Skip to content

Commit 9e20b41

Browse files
ohauckeeXpl0it3r
authored andcommitted
Support for case sensitive html entities
1 parent 21abffb commit 9e20b41

File tree

1 file changed

+126
-120
lines changed

1 file changed

+126
-120
lines changed

Source/HtmlRenderer/Core/Utils/HtmlUtils.cs

Lines changed: 126 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ internal static class HtmlUtils
4646
/// the html decode only pairs
4747
/// </summary>
4848
private static readonly Dictionary<string, char> _decodeOnly = new Dictionary<string, char>(StringComparer.InvariantCultureIgnoreCase);
49+
private static readonly Dictionary<string, char> _decodeOnlyCaseSensitive = new Dictionary<string, char>(StringComparer.InvariantCulture);
4950

5051
#endregion
5152

@@ -96,68 +97,68 @@ static HtmlUtils()
9697
_decodeOnly["divide"] = Convert.ToChar(247);
9798

9899
// ISO 8859-1 Characters
99-
_decodeOnly["Agrave"] = Convert.ToChar(192);
100-
_decodeOnly["Aacute"] = Convert.ToChar(193);
101-
_decodeOnly["Acirc"] = Convert.ToChar(194);
102-
_decodeOnly["Atilde"] = Convert.ToChar(195);
103-
_decodeOnly["Auml"] = Convert.ToChar(196);
104-
_decodeOnly["Aring"] = Convert.ToChar(197);
105-
_decodeOnly["AElig"] = Convert.ToChar(198);
106-
_decodeOnly["Ccedil"] = Convert.ToChar(199);
107-
_decodeOnly["Egrave"] = Convert.ToChar(200);
108-
_decodeOnly["Eacute"] = Convert.ToChar(201);
109-
_decodeOnly["Ecirc"] = Convert.ToChar(202);
110-
_decodeOnly["Euml"] = Convert.ToChar(203);
111-
_decodeOnly["Igrave"] = Convert.ToChar(204);
112-
_decodeOnly["Iacute"] = Convert.ToChar(205);
113-
_decodeOnly["Icirc"] = Convert.ToChar(206);
114-
_decodeOnly["Iuml"] = Convert.ToChar(207);
115-
_decodeOnly["ETH"] = Convert.ToChar(208);
116-
_decodeOnly["Ntilde"] = Convert.ToChar(209);
117-
_decodeOnly["Ograve"] = Convert.ToChar(210);
118-
_decodeOnly["Oacute"] = Convert.ToChar(211);
119-
_decodeOnly["Ocirc"] = Convert.ToChar(212);
120-
_decodeOnly["Otilde"] = Convert.ToChar(213);
121-
_decodeOnly["Ouml"] = Convert.ToChar(214);
122-
_decodeOnly["Oslash"] = Convert.ToChar(216);
123-
_decodeOnly["Ugrave"] = Convert.ToChar(217);
124-
_decodeOnly["Uacute"] = Convert.ToChar(218);
125-
_decodeOnly["Ucirc"] = Convert.ToChar(219);
126-
_decodeOnly["Uuml"] = Convert.ToChar(220);
127-
_decodeOnly["Yacute"] = Convert.ToChar(221);
128-
_decodeOnly["THORN"] = Convert.ToChar(222);
100+
_decodeOnlyCaseSensitive["Agrave"] = Convert.ToChar(192);
101+
_decodeOnlyCaseSensitive["Aacute"] = Convert.ToChar(193);
102+
_decodeOnlyCaseSensitive["Acirc"] = Convert.ToChar(194);
103+
_decodeOnlyCaseSensitive["Atilde"] = Convert.ToChar(195);
104+
_decodeOnlyCaseSensitive["Auml"] = Convert.ToChar(196);
105+
_decodeOnlyCaseSensitive["Aring"] = Convert.ToChar(197);
106+
_decodeOnlyCaseSensitive["AElig"] = Convert.ToChar(198);
107+
_decodeOnlyCaseSensitive["Ccedil"] = Convert.ToChar(199);
108+
_decodeOnlyCaseSensitive["Egrave"] = Convert.ToChar(200);
109+
_decodeOnlyCaseSensitive["Eacute"] = Convert.ToChar(201);
110+
_decodeOnlyCaseSensitive["Ecirc"] = Convert.ToChar(202);
111+
_decodeOnlyCaseSensitive["Euml"] = Convert.ToChar(203);
112+
_decodeOnlyCaseSensitive["Igrave"] = Convert.ToChar(204);
113+
_decodeOnlyCaseSensitive["Iacute"] = Convert.ToChar(205);
114+
_decodeOnlyCaseSensitive["Icirc"] = Convert.ToChar(206);
115+
_decodeOnlyCaseSensitive["Iuml"] = Convert.ToChar(207);
116+
_decodeOnlyCaseSensitive["ETH"] = Convert.ToChar(208);
117+
_decodeOnlyCaseSensitive["Ntilde"] = Convert.ToChar(209);
118+
_decodeOnlyCaseSensitive["Ograve"] = Convert.ToChar(210);
119+
_decodeOnlyCaseSensitive["Oacute"] = Convert.ToChar(211);
120+
_decodeOnlyCaseSensitive["Ocirc"] = Convert.ToChar(212);
121+
_decodeOnlyCaseSensitive["Otilde"] = Convert.ToChar(213);
122+
_decodeOnlyCaseSensitive["Ouml"] = Convert.ToChar(214);
123+
_decodeOnlyCaseSensitive["Oslash"] = Convert.ToChar(216);
124+
_decodeOnlyCaseSensitive["Ugrave"] = Convert.ToChar(217);
125+
_decodeOnlyCaseSensitive["Uacute"] = Convert.ToChar(218);
126+
_decodeOnlyCaseSensitive["Ucirc"] = Convert.ToChar(219);
127+
_decodeOnlyCaseSensitive["Uuml"] = Convert.ToChar(220);
128+
_decodeOnlyCaseSensitive["Yacute"] = Convert.ToChar(221);
129+
_decodeOnlyCaseSensitive["THORN"] = Convert.ToChar(222);
129130
_decodeOnly["szlig"] = Convert.ToChar(223);
130-
_decodeOnly["agrave"] = Convert.ToChar(224);
131-
_decodeOnly["aacute"] = Convert.ToChar(225);
132-
_decodeOnly["acirc"] = Convert.ToChar(226);
133-
_decodeOnly["atilde"] = Convert.ToChar(227);
134-
_decodeOnly["auml"] = Convert.ToChar(228);
135-
_decodeOnly["aring"] = Convert.ToChar(229);
136-
_decodeOnly["aelig"] = Convert.ToChar(230);
137-
_decodeOnly["ccedil"] = Convert.ToChar(231);
138-
_decodeOnly["egrave"] = Convert.ToChar(232);
139-
_decodeOnly["eacute"] = Convert.ToChar(233);
140-
_decodeOnly["ecirc"] = Convert.ToChar(234);
141-
_decodeOnly["euml"] = Convert.ToChar(235);
142-
_decodeOnly["igrave"] = Convert.ToChar(236);
143-
_decodeOnly["iacute"] = Convert.ToChar(237);
144-
_decodeOnly["icirc"] = Convert.ToChar(238);
145-
_decodeOnly["iuml"] = Convert.ToChar(239);
146-
_decodeOnly["eth"] = Convert.ToChar(240);
147-
_decodeOnly["ntilde"] = Convert.ToChar(241);
148-
_decodeOnly["ograve"] = Convert.ToChar(242);
149-
_decodeOnly["oacute"] = Convert.ToChar(243);
150-
_decodeOnly["ocirc"] = Convert.ToChar(244);
151-
_decodeOnly["otilde"] = Convert.ToChar(245);
152-
_decodeOnly["ouml"] = Convert.ToChar(246);
153-
_decodeOnly["oslash"] = Convert.ToChar(248);
154-
_decodeOnly["ugrave"] = Convert.ToChar(249);
155-
_decodeOnly["uacute"] = Convert.ToChar(250);
156-
_decodeOnly["ucirc"] = Convert.ToChar(251);
157-
_decodeOnly["uuml"] = Convert.ToChar(252);
158-
_decodeOnly["yacute"] = Convert.ToChar(253);
159-
_decodeOnly["thorn"] = Convert.ToChar(254);
160-
_decodeOnly["yuml"] = Convert.ToChar(255);
131+
_decodeOnlyCaseSensitive["agrave"] = Convert.ToChar(224);
132+
_decodeOnlyCaseSensitive["aacute"] = Convert.ToChar(225);
133+
_decodeOnlyCaseSensitive["acirc"] = Convert.ToChar(226);
134+
_decodeOnlyCaseSensitive["atilde"] = Convert.ToChar(227);
135+
_decodeOnlyCaseSensitive["auml"] = Convert.ToChar(228);
136+
_decodeOnlyCaseSensitive["aring"] = Convert.ToChar(229);
137+
_decodeOnlyCaseSensitive["aelig"] = Convert.ToChar(230);
138+
_decodeOnlyCaseSensitive["ccedil"] = Convert.ToChar(231);
139+
_decodeOnlyCaseSensitive["egrave"] = Convert.ToChar(232);
140+
_decodeOnlyCaseSensitive["eacute"] = Convert.ToChar(233);
141+
_decodeOnlyCaseSensitive["ecirc"] = Convert.ToChar(234);
142+
_decodeOnlyCaseSensitive["euml"] = Convert.ToChar(235);
143+
_decodeOnlyCaseSensitive["igrave"] = Convert.ToChar(236);
144+
_decodeOnlyCaseSensitive["iacute"] = Convert.ToChar(237);
145+
_decodeOnlyCaseSensitive["icirc"] = Convert.ToChar(238);
146+
_decodeOnlyCaseSensitive["iuml"] = Convert.ToChar(239);
147+
_decodeOnlyCaseSensitive["eth"] = Convert.ToChar(240);
148+
_decodeOnlyCaseSensitive["ntilde"] = Convert.ToChar(241);
149+
_decodeOnlyCaseSensitive["ograve"] = Convert.ToChar(242);
150+
_decodeOnlyCaseSensitive["oacute"] = Convert.ToChar(243);
151+
_decodeOnlyCaseSensitive["ocirc"] = Convert.ToChar(244);
152+
_decodeOnlyCaseSensitive["otilde"] = Convert.ToChar(245);
153+
_decodeOnlyCaseSensitive["ouml"] = Convert.ToChar(246);
154+
_decodeOnlyCaseSensitive["oslash"] = Convert.ToChar(248);
155+
_decodeOnlyCaseSensitive["ugrave"] = Convert.ToChar(249);
156+
_decodeOnlyCaseSensitive["uacute"] = Convert.ToChar(250);
157+
_decodeOnlyCaseSensitive["ucirc"] = Convert.ToChar(251);
158+
_decodeOnlyCaseSensitive["uuml"] = Convert.ToChar(252);
159+
_decodeOnlyCaseSensitive["yacute"] = Convert.ToChar(253);
160+
_decodeOnlyCaseSensitive["thorn"] = Convert.ToChar(254);
161+
_decodeOnlyCaseSensitive["yuml"] = Convert.ToChar(255);
161162

162163
// Math Symbols Supported by HTML
163164
_decodeOnly["forall"] = Convert.ToChar(8704);
@@ -200,65 +201,65 @@ static HtmlUtils()
200201
_decodeOnly["sdot"] = Convert.ToChar(8901);
201202

202203
// Greek Letters Supported by HTML
203-
_decodeOnly["Alpha"] = Convert.ToChar(913);
204-
_decodeOnly["Beta"] = Convert.ToChar(914);
205-
_decodeOnly["Gamma"] = Convert.ToChar(915);
206-
_decodeOnly["Delta"] = Convert.ToChar(916);
207-
_decodeOnly["Epsilon"] = Convert.ToChar(917);
208-
_decodeOnly["Zeta"] = Convert.ToChar(918);
209-
_decodeOnly["Eta"] = Convert.ToChar(919);
210-
_decodeOnly["Theta"] = Convert.ToChar(920);
211-
_decodeOnly["Iota"] = Convert.ToChar(921);
212-
_decodeOnly["Kappa"] = Convert.ToChar(922);
213-
_decodeOnly["Lambda"] = Convert.ToChar(923);
214-
_decodeOnly["Mu"] = Convert.ToChar(924);
215-
_decodeOnly["Nu"] = Convert.ToChar(925);
216-
_decodeOnly["Xi"] = Convert.ToChar(926);
217-
_decodeOnly["Omicron"] = Convert.ToChar(927);
218-
_decodeOnly["Pi"] = Convert.ToChar(928);
219-
_decodeOnly["Rho"] = Convert.ToChar(929);
220-
_decodeOnly["Sigma"] = Convert.ToChar(931);
221-
_decodeOnly["Tau"] = Convert.ToChar(932);
222-
_decodeOnly["Upsilon"] = Convert.ToChar(933);
223-
_decodeOnly["Phi"] = Convert.ToChar(934);
224-
_decodeOnly["Chi"] = Convert.ToChar(935);
225-
_decodeOnly["Psi"] = Convert.ToChar(936);
226-
_decodeOnly["Omega"] = Convert.ToChar(937);
227-
_decodeOnly["alpha"] = Convert.ToChar(945);
228-
_decodeOnly["beta"] = Convert.ToChar(946);
229-
_decodeOnly["gamma"] = Convert.ToChar(947);
230-
_decodeOnly["delta"] = Convert.ToChar(948);
231-
_decodeOnly["epsilon"] = Convert.ToChar(949);
232-
_decodeOnly["zeta"] = Convert.ToChar(950);
233-
_decodeOnly["eta"] = Convert.ToChar(951);
234-
_decodeOnly["theta"] = Convert.ToChar(952);
235-
_decodeOnly["iota"] = Convert.ToChar(953);
236-
_decodeOnly["kappa"] = Convert.ToChar(954);
237-
_decodeOnly["lambda"] = Convert.ToChar(955);
238-
_decodeOnly["mu"] = Convert.ToChar(956);
239-
_decodeOnly["nu"] = Convert.ToChar(957);
240-
_decodeOnly["xi"] = Convert.ToChar(958);
241-
_decodeOnly["omicron"] = Convert.ToChar(959);
242-
_decodeOnly["pi"] = Convert.ToChar(960);
243-
_decodeOnly["rho"] = Convert.ToChar(961);
244-
_decodeOnly["sigmaf"] = Convert.ToChar(962);
245-
_decodeOnly["sigma"] = Convert.ToChar(963);
246-
_decodeOnly["tau"] = Convert.ToChar(964);
247-
_decodeOnly["upsilon"] = Convert.ToChar(965);
248-
_decodeOnly["phi"] = Convert.ToChar(966);
249-
_decodeOnly["chi"] = Convert.ToChar(967);
250-
_decodeOnly["psi"] = Convert.ToChar(968);
251-
_decodeOnly["omega"] = Convert.ToChar(969);
204+
_decodeOnlyCaseSensitive["Alpha"] = Convert.ToChar(913);
205+
_decodeOnlyCaseSensitive["Beta"] = Convert.ToChar(914);
206+
_decodeOnlyCaseSensitive["Gamma"] = Convert.ToChar(915);
207+
_decodeOnlyCaseSensitive["Delta"] = Convert.ToChar(916);
208+
_decodeOnlyCaseSensitive["Epsilon"] = Convert.ToChar(917);
209+
_decodeOnlyCaseSensitive["Zeta"] = Convert.ToChar(918);
210+
_decodeOnlyCaseSensitive["Eta"] = Convert.ToChar(919);
211+
_decodeOnlyCaseSensitive["Theta"] = Convert.ToChar(920);
212+
_decodeOnlyCaseSensitive["Iota"] = Convert.ToChar(921);
213+
_decodeOnlyCaseSensitive["Kappa"] = Convert.ToChar(922);
214+
_decodeOnlyCaseSensitive["Lambda"] = Convert.ToChar(923);
215+
_decodeOnlyCaseSensitive["Mu"] = Convert.ToChar(924);
216+
_decodeOnlyCaseSensitive["Nu"] = Convert.ToChar(925);
217+
_decodeOnlyCaseSensitive["Xi"] = Convert.ToChar(926);
218+
_decodeOnlyCaseSensitive["Omicron"] = Convert.ToChar(927);
219+
_decodeOnlyCaseSensitive["Pi"] = Convert.ToChar(928);
220+
_decodeOnlyCaseSensitive["Rho"] = Convert.ToChar(929);
221+
_decodeOnlyCaseSensitive["Sigma"] = Convert.ToChar(931);
222+
_decodeOnlyCaseSensitive["Tau"] = Convert.ToChar(932);
223+
_decodeOnlyCaseSensitive["Upsilon"] = Convert.ToChar(933);
224+
_decodeOnlyCaseSensitive["Phi"] = Convert.ToChar(934);
225+
_decodeOnlyCaseSensitive["Chi"] = Convert.ToChar(935);
226+
_decodeOnlyCaseSensitive["Psi"] = Convert.ToChar(936);
227+
_decodeOnlyCaseSensitive["Omega"] = Convert.ToChar(937);
228+
_decodeOnlyCaseSensitive["alpha"] = Convert.ToChar(945);
229+
_decodeOnlyCaseSensitive["beta"] = Convert.ToChar(946);
230+
_decodeOnlyCaseSensitive["gamma"] = Convert.ToChar(947);
231+
_decodeOnlyCaseSensitive["delta"] = Convert.ToChar(948);
232+
_decodeOnlyCaseSensitive["epsilon"] = Convert.ToChar(949);
233+
_decodeOnlyCaseSensitive["zeta"] = Convert.ToChar(950);
234+
_decodeOnlyCaseSensitive["eta"] = Convert.ToChar(951);
235+
_decodeOnlyCaseSensitive["theta"] = Convert.ToChar(952);
236+
_decodeOnlyCaseSensitive["iota"] = Convert.ToChar(953);
237+
_decodeOnlyCaseSensitive["kappa"] = Convert.ToChar(954);
238+
_decodeOnlyCaseSensitive["lambda"] = Convert.ToChar(955);
239+
_decodeOnlyCaseSensitive["mu"] = Convert.ToChar(956);
240+
_decodeOnlyCaseSensitive["nu"] = Convert.ToChar(957);
241+
_decodeOnlyCaseSensitive["xi"] = Convert.ToChar(958);
242+
_decodeOnlyCaseSensitive["omicron"] = Convert.ToChar(959);
243+
_decodeOnlyCaseSensitive["pi"] = Convert.ToChar(960);
244+
_decodeOnlyCaseSensitive["rho"] = Convert.ToChar(961);
245+
_decodeOnlyCaseSensitive["sigmaf"] = Convert.ToChar(962);
246+
_decodeOnlyCaseSensitive["sigma"] = Convert.ToChar(963);
247+
_decodeOnlyCaseSensitive["tau"] = Convert.ToChar(964);
248+
_decodeOnlyCaseSensitive["upsilon"] = Convert.ToChar(965);
249+
_decodeOnlyCaseSensitive["phi"] = Convert.ToChar(966);
250+
_decodeOnlyCaseSensitive["chi"] = Convert.ToChar(967);
251+
_decodeOnlyCaseSensitive["psi"] = Convert.ToChar(968);
252+
_decodeOnlyCaseSensitive["omega"] = Convert.ToChar(969);
252253
_decodeOnly["thetasym"] = Convert.ToChar(977);
253254
_decodeOnly["upsih"] = Convert.ToChar(978);
254255
_decodeOnly["piv"] = Convert.ToChar(982);
255256

256257
// Other Entities Supported by HTML
257-
_decodeOnly["OElig"] = Convert.ToChar(338);
258-
_decodeOnly["oelig"] = Convert.ToChar(339);
259-
_decodeOnly["Scaron"] = Convert.ToChar(352);
260-
_decodeOnly["scaron"] = Convert.ToChar(353);
261-
_decodeOnly["Yuml"] = Convert.ToChar(376);
258+
_decodeOnlyCaseSensitive["OElig"] = Convert.ToChar(338);
259+
_decodeOnlyCaseSensitive["oelig"] = Convert.ToChar(339);
260+
_decodeOnlyCaseSensitive["Scaron"] = Convert.ToChar(352);
261+
_decodeOnlyCaseSensitive["scaron"] = Convert.ToChar(353);
262+
_decodeOnlyCaseSensitive["Yuml"] = Convert.ToChar(376);
262263
_decodeOnly["fnof"] = Convert.ToChar(402);
263264
_decodeOnly["circ"] = Convert.ToChar(710);
264265
_decodeOnly["tilde"] = Convert.ToChar(732);
@@ -270,13 +271,13 @@ static HtmlUtils()
270271
_decodeOnly["ldquo"] = Convert.ToChar(8220);
271272
_decodeOnly["rdquo"] = Convert.ToChar(8221);
272273
_decodeOnly["bdquo"] = Convert.ToChar(8222);
273-
_decodeOnly["dagger"] = Convert.ToChar(8224);
274-
_decodeOnly["Dagger"] = Convert.ToChar(8225);
274+
_decodeOnlyCaseSensitive["dagger"] = Convert.ToChar(8224);
275+
_decodeOnlyCaseSensitive["Dagger"] = Convert.ToChar(8225);
275276
_decodeOnly["bull"] = Convert.ToChar(8226);
276277
_decodeOnly["hellip"] = Convert.ToChar(8230);
277278
_decodeOnly["permil"] = Convert.ToChar(8240);
278-
_decodeOnly["prime"] = Convert.ToChar(8242);
279-
_decodeOnly["Prime"] = Convert.ToChar(8243);
279+
_decodeOnlyCaseSensitive["prime"] = Convert.ToChar(8242);
280+
_decodeOnlyCaseSensitive["Prime"] = Convert.ToChar(8243);
280281
_decodeOnly["lsaquo"] = Convert.ToChar(8249);
281282
_decodeOnly["rsaquo"] = Convert.ToChar(8250);
282283
_decodeOnly["oline"] = Convert.ToChar(8254);
@@ -397,7 +398,12 @@ private static string DecodeHtmlCharByName(string str)
397398
{
398399
var key = str.Substring(idx + 1, endIdx - idx - 1);
399400
char c;
400-
if (_decodeOnly.TryGetValue(key, out c))
401+
if (_decodeOnlyCaseSensitive.TryGetValue(key, out c))
402+
{
403+
str = str.Remove(idx, endIdx - idx + 1);
404+
str = str.Insert(idx, c.ToString());
405+
}
406+
else if(_decodeOnly.TryGetValue(key, out c))
401407
{
402408
str = str.Remove(idx, endIdx - idx + 1);
403409
str = str.Insert(idx, c.ToString());

0 commit comments

Comments
 (0)