From 2f3a376844440e16909102c525424cbf4025415c Mon Sep 17 00:00:00 2001 From: Oliver Haucke Date: Wed, 14 Feb 2018 21:47:00 +0100 Subject: [PATCH] Support for case sensitive html entities --- Source/HtmlRenderer/Core/Utils/HtmlUtils.cs | 246 ++++++++++---------- 1 file changed, 126 insertions(+), 120 deletions(-) diff --git a/Source/HtmlRenderer/Core/Utils/HtmlUtils.cs b/Source/HtmlRenderer/Core/Utils/HtmlUtils.cs index 960b719aa..d0198d20c 100644 --- a/Source/HtmlRenderer/Core/Utils/HtmlUtils.cs +++ b/Source/HtmlRenderer/Core/Utils/HtmlUtils.cs @@ -46,6 +46,7 @@ internal static class HtmlUtils /// the html decode only pairs /// private static readonly Dictionary _decodeOnly = new Dictionary(StringComparer.InvariantCultureIgnoreCase); + private static readonly Dictionary _decodeOnlyCaseSensitive = new Dictionary(StringComparer.InvariantCulture); #endregion @@ -96,68 +97,68 @@ static HtmlUtils() _decodeOnly["divide"] = Convert.ToChar(247); // ISO 8859-1 Characters - _decodeOnly["Agrave"] = Convert.ToChar(192); - _decodeOnly["Aacute"] = Convert.ToChar(193); - _decodeOnly["Acirc"] = Convert.ToChar(194); - _decodeOnly["Atilde"] = Convert.ToChar(195); - _decodeOnly["Auml"] = Convert.ToChar(196); - _decodeOnly["Aring"] = Convert.ToChar(197); - _decodeOnly["AElig"] = Convert.ToChar(198); - _decodeOnly["Ccedil"] = Convert.ToChar(199); - _decodeOnly["Egrave"] = Convert.ToChar(200); - _decodeOnly["Eacute"] = Convert.ToChar(201); - _decodeOnly["Ecirc"] = Convert.ToChar(202); - _decodeOnly["Euml"] = Convert.ToChar(203); - _decodeOnly["Igrave"] = Convert.ToChar(204); - _decodeOnly["Iacute"] = Convert.ToChar(205); - _decodeOnly["Icirc"] = Convert.ToChar(206); - _decodeOnly["Iuml"] = Convert.ToChar(207); - _decodeOnly["ETH"] = Convert.ToChar(208); - _decodeOnly["Ntilde"] = Convert.ToChar(209); - _decodeOnly["Ograve"] = Convert.ToChar(210); - _decodeOnly["Oacute"] = Convert.ToChar(211); - _decodeOnly["Ocirc"] = Convert.ToChar(212); - _decodeOnly["Otilde"] = Convert.ToChar(213); - _decodeOnly["Ouml"] = Convert.ToChar(214); - _decodeOnly["Oslash"] = Convert.ToChar(216); - _decodeOnly["Ugrave"] = Convert.ToChar(217); - _decodeOnly["Uacute"] = Convert.ToChar(218); - _decodeOnly["Ucirc"] = Convert.ToChar(219); - _decodeOnly["Uuml"] = Convert.ToChar(220); - _decodeOnly["Yacute"] = Convert.ToChar(221); - _decodeOnly["THORN"] = Convert.ToChar(222); + _decodeOnlyCaseSensitive["Agrave"] = Convert.ToChar(192); + _decodeOnlyCaseSensitive["Aacute"] = Convert.ToChar(193); + _decodeOnlyCaseSensitive["Acirc"] = Convert.ToChar(194); + _decodeOnlyCaseSensitive["Atilde"] = Convert.ToChar(195); + _decodeOnlyCaseSensitive["Auml"] = Convert.ToChar(196); + _decodeOnlyCaseSensitive["Aring"] = Convert.ToChar(197); + _decodeOnlyCaseSensitive["AElig"] = Convert.ToChar(198); + _decodeOnlyCaseSensitive["Ccedil"] = Convert.ToChar(199); + _decodeOnlyCaseSensitive["Egrave"] = Convert.ToChar(200); + _decodeOnlyCaseSensitive["Eacute"] = Convert.ToChar(201); + _decodeOnlyCaseSensitive["Ecirc"] = Convert.ToChar(202); + _decodeOnlyCaseSensitive["Euml"] = Convert.ToChar(203); + _decodeOnlyCaseSensitive["Igrave"] = Convert.ToChar(204); + _decodeOnlyCaseSensitive["Iacute"] = Convert.ToChar(205); + _decodeOnlyCaseSensitive["Icirc"] = Convert.ToChar(206); + _decodeOnlyCaseSensitive["Iuml"] = Convert.ToChar(207); + _decodeOnlyCaseSensitive["ETH"] = Convert.ToChar(208); + _decodeOnlyCaseSensitive["Ntilde"] = Convert.ToChar(209); + _decodeOnlyCaseSensitive["Ograve"] = Convert.ToChar(210); + _decodeOnlyCaseSensitive["Oacute"] = Convert.ToChar(211); + _decodeOnlyCaseSensitive["Ocirc"] = Convert.ToChar(212); + _decodeOnlyCaseSensitive["Otilde"] = Convert.ToChar(213); + _decodeOnlyCaseSensitive["Ouml"] = Convert.ToChar(214); + _decodeOnlyCaseSensitive["Oslash"] = Convert.ToChar(216); + _decodeOnlyCaseSensitive["Ugrave"] = Convert.ToChar(217); + _decodeOnlyCaseSensitive["Uacute"] = Convert.ToChar(218); + _decodeOnlyCaseSensitive["Ucirc"] = Convert.ToChar(219); + _decodeOnlyCaseSensitive["Uuml"] = Convert.ToChar(220); + _decodeOnlyCaseSensitive["Yacute"] = Convert.ToChar(221); + _decodeOnlyCaseSensitive["THORN"] = Convert.ToChar(222); _decodeOnly["szlig"] = Convert.ToChar(223); - _decodeOnly["agrave"] = Convert.ToChar(224); - _decodeOnly["aacute"] = Convert.ToChar(225); - _decodeOnly["acirc"] = Convert.ToChar(226); - _decodeOnly["atilde"] = Convert.ToChar(227); - _decodeOnly["auml"] = Convert.ToChar(228); - _decodeOnly["aring"] = Convert.ToChar(229); - _decodeOnly["aelig"] = Convert.ToChar(230); - _decodeOnly["ccedil"] = Convert.ToChar(231); - _decodeOnly["egrave"] = Convert.ToChar(232); - _decodeOnly["eacute"] = Convert.ToChar(233); - _decodeOnly["ecirc"] = Convert.ToChar(234); - _decodeOnly["euml"] = Convert.ToChar(235); - _decodeOnly["igrave"] = Convert.ToChar(236); - _decodeOnly["iacute"] = Convert.ToChar(237); - _decodeOnly["icirc"] = Convert.ToChar(238); - _decodeOnly["iuml"] = Convert.ToChar(239); - _decodeOnly["eth"] = Convert.ToChar(240); - _decodeOnly["ntilde"] = Convert.ToChar(241); - _decodeOnly["ograve"] = Convert.ToChar(242); - _decodeOnly["oacute"] = Convert.ToChar(243); - _decodeOnly["ocirc"] = Convert.ToChar(244); - _decodeOnly["otilde"] = Convert.ToChar(245); - _decodeOnly["ouml"] = Convert.ToChar(246); - _decodeOnly["oslash"] = Convert.ToChar(248); - _decodeOnly["ugrave"] = Convert.ToChar(249); - _decodeOnly["uacute"] = Convert.ToChar(250); - _decodeOnly["ucirc"] = Convert.ToChar(251); - _decodeOnly["uuml"] = Convert.ToChar(252); - _decodeOnly["yacute"] = Convert.ToChar(253); - _decodeOnly["thorn"] = Convert.ToChar(254); - _decodeOnly["yuml"] = Convert.ToChar(255); + _decodeOnlyCaseSensitive["agrave"] = Convert.ToChar(224); + _decodeOnlyCaseSensitive["aacute"] = Convert.ToChar(225); + _decodeOnlyCaseSensitive["acirc"] = Convert.ToChar(226); + _decodeOnlyCaseSensitive["atilde"] = Convert.ToChar(227); + _decodeOnlyCaseSensitive["auml"] = Convert.ToChar(228); + _decodeOnlyCaseSensitive["aring"] = Convert.ToChar(229); + _decodeOnlyCaseSensitive["aelig"] = Convert.ToChar(230); + _decodeOnlyCaseSensitive["ccedil"] = Convert.ToChar(231); + _decodeOnlyCaseSensitive["egrave"] = Convert.ToChar(232); + _decodeOnlyCaseSensitive["eacute"] = Convert.ToChar(233); + _decodeOnlyCaseSensitive["ecirc"] = Convert.ToChar(234); + _decodeOnlyCaseSensitive["euml"] = Convert.ToChar(235); + _decodeOnlyCaseSensitive["igrave"] = Convert.ToChar(236); + _decodeOnlyCaseSensitive["iacute"] = Convert.ToChar(237); + _decodeOnlyCaseSensitive["icirc"] = Convert.ToChar(238); + _decodeOnlyCaseSensitive["iuml"] = Convert.ToChar(239); + _decodeOnlyCaseSensitive["eth"] = Convert.ToChar(240); + _decodeOnlyCaseSensitive["ntilde"] = Convert.ToChar(241); + _decodeOnlyCaseSensitive["ograve"] = Convert.ToChar(242); + _decodeOnlyCaseSensitive["oacute"] = Convert.ToChar(243); + _decodeOnlyCaseSensitive["ocirc"] = Convert.ToChar(244); + _decodeOnlyCaseSensitive["otilde"] = Convert.ToChar(245); + _decodeOnlyCaseSensitive["ouml"] = Convert.ToChar(246); + _decodeOnlyCaseSensitive["oslash"] = Convert.ToChar(248); + _decodeOnlyCaseSensitive["ugrave"] = Convert.ToChar(249); + _decodeOnlyCaseSensitive["uacute"] = Convert.ToChar(250); + _decodeOnlyCaseSensitive["ucirc"] = Convert.ToChar(251); + _decodeOnlyCaseSensitive["uuml"] = Convert.ToChar(252); + _decodeOnlyCaseSensitive["yacute"] = Convert.ToChar(253); + _decodeOnlyCaseSensitive["thorn"] = Convert.ToChar(254); + _decodeOnlyCaseSensitive["yuml"] = Convert.ToChar(255); // Math Symbols Supported by HTML _decodeOnly["forall"] = Convert.ToChar(8704); @@ -200,65 +201,65 @@ static HtmlUtils() _decodeOnly["sdot"] = Convert.ToChar(8901); // Greek Letters Supported by HTML - _decodeOnly["Alpha"] = Convert.ToChar(913); - _decodeOnly["Beta"] = Convert.ToChar(914); - _decodeOnly["Gamma"] = Convert.ToChar(915); - _decodeOnly["Delta"] = Convert.ToChar(916); - _decodeOnly["Epsilon"] = Convert.ToChar(917); - _decodeOnly["Zeta"] = Convert.ToChar(918); - _decodeOnly["Eta"] = Convert.ToChar(919); - _decodeOnly["Theta"] = Convert.ToChar(920); - _decodeOnly["Iota"] = Convert.ToChar(921); - _decodeOnly["Kappa"] = Convert.ToChar(922); - _decodeOnly["Lambda"] = Convert.ToChar(923); - _decodeOnly["Mu"] = Convert.ToChar(924); - _decodeOnly["Nu"] = Convert.ToChar(925); - _decodeOnly["Xi"] = Convert.ToChar(926); - _decodeOnly["Omicron"] = Convert.ToChar(927); - _decodeOnly["Pi"] = Convert.ToChar(928); - _decodeOnly["Rho"] = Convert.ToChar(929); - _decodeOnly["Sigma"] = Convert.ToChar(931); - _decodeOnly["Tau"] = Convert.ToChar(932); - _decodeOnly["Upsilon"] = Convert.ToChar(933); - _decodeOnly["Phi"] = Convert.ToChar(934); - _decodeOnly["Chi"] = Convert.ToChar(935); - _decodeOnly["Psi"] = Convert.ToChar(936); - _decodeOnly["Omega"] = Convert.ToChar(937); - _decodeOnly["alpha"] = Convert.ToChar(945); - _decodeOnly["beta"] = Convert.ToChar(946); - _decodeOnly["gamma"] = Convert.ToChar(947); - _decodeOnly["delta"] = Convert.ToChar(948); - _decodeOnly["epsilon"] = Convert.ToChar(949); - _decodeOnly["zeta"] = Convert.ToChar(950); - _decodeOnly["eta"] = Convert.ToChar(951); - _decodeOnly["theta"] = Convert.ToChar(952); - _decodeOnly["iota"] = Convert.ToChar(953); - _decodeOnly["kappa"] = Convert.ToChar(954); - _decodeOnly["lambda"] = Convert.ToChar(955); - _decodeOnly["mu"] = Convert.ToChar(956); - _decodeOnly["nu"] = Convert.ToChar(957); - _decodeOnly["xi"] = Convert.ToChar(958); - _decodeOnly["omicron"] = Convert.ToChar(959); - _decodeOnly["pi"] = Convert.ToChar(960); - _decodeOnly["rho"] = Convert.ToChar(961); - _decodeOnly["sigmaf"] = Convert.ToChar(962); - _decodeOnly["sigma"] = Convert.ToChar(963); - _decodeOnly["tau"] = Convert.ToChar(964); - _decodeOnly["upsilon"] = Convert.ToChar(965); - _decodeOnly["phi"] = Convert.ToChar(966); - _decodeOnly["chi"] = Convert.ToChar(967); - _decodeOnly["psi"] = Convert.ToChar(968); - _decodeOnly["omega"] = Convert.ToChar(969); + _decodeOnlyCaseSensitive["Alpha"] = Convert.ToChar(913); + _decodeOnlyCaseSensitive["Beta"] = Convert.ToChar(914); + _decodeOnlyCaseSensitive["Gamma"] = Convert.ToChar(915); + _decodeOnlyCaseSensitive["Delta"] = Convert.ToChar(916); + _decodeOnlyCaseSensitive["Epsilon"] = Convert.ToChar(917); + _decodeOnlyCaseSensitive["Zeta"] = Convert.ToChar(918); + _decodeOnlyCaseSensitive["Eta"] = Convert.ToChar(919); + _decodeOnlyCaseSensitive["Theta"] = Convert.ToChar(920); + _decodeOnlyCaseSensitive["Iota"] = Convert.ToChar(921); + _decodeOnlyCaseSensitive["Kappa"] = Convert.ToChar(922); + _decodeOnlyCaseSensitive["Lambda"] = Convert.ToChar(923); + _decodeOnlyCaseSensitive["Mu"] = Convert.ToChar(924); + _decodeOnlyCaseSensitive["Nu"] = Convert.ToChar(925); + _decodeOnlyCaseSensitive["Xi"] = Convert.ToChar(926); + _decodeOnlyCaseSensitive["Omicron"] = Convert.ToChar(927); + _decodeOnlyCaseSensitive["Pi"] = Convert.ToChar(928); + _decodeOnlyCaseSensitive["Rho"] = Convert.ToChar(929); + _decodeOnlyCaseSensitive["Sigma"] = Convert.ToChar(931); + _decodeOnlyCaseSensitive["Tau"] = Convert.ToChar(932); + _decodeOnlyCaseSensitive["Upsilon"] = Convert.ToChar(933); + _decodeOnlyCaseSensitive["Phi"] = Convert.ToChar(934); + _decodeOnlyCaseSensitive["Chi"] = Convert.ToChar(935); + _decodeOnlyCaseSensitive["Psi"] = Convert.ToChar(936); + _decodeOnlyCaseSensitive["Omega"] = Convert.ToChar(937); + _decodeOnlyCaseSensitive["alpha"] = Convert.ToChar(945); + _decodeOnlyCaseSensitive["beta"] = Convert.ToChar(946); + _decodeOnlyCaseSensitive["gamma"] = Convert.ToChar(947); + _decodeOnlyCaseSensitive["delta"] = Convert.ToChar(948); + _decodeOnlyCaseSensitive["epsilon"] = Convert.ToChar(949); + _decodeOnlyCaseSensitive["zeta"] = Convert.ToChar(950); + _decodeOnlyCaseSensitive["eta"] = Convert.ToChar(951); + _decodeOnlyCaseSensitive["theta"] = Convert.ToChar(952); + _decodeOnlyCaseSensitive["iota"] = Convert.ToChar(953); + _decodeOnlyCaseSensitive["kappa"] = Convert.ToChar(954); + _decodeOnlyCaseSensitive["lambda"] = Convert.ToChar(955); + _decodeOnlyCaseSensitive["mu"] = Convert.ToChar(956); + _decodeOnlyCaseSensitive["nu"] = Convert.ToChar(957); + _decodeOnlyCaseSensitive["xi"] = Convert.ToChar(958); + _decodeOnlyCaseSensitive["omicron"] = Convert.ToChar(959); + _decodeOnlyCaseSensitive["pi"] = Convert.ToChar(960); + _decodeOnlyCaseSensitive["rho"] = Convert.ToChar(961); + _decodeOnlyCaseSensitive["sigmaf"] = Convert.ToChar(962); + _decodeOnlyCaseSensitive["sigma"] = Convert.ToChar(963); + _decodeOnlyCaseSensitive["tau"] = Convert.ToChar(964); + _decodeOnlyCaseSensitive["upsilon"] = Convert.ToChar(965); + _decodeOnlyCaseSensitive["phi"] = Convert.ToChar(966); + _decodeOnlyCaseSensitive["chi"] = Convert.ToChar(967); + _decodeOnlyCaseSensitive["psi"] = Convert.ToChar(968); + _decodeOnlyCaseSensitive["omega"] = Convert.ToChar(969); _decodeOnly["thetasym"] = Convert.ToChar(977); _decodeOnly["upsih"] = Convert.ToChar(978); _decodeOnly["piv"] = Convert.ToChar(982); // Other Entities Supported by HTML - _decodeOnly["OElig"] = Convert.ToChar(338); - _decodeOnly["oelig"] = Convert.ToChar(339); - _decodeOnly["Scaron"] = Convert.ToChar(352); - _decodeOnly["scaron"] = Convert.ToChar(353); - _decodeOnly["Yuml"] = Convert.ToChar(376); + _decodeOnlyCaseSensitive["OElig"] = Convert.ToChar(338); + _decodeOnlyCaseSensitive["oelig"] = Convert.ToChar(339); + _decodeOnlyCaseSensitive["Scaron"] = Convert.ToChar(352); + _decodeOnlyCaseSensitive["scaron"] = Convert.ToChar(353); + _decodeOnlyCaseSensitive["Yuml"] = Convert.ToChar(376); _decodeOnly["fnof"] = Convert.ToChar(402); _decodeOnly["circ"] = Convert.ToChar(710); _decodeOnly["tilde"] = Convert.ToChar(732); @@ -270,13 +271,13 @@ static HtmlUtils() _decodeOnly["ldquo"] = Convert.ToChar(8220); _decodeOnly["rdquo"] = Convert.ToChar(8221); _decodeOnly["bdquo"] = Convert.ToChar(8222); - _decodeOnly["dagger"] = Convert.ToChar(8224); - _decodeOnly["Dagger"] = Convert.ToChar(8225); + _decodeOnlyCaseSensitive["dagger"] = Convert.ToChar(8224); + _decodeOnlyCaseSensitive["Dagger"] = Convert.ToChar(8225); _decodeOnly["bull"] = Convert.ToChar(8226); _decodeOnly["hellip"] = Convert.ToChar(8230); _decodeOnly["permil"] = Convert.ToChar(8240); - _decodeOnly["prime"] = Convert.ToChar(8242); - _decodeOnly["Prime"] = Convert.ToChar(8243); + _decodeOnlyCaseSensitive["prime"] = Convert.ToChar(8242); + _decodeOnlyCaseSensitive["Prime"] = Convert.ToChar(8243); _decodeOnly["lsaquo"] = Convert.ToChar(8249); _decodeOnly["rsaquo"] = Convert.ToChar(8250); _decodeOnly["oline"] = Convert.ToChar(8254); @@ -397,7 +398,12 @@ private static string DecodeHtmlCharByName(string str) { var key = str.Substring(idx + 1, endIdx - idx - 1); char c; - if (_decodeOnly.TryGetValue(key, out c)) + if (_decodeOnlyCaseSensitive.TryGetValue(key, out c)) + { + str = str.Remove(idx, endIdx - idx + 1); + str = str.Insert(idx, c.ToString()); + } + else if(_decodeOnly.TryGetValue(key, out c)) { str = str.Remove(idx, endIdx - idx + 1); str = str.Insert(idx, c.ToString());