From 54148023d3591ffd093dced98c0185515ee34c03 Mon Sep 17 00:00:00 2001 From: pavel Date: Fri, 13 Mar 2020 18:55:55 +0100 Subject: [PATCH 1/2] handle invalid encoding --- lib/cgi/util.rb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index aab8b00..69a252b 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -49,9 +49,12 @@ def escapeHTML(string) table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) string.encode!(origenc) if origenc - return string + string + else + string = string.b + string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) + string.force_encoding(enc) end - string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) end begin @@ -90,7 +93,8 @@ def unescapeHTML(string) when Encoding::ISO_8859_1; 256 else 128 end - string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do + string = string.b + string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do match = $1.dup case match when 'apos' then "'" @@ -116,6 +120,7 @@ def unescapeHTML(string) "&#{match};" end end + string.force_encoding enc end # Synonym for CGI.escapeHTML(str) From 69d2ad51c04877f712e0d2d62a61dec3aaa5dea2 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Thu, 25 Feb 2021 09:24:15 -0800 Subject: [PATCH 2/2] Add test for escapeHTML/unescapeHTML invalid encoding fix in pure ruby version Also, remove pointless assert_nothing_raised(ArgumentError) while here. --- test/cgi/test_cgi_util.rb | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index b7bb7b8..6ce8b42 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -36,9 +36,7 @@ def test_cgi_escape_with_unreserved_characters end def test_cgi_escape_with_invalid_byte_sequence - assert_nothing_raised(ArgumentError) do - assert_equal('%C0%3C%3C', CGI.escape("\xC0\<\<".dup.force_encoding("UTF-8"))) - end + assert_equal('%C0%3C%3C', CGI.escape("\xC0\<\<".dup.force_encoding("UTF-8"))) end def test_cgi_escape_preserve_encoding @@ -191,3 +189,32 @@ def test_cgi_unescapeElement assert_equal('<BR>', unescape_element(escapeHTML('
'), ["A", "IMG"])) end end + +class CGIUtilPureRubyTest < Test::Unit::TestCase + def setup + CGI::Escape.module_eval do + alias _escapeHTML escapeHTML + remove_method :escapeHTML + alias _unescapeHTML unescapeHTML + remove_method :unescapeHTML + end + end + + def teardown + CGI::Escape.module_eval do + alias escapeHTML _escapeHTML + remove_method :_escapeHTML + alias unescapeHTML _unescapeHTML + remove_method :_unescapeHTML + end + end + + def test_cgi_escapeHTML_with_invalid_byte_sequence + assert_equal("<\xA4??>", CGI.escapeHTML(%[<\xA4??>])) + end + + def test_cgi_unescapeHTML_with_invalid_byte_sequence + input = "\xFF&" + assert_equal(input, CGI.unescapeHTML(input)) + end +end