diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index 495ad83..6b00bc3 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -8,7 +8,7 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[]; #define upper_hexdigits (ruby_hexdigits+16) #define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)] -static VALUE rb_cCGI, rb_mUtil, rb_mEscape; +static VALUE rb_cCGI, rb_mEscape, rb_mEscapeExt; static ID id_accept_charset; #define HTML_ESCAPE_MAX_LEN 6 @@ -471,17 +471,17 @@ Init_escape(void) void InitVM_escape(void) { - rb_cCGI = rb_define_class("CGI", rb_cObject); - rb_mEscape = rb_define_module_under(rb_cCGI, "Escape"); - rb_mUtil = rb_define_module_under(rb_cCGI, "Util"); - rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); - rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); - rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); - rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent"); - rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); - rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent"); - rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); - rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); - rb_prepend_module(rb_mUtil, rb_mEscape); - rb_extend_object(rb_cCGI, rb_mEscape); + rb_cCGI = rb_define_class("CGI", rb_cObject); + rb_mEscapeExt = rb_define_module_under(rb_cCGI, "EscapeExt"); + rb_mEscape = rb_define_module_under(rb_cCGI, "Escape"); + rb_define_method(rb_mEscapeExt, "escapeHTML", cgiesc_escape_html, 1); + rb_define_method(rb_mEscapeExt, "unescapeHTML", cgiesc_unescape_html, 1); + rb_define_method(rb_mEscapeExt, "escapeURIComponent", cgiesc_escape_uri_component, 1); + rb_define_alias(rb_mEscapeExt, "escape_uri_component", "escapeURIComponent"); + rb_define_method(rb_mEscapeExt, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); + rb_define_alias(rb_mEscapeExt, "unescape_uri_component", "unescapeURIComponent"); + rb_define_method(rb_mEscapeExt, "escape", cgiesc_escape, 1); + rb_define_method(rb_mEscapeExt, "unescape", cgiesc_unescape, -1); + rb_prepend_module(rb_mEscape, rb_mEscapeExt); + rb_extend_object(rb_cCGI, rb_mEscapeExt); } diff --git a/lib/cgi.rb b/lib/cgi.rb index 69c3c4f..a35fabb 100644 --- a/lib/cgi.rb +++ b/lib/cgi.rb @@ -288,10 +288,11 @@ # class CGI - VERSION = "0.4.2" + VERSION = "0.5.0.beta1" end require 'cgi/core' require 'cgi/cookie' require 'cgi/util' +require 'cgi/escape' unless defined?(CGI::EscapeExt) CGI.autoload(:HtmlExtension, 'cgi/html') diff --git a/lib/cgi/core.rb b/lib/cgi/core.rb index 62e6068..e6c19bb 100644 --- a/lib/cgi/core.rb +++ b/lib/cgi/core.rb @@ -4,12 +4,12 @@ # generating HTTP responses. #++ class CGI - unless const_defined?(:Util) - module Util + unless const_defined?(:Escape) + module Escape @@accept_charset = "UTF-8" # :nodoc: end - include Util - extend Util + include Escape + extend Escape end $CGI_ENV = ENV # for FCGI support diff --git a/lib/cgi/escape.rb b/lib/cgi/escape.rb new file mode 100644 index 0000000..59a310b --- /dev/null +++ b/lib/cgi/escape.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +class CGI + module Escape; end + include Escape + extend Escape +end + +module CGI::Escape + @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) + + # URL-encode a string into application/x-www-form-urlencoded. + # Space characters (+" "+) are encoded with plus signs (+"+"+) + # url_encoded_string = CGI.escape("'Stop!' said Fred") + # # => "%27Stop%21%27+said+Fred" + def escape(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.tr!(' ', '+') + buffer.force_encoding(encoding) + end + + # URL-decode an application/x-www-form-urlencoded string with encoding(optional). + # string = CGI.unescape("%27Stop%21%27+said+Fred") + # # => "'Stop!' said Fred" + def unescape(string, encoding = @@accept_charset) + str = string.tr('+', ' ') + str = str.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + # URL-encode a string following RFC 3986 + # Space characters (+" "+) are encoded with (+"%20"+) + # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred") + # # => "%27Stop%21%27%20said%20Fred" + def escapeURIComponent(string) + encoding = string.encoding + buffer = string.b + buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end + buffer.force_encoding(encoding) + end + alias escape_uri_component escapeURIComponent + + # URL-decode a string following RFC 3986 with encoding(optional). + # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred") + # # => "'Stop!'+said Fred" + def unescapeURIComponent(string, encoding = @@accept_charset) + str = string.b + str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| + [m.delete('%')].pack('H*') + end + str.force_encoding(encoding) + str.valid_encoding? ? str : str.force_encoding(string.encoding) + end + + alias unescape_uri_component unescapeURIComponent + + # The set of special characters and their escaped values + TABLE_FOR_ESCAPE_HTML__ = { + "'" => ''', + '&' => '&', + '"' => '"', + '<' => '<', + '>' => '>', + } + + # Escape special characters in HTML, namely '&\"<> + # CGI.escapeHTML('Usage: foo "bar" ') + # # => "Usage: foo "bar" <baz>" + def escapeHTML(string) + enc = string.encoding + unless enc.ascii_compatible? + if enc.dummy? + origenc = enc + enc = Encoding::Converter.asciicompat_encoding(enc) + string = enc ? string.encode(enc) : string.b + end + table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] + string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) + string.encode!(origenc) if origenc + string + else + string = string.b + string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) + string.force_encoding(enc) + end + end + + # Unescape a string that has been HTML-escaped + # CGI.unescapeHTML("Usage: foo "bar" <baz>") + # # => "Usage: foo \"bar\" " + def unescapeHTML(string) + enc = string.encoding + unless enc.ascii_compatible? + if enc.dummy? + origenc = enc + enc = Encoding::Converter.asciicompat_encoding(enc) + string = enc ? string.encode(enc) : string.b + end + string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do + case $1.encode(Encoding::US_ASCII) + when 'apos' then "'".encode(enc) + when 'amp' then '&'.encode(enc) + when 'quot' then '"'.encode(enc) + when 'gt' then '>'.encode(enc) + when 'lt' then '<'.encode(enc) + when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) + when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) + end + end + string.encode!(origenc) if origenc + return string + end + return string unless string.include? '&' + charlimit = case enc + when Encoding::UTF_8; 0x10ffff + when Encoding::ISO_8859_1; 256 + else 128 + end + string = string.b + string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do + match = $1.dup + case match + when 'apos' then "'" + when 'amp' then '&' + when 'quot' then '"' + when 'gt' then '>' + when 'lt' then '<' + when /\A#0*(\d+)\z/ + n = $1.to_i + if n < charlimit + n.chr(enc) + else + "&##{$1};" + end + when /\A#x([0-9a-f]+)\z/i + n = $1.hex + if n < charlimit + n.chr(enc) + else + "&#x#{$1};" + end + else + "&#{match};" + end + end + string.force_encoding enc + end + + # Synonym for CGI.escapeHTML(str) + alias escape_html escapeHTML + alias h escapeHTML + + # Synonym for CGI.unescapeHTML(str) + alias unescape_html unescapeHTML + + # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there + unless RUBY_ENGINE == 'truffleruby' + begin + require 'cgi/escape.so' + rescue LoadError + end + end + + # Escape only the tags of certain HTML elements in +string+. + # + # Takes an element or elements or array of elements. Each element + # is specified by the name of the element, without angle brackets. + # This matches both the start and the end tag of that element. + # The attribute list of the open tag will also be escaped (for + # instance, the double-quotes surrounding attribute values). + # + # print CGI.escapeElement('
', "A", "IMG") + # # "
<A HREF="url"></A>" + # + # print CGI.escapeElement('
', ["A", "IMG"]) + # # "
<A HREF="url"></A>" + def escapeElement(string, *elements) + elements = elements[0] if elements[0].kind_of?(Array) + unless elements.empty? + string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do + CGI.escapeHTML($&) + end + else + string + end + end + + # Undo escaping such as that done by CGI.escapeElement() + # + # print CGI.unescapeElement( + # CGI.escapeHTML('
'), "A", "IMG") + # # "<BR>" + # + # print CGI.unescapeElement( + # CGI.escapeHTML('
'), ["A", "IMG"]) + # # "<BR>" + def unescapeElement(string, *elements) + elements = elements[0] if elements[0].kind_of?(Array) + unless elements.empty? + string.gsub(/<\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:>)?/im) do + unescapeHTML($&) + end + else + string + end + end + + # Synonym for CGI.escapeElement(str) + alias escape_element escapeElement + + # Synonym for CGI.unescapeElement(str) + alias unescape_element unescapeElement + +end diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index 5f12eae..d4a5fbb 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -4,220 +4,8 @@ module Util; end include Util extend Util end -module CGI::Util - @@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset) - - # URL-encode a string into application/x-www-form-urlencoded. - # Space characters (+" "+) are encoded with plus signs (+"+"+) - # url_encoded_string = CGI.escape("'Stop!' said Fred") - # # => "%27Stop%21%27+said+Fred" - def escape(string) - encoding = string.encoding - buffer = string.b - buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m| - '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end - buffer.tr!(' ', '+') - buffer.force_encoding(encoding) - end - - # URL-decode an application/x-www-form-urlencoded string with encoding(optional). - # string = CGI.unescape("%27Stop%21%27+said+Fred") - # # => "'Stop!' said Fred" - def unescape(string, encoding = @@accept_charset) - str = string.tr('+', ' ') - str = str.b - str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| - [m.delete('%')].pack('H*') - end - str.force_encoding(encoding) - str.valid_encoding? ? str : str.force_encoding(string.encoding) - end - - # URL-encode a string following RFC 3986 - # Space characters (+" "+) are encoded with (+"%20"+) - # url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred") - # # => "%27Stop%21%27%20said%20Fred" - def escapeURIComponent(string) - encoding = string.encoding - buffer = string.b - buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m| - '%' + m.unpack('H2' * m.bytesize).join('%').upcase - end - buffer.force_encoding(encoding) - end - alias escape_uri_component escapeURIComponent - - # URL-decode a string following RFC 3986 with encoding(optional). - # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred") - # # => "'Stop!'+said Fred" - def unescapeURIComponent(string, encoding = @@accept_charset) - str = string.b - str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m| - [m.delete('%')].pack('H*') - end - str.force_encoding(encoding) - str.valid_encoding? ? str : str.force_encoding(string.encoding) - end - - alias unescape_uri_component unescapeURIComponent - - # The set of special characters and their escaped values - TABLE_FOR_ESCAPE_HTML__ = { - "'" => ''', - '&' => '&', - '"' => '"', - '<' => '<', - '>' => '>', - } - - # Escape special characters in HTML, namely '&\"<> - # CGI.escapeHTML('Usage: foo "bar" ') - # # => "Usage: foo "bar" <baz>" - def escapeHTML(string) - enc = string.encoding - unless enc.ascii_compatible? - if enc.dummy? - origenc = enc - enc = Encoding::Converter.asciicompat_encoding(enc) - string = enc ? string.encode(enc) : string.b - end - table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}] - string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table) - string.encode!(origenc) if origenc - string - else - string = string.b - string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) - string.force_encoding(enc) - end - end - - # TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there - unless RUBY_ENGINE == 'truffleruby' - begin - require 'cgi/escape' - rescue LoadError - end - end - - # Unescape a string that has been HTML-escaped - # CGI.unescapeHTML("Usage: foo "bar" <baz>") - # # => "Usage: foo \"bar\" " - def unescapeHTML(string) - enc = string.encoding - unless enc.ascii_compatible? - if enc.dummy? - origenc = enc - enc = Encoding::Converter.asciicompat_encoding(enc) - string = enc ? string.encode(enc) : string.b - end - string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do - case $1.encode(Encoding::US_ASCII) - when 'apos' then "'".encode(enc) - when 'amp' then '&'.encode(enc) - when 'quot' then '"'.encode(enc) - when 'gt' then '>'.encode(enc) - when 'lt' then '<'.encode(enc) - when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) - when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) - end - end - string.encode!(origenc) if origenc - return string - end - return string unless string.include? '&' - charlimit = case enc - when Encoding::UTF_8; 0x10ffff - when Encoding::ISO_8859_1; 256 - else 128 - end - string = string.b - string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do - match = $1.dup - case match - when 'apos' then "'" - when 'amp' then '&' - when 'quot' then '"' - when 'gt' then '>' - when 'lt' then '<' - when /\A#0*(\d+)\z/ - n = $1.to_i - if n < charlimit - n.chr(enc) - else - "&##{$1};" - end - when /\A#x([0-9a-f]+)\z/i - n = $1.hex - if n < charlimit - n.chr(enc) - else - "&#x#{$1};" - end - else - "&#{match};" - end - end - string.force_encoding enc - end - - # Synonym for CGI.escapeHTML(str) - alias escape_html escapeHTML - - # Synonym for CGI.unescapeHTML(str) - alias unescape_html unescapeHTML - - # Escape only the tags of certain HTML elements in +string+. - # - # Takes an element or elements or array of elements. Each element - # is specified by the name of the element, without angle brackets. - # This matches both the start and the end tag of that element. - # The attribute list of the open tag will also be escaped (for - # instance, the double-quotes surrounding attribute values). - # - # print CGI.escapeElement('
', "A", "IMG") - # # "
<A HREF="url"></A>" - # - # print CGI.escapeElement('
', ["A", "IMG"]) - # # "
<A HREF="url"></A>" - def escapeElement(string, *elements) - elements = elements[0] if elements[0].kind_of?(Array) - unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do - CGI.escapeHTML($&) - end - else - string - end - end - - # Undo escaping such as that done by CGI.escapeElement() - # - # print CGI.unescapeElement( - # CGI.escapeHTML('
'), "A", "IMG") - # # "<BR>" - # - # print CGI.unescapeElement( - # CGI.escapeHTML('
'), ["A", "IMG"]) - # # "<BR>" - def unescapeElement(string, *elements) - elements = elements[0] if elements[0].kind_of?(Array) - unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:>)?/im) do - unescapeHTML($&) - end - else - string - end - end - - # Synonym for CGI.escapeElement(str) - alias escape_element escapeElement - - # Synonym for CGI.unescapeElement(str) - alias unescape_element unescapeElement +module CGI::Util # Format a +Time+ object as a String using the format specified by RFC 1123. # # CGI.rfc1123_date(Time.now) @@ -253,6 +41,7 @@ def pretty(string, shift = " ") end lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') end - - alias h escapeHTML end + +# For backward compatibility +require 'cgi/escape' unless defined?(CGI::EscapeExt) diff --git a/test/cgi/test_cgi_escape.rb b/test/cgi/test_cgi_escape.rb new file mode 100644 index 0000000..f6ca658 --- /dev/null +++ b/test/cgi/test_cgi_escape.rb @@ -0,0 +1,325 @@ +# frozen_string_literal: true +require 'test/unit' +require 'cgi/escape' +require 'stringio' +require_relative 'update_env' + + +class CGIEscapeTest < Test::Unit::TestCase + include CGI::Escape + include UpdateEnv + + def setup + @environ = {} + update_env( + 'REQUEST_METHOD' => 'GET', + 'SCRIPT_NAME' => nil, + ) + @str1="&<>\" \xE3\x82\x86\xE3\x82\x93\xE3\x82\x86\xE3\x82\x93".dup + @str1.force_encoding("UTF-8") if defined?(::Encoding) + end + + def teardown + ENV.update(@environ) + end + + def test_cgi_escape + assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1)) + assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding) + end + + def test_cgi_escape_with_unreserved_characters + assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", + CGI.escape("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), + "should not escape any unreserved characters, as per RFC3986 Section 2.3") + end + + def test_cgi_escape_with_invalid_byte_sequence + assert_equal('%C0%3C%3C', CGI.escape("\xC0\<\<".dup.force_encoding("UTF-8"))) + end + + def test_cgi_escape_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.escape("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.escape("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.escape("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescape + str = CGI.unescape('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + return unless defined?(::Encoding) + + assert_equal(@str1.encoding, str.encoding) + assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescape("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) + end + + def test_cgi_unescape_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.unescape("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.unescape("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.unescape("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescape_accept_charset + return unless defined?(::Encoding) + + assert_raise(TypeError) {CGI.unescape('', nil)} + assert_separately(%w[-rcgi/escape], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + assert_equal("", CGI.unescape('')) + end; + end + + def test_cgi_escapeURIComponent + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1)) + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding) + end + + def test_cgi_escape_uri_component + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape_uri_component(@str1)) + end + + def test_cgi_escapeURIComponent_with_unreserved_characters + assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", + CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), + "should not encode any unreserved characters, as per RFC3986 Section 2.3") + end + + def test_cgi_escapeURIComponent_with_invalid_byte_sequence + assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8"))) + end + + def test_cgi_escapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent + str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + return unless defined?(::Encoding) + + assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar")) + + assert_equal(@str1.encoding, str.encoding) + assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) + end + + def test_cgi_unescape_uri_component + str = CGI.unescape_uri_component('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + end + + def test_cgi_unescapeURIComponent_preserve_encoding + assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) + assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) + assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) + end + + def test_cgi_unescapeURIComponent_accept_charset + return unless defined?(::Encoding) + + assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)} + assert_separately(%w[-rcgi/escape], "#{<<-"begin;"}\n#{<<-"end;"}") + begin; + assert_equal("", CGI.unescapeURIComponent('')) + end; + end + + def test_cgi_escapeHTML + assert_equal("'&"><", CGI.escapeHTML("'&\"><")) + end + + def test_cgi_escape_html_duplicated + orig = "Ruby".dup.force_encoding("US-ASCII") + str = CGI.escapeHTML(orig) + assert_equal(orig, str) + assert_not_same(orig, str) + end + + def assert_cgi_escape_html_preserve_encoding(str, encoding) + assert_equal(encoding, CGI.escapeHTML(str.dup.force_encoding(encoding)).encoding) + end + + def test_cgi_escape_html_preserve_encoding + Encoding.list do |enc| + assert_cgi_escape_html_preserve_encoding("'&\"><", enc) + assert_cgi_escape_html_preserve_encoding("Ruby", enc) + end + end + + def test_cgi_escape_html_dont_freeze + assert_not_predicate CGI.escapeHTML("'&\"><".dup), :frozen? + assert_not_predicate CGI.escapeHTML("'&\"><".freeze), :frozen? + assert_not_predicate CGI.escapeHTML("Ruby".dup), :frozen? + assert_not_predicate CGI.escapeHTML("Ruby".freeze), :frozen? + end + + def test_cgi_escape_html_large + return if RUBY_ENGINE == 'jruby' + ulong_max, size_max = RbConfig::LIMITS.values_at("ULONG_MAX", "SIZE_MAX") + return unless ulong_max < size_max # Platforms not concerned + + size = (ulong_max / 6 + 1) + begin + str = '"' * size + escaped = CGI.escapeHTML(str) + rescue NoMemoryError + omit "Not enough memory" + rescue => e + end + assert_raise_with_message(ArgumentError, /overflow/, ->{"length = #{escaped.length}"}) do + raise e if e + end + end + + def test_cgi_unescapeHTML + assert_equal("'&\"><", CGI.unescapeHTML("'&"><")) + end + + def test_cgi_unescapeHTML_invalid + assert_equal('&<&>"&abcdefghijklmn', CGI.unescapeHTML('&<&>"&abcdefghijklmn')) + end + + module UnescapeHTMLTests + def test_cgi_unescapeHTML_following_known_first_letter + assert_equal('&a>&q>&l>&g>', CGI.unescapeHTML('&a>&q>&l>&g>')) + end + + def test_cgi_unescapeHTML_following_number_sign + assert_equal('&#>&#x>', CGI.unescapeHTML('&#>&#x>')) + end + + def test_cgi_unescapeHTML_following_invalid_numeric + assert_equal('�>�>', CGI.unescapeHTML('�>�>')) + end + end + + include UnescapeHTMLTests + + Encoding.list.each do |enc| + begin + escaped = "'&"><".encode(enc) + unescaped = "'&\"><".encode(enc) + rescue Encoding::ConverterNotFoundError + next + else + define_method("test_cgi_escapeHTML:#{enc.name}") do + assert_equal(escaped, CGI.escapeHTML(unescaped)) + end + define_method("test_cgi_unescapeHTML:#{enc.name}") do + assert_equal(unescaped, CGI.unescapeHTML(escaped)) + end + end + end + + Encoding.list.each do |enc| + next unless enc.ascii_compatible? + begin + escaped = "%25+%2B" + unescaped = "% +".encode(enc) + rescue Encoding::ConverterNotFoundError + next + else + define_method("test_cgi_escape:#{enc.name}") do + assert_equal(escaped, CGI.escape(unescaped)) + end + define_method("test_cgi_unescape:#{enc.name}") do + assert_equal(unescaped, CGI.unescape(escaped, enc)) + end + end + end + + def test_cgi_unescapeHTML_uppercasecharacter + assert_equal("\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", CGI.unescapeHTML("あいう")) + end + + def test_cgi_include_escape + assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', escape(@str1)) + end + + def test_cgi_include_escapeHTML + assert_equal("'&"><", escapeHTML("'&\"><")) + end + + def test_cgi_include_h + assert_equal("'&"><", h("'&\"><")) + end + + def test_cgi_include_unescape + str = unescape('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + return unless defined?(::Encoding) + + assert_equal(@str1.encoding, str.encoding) + assert_equal("\u{30E1 30E2 30EA 691C 7D22}", unescape("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) + end + + def test_cgi_include_unescapeHTML + assert_equal("'&\"><", unescapeHTML("'&"><")) + end + + def test_cgi_escapeElement + assert_equal("
<A HREF="url"></A>", escapeElement('
', "A", "IMG")) + assert_equal("
<A HREF="url"></A>", escapeElement('
', ["A", "IMG"])) + assert_equal("
<A HREF="url"></A>", escape_element('
', "A", "IMG")) + assert_equal("
<A HREF="url"></A>", escape_element('
', ["A", "IMG"])) + + assert_equal("<A <A HREF="url"></A>", escapeElement('', "A", "IMG")) + assert_equal("<A <A HREF="url"></A>", escapeElement('', ["A", "IMG"])) + assert_equal("<A <A HREF="url"></A>", escape_element('', "A", "IMG")) + assert_equal("<A <A HREF="url"></A>", escape_element('', ["A", "IMG"])) + + assert_equal("<A <A ", escapeElement('', unescapeElement(escapeHTML('
'), "A", "IMG")) + assert_equal('<BR>', unescapeElement(escapeHTML('
'), ["A", "IMG"])) + assert_equal('<BR>', unescape_element(escapeHTML('
'), "A", "IMG")) + assert_equal('<BR>', unescape_element(escapeHTML('
'), ["A", "IMG"])) + + assert_equal('', unescapeElement(escapeHTML(''), "A", "IMG")) + assert_equal('', unescapeElement(escapeHTML(''), ["A", "IMG"])) + assert_equal('', unescape_element(escapeHTML(''), "A", "IMG")) + assert_equal('', unescape_element(escapeHTML(''), ["A", "IMG"])) + + assert_equal('])) + end + + def test_cgi_unescapeHTML_with_invalid_byte_sequence + input = "\xFF&" + assert_equal(input, CGI.unescapeHTML(input)) + end +end diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index bff77f7..50f85e3 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -1,330 +1,12 @@ # frozen_string_literal: true require 'test/unit' -require 'cgi' -require 'stringio' -require_relative 'update_env' - +require 'cgi/util' class CGIUtilTest < Test::Unit::TestCase - include CGI::Util - include UpdateEnv - - def setup - @environ = {} - update_env( - 'REQUEST_METHOD' => 'GET', - 'SCRIPT_NAME' => nil, - ) - @str1="&<>\" \xE3\x82\x86\xE3\x82\x93\xE3\x82\x86\xE3\x82\x93".dup - @str1.force_encoding("UTF-8") if defined?(::Encoding) - end - - def teardown - ENV.update(@environ) - end - - def test_cgi_escape - assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape(@str1)) - assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escape(@str1).ascii_only?) if defined?(::Encoding) - end - - def test_cgi_escape_with_unreserved_characters - assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", - CGI.escape("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), - "should not escape any unreserved characters, as per RFC3986 Section 2.3") - end - - def test_cgi_escape_with_invalid_byte_sequence - assert_equal('%C0%3C%3C', CGI.escape("\xC0\<\<".dup.force_encoding("UTF-8"))) - end - - def test_cgi_escape_preserve_encoding - assert_equal(Encoding::US_ASCII, CGI.escape("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) - assert_equal(Encoding::ASCII_8BIT, CGI.escape("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) - assert_equal(Encoding::UTF_8, CGI.escape("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) - end - - def test_cgi_unescape - str = CGI.unescape('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93') - assert_equal(@str1, str) - return unless defined?(::Encoding) - - assert_equal(@str1.encoding, str.encoding) - assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescape("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) - end - - def test_cgi_unescape_preserve_encoding - assert_equal(Encoding::US_ASCII, CGI.unescape("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) - assert_equal(Encoding::ASCII_8BIT, CGI.unescape("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) - assert_equal(Encoding::UTF_8, CGI.unescape("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) - end - - def test_cgi_unescape_accept_charset - return unless defined?(::Encoding) - - assert_raise(TypeError) {CGI.unescape('', nil)} - assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") - begin; - assert_equal("", CGI.unescape('')) - end; - end - - def test_cgi_escapeURIComponent - assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escapeURIComponent(@str1)) - assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding) - end - - def test_cgi_escape_uri_component - assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape_uri_component(@str1)) - end - - def test_cgi_escapeURIComponent_with_unreserved_characters - assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", - CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), - "should not encode any unreserved characters, as per RFC3986 Section 2.3") - end - - def test_cgi_escapeURIComponent_with_invalid_byte_sequence - assert_equal('%C0%3C%3C', CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8"))) - end - - def test_cgi_escapeURIComponent_preserve_encoding - assert_equal(Encoding::US_ASCII, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("US-ASCII")).encoding) - assert_equal(Encoding::ASCII_8BIT, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("ASCII-8BIT")).encoding) - assert_equal(Encoding::UTF_8, CGI.escapeURIComponent("\xC0\<\<".dup.force_encoding("UTF-8")).encoding) - end - - def test_cgi_unescapeURIComponent - str = CGI.unescapeURIComponent('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') - assert_equal(@str1, str) - return unless defined?(::Encoding) - - assert_equal("foo+bar", CGI.unescapeURIComponent("foo+bar")) - - assert_equal(@str1.encoding, str.encoding) - assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) - end - - def test_cgi_unescape_uri_component - str = CGI.unescape_uri_component('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') - assert_equal(@str1, str) - end - - def test_cgi_unescapeURIComponent_preserve_encoding - assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) - assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) - assert_equal(Encoding::UTF_8, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("UTF-8")).encoding) - end - - def test_cgi_unescapeURIComponent_accept_charset - return unless defined?(::Encoding) - - assert_raise(TypeError) {CGI.unescapeURIComponent('', nil)} - assert_separately(%w[-rcgi/util], "#{<<-"begin;"}\n#{<<-"end;"}") - begin; - assert_equal("", CGI.unescapeURIComponent('')) - end; - end def test_cgi_pretty assert_equal("\n \n \n\n",CGI.pretty("")) assert_equal("\n\t\n\t\n\n",CGI.pretty("","\t")) end - def test_cgi_escapeHTML - assert_equal("'&"><", CGI.escapeHTML("'&\"><")) - end - - def test_cgi_escape_html_duplicated - orig = "Ruby".dup.force_encoding("US-ASCII") - str = CGI.escapeHTML(orig) - assert_equal(orig, str) - assert_not_same(orig, str) - end - - def assert_cgi_escape_html_preserve_encoding(str, encoding) - assert_equal(encoding, CGI.escapeHTML(str.dup.force_encoding(encoding)).encoding) - end - - def test_cgi_escape_html_preserve_encoding - Encoding.list do |enc| - assert_cgi_escape_html_preserve_encoding("'&\"><", enc) - assert_cgi_escape_html_preserve_encoding("Ruby", enc) - end - end - - def test_cgi_escape_html_dont_freeze - assert_not_predicate CGI.escapeHTML("'&\"><".dup), :frozen? - assert_not_predicate CGI.escapeHTML("'&\"><".freeze), :frozen? - assert_not_predicate CGI.escapeHTML("Ruby".dup), :frozen? - assert_not_predicate CGI.escapeHTML("Ruby".freeze), :frozen? - end - - def test_cgi_escape_html_large - return if RUBY_ENGINE == 'jruby' - ulong_max, size_max = RbConfig::LIMITS.values_at("ULONG_MAX", "SIZE_MAX") - return unless ulong_max < size_max # Platforms not concerned - - size = (ulong_max / 6 + 1) - begin - str = '"' * size - escaped = CGI.escapeHTML(str) - rescue NoMemoryError - omit "Not enough memory" - rescue => e - end - assert_raise_with_message(ArgumentError, /overflow/, ->{"length = #{escaped.length}"}) do - raise e if e - end - end - - def test_cgi_unescapeHTML - assert_equal("'&\"><", CGI.unescapeHTML("'&"><")) - end - - def test_cgi_unescapeHTML_invalid - assert_equal('&<&>"&abcdefghijklmn', CGI.unescapeHTML('&<&>"&abcdefghijklmn')) - end - - module UnescapeHTMLTests - def test_cgi_unescapeHTML_following_known_first_letter - assert_equal('&a>&q>&l>&g>', CGI.unescapeHTML('&a>&q>&l>&g>')) - end - - def test_cgi_unescapeHTML_following_number_sign - assert_equal('&#>&#x>', CGI.unescapeHTML('&#>&#x>')) - end - - def test_cgi_unescapeHTML_following_invalid_numeric - assert_equal('�>�>', CGI.unescapeHTML('�>�>')) - end - end - - include UnescapeHTMLTests - - Encoding.list.each do |enc| - begin - escaped = "'&"><".encode(enc) - unescaped = "'&\"><".encode(enc) - rescue Encoding::ConverterNotFoundError - next - else - define_method("test_cgi_escapeHTML:#{enc.name}") do - assert_equal(escaped, CGI.escapeHTML(unescaped)) - end - define_method("test_cgi_unescapeHTML:#{enc.name}") do - assert_equal(unescaped, CGI.unescapeHTML(escaped)) - end - end - end - - Encoding.list.each do |enc| - next unless enc.ascii_compatible? - begin - escaped = "%25+%2B" - unescaped = "% +".encode(enc) - rescue Encoding::ConverterNotFoundError - next - else - define_method("test_cgi_escape:#{enc.name}") do - assert_equal(escaped, CGI.escape(unescaped)) - end - define_method("test_cgi_unescape:#{enc.name}") do - assert_equal(unescaped, CGI.unescape(escaped, enc)) - end - end - end - - def test_cgi_unescapeHTML_uppercasecharacter - assert_equal("\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", CGI.unescapeHTML("あいう")) - end - - def test_cgi_include_escape - assert_equal('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93', escape(@str1)) - end - - def test_cgi_include_escapeHTML - assert_equal("'&"><", escapeHTML("'&\"><")) - end - - def test_cgi_include_h - assert_equal("'&"><", h("'&\"><")) - end - - def test_cgi_include_unescape - str = unescape('%26%3C%3E%22+%E3%82%86%E3%82%93%E3%82%86%E3%82%93') - assert_equal(@str1, str) - return unless defined?(::Encoding) - - assert_equal(@str1.encoding, str.encoding) - assert_equal("\u{30E1 30E2 30EA 691C 7D22}", unescape("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) - end - - def test_cgi_include_unescapeHTML - assert_equal("'&\"><", unescapeHTML("'&"><")) - end - - def test_cgi_escapeElement - assert_equal("
<A HREF="url"></A>", escapeElement('
', "A", "IMG")) - assert_equal("
<A HREF="url"></A>", escapeElement('
', ["A", "IMG"])) - assert_equal("
<A HREF="url"></A>", escape_element('
', "A", "IMG")) - assert_equal("
<A HREF="url"></A>", escape_element('
', ["A", "IMG"])) - - assert_equal("<A <A HREF="url"></A>", escapeElement('', "A", "IMG")) - assert_equal("<A <A HREF="url"></A>", escapeElement('', ["A", "IMG"])) - assert_equal("<A <A HREF="url"></A>", escape_element('', "A", "IMG")) - assert_equal("<A <A HREF="url"></A>", escape_element('', ["A", "IMG"])) - - assert_equal("<A <A ", escapeElement('', unescapeElement(escapeHTML('
'), "A", "IMG")) - assert_equal('<BR>', unescapeElement(escapeHTML('
'), ["A", "IMG"])) - assert_equal('<BR>', unescape_element(escapeHTML('
'), "A", "IMG")) - assert_equal('<BR>', unescape_element(escapeHTML('
'), ["A", "IMG"])) - - assert_equal('', unescapeElement(escapeHTML(''), "A", "IMG")) - assert_equal('', unescapeElement(escapeHTML(''), ["A", "IMG"])) - assert_equal('', unescape_element(escapeHTML(''), "A", "IMG")) - assert_equal('', unescape_element(escapeHTML(''), ["A", "IMG"])) - - assert_equal('])) - end - - def test_cgi_unescapeHTML_with_invalid_byte_sequence - input = "\xFF&" - assert_equal(input, CGI.unescapeHTML(input)) - end end