From e78890e86843e50389edc9b13fb772371512ee2c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 16 Aug 2022 12:43:36 +0200 Subject: [PATCH 1/2] Add snake case aliases for escapeURIComponent As agreed in [Feature #18822] --- ext/cgi/escape/escape.c | 2 ++ lib/cgi/util.rb | 3 +++ test/cgi/test_cgi_util.rb | 9 +++++++++ 3 files changed, 14 insertions(+) diff --git a/ext/cgi/escape/escape.c b/ext/cgi/escape/escape.c index c5b76de..17a134a 100644 --- a/ext/cgi/escape/escape.c +++ b/ext/cgi/escape/escape.c @@ -458,7 +458,9 @@ InitVM_escape(void) rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1); rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1); rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1); + rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent"); rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1); + rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent"); rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1); rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1); rb_prepend_module(rb_mUtil, rb_mEscape); diff --git a/lib/cgi/util.rb b/lib/cgi/util.rb index 8b129a4..4986e54 100644 --- a/lib/cgi/util.rb +++ b/lib/cgi/util.rb @@ -46,6 +46,7 @@ def escapeURIComponent(string) end buffer.force_encoding(encoding) end + alias escape_uri_component escapeURIComponent # URL-decode a string following RFC 3986 with encoding(optional). # string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred") @@ -59,6 +60,8 @@ def unescapeURIComponent(string, encoding = @@accept_charset) str.valid_encoding? ? str : str.force_encoding(string.encoding) end + alias unescape_uri_component unescapeURIComponent + # The set of special characters and their escaped values TABLE_FOR_ESCAPE_HTML__ = { "'" => ''', diff --git a/test/cgi/test_cgi_util.rb b/test/cgi/test_cgi_util.rb index b3a46a1..1bdc0e4 100644 --- a/test/cgi/test_cgi_util.rb +++ b/test/cgi/test_cgi_util.rb @@ -74,6 +74,10 @@ def test_cgi_escapeURIComponent assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93'.ascii_only?, CGI.escapeURIComponent(@str1).ascii_only?) if defined?(::Encoding) end + def test_cgi_escape_uri_component + assert_equal('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93', CGI.escape_uri_component(@str1)) + end + def test_cgi_escapeURIComponent_with_unreserved_characters assert_equal("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~", CGI.escapeURIComponent("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"), @@ -101,6 +105,11 @@ def test_cgi_unescapeURIComponent assert_equal("\u{30E1 30E2 30EA 691C 7D22}", CGI.unescapeURIComponent("\u{30E1 30E2 30EA}%E6%A4%9C%E7%B4%A2")) end + def test_cgi_unescape_uri_component + str = CGI.unescape_uri_component('%26%3C%3E%22%20%E3%82%86%E3%82%93%E3%82%86%E3%82%93') + assert_equal(@str1, str) + end + def test_cgi_unescapeURIComponent_preserve_encoding assert_equal(Encoding::US_ASCII, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("US-ASCII")).encoding) assert_equal(Encoding::ASCII_8BIT, CGI.unescapeURIComponent("%C0%3C%3C".dup.force_encoding("ASCII-8BIT")).encoding) From 11c11ef0e711da87612a87031056b673177de6be Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 7 Nov 2023 08:47:40 +0100 Subject: [PATCH 2/2] Implement escapeURIComponent for JRuby --- .../org/jruby/ext/cgi/escape/CGIEscape.java | 51 ++++++++++++++++--- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/ext/java/org/jruby/ext/cgi/escape/CGIEscape.java b/ext/java/org/jruby/ext/cgi/escape/CGIEscape.java index 956eebe..c34d09c 100644 --- a/ext/java/org/jruby/ext/cgi/escape/CGIEscape.java +++ b/ext/java/org/jruby/ext/cgi/escape/CGIEscape.java @@ -263,7 +263,7 @@ static boolean url_unreserved_char(int c) { static final byte[] upper_hexdigits = "0123456789ABCDEF".getBytes(RubyEncoding.UTF8); - static IRubyObject optimized_escape(Ruby runtime, RubyString str) { + static IRubyObject optimized_escape(Ruby runtime, RubyString str, boolean escapePlus) { int i, len, beg = 0; RubyString dest = null; byte[] cstrBytes; @@ -285,7 +285,7 @@ static IRubyObject optimized_escape(Ruby runtime, RubyString str) { dest.cat(cstrBytes, cstr + beg, i - beg); beg = i + 1; - if (c == ' ') { + if (escapePlus && c == ' ') { dest.cat('+'); } else { buf[1] = upper_hexdigits[(c >> 4) & 0xf]; @@ -305,7 +305,7 @@ static IRubyObject optimized_escape(Ruby runtime, RubyString str) { } static IRubyObject - optimized_unescape(ThreadContext context, RubyString str, IRubyObject encoding) { + optimized_unescape(ThreadContext context, RubyString str, IRubyObject encoding, boolean unescapePlus) { int i, len, beg = 0; RubyString dest = null; byte[] cstrBytes; @@ -331,7 +331,7 @@ static IRubyObject optimized_escape(Ruby runtime, RubyString str) { buf = ((char_to_number(cstrBytes[cstr + i + 1]) << 4) | char_to_number(cstrBytes[cstr + i + 2])); clen = 2; - } else if (c == '+') { + } else if (unescapePlus && c == '+') { buf = ' '; } else { continue; @@ -416,7 +416,25 @@ public static IRubyObject cgiesc_escape(ThreadContext context, IRubyObject self, RubyString str = _str.convertToString(); if (str.getEncoding().isAsciiCompatible()) { - return optimized_escape(context.runtime, str); + return optimized_escape(context.runtime, str, true); + } else { + return Helpers.invokeSuper(context, self, _str, Block.NULL_BLOCK); + } + } + + /* + * call-seq: + * CGI.escapeURIComponent(string) -> string + * + * Returns URL-escaped string following RFC 3986. + * + */ + @JRubyMethod(name = "escapeURIComponent", alias = { "escape_uri_component" }, module = true, frame = true) + public static IRubyObject cgiesc_escape_uri_component(ThreadContext context, IRubyObject self, IRubyObject _str) { + RubyString str = _str.convertToString(); + + if (str.getEncoding().isAsciiCompatible()) { + return optimized_escape(context.runtime, str, false); } else { return Helpers.invokeSuper(context, self, _str, Block.NULL_BLOCK); } @@ -443,7 +461,28 @@ public static IRubyObject cgiesc_unescape(ThreadContext context, IRubyObject sel if (str.getEncoding().isAsciiCompatible()) { IRubyObject enc = accept_charset(argv, argv.length - 1, 1, self); - return optimized_unescape(context, str, enc); + return optimized_unescape(context, str, enc, true); + } else { + return Helpers.invokeSuper(context, self, argv, Block.NULL_BLOCK); + } + } + + /* + * call-seq: + * CGI.unescapeURIComponent(string, encoding=@@accept_charset) -> string + * + * Returns URL-unescaped string following RFC 3986. + * + */ + @JRubyMethod(name = "unescapeURIComponent", alias = { "unescape_uri_component" }, required = 1, optional = 1, module = true, frame = true) + public static IRubyObject cgiesc_unescape_uri_component(ThreadContext context, IRubyObject self, IRubyObject[] argv) { + IRubyObject _str = argv[0]; + + RubyString str = _str.convertToString(); + + if (str.getEncoding().isAsciiCompatible()) { + IRubyObject enc = accept_charset(argv, argv.length - 1, 1, self); + return optimized_unescape(context, str, enc, false); } else { return Helpers.invokeSuper(context, self, argv, Block.NULL_BLOCK); }