Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions ext/cgi/escape/escape.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUBY_EXTERN const signed char ruby_digit36_to_number_table[];
#define upper_hexdigits (ruby_hexdigits+16)
#define char_to_number(c) ruby_digit36_to_number_table[(unsigned char)(c)]

static VALUE rb_cCGI, rb_mUtil, rb_mEscape;
static VALUE rb_cCGI, rb_mEscape, rb_mEscapeExt;
static ID id_accept_charset;

#define HTML_ESCAPE_MAX_LEN 6
Expand Down Expand Up @@ -471,17 +471,17 @@ Init_escape(void)
void
InitVM_escape(void)
{
rb_cCGI = rb_define_class("CGI", rb_cObject);
rb_mEscape = rb_define_module_under(rb_cCGI, "Escape");
rb_mUtil = rb_define_module_under(rb_cCGI, "Util");
rb_define_method(rb_mEscape, "escapeHTML", cgiesc_escape_html, 1);
rb_define_method(rb_mEscape, "unescapeHTML", cgiesc_unescape_html, 1);
rb_define_method(rb_mEscape, "escapeURIComponent", cgiesc_escape_uri_component, 1);
rb_define_alias(rb_mEscape, "escape_uri_component", "escapeURIComponent");
rb_define_method(rb_mEscape, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
rb_define_alias(rb_mEscape, "unescape_uri_component", "unescapeURIComponent");
rb_define_method(rb_mEscape, "escape", cgiesc_escape, 1);
rb_define_method(rb_mEscape, "unescape", cgiesc_unescape, -1);
rb_prepend_module(rb_mUtil, rb_mEscape);
rb_extend_object(rb_cCGI, rb_mEscape);
rb_cCGI = rb_define_class("CGI", rb_cObject);
rb_mEscapeExt = rb_define_module_under(rb_cCGI, "EscapeExt");
rb_mEscape = rb_define_module_under(rb_cCGI, "Escape");
rb_define_method(rb_mEscapeExt, "escapeHTML", cgiesc_escape_html, 1);
rb_define_method(rb_mEscapeExt, "unescapeHTML", cgiesc_unescape_html, 1);
rb_define_method(rb_mEscapeExt, "escapeURIComponent", cgiesc_escape_uri_component, 1);
rb_define_alias(rb_mEscapeExt, "escape_uri_component", "escapeURIComponent");
rb_define_method(rb_mEscapeExt, "unescapeURIComponent", cgiesc_unescape_uri_component, -1);
rb_define_alias(rb_mEscapeExt, "unescape_uri_component", "unescapeURIComponent");
rb_define_method(rb_mEscapeExt, "escape", cgiesc_escape, 1);
rb_define_method(rb_mEscapeExt, "unescape", cgiesc_unescape, -1);
rb_prepend_module(rb_mEscape, rb_mEscapeExt);
rb_extend_object(rb_cCGI, rb_mEscapeExt);
}
3 changes: 2 additions & 1 deletion lib/cgi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,11 @@
#

class CGI
VERSION = "0.4.2"
VERSION = "0.5.0.beta1"
end

require 'cgi/core'
require 'cgi/cookie'
require 'cgi/util'
require 'cgi/escape' unless defined?(CGI::EscapeExt)
CGI.autoload(:HtmlExtension, 'cgi/html')
8 changes: 4 additions & 4 deletions lib/cgi/core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
# generating HTTP responses.
#++
class CGI
unless const_defined?(:Util)
module Util
unless const_defined?(:Escape)
module Escape
@@accept_charset = "UTF-8" # :nodoc:
end
include Util
extend Util
include Escape
extend Escape
end

$CGI_ENV = ENV # for FCGI support
Expand Down
224 changes: 224 additions & 0 deletions lib/cgi/escape.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# frozen_string_literal: true

class CGI
module Escape; end
include Escape
extend Escape
end

module CGI::Escape
@@accept_charset = Encoding::UTF_8 unless defined?(@@accept_charset)

# URL-encode a string into application/x-www-form-urlencoded.
# Space characters (+" "+) are encoded with plus signs (+"+"+)
# url_encoded_string = CGI.escape("'Stop!' said Fred")
# # => "%27Stop%21%27+said+Fred"
def escape(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.tr!(' ', '+')
buffer.force_encoding(encoding)
end

# URL-decode an application/x-www-form-urlencoded string with encoding(optional).
# string = CGI.unescape("%27Stop%21%27+said+Fred")
# # => "'Stop!' said Fred"
def unescape(string, encoding = @@accept_charset)
str = string.tr('+', ' ')
str = str.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

# URL-encode a string following RFC 3986
# Space characters (+" "+) are encoded with (+"%20"+)
# url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
# # => "%27Stop%21%27%20said%20Fred"
def escapeURIComponent(string)
encoding = string.encoding
buffer = string.b
buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end
buffer.force_encoding(encoding)
end
alias escape_uri_component escapeURIComponent

# URL-decode a string following RFC 3986 with encoding(optional).
# string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
# # => "'Stop!'+said Fred"
def unescapeURIComponent(string, encoding = @@accept_charset)
str = string.b
str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
[m.delete('%')].pack('H*')
end
str.force_encoding(encoding)
str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

alias unescape_uri_component unescapeURIComponent

# The set of special characters and their escaped values
TABLE_FOR_ESCAPE_HTML__ = {
"'" => ''',
'&' => '&',
'"' => '"',
'<' => '&lt;',
'>' => '&gt;',
}

# Escape special characters in HTML, namely '&\"<>
# CGI.escapeHTML('Usage: foo "bar" <baz>')
# # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
def escapeHTML(string)
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
string.encode!(origenc) if origenc
string
else
string = string.b
string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
string.force_encoding(enc)
end
end

# Unescape a string that has been HTML-escaped
# CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
# # => "Usage: foo \"bar\" <baz>"
def unescapeHTML(string)
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
case $1.encode(Encoding::US_ASCII)
when 'apos' then "'".encode(enc)
when 'amp' then '&'.encode(enc)
when 'quot' then '"'.encode(enc)
when 'gt' then '>'.encode(enc)
when 'lt' then '<'.encode(enc)
when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
end
end
string.encode!(origenc) if origenc
return string
end
return string unless string.include? '&'
charlimit = case enc
when Encoding::UTF_8; 0x10ffff
when Encoding::ISO_8859_1; 256
else 128
end
string = string.b
string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
match = $1.dup
case match
when 'apos' then "'"
when 'amp' then '&'
when 'quot' then '"'
when 'gt' then '>'
when 'lt' then '<'
when /\A#0*(\d+)\z/
n = $1.to_i
if n < charlimit
n.chr(enc)
else
"&##{$1};"
end
when /\A#x([0-9a-f]+)\z/i
n = $1.hex
if n < charlimit
n.chr(enc)
else
"&#x#{$1};"
end
else
"&#{match};"
end
end
string.force_encoding enc
end

# Synonym for CGI.escapeHTML(str)
alias escape_html escapeHTML
alias h escapeHTML

# Synonym for CGI.unescapeHTML(str)
alias unescape_html unescapeHTML

# TruffleRuby runs the pure-Ruby variant faster, do not use the C extension there
unless RUBY_ENGINE == 'truffleruby'
begin
require 'cgi/escape.so'
rescue LoadError
end
end

# Escape only the tags of certain HTML elements in +string+.
#
# Takes an element or elements or array of elements. Each element
# is specified by the name of the element, without angle brackets.
# This matches both the start and the end tag of that element.
# The attribute list of the open tag will also be escaped (for
# instance, the double-quotes surrounding attribute values).
#
# print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
#
# print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
# # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
def escapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/<\/?(?:#{elements.join("|")})\b[^<>]*+>?/im) do
CGI.escapeHTML($&)
end
else
string
end
end

# Undo escaping such as that done by CGI.escapeElement()
#
# print CGI.unescapeElement(
# CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
# # "&lt;BR&gt;<A HREF="url"></A>"
#
# print CGI.unescapeElement(
# CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
# # "&lt;BR&gt;<A HREF="url"></A>"
def unescapeElement(string, *elements)
elements = elements[0] if elements[0].kind_of?(Array)
unless elements.empty?
string.gsub(/&lt;\/?(?:#{elements.join("|")})\b(?>[^&]+|&(?![gl]t;)\w+;)*(?:&gt;)?/im) do
unescapeHTML($&)
end
else
string
end
end

# Synonym for CGI.escapeElement(str)
alias escape_element escapeElement

# Synonym for CGI.unescapeElement(str)
alias unescape_element unescapeElement

end
Loading
Loading