From 4c4787b3c9a7734ea6d0913b002c47d7a686c584 Mon Sep 17 00:00:00 2001 From: Daniel Azuma Date: Wed, 14 Jan 2026 18:07:38 -0800 Subject: [PATCH] fix: Fixed default charset behavior for various non-text/plain content types Signed-off-by: Daniel Azuma --- lib/cloud_events/content_type.rb | 33 ++++++++++++++++++++-------- test/test_content_type.rb | 32 +++++++++++++++++++++++---- test/test_http_binding.rb | 37 ++++++++++++++++---------------- 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/lib/cloud_events/content_type.rb b/lib/cloud_events/content_type.rb index 1faa42b..08026fd 100644 --- a/lib/cloud_events/content_type.rb +++ b/lib/cloud_events/content_type.rb @@ -22,17 +22,15 @@ class ContentType # # @param string [String] Content-Type header value in RFC 2045 format # @param default_charset [String] Optional. The charset to use if none is - # specified. Defaults to `us-ascii`. + # specified. Defaults to `utf-8` for all types other than `text/plain` + # for which it defaults to `us-ascii`. # def initialize(string, default_charset: nil) @string = string.to_s - @media_type = "text" - @subtype_base = @subtype = "plain" - @subtype_format = nil + @media_type = @subtype = @subtype_base = @subtype_format = @charset = nil @params = [] - @charset = default_charset || "us-ascii" @error_message = nil - parse(consume_comments(@string.strip)) + parse(consume_comments(@string.strip), default_charset) @canonical_string = "#{@media_type}/#{@subtype}" + @params.map { |k, v| "; #{k}=#{maybe_quote(v)}" }.join full_freeze @@ -86,8 +84,10 @@ def initialize(string, default_charset: nil) attr_reader :params ## - # The charset, defaulting to "us-ascii" if none is explicitly set. - # @return [String] + # Returns the charset, which may be an appropriate default (e.g. `us-ascii` + # for `text/plain`, or `utf-8` for `application/json`), or may be nil for + # non-text cases. + # @return [String,nil] # attr_reader :charset @@ -124,7 +124,7 @@ class ParseError < ::StandardError private - def parse(str) + def parse(str, default_charset) @media_type, str = consume_token(str, downcase: true, error_message: "Failed to parse media type") str = consume_special(str, "/") @subtype, str = consume_token(str, downcase: true, error_message: "Failed to parse subtype") @@ -139,6 +139,13 @@ def parse(str) end rescue ParseError => e @error_message = e.message + ensure + unless @subtype + @media_type = "text" + @subtype_base = @subtype = "plain" + @subtype_format = nil + end + @charset ||= default_charset || choose_default_charset end def consume_token(str, downcase: false, error_message: nil) @@ -210,6 +217,14 @@ def maybe_quote(str) "\"#{str}\"" end + def choose_default_charset + if @media_type == "text" && @subtype == "plain" + "us-ascii" + elsif @media_type == "text" || @subtype_base == "json" || @subtype_format == "json" + "utf-8" + end + end + def full_freeze instance_variables.each do |iv| instance_variable_get(iv).freeze diff --git a/test/test_content_type.rb b/test/test_content_type.rb index f08066a..a690870 100644 --- a/test/test_content_type.rb +++ b/test/test_content_type.rb @@ -29,18 +29,42 @@ assert Ractor.shareable?(content_type) if defined? Ractor end - it "defaults to us-ascii charset" do - content_type = CloudEvents::ContentType.new("application/json") + it "defaults to us-ascii charset for text/plain" do + content_type = CloudEvents::ContentType.new("text/plain") assert_equal "us-ascii", content_type.charset assert Ractor.shareable?(content_type) if defined? Ractor end - it "defaults to a given charset" do - content_type = CloudEvents::ContentType.new("application/json", default_charset: "utf-8") + it "defaults to utf-8 charset for application/json" do + content_type = CloudEvents::ContentType.new("application/json") + assert_equal "utf-8", content_type.charset + assert Ractor.shareable?(content_type) if defined? Ractor + end + + it "defaults to utf-8 charset for application/cloudevents+json" do + content_type = CloudEvents::ContentType.new("application/cloudevents+json") assert_equal "utf-8", content_type.charset assert Ractor.shareable?(content_type) if defined? Ractor end + it "defaults to utf-8 charset for text/html" do + content_type = CloudEvents::ContentType.new("text/html") + assert_equal "utf-8", content_type.charset + assert Ractor.shareable?(content_type) if defined? Ractor + end + + it "does not default to a charset for image/png" do + content_type = CloudEvents::ContentType.new("image/png") + assert_nil content_type.charset + assert Ractor.shareable?(content_type) if defined? Ractor + end + + it "defaults to a given charset" do + content_type = CloudEvents::ContentType.new("text/html", default_charset: "utf-16") + assert_equal "utf-16", content_type.charset + assert Ractor.shareable?(content_type) if defined? Ractor + end + it "recognizes charset param" do content_type = CloudEvents::ContentType.new("application/json; charset=utf-8") assert_equal [["charset", "utf-8"]], content_type.params diff --git a/test/test_http_binding.rb b/test/test_http_binding.rb index 6cce2f8..4b9ff62 100644 --- a/test/test_http_binding.rb +++ b/test/test_http_binding.rb @@ -20,10 +20,11 @@ let(:encoded_quoted_type) { "Hello%20\"Ruby%20world\"%20\"this\\\"is\\\\a\\1string\"%20okay" } let(:spec_version) { "1.0" } let(:my_simple_data) { "12345" } - let(:my_json_escaped_simple_data) { '"12345"' } + let(:my_json_object) { {"a" => "รค", "b" => "๐Ÿ˜€"} } + let(:my_json_escaped_data) { '{"a":"รค","b":"๐Ÿ˜€"}' } let(:my_content_type_string) { "text/plain; charset=us-ascii" } let(:my_content_type) { CloudEvents::ContentType.new(my_content_type_string) } - let(:my_json_content_type_string) { "application/json; charset=us-ascii" } + let(:my_json_content_type_string) { "application/json" } let(:my_json_content_type) { CloudEvents::ContentType.new(my_json_content_type_string) } let(:my_schema_string) { "/my_schema" } let(:my_schema) { URI.parse(my_schema_string) } @@ -31,7 +32,7 @@ let(:my_time_string) { "2020-01-12T20:52:05-08:00" } let(:my_time) { DateTime.rfc3339(my_time_string) } let(:my_trace_context) { "1234567890;9876543210" } - let :my_json_struct do + let(:my_json_struct) do { "data" => my_simple_data, "datacontenttype" => my_content_type_string, @@ -46,9 +47,9 @@ end let(:my_json_struct_encoded) { JSON.dump(my_json_struct) } let(:my_json_batch_encoded) { JSON.dump([my_json_struct]) } - let :my_json_data_struct do + let(:my_json_data_struct) do { - "data" => my_simple_data, + "data" => my_json_object, "datacontenttype" => my_json_content_type_string, "dataschema" => my_schema_string, "id" => my_id, @@ -60,7 +61,7 @@ } end let(:my_json_data_struct_encoded) { JSON.dump(my_json_data_struct) } - let :my_simple_binary_mode do + let(:my_simple_binary_mode) do { "rack.input" => StringIO.new(my_simple_data), "HTTP_CE_ID" => my_id, @@ -73,9 +74,9 @@ "HTTP_CE_TIME" => my_time_string, } end - let :my_json_binary_mode do + let(:my_json_binary_mode) do { - "rack.input" => StringIO.new(my_json_escaped_simple_data), + "rack.input" => StringIO.new(my_json_escaped_data), "HTTP_CE_ID" => my_id, "HTTP_CE_SOURCE" => my_source_string, "HTTP_CE_TYPE" => my_type, @@ -86,7 +87,7 @@ "HTTP_CE_TIME" => my_time_string, } end - let :my_minimal_binary_mode do + let(:my_minimal_binary_mode) do { "rack.input" => StringIO.new(""), "HTTP_CE_ID" => my_id, @@ -95,7 +96,7 @@ "HTTP_CE_SPECVERSION" => spec_version, } end - let :my_extensions_binary_mode do + let(:my_extensions_binary_mode) do { "rack.input" => StringIO.new(my_simple_data), "HTTP_CE_ID" => my_id, @@ -109,7 +110,7 @@ "HTTP_CE_TRACECONTEXT" => my_trace_context, } end - let :my_nonascii_binary_mode do + let(:my_nonascii_binary_mode) do { "rack.input" => StringIO.new(my_simple_data), "HTTP_CE_ID" => my_id, @@ -122,7 +123,7 @@ "HTTP_CE_TIME" => my_time_string, } end - let :my_simple_event do + let(:my_simple_event) do CloudEvents::Event::V1.new(data_encoded: my_simple_data, data: my_simple_data, datacontenttype: my_content_type_string, @@ -134,9 +135,9 @@ time: my_time_string, type: my_type) end - let :my_json_event do - CloudEvents::Event::V1.new(data_encoded: my_json_escaped_simple_data, - data: my_simple_data, + let(:my_json_event) do + CloudEvents::Event::V1.new(data_encoded: my_json_escaped_data, + data: my_json_object, datacontenttype: my_json_content_type_string, dataschema: my_schema_string, id: my_id, @@ -146,7 +147,7 @@ time: my_time_string, type: my_type) end - let :my_minimal_event do + let(:my_minimal_event) do CloudEvents::Event::V1.new(data_encoded: "", data: "", id: my_id, @@ -154,7 +155,7 @@ specversion: spec_version, type: my_type) end - let :my_extensions_event do + let(:my_extensions_event) do CloudEvents::Event::V1.new(data_encoded: my_simple_data, data: my_simple_data, datacontenttype: my_content_type_string, @@ -167,7 +168,7 @@ tracecontext: my_trace_context, type: my_type) end - let :my_nonascii_event do + let(:my_nonascii_event) do CloudEvents::Event::V1.new(data_encoded: my_simple_data, data: my_simple_data, datacontenttype: my_content_type_string,