From 750658bad23be137511703cd853783891bcb641c Mon Sep 17 00:00:00 2001 From: prozolic <42107886+prozolic@users.noreply.github.com> Date: Fri, 17 Apr 2026 14:34:25 +0900 Subject: [PATCH 1/6] Use WriteStringValueSegment for large strings In StringConverter.Write, use chunk-based WriteStringValueSegment instead of WriteStringValue when the input string length exceeds the safe threshold. This prevents IndexOutOfRangeException that could occur when writing extremely large strings. --- .../Converters/Value/StringConverter.cs | 36 +++++++- .../Serialization/Value.WriteTests.cs | 89 +++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs index 075ecef03ae035..d816c513458919 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs @@ -9,6 +9,12 @@ namespace System.Text.Json.Serialization.Converters { internal sealed class StringConverter : JsonPrimitiveConverter { + // Use 1 MB segments as a performance tradeoff when writing strings larger than MaxSafeStringLength: + // large enough to keep the number of WriteStringValueSegment calls low, but small enough to avoid + // pushing extremely large spans through a single segmented write. This is not a correctness or + // protocol limit; it can be tuned if profiling shows a better size for writer throughput/allocation behavior. + private const int ChunkSize = 1024 * 1024; + public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { return reader.GetString(); @@ -23,10 +29,38 @@ public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerO } else { - writer.WriteStringValue(value.AsSpan()); + + ReadOnlySpan remaining = value.AsSpan(); + if (remaining.Length < ComputeMaxSafeStringLength(writer)) + { + writer.WriteStringValue(remaining); + } + else + { + WriteStringValueSegment(writer, remaining); + } } } + private static void WriteStringValueSegment(Utf8JsonWriter writer, ReadOnlySpan value) + { + int chunkSize = ChunkSize; + while (value.Length > chunkSize) + { + ReadOnlySpan chunk = value.Slice(0, chunkSize); + writer.WriteStringValueSegment(chunk, isFinalSegment: false); + value = value.Slice(chunk.Length); + } + + writer.WriteStringValueSegment(value, isFinalSegment: true); + } + + private static int ComputeMaxSafeStringLength(Utf8JsonWriter writer) + { + int indentOverhead = writer.Options.Indented ? writer.CurrentDepth * writer.Options.IndentSize + writer.Options.NewLine.Length : 0; + return (int.MaxValue / (JsonConstants.MaxExpansionFactorWhileEscaping * JsonConstants.MaxExpansionFactorWhileTranscoding)) - (3 + indentOverhead); + } + internal override string ReadAsPropertyNameCore(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { Debug.Assert(reader.TokenType == JsonTokenType.PropertyName); diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs index 435ad803c3975a..58e23080385c34 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs @@ -22,6 +22,95 @@ public static void WriteStringWithRelaxedEscaper() Assert.NotEqual(expected, JsonSerializer.Serialize(inputString)); } + [Theory] + [OuterLoop] + [InlineData(119_304_643)] // (int.MaxValue / (MaxExpansionFactorWhileEscaping * MaxExpansionFactorWhileTranscoding)) - 4 + [InlineData(119_304_644)] // (int.MaxValue / (MaxExpansionFactorWhileEscaping * MaxExpansionFactorWhileTranscoding)) - 3 + [InlineData(120_000_000)] + public static void WriteExtremelyLargeStrings(int strLength) + { + const char InputCharacter = '\u007F'; + const string EscapedCharacter = "\\u007F"; + + string value = new string(InputCharacter, strLength); + string json = JsonSerializer.Serialize(value, JsonSerializerOptions.Default); + + int expectedJsonLength = 2 + (strLength * EscapedCharacter.Length); + int middleSegmentStart = 1 + ((strLength / 2) * EscapedCharacter.Length); + int lastSegmentStart = 1 + ((strLength - 1) * EscapedCharacter.Length); + + Assert.Equal(expectedJsonLength, json.Length); + Assert.Equal('"', json[0]); + Assert.Equal(EscapedCharacter, json.AsSpan(1, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal('"', json[^1]); +#if NET + Assert.False(json.AsSpan(1, json.Length - 2).ContainsAnyExcept(['\\', 'u', '0', '7', 'F'])); +#endif + } + + [Fact] + [OuterLoop] + public static void WriteExtremelyLargeStringsIndentedRootLevel() + { + const int IndentSize = 127; + const string NewLine = "\n"; + const int StrLength = 120_000_000; + const char InputCharacter = '\u007F'; + const string EscapedCharacter = "\\u007F"; + + string value = new string(InputCharacter, StrLength); + var options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; + string json = JsonSerializer.Serialize(value, options); + + int expectedJsonLength = 2 + StrLength * EscapedCharacter.Length; + int middleSegmentStart = 1 + (StrLength / 2) * EscapedCharacter.Length; + int lastSegmentStart = 1 + (StrLength - 1) * EscapedCharacter.Length; + + Assert.Equal(expectedJsonLength, json.Length); + Assert.Equal('"', json[0]); + Assert.Equal(EscapedCharacter, json.AsSpan(1, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal('"', json[^1]); +#if NET + Assert.False(json.AsSpan(1, json.Length - 2).ContainsAnyExcept(['\\', 'u', '0', '7', 'F'])); +#endif + } + + [Fact] + [OuterLoop] + public static void WriteExtremelyLargeStringsIndentedAsArrayElement() + { + const int IndentSize = 127; + const string NewLine = "\n"; + const int StrLength = 120_000_000; + const char InputCharacter = '\u007F'; + const string EscapedCharacter = "\\u007F"; + + string value = new string(InputCharacter, StrLength); + var options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; + string[] arr = new[] { value }; + string json = JsonSerializer.Serialize(arr, options); + + // Indented single-element array layout: [ newLine indent "escapedStr" newLine ] + int indent = 1 * IndentSize; + int escapedStrLength = 2 + StrLength * EscapedCharacter.Length; + int expectedJsonLength = 1 + NewLine.Length + indent + escapedStrLength + NewLine.Length + 1; + + Assert.Equal(expectedJsonLength, json.Length); + Assert.Equal('[', json[0]); + Assert.Equal('"', json[1 + NewLine.Length + indent]); + Assert.Equal(EscapedCharacter, json.AsSpan(1 + NewLine.Length + indent + 1, EscapedCharacter.Length).ToString()); + Assert.Equal('"', json[1 + NewLine.Length + indent + escapedStrLength - 1]); + Assert.Equal(']', json[^1]); + + string[] result = JsonSerializer.Deserialize(json, options)!; + Assert.Single(result); + Assert.Equal(value, result[0]); + } + [Fact] public static void WritePrimitives() { From 9d432799e9e51515a18eb3b81b7516c0a160f721 Mon Sep 17 00:00:00 2001 From: prozolic <42107886+prozolic@users.noreply.github.com> Date: Fri, 17 Apr 2026 15:17:13 +0900 Subject: [PATCH 2/6] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../Serialization/Converters/Value/StringConverter.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs index d816c513458919..00d30985c174aa 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs @@ -9,10 +9,11 @@ namespace System.Text.Json.Serialization.Converters { internal sealed class StringConverter : JsonPrimitiveConverter { - // Use 1 MB segments as a performance tradeoff when writing strings larger than MaxSafeStringLength: - // large enough to keep the number of WriteStringValueSegment calls low, but small enough to avoid - // pushing extremely large spans through a single segmented write. This is not a correctness or - // protocol limit; it can be tuned if profiling shows a better size for writer throughput/allocation behavior. + // Use 1 MB segments as a performance tradeoff when writing strings larger than the threshold computed by + // ComputeMaxSafeStringLength(writer): large enough to keep the number of WriteStringValueSegment calls low, + // but small enough to avoid pushing extremely large spans through a single segmented write. This is not a + // correctness or protocol limit; it can be tuned if profiling shows a better size for writer throughput/ + // allocation behavior. private const int ChunkSize = 1024 * 1024; public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) From 626a5405645f8ba0a80f218793485aa742fa8366 Mon Sep 17 00:00:00 2001 From: prozolic <42107886+prozolic@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:04:10 +0900 Subject: [PATCH 3/6] Add cheap guard to StringConverter.Write and add ConditionalTheory and ConditionalFact in test case. --- .../Converters/Value/StringConverter.cs | 2 +- .../Serialization/Value.WriteTests.cs | 33 +++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs index 00d30985c174aa..19f68dd07fc126 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs @@ -32,7 +32,7 @@ public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerO { ReadOnlySpan remaining = value.AsSpan(); - if (remaining.Length < ComputeMaxSafeStringLength(writer)) + if (remaining.Length <= ChunkSize || remaining.Length < ComputeMaxSafeStringLength(writer)) { writer.WriteStringValue(remaining); } diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs index 58e23080385c34..fe4ec43c136db8 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs @@ -22,7 +22,7 @@ public static void WriteStringWithRelaxedEscaper() Assert.NotEqual(expected, JsonSerializer.Serialize(inputString)); } - [Theory] + [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] [OuterLoop] [InlineData(119_304_643)] // (int.MaxValue / (MaxExpansionFactorWhileEscaping * MaxExpansionFactorWhileTranscoding)) - 4 [InlineData(119_304_644)] // (int.MaxValue / (MaxExpansionFactorWhileEscaping * MaxExpansionFactorWhileTranscoding)) - 3 @@ -45,12 +45,9 @@ public static void WriteExtremelyLargeStrings(int strLength) Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); Assert.Equal('"', json[^1]); -#if NET - Assert.False(json.AsSpan(1, json.Length - 2).ContainsAnyExcept(['\\', 'u', '0', '7', 'F'])); -#endif } - [Fact] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] [OuterLoop] public static void WriteExtremelyLargeStringsIndentedRootLevel() { @@ -61,7 +58,7 @@ public static void WriteExtremelyLargeStringsIndentedRootLevel() const string EscapedCharacter = "\\u007F"; string value = new string(InputCharacter, StrLength); - var options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; + JsonSerializerOptions options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; string json = JsonSerializer.Serialize(value, options); int expectedJsonLength = 2 + StrLength * EscapedCharacter.Length; @@ -74,12 +71,9 @@ public static void WriteExtremelyLargeStringsIndentedRootLevel() Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); Assert.Equal('"', json[^1]); -#if NET - Assert.False(json.AsSpan(1, json.Length - 2).ContainsAnyExcept(['\\', 'u', '0', '7', 'F'])); -#endif } - [Fact] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] [OuterLoop] public static void WriteExtremelyLargeStringsIndentedAsArrayElement() { @@ -90,7 +84,7 @@ public static void WriteExtremelyLargeStringsIndentedAsArrayElement() const string EscapedCharacter = "\\u007F"; string value = new string(InputCharacter, StrLength); - var options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; + JsonSerializerOptions options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; string[] arr = new[] { value }; string json = JsonSerializer.Serialize(arr, options); @@ -99,16 +93,19 @@ public static void WriteExtremelyLargeStringsIndentedAsArrayElement() int escapedStrLength = 2 + StrLength * EscapedCharacter.Length; int expectedJsonLength = 1 + NewLine.Length + indent + escapedStrLength + NewLine.Length + 1; + int stringStart = 1 + NewLine.Length + indent; + int stringContentStart = stringStart + 1; + int middleSegmentStart = stringContentStart + (StrLength / 2) * EscapedCharacter.Length; + int lastSegmentStart = stringContentStart + (StrLength - 1) * EscapedCharacter.Length; + Assert.Equal(expectedJsonLength, json.Length); Assert.Equal('[', json[0]); - Assert.Equal('"', json[1 + NewLine.Length + indent]); - Assert.Equal(EscapedCharacter, json.AsSpan(1 + NewLine.Length + indent + 1, EscapedCharacter.Length).ToString()); - Assert.Equal('"', json[1 + NewLine.Length + indent + escapedStrLength - 1]); + Assert.Equal('"', json[stringStart]); + Assert.Equal(EscapedCharacter, json.AsSpan(stringContentStart, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal('"', json[stringStart + escapedStrLength - 1]); Assert.Equal(']', json[^1]); - - string[] result = JsonSerializer.Deserialize(json, options)!; - Assert.Single(result); - Assert.Equal(value, result[0]); } [Fact] From b91ca178de4b0180dff18dbdc2f3500ceb7611be Mon Sep 17 00:00:00 2001 From: prozolic <42107886+prozolic@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:00:02 +0900 Subject: [PATCH 4/6] Revert StringConverter to original state --- .../Converters/Value/StringConverter.cs | 37 +------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs index 19f68dd07fc126..075ecef03ae035 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/Converters/Value/StringConverter.cs @@ -9,13 +9,6 @@ namespace System.Text.Json.Serialization.Converters { internal sealed class StringConverter : JsonPrimitiveConverter { - // Use 1 MB segments as a performance tradeoff when writing strings larger than the threshold computed by - // ComputeMaxSafeStringLength(writer): large enough to keep the number of WriteStringValueSegment calls low, - // but small enough to avoid pushing extremely large spans through a single segmented write. This is not a - // correctness or protocol limit; it can be tuned if profiling shows a better size for writer throughput/ - // allocation behavior. - private const int ChunkSize = 1024 * 1024; - public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { return reader.GetString(); @@ -30,38 +23,10 @@ public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerO } else { - - ReadOnlySpan remaining = value.AsSpan(); - if (remaining.Length <= ChunkSize || remaining.Length < ComputeMaxSafeStringLength(writer)) - { - writer.WriteStringValue(remaining); - } - else - { - WriteStringValueSegment(writer, remaining); - } + writer.WriteStringValue(value.AsSpan()); } } - private static void WriteStringValueSegment(Utf8JsonWriter writer, ReadOnlySpan value) - { - int chunkSize = ChunkSize; - while (value.Length > chunkSize) - { - ReadOnlySpan chunk = value.Slice(0, chunkSize); - writer.WriteStringValueSegment(chunk, isFinalSegment: false); - value = value.Slice(chunk.Length); - } - - writer.WriteStringValueSegment(value, isFinalSegment: true); - } - - private static int ComputeMaxSafeStringLength(Utf8JsonWriter writer) - { - int indentOverhead = writer.Options.Indented ? writer.CurrentDepth * writer.Options.IndentSize + writer.Options.NewLine.Length : 0; - return (int.MaxValue / (JsonConstants.MaxExpansionFactorWhileEscaping * JsonConstants.MaxExpansionFactorWhileTranscoding)) - (3 + indentOverhead); - } - internal override string ReadAsPropertyNameCore(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { Debug.Assert(reader.TokenType == JsonTokenType.PropertyName); From 9d41d56644890517447ee1f4f2de3fbf21bc1f9d Mon Sep 17 00:00:00 2001 From: prozolic <42107886+prozolic@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:51:05 +0900 Subject: [PATCH 5/6] Precompute maxRequiredBytes before dispatching to WriteStringMinimized and WriteStringIndented, using the original input length. - With escaping: value.Length * MaxExpansionFactorWhileEscaping - Without escaping: value.Length * MaxExpansionFactorWhileTranscoding The new calculation is safe for inputs up to int.MaxValue / 6 (~357M chars). WriteStringValue enforces a maximum input size of 166_666_666 chars (MaxUnescapedTokenSize), so no overflow occurs in practice --- .../Utf8JsonWriter.WriteValues.String.cs | 34 ++++---- .../Serialization/Value.WriteTests.cs | 68 ++-------------- .../Utf8JsonWriterTests.cs | 80 ++++++++++++++++++- 3 files changed, 102 insertions(+), 80 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs index 5ff4064d2b59bd..fcbc5b6f9e0583 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.String.cs @@ -91,11 +91,12 @@ private void WriteStringEscape(ReadOnlySpan value) } else { - WriteStringByOptions(value); + // Each input char may transcode to up to 3 bytes. + WriteStringByOptions(value, value.Length * JsonConstants.MaxExpansionFactorWhileTranscoding); } } - private void WriteStringByOptions(ReadOnlySpan value) + private void WriteStringByOptions(ReadOnlySpan value, int maxRequiredBytes) { if (!_options.SkipValidation) { @@ -104,22 +105,21 @@ private void WriteStringByOptions(ReadOnlySpan value) if (_options.Indented) { - WriteStringIndented(value); + WriteStringIndented(value, maxRequiredBytes); } else { - WriteStringMinimized(value); + WriteStringMinimized(value, maxRequiredBytes); } } // TODO: https://github.com/dotnet/runtime/issues/29293 - private void WriteStringMinimized(ReadOnlySpan escapedValue) + private void WriteStringMinimized(ReadOnlySpan escapedValue, int maxRequiredBytes) { - Debug.Assert(escapedValue.Length < (int.MaxValue / JsonConstants.MaxExpansionFactorWhileTranscoding) - 3); + Debug.Assert(maxRequiredBytes >= 0 && maxRequiredBytes < int.MaxValue - 3); - // All ASCII, 2 quotes => escapedValue.Length + 2 - // Optionally, 1 list separator, and up to 3x growth when transcoding - int maxRequired = (escapedValue.Length * JsonConstants.MaxExpansionFactorWhileTranscoding) + 3; + // 2 quotes + optional 1 list separator, plus precomputed max bytes for the payload. + int maxRequired = maxRequiredBytes + 3; if (_memory.Length - BytesPending < maxRequired) { @@ -140,16 +140,14 @@ private void WriteStringMinimized(ReadOnlySpan escapedValue) } // TODO: https://github.com/dotnet/runtime/issues/29293 - private void WriteStringIndented(ReadOnlySpan escapedValue) + private void WriteStringIndented(ReadOnlySpan escapedValue, int maxRequiredBytes) { int indent = Indentation; Debug.Assert(indent <= _indentLength * _options.MaxDepth); + Debug.Assert(maxRequiredBytes >= 0 && maxRequiredBytes < int.MaxValue - indent - 3 - _newLineLength); - Debug.Assert(escapedValue.Length < (int.MaxValue / JsonConstants.MaxExpansionFactorWhileTranscoding) - indent - 3 - _newLineLength); - - // All ASCII, 2 quotes => indent + escapedValue.Length + 2 - // Optionally, 1 list separator, 1-2 bytes for new line, and up to 3x growth when transcoding - int maxRequired = indent + (escapedValue.Length * JsonConstants.MaxExpansionFactorWhileTranscoding) + 3 + _newLineLength; + // indent + 2 quotes + optional 1 list separator + 1-2 bytes for new line, plus precomputed max bytes for the payload. + int maxRequired = indent + maxRequiredBytes + 3 + _newLineLength; if (_memory.Length - BytesPending < maxRequired) { @@ -195,7 +193,11 @@ private void WriteStringEscapeValue(ReadOnlySpan value, int firstEscapeInd JsonWriterHelper.EscapeString(value, escapedValue, firstEscapeIndexVal, _options.Encoder, out int written); - WriteStringByOptions(escapedValue.Slice(0, written)); + // Each original input char expands to at most MaxExpansionFactorWhileEscaping bytes to the output. + // Escaped sequences are all ASCII (1 byte each), so × 6 ≥ transcoded bytes. + int requiredBytes = value.Length * JsonConstants.MaxExpansionFactorWhileEscaping; + + WriteStringByOptions(escapedValue.Slice(0, written), requiredBytes); if (valueArray != null) { diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs index fe4ec43c136db8..1394d84e4dcd6b 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs @@ -22,10 +22,13 @@ public static void WriteStringWithRelaxedEscaper() Assert.NotEqual(expected, JsonSerializer.Serialize(inputString)); } + // NOTE: WriteExtremelyLargeStrings test is constrained to run on Windows and MacOSX because it causes + // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can + // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the + // time the memory is accessed which triggers the full memory allocation. + [PlatformSpecific(TestPlatforms.Windows | TestPlatforms.OSX)] [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] [OuterLoop] - [InlineData(119_304_643)] // (int.MaxValue / (MaxExpansionFactorWhileEscaping * MaxExpansionFactorWhileTranscoding)) - 4 - [InlineData(119_304_644)] // (int.MaxValue / (MaxExpansionFactorWhileEscaping * MaxExpansionFactorWhileTranscoding)) - 3 [InlineData(120_000_000)] public static void WriteExtremelyLargeStrings(int strLength) { @@ -47,67 +50,6 @@ public static void WriteExtremelyLargeStrings(int strLength) Assert.Equal('"', json[^1]); } - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] - [OuterLoop] - public static void WriteExtremelyLargeStringsIndentedRootLevel() - { - const int IndentSize = 127; - const string NewLine = "\n"; - const int StrLength = 120_000_000; - const char InputCharacter = '\u007F'; - const string EscapedCharacter = "\\u007F"; - - string value = new string(InputCharacter, StrLength); - JsonSerializerOptions options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; - string json = JsonSerializer.Serialize(value, options); - - int expectedJsonLength = 2 + StrLength * EscapedCharacter.Length; - int middleSegmentStart = 1 + (StrLength / 2) * EscapedCharacter.Length; - int lastSegmentStart = 1 + (StrLength - 1) * EscapedCharacter.Length; - - Assert.Equal(expectedJsonLength, json.Length); - Assert.Equal('"', json[0]); - Assert.Equal(EscapedCharacter, json.AsSpan(1, EscapedCharacter.Length).ToString()); - Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); - Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); - Assert.Equal('"', json[^1]); - } - - [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] - [OuterLoop] - public static void WriteExtremelyLargeStringsIndentedAsArrayElement() - { - const int IndentSize = 127; - const string NewLine = "\n"; - const int StrLength = 120_000_000; - const char InputCharacter = '\u007F'; - const string EscapedCharacter = "\\u007F"; - - string value = new string(InputCharacter, StrLength); - JsonSerializerOptions options = new JsonSerializerOptions { WriteIndented = true, IndentSize = IndentSize, NewLine = NewLine }; - string[] arr = new[] { value }; - string json = JsonSerializer.Serialize(arr, options); - - // Indented single-element array layout: [ newLine indent "escapedStr" newLine ] - int indent = 1 * IndentSize; - int escapedStrLength = 2 + StrLength * EscapedCharacter.Length; - int expectedJsonLength = 1 + NewLine.Length + indent + escapedStrLength + NewLine.Length + 1; - - int stringStart = 1 + NewLine.Length + indent; - int stringContentStart = stringStart + 1; - int middleSegmentStart = stringContentStart + (StrLength / 2) * EscapedCharacter.Length; - int lastSegmentStart = stringContentStart + (StrLength - 1) * EscapedCharacter.Length; - - Assert.Equal(expectedJsonLength, json.Length); - Assert.Equal('[', json[0]); - Assert.Equal('"', json[stringStart]); - Assert.Equal(EscapedCharacter, json.AsSpan(stringContentStart, EscapedCharacter.Length).ToString()); - Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); - Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); - Assert.Equal('"', json[stringStart + escapedStrLength - 1]); - Assert.Equal(']', json[^1]); - } - [Fact] public static void WritePrimitives() { diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs index b545496857b71d..bd8f6508ce1b85 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs @@ -3590,7 +3590,7 @@ public void WritingTooLargeBase64Bytes(JsonWriterOptions options) } } - // NOTE: WritingTooLargeProperty test is constrained to run on Windows and MacOSX because it causes + // NOTE: WritingHugeBase64Bytes test is constrained to run on Windows and MacOSX because it causes // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the // time the memory is accessed which triggers the full memory allocation. @@ -8271,6 +8271,84 @@ public static void WriteValueWithExtremelyLongValue_ThrowsArgumentException() Assert.Throws(() => writer.WriteStringValue(longValue.AsSpan())); } + // NOTE: WriteExtremelyLargeEscapedStringValue_Minimized test is constrained to run on Windows and MacOSX because it causes + // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can + // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the + // time the memory is accessed which triggers the full memory allocation. + [PlatformSpecific(TestPlatforms.Windows | TestPlatforms.OSX)] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] + [OuterLoop] + public static void WriteExtremelyLargeEscapedStringValue_Minimized() + { + const char InputCharacter = '\u007F'; + const int EscapedCharacterByteLength = 6; + + try + { + char[] value = new char[MaxUnescapedTokenSize]; + value.AsSpan().Fill(InputCharacter); + + var output = new ArrayBufferWriter(); + using var writer = new Utf8JsonWriter(output); + writer.WriteStringValue(value.AsSpan()); + writer.Flush(); + + ReadOnlySpan written = output.WrittenSpan; + int expectedByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; + Assert.Equal(expectedByteLength, written.Length); + Assert.Equal((byte)'"', written[0]); + Assert.Equal((byte)'"', written[^1]); + } + catch (OutOfMemoryException) + { + throw new SkipTestException("Out of memory allocating large objects"); + } + } + + // NOTE: WriteExtremelyLargeEscapedStringValue_Indented test is constrained to run on Windows and MacOSX because it causes + // problems on Linux due to the way deferred memory allocation works. On Linux, the allocation can + // succeed even if there is not enough memory but then the test may get killed by the OOM killer at the + // time the memory is accessed which triggers the full memory allocation. + [PlatformSpecific(TestPlatforms.Windows | TestPlatforms.OSX)] + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] + [OuterLoop] + public static void WriteExtremelyLargeEscapedStringValue_Indented() + { + const char InputCharacter = '\u007F'; + const int EscapedCharacterByteLength = 6; + const int IndentSize = 127; + const string NewLine = "\n"; + + try + { + char[] value = new char[MaxUnescapedTokenSize]; + value.AsSpan().Fill(InputCharacter); + + var options = new JsonWriterOptions { Indented = true, IndentSize = IndentSize, NewLine = NewLine }; + var output = new ArrayBufferWriter(); + using var writer = new Utf8JsonWriter(output, options); + writer.WriteStartArray(); + writer.WriteStringValue(value.AsSpan()); + writer.WriteEndArray(); + writer.Flush(); + + // Layout: [ \n "escapedStr" \n ] + ReadOnlySpan written = output.WrittenSpan; + int escapedStrByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; + int expectedByteLength = 1 + NewLine.Length + IndentSize + escapedStrByteLength + NewLine.Length + 1; + Assert.Equal(expectedByteLength, written.Length); + Assert.Equal((byte)'[', written[0]); + Assert.Equal((byte)']', written[^1]); + int stringStart = 1 + NewLine.Length + IndentSize; + Assert.Equal((byte)'"', written[stringStart]); + Assert.Equal((byte)'"', written[stringStart + escapedStrByteLength - 1]); + } + catch (OutOfMemoryException) + { + throw new SkipTestException("Out of memory allocating large objects"); + } + } + [Fact] public static void WriteRawValueWithInvalidJson_ValidationDisabled() { From 09b4911c43e475a5be37609d23e8c3840c0feb81 Mon Sep 17 00:00:00 2001 From: prozolic <42107886+prozolic@users.noreply.github.com> Date: Sat, 25 Apr 2026 08:38:56 +0900 Subject: [PATCH 6/6] Add comment in MaxExpansionFactorWhileEscaping and fix test --- .../src/System/Text/Json/JsonConstants.cs | 7 +++- .../Serialization/Value.WriteTests.cs | 34 ++++++++++++------- .../Utf8JsonWriterTests.cs | 11 +++--- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs b/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs index 23e35a6e70c694..06f67c379b388a 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs @@ -54,9 +54,14 @@ internal static partial class JsonConstants public const int RemoveFlagsBitMask = 0x7FFFFFFF; // In the worst case, an ASCII character represented as a single utf-8 byte could expand 6x when escaped. - // For example: '+' becomes '\u0043' + // For example: '+' becomes '\u002B' // Escaping surrogate pairs (represented by 3 or 4 utf-8 bytes) would expand to 12 bytes (which is still <= 6x). // The same factor applies to utf-16 characters. + // This factor also serves as an upper bound for the combined escaping-and-transcoding pipeline. + // A non-ASCII unicode character is either: + // - escaped into an ASCII sequence (e.g. \uXXXX), so 1 UTF-16 char -> at most 6 UTF-8 bytes, or + // - written directly as UTF-8 (e.g. when using a non-default encoder such as UnsafeRelaxedJsonEscaping), + // expanding at most 3x (MaxExpansionFactorWhileTranscoding), which is <= 6. public const int MaxExpansionFactorWhileEscaping = 6; // In the worst case, a single UTF-16 character could be expanded to 3 UTF-8 bytes. diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs index 1394d84e4dcd6b..02fd7b283cc27d 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/Value.WriteTests.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Text.Encodings.Web; +using Microsoft.DotNet.XUnitExtensions; using Newtonsoft.Json; using Xunit; @@ -35,19 +36,26 @@ public static void WriteExtremelyLargeStrings(int strLength) const char InputCharacter = '\u007F'; const string EscapedCharacter = "\\u007F"; - string value = new string(InputCharacter, strLength); - string json = JsonSerializer.Serialize(value, JsonSerializerOptions.Default); - - int expectedJsonLength = 2 + (strLength * EscapedCharacter.Length); - int middleSegmentStart = 1 + ((strLength / 2) * EscapedCharacter.Length); - int lastSegmentStart = 1 + ((strLength - 1) * EscapedCharacter.Length); - - Assert.Equal(expectedJsonLength, json.Length); - Assert.Equal('"', json[0]); - Assert.Equal(EscapedCharacter, json.AsSpan(1, EscapedCharacter.Length).ToString()); - Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); - Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); - Assert.Equal('"', json[^1]); + try + { + string value = new string(InputCharacter, strLength); + string json = JsonSerializer.Serialize(value, JsonSerializerOptions.Default); + + int expectedJsonLength = 2 + (strLength * EscapedCharacter.Length); + int middleSegmentStart = 1 + ((strLength / 2) * EscapedCharacter.Length); + int lastSegmentStart = 1 + ((strLength - 1) * EscapedCharacter.Length); + + Assert.Equal(expectedJsonLength, json.Length); + Assert.Equal('"', json[0]); + Assert.Equal(EscapedCharacter, json.AsSpan(1, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(middleSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal(EscapedCharacter, json.AsSpan(lastSegmentStart, EscapedCharacter.Length).ToString()); + Assert.Equal('"', json[^1]); + } + catch (OutOfMemoryException) + { + throw new SkipTestException($"Insufficient memory to run {nameof(WriteExtremelyLargeStrings)} with length {strLength}."); + } } [Fact] diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs index bd8f6508ce1b85..b329547b451223 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Utf8JsonWriterTests.cs @@ -8288,13 +8288,13 @@ public static void WriteExtremelyLargeEscapedStringValue_Minimized() char[] value = new char[MaxUnescapedTokenSize]; value.AsSpan().Fill(InputCharacter); - var output = new ArrayBufferWriter(); + int expectedByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; + var output = new ArrayBufferWriter(expectedByteLength); using var writer = new Utf8JsonWriter(output); writer.WriteStringValue(value.AsSpan()); writer.Flush(); ReadOnlySpan written = output.WrittenSpan; - int expectedByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; Assert.Equal(expectedByteLength, written.Length); Assert.Equal((byte)'"', written[0]); Assert.Equal((byte)'"', written[^1]); @@ -8324,8 +8324,11 @@ public static void WriteExtremelyLargeEscapedStringValue_Indented() char[] value = new char[MaxUnescapedTokenSize]; value.AsSpan().Fill(InputCharacter); + int escapedStrByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; + int expectedByteLength = 1 + NewLine.Length + IndentSize + escapedStrByteLength + NewLine.Length + 1; + var options = new JsonWriterOptions { Indented = true, IndentSize = IndentSize, NewLine = NewLine }; - var output = new ArrayBufferWriter(); + var output = new ArrayBufferWriter(expectedByteLength); using var writer = new Utf8JsonWriter(output, options); writer.WriteStartArray(); writer.WriteStringValue(value.AsSpan()); @@ -8334,8 +8337,6 @@ public static void WriteExtremelyLargeEscapedStringValue_Indented() // Layout: [ \n "escapedStr" \n ] ReadOnlySpan written = output.WrittenSpan; - int escapedStrByteLength = 2 + MaxUnescapedTokenSize * EscapedCharacterByteLength; - int expectedByteLength = 1 + NewLine.Length + IndentSize + escapedStrByteLength + NewLine.Length + 1; Assert.Equal(expectedByteLength, written.Length); Assert.Equal((byte)'[', written[0]); Assert.Equal((byte)']', written[^1]);