From 4f763ec40c504a1bed75ccf8b3f135940ca20548 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Wed, 29 May 2019 16:59:01 -0700 Subject: [PATCH 1/4] Add read ahead logic for streams. When reading a jaon object or array from a stream into an object we need to TrySkip to ensure that we have all the needed data for the JsonDocument to create a JsonElement. This is only necessary if we haven't already drained the stream. --- .../JsonSerializer.Read.Stream.cs | 5 ++ .../Json/Serialization/JsonSerializer.Read.cs | 68 ++++++++++++++----- .../Serialization/JsonSerializerOptions.cs | 5 ++ .../tests/Serialization/JsonElementTests.cs | 19 ++++++ 4 files changed, 80 insertions(+), 17 deletions(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs index 19fdc98562a0..17cc138d4c09 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs @@ -195,6 +195,11 @@ private static void ReadCore( { var reader = new Utf8JsonReader(buffer, isFinalBlock, readerState); + // If we haven't read in all the data we need to read ahead when reading a json object + // or array into a single .NET object so the JsonDocument has all of the needed data + // to parse. + options.ReadAhead = !isFinalBlock; + ReadCore( options, ref reader, diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs index d31d4e2290f8..2e30ed48a932 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs @@ -5,6 +5,7 @@ using System.Buffers; using System.Collections; using System.Diagnostics; +using System.Runtime.CompilerServices; namespace System.Text.Json.Serialization { @@ -21,18 +22,30 @@ private static void ReadCore( { try { - while (reader.Read()) + JsonReaderState initialState = default; + + while (true) { + if (options.ReadAhead) + { + // When we're reading ahead we always have to save the state + // as we don't know if the next token is an opening object or + // array brace. + initialState = reader.CurrentState; + } + + if (!reader.Read()) + { + break; + } + JsonTokenType tokenType = reader.TokenType; if (JsonHelpers.IsInRangeInclusive(tokenType, JsonTokenType.String, JsonTokenType.False)) { Debug.Assert(tokenType == JsonTokenType.String || tokenType == JsonTokenType.Number || tokenType == JsonTokenType.True || tokenType == JsonTokenType.False); - if (HandleValue(tokenType, options, ref reader, ref state)) - { - continue; - } + HandleValue(tokenType, options, ref reader, ref state); } else if (tokenType == JsonTokenType.PropertyName) { @@ -47,9 +60,10 @@ private static void ReadCore( } else if (state.Current.IsProcessingValue) { - if (HandleValue(tokenType, options, ref reader, ref state)) + if (!HandleObjectAsValue(tokenType, options, ref reader, ref state, ref initialState)) { - continue; + // Need more data + break; } } else if (state.Current.IsProcessingDictionary) @@ -82,24 +96,19 @@ private static void ReadCore( { HandleStartArray(options, ref reader, ref state); } - else if (HandleValue(tokenType, options, ref reader, ref state)) + else if (!HandleObjectAsValue(tokenType, options, ref reader, ref state, ref initialState)) { - continue; + // Need more data + break; } } else if (tokenType == JsonTokenType.EndArray) { - if (HandleEndArray(options, ref reader, ref state)) - { - continue; - } + HandleEndArray(options, ref reader, ref state); } else if (tokenType == JsonTokenType.Null) { - if (HandleNull(ref reader, ref state, options)) - { - continue; - } + HandleNull(ref reader, ref state, options); } } } @@ -112,6 +121,31 @@ private static void ReadCore( return; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool HandleObjectAsValue( + JsonTokenType tokenType, + JsonSerializerOptions options, + ref Utf8JsonReader reader, + ref ReadStack readStack, + ref JsonReaderState readerState) + { + if (options.ReadAhead) + { + // Attempt to skip to make sure we have all the data we need. + bool complete = reader.TrySkip(); + + // We need to restore the state in all cases as we need to be positioned back before the current token. + reader = new Utf8JsonReader(reader.OriginalSpan, isFinalBlock: reader.IsFinalBlock, state: readerState); + if (!complete) + { + // Couldn't read to the end of the object, exit out to get more data in the buffer. + return false; + } + } + HandleValue(tokenType, options, ref reader, ref readStack); + return true; + } + private static ReadOnlySpan GetUnescapedString(ReadOnlySpan utf8Source, int idx) { // The escaped name is always longer than the unescaped, so it is safe to use escaped name for the buffer length. diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptions.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptions.cs index 70a1cc9edec9..00e21f572a08 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptions.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializerOptions.cs @@ -314,5 +314,10 @@ private void VerifyMutable() ThrowHelper.ThrowInvalidOperationException_SerializerOptionsImmutable(); } } + + /// + /// Internal flag to let us know that we need to read ahead in the inner read loop. + /// + internal bool ReadAhead { get; set; } } } diff --git a/src/System.Text.Json/tests/Serialization/JsonElementTests.cs b/src/System.Text.Json/tests/Serialization/JsonElementTests.cs index f146734d52f7..d00dd8e1cce7 100644 --- a/src/System.Text.Json/tests/Serialization/JsonElementTests.cs +++ b/src/System.Text.Json/tests/Serialization/JsonElementTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.IO; using System.Linq; using Xunit; @@ -145,5 +146,23 @@ public void Verify() Assert.Equal("Hello", Array[3].ToString()); } } + + [Theory, + InlineData(5), + InlineData(10), + InlineData(20), + InlineData(1024)] + public static void ReadFromStream(int defaultBufferSize) + { + // Streams need to read ahead when they hit objects or arrays that are assigned to JsonElement or object. + + byte[] data = Encoding.UTF8.GetBytes(@"{""Data"":[1,true,{""City"":""MyCity""},null,""foo""]}"); + MemoryStream stream = new MemoryStream(data); + JsonElement obj = JsonSerializer.ReadAsync(stream, new JsonSerializerOptions { DefaultBufferSize = defaultBufferSize }).Result; + + data = Encoding.UTF8.GetBytes(@"[1,true,{""City"":""MyCity""},null,""foo""]"); + stream = new MemoryStream(data); + obj = JsonSerializer.ReadAsync(stream, new JsonSerializerOptions { DefaultBufferSize = defaultBufferSize }).Result; + } } } From 982493d67af91d8917ba1b169f4a3f2363fac25e Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Thu, 30 May 2019 14:41:47 -0700 Subject: [PATCH 2/4] Track state correctly We need to track consumed separately so we can requeue the reader properly after skipping. Add more stream tests. --- .../JsonSerializer.Read.Stream.cs | 18 ++--- .../Json/Serialization/JsonSerializer.Read.cs | 67 ++++++++++++------- .../Text/Json/Serialization/ReadStack.cs | 5 ++ .../tests/Serialization/JsonElementTests.cs | 11 ++- .../tests/Serialization/SpanTests.cs | 22 ++++++ 5 files changed, 88 insertions(+), 35 deletions(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs index 17cc138d4c09..a9b03113187f 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs @@ -83,8 +83,8 @@ private static async ValueTask ReadAsync( options = JsonSerializerOptions.s_defaultOptions; } - ReadStack state = default; - state.Current.Initialize(returnType, options); + ReadStack readStack = default; + readStack.Current.Initialize(returnType, options); var readerState = new JsonReaderState(options.GetReaderOptions()); @@ -138,10 +138,11 @@ private static async ValueTask ReadAsync( isFinalBlock, new Span(buffer, 0, bytesInBuffer), options, - ref state); + ref readStack); + + Debug.Assert(readStack.BytesConsumed <= bytesInBuffer); + int bytesConsumed = checked((int)readStack.BytesConsumed); - Debug.Assert(readerState.BytesConsumed <= bytesInBuffer); - int bytesConsumed = (int)readerState.BytesConsumed; bytesInBuffer -= bytesConsumed; if (isFinalBlock) @@ -183,7 +184,7 @@ private static async ValueTask ReadAsync( ThrowHelper.ThrowJsonException_DeserializeDataRemaining(totalBytesRead, bytesInBuffer); } - return (TValue)state.Current.ReturnValue; + return (TValue)readStack.Current.ReturnValue; } private static void ReadCore( @@ -191,7 +192,7 @@ private static void ReadCore( bool isFinalBlock, Span buffer, JsonSerializerOptions options, - ref ReadStack state) + ref ReadStack readStack) { var reader = new Utf8JsonReader(buffer, isFinalBlock, readerState); @@ -199,11 +200,12 @@ private static void ReadCore( // or array into a single .NET object so the JsonDocument has all of the needed data // to parse. options.ReadAhead = !isFinalBlock; + readStack.BytesConsumed = 0; ReadCore( options, ref reader, - ref state); + ref readStack); readerState = reader.CurrentState; } diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs index 2e30ed48a932..4d96348b0394 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System.Buffers; -using System.Collections; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -18,7 +17,7 @@ public static partial class JsonSerializer private static void ReadCore( JsonSerializerOptions options, ref Utf8JsonReader reader, - ref ReadStack state) + ref ReadStack readStack) { try { @@ -36,6 +35,7 @@ private static void ReadCore( if (!reader.Read()) { + // Need more data break; } @@ -45,58 +45,58 @@ private static void ReadCore( { Debug.Assert(tokenType == JsonTokenType.String || tokenType == JsonTokenType.Number || tokenType == JsonTokenType.True || tokenType == JsonTokenType.False); - HandleValue(tokenType, options, ref reader, ref state); + HandleValue(tokenType, options, ref reader, ref readStack); } else if (tokenType == JsonTokenType.PropertyName) { - HandlePropertyName(options, ref reader, ref state); + HandlePropertyName(options, ref reader, ref readStack); } else if (tokenType == JsonTokenType.StartObject) { - if (state.Current.SkipProperty) + if (readStack.Current.SkipProperty) { - state.Push(); - state.Current.Drain = true; + readStack.Push(); + readStack.Current.Drain = true; } - else if (state.Current.IsProcessingValue) + else if (readStack.Current.IsProcessingValue) { - if (!HandleObjectAsValue(tokenType, options, ref reader, ref state, ref initialState)) + if (!HandleObjectAsValue(tokenType, options, ref reader, ref readStack, ref initialState)) { // Need more data break; } } - else if (state.Current.IsProcessingDictionary) + else if (readStack.Current.IsProcessingDictionary) { - HandleStartDictionary(options, ref reader, ref state); + HandleStartDictionary(options, ref reader, ref readStack); } else { - HandleStartObject(options, ref reader, ref state); + HandleStartObject(options, ref reader, ref readStack); } } else if (tokenType == JsonTokenType.EndObject) { - if (state.Current.Drain) + if (readStack.Current.Drain) { - state.Pop(); + readStack.Pop(); } - else if (state.Current.IsProcessingDictionary) + else if (readStack.Current.IsProcessingDictionary) { - HandleEndDictionary(options, ref reader, ref state); + HandleEndDictionary(options, ref reader, ref readStack); } else { - HandleEndObject(options, ref reader, ref state); + HandleEndObject(options, ref reader, ref readStack); } } else if (tokenType == JsonTokenType.StartArray) { - if (!state.Current.IsProcessingValue) + if (!readStack.Current.IsProcessingValue) { - HandleStartArray(options, ref reader, ref state); + HandleStartArray(options, ref reader, ref readStack); } - else if (!HandleObjectAsValue(tokenType, options, ref reader, ref state, ref initialState)) + else if (!HandleObjectAsValue(tokenType, options, ref reader, ref readStack, ref initialState)) { // Need more data break; @@ -104,20 +104,21 @@ private static void ReadCore( } else if (tokenType == JsonTokenType.EndArray) { - HandleEndArray(options, ref reader, ref state); + HandleEndArray(options, ref reader, ref readStack); } else if (tokenType == JsonTokenType.Null) { - HandleNull(ref reader, ref state, options); + HandleNull(ref reader, ref readStack, options); } } } catch (JsonReaderException e) { // Re-throw with Path information. - ThrowHelper.ReThrowWithPath(e, state.JsonPath); + ThrowHelper.ReThrowWithPath(e, readStack.JsonPath); } + readStack.BytesConsumed += reader.BytesConsumed; return; } @@ -127,21 +128,35 @@ private static bool HandleObjectAsValue( JsonSerializerOptions options, ref Utf8JsonReader reader, ref ReadStack readStack, - ref JsonReaderState readerState) + ref JsonReaderState initialState) { if (options.ReadAhead) { // Attempt to skip to make sure we have all the data we need. bool complete = reader.TrySkip(); - // We need to restore the state in all cases as we need to be positioned back before the current token. - reader = new Utf8JsonReader(reader.OriginalSpan, isFinalBlock: reader.IsFinalBlock, state: readerState); + // We need to restore the state in all cases as we need to be positioned back before + // the current token to either attempt to skip again or to actually read the value in + // HandleValue below. + + reader = new Utf8JsonReader( + reader.OriginalSpan.Slice(checked((int)initialState.BytesConsumed)), + isFinalBlock: reader.IsFinalBlock, + state: initialState); + Debug.Assert(reader.BytesConsumed == 0); + readStack.BytesConsumed += initialState.BytesConsumed; + if (!complete) { // Couldn't read to the end of the object, exit out to get more data in the buffer. return false; } + + // Success, requeue the reader to the token for HandleValue. + reader.Read(); + Debug.Assert(tokenType == reader.TokenType); } + HandleValue(tokenType, options, ref reader, ref readStack); return true; } diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/ReadStack.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/ReadStack.cs index c394bacb0a70..be6d645b600f 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/ReadStack.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/ReadStack.cs @@ -145,5 +145,10 @@ private string GetPropertyName(in ReadStackFrame frame) return propertyName; } + + /// + /// Bytes consumed in the current loop + /// + public long BytesConsumed; } } diff --git a/src/System.Text.Json/tests/Serialization/JsonElementTests.cs b/src/System.Text.Json/tests/Serialization/JsonElementTests.cs index d00dd8e1cce7..544c02146048 100644 --- a/src/System.Text.Json/tests/Serialization/JsonElementTests.cs +++ b/src/System.Text.Json/tests/Serialization/JsonElementTests.cs @@ -152,7 +152,7 @@ public void Verify() InlineData(10), InlineData(20), InlineData(1024)] - public static void ReadFromStream(int defaultBufferSize) + public void ReadJsonElementFromStream(int defaultBufferSize) { // Streams need to read ahead when they hit objects or arrays that are assigned to JsonElement or object. @@ -163,6 +163,15 @@ public static void ReadFromStream(int defaultBufferSize) data = Encoding.UTF8.GetBytes(@"[1,true,{""City"":""MyCity""},null,""foo""]"); stream = new MemoryStream(data); obj = JsonSerializer.ReadAsync(stream, new JsonSerializerOptions { DefaultBufferSize = defaultBufferSize }).Result; + + // Ensure we fail with incomplete data + data = Encoding.UTF8.GetBytes(@"{""Data"":[1,true,{""City"":""MyCity""},null,""foo""]"); + stream = new MemoryStream(data); + Assert.Throws(() => JsonSerializer.ReadAsync(stream, new JsonSerializerOptions { DefaultBufferSize = defaultBufferSize }).Result); + + data = Encoding.UTF8.GetBytes(@"[1,true,{""City"":""MyCity""},null,""foo"""); + stream = new MemoryStream(data); + Assert.Throws(() => JsonSerializer.ReadAsync(stream, new JsonSerializerOptions { DefaultBufferSize = defaultBufferSize }).Result); } } } diff --git a/src/System.Text.Json/tests/Serialization/SpanTests.cs b/src/System.Text.Json/tests/Serialization/SpanTests.cs index 2dd50b7ed05b..55b56e257e8c 100644 --- a/src/System.Text.Json/tests/Serialization/SpanTests.cs +++ b/src/System.Text.Json/tests/Serialization/SpanTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections.Generic; +using System.IO; using Xunit; namespace System.Text.Json.Serialization.Tests @@ -24,6 +25,27 @@ public static void Read(Type classType, byte[] data) ((ITestClass)obj).Verify(); } + [Theory] + [MemberData(nameof(ReadSuccessCases))] + public static void ReadFromStream(Type classType, byte[] data) + { + MemoryStream stream = new MemoryStream(data); + object obj = JsonSerializer.ReadAsync( + stream, + classType).Result; + Assert.IsAssignableFrom(typeof(ITestClass), obj); + ((ITestClass)obj).Verify(); + + // Try again with a smaller initial buffer size to ensure we handle incomplete data + stream = new MemoryStream(data); + obj = JsonSerializer.ReadAsync( + stream, + classType, + new JsonSerializerOptions { DefaultBufferSize = 5 }).Result; + Assert.IsAssignableFrom(typeof(ITestClass), obj); + ((ITestClass)obj).Verify(); + } + [Fact] public static void ReadGenericApi() { From 4375aa311d99ad788e5fdf61b2b7113f307cddd4 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Thu, 30 May 2019 19:03:10 -0700 Subject: [PATCH 3/4] Clarify comments and other feedback. --- .../Text/Json/Serialization/JsonSerializer.Read.Stream.cs | 7 ++++--- src/System.Text.Json/tests/Serialization/SpanTests.cs | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs index a9b03113187f..5903ca233c81 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs @@ -196,9 +196,10 @@ private static void ReadCore( { var reader = new Utf8JsonReader(buffer, isFinalBlock, readerState); - // If we haven't read in all the data we need to read ahead when reading a json object - // or array into a single .NET object so the JsonDocument has all of the needed data - // to parse. + // If we haven't read in the entire stream's payload we'll need signify that we want + // to enable read ahead behaviors to ensure we have complete json objects and arrays + // ({}, []) when needed. (Notably to successfully parse JsonElement via JsonDocument + // to assign to object and JsonElement properties in the constructed .NET object.) options.ReadAhead = !isFinalBlock; readStack.BytesConsumed = 0; diff --git a/src/System.Text.Json/tests/Serialization/SpanTests.cs b/src/System.Text.Json/tests/Serialization/SpanTests.cs index 55b56e257e8c..c54dced58ee1 100644 --- a/src/System.Text.Json/tests/Serialization/SpanTests.cs +++ b/src/System.Text.Json/tests/Serialization/SpanTests.cs @@ -33,6 +33,7 @@ public static void ReadFromStream(Type classType, byte[] data) object obj = JsonSerializer.ReadAsync( stream, classType).Result; + Assert.IsAssignableFrom(typeof(ITestClass), obj); ((ITestClass)obj).Verify(); @@ -42,6 +43,7 @@ public static void ReadFromStream(Type classType, byte[] data) stream, classType, new JsonSerializerOptions { DefaultBufferSize = 5 }).Result; + Assert.IsAssignableFrom(typeof(ITestClass), obj); ((ITestClass)obj).Verify(); } From a975ad42e69e95d0bdd1eeb1d804d0f164d29e23 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Thu, 30 May 2019 19:48:02 -0700 Subject: [PATCH 4/4] Fix comment --- .../Text/Json/Serialization/JsonSerializer.Read.Stream.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs index 5903ca233c81..61dc05e5856f 100644 --- a/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs +++ b/src/System.Text.Json/src/System/Text/Json/Serialization/JsonSerializer.Read.Stream.cs @@ -196,7 +196,7 @@ private static void ReadCore( { var reader = new Utf8JsonReader(buffer, isFinalBlock, readerState); - // If we haven't read in the entire stream's payload we'll need signify that we want + // If we haven't read in the entire stream's payload we'll need to signify that we want // to enable read ahead behaviors to ensure we have complete json objects and arrays // ({}, []) when needed. (Notably to successfully parse JsonElement via JsonDocument // to assign to object and JsonElement properties in the constructed .NET object.)