From f14b50267ae77f6271047bc192f97e17ba8d2ffe Mon Sep 17 00:00:00 2001 From: Lennart Schlegge Date: Sun, 15 Jan 2023 22:09:01 +0100 Subject: [PATCH 1/4] Fix reading CSV from non seekable network stream --- src/Csv/CsvRuntime.fs | 7 ++--- tests/FSharp.Data.Core.Tests/CsvReader.fs | 36 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/src/Csv/CsvRuntime.fs b/src/Csv/CsvRuntime.fs index e62249ee0..dc37b6fe1 100644 --- a/src/Csv/CsvRuntime.fs +++ b/src/Csv/CsvRuntime.fs @@ -65,15 +65,14 @@ module internal CsvReader = /// Reads multiple lines from the input, skipping newline characters let rec readLines lineNumber = seq { - match reader.Peek() with + match reader.Read() with | -1 -> () | Char '\r' | Char '\n' -> - reader.Read() |> ignore yield! readLines lineNumber - | _ -> + | Char c -> yield - readLine [] (StringBuilder()) + readLine [] (StringBuilder(string c)) |> List.rev |> Array.ofList, lineNumber diff --git a/tests/FSharp.Data.Core.Tests/CsvReader.fs b/tests/FSharp.Data.Core.Tests/CsvReader.fs index 2c714357c..8a11ac6a2 100644 --- a/tests/FSharp.Data.Core.Tests/CsvReader.fs +++ b/tests/FSharp.Data.Core.Tests/CsvReader.fs @@ -72,3 +72,39 @@ let ``Quoted strings parsed correctly`` () = let expected = [|[|"12"; "a\n\rb"|]; [|"123"; "\"hello\" world"|]|] actual |> should equal expected + +[] +let ``Read all rows from non seekable slow network stream`` () = + + let data = """ABC +DEF +GHI""" + + let encoding = System.Text.Encoding.UTF8 + let bytes = data |> encoding.GetBytes + use memoryStream = new MemoryStream(bytes) + + use fakeNetworkStream = + { new System.IO.Stream () with + override _.CanRead: bool = memoryStream.CanRead + override _.CanSeek: bool = false + override _.CanWrite: bool = false + override _.Flush (): unit = memoryStream.Flush( ) + override _.Length: int64 = raise (System.NotSupportedException()) + override _.Position + with get (): int64 = memoryStream.Position + and set (v: int64): unit = raise (System.NotSupportedException ()) + override _.Read(buffer: byte[], offset: int, _: int): int = + memoryStream.Read (buffer, offset, 1 (* Ignores the count parameter and reads one byte only to simulate a slow network stream *)) + override _.ReadByte(): int = memoryStream.ReadByte () + override _.Seek(offset: int64, origin: SeekOrigin): int64 = raise (System.NotSupportedException ()) + override _.SetLength(value: int64): unit = raise (System.NotSupportedException ()) + override _.Write(buffer: byte[], offset: int, count: int): unit = raise (System.NotSupportedException ()) + override _.WriteByte(value: byte): unit = raise (System.NotSupportedException ()) } + + let reader = new StreamReader(fakeNetworkStream, encoding) + + let actual = readCsvFile reader ";" '"' |> Seq.map fst |> Array.ofSeq + let expected = [| [| "ABC" |]; [| "DEF" |]; [| "GHI" |] |] + + actual |> should equal expected \ No newline at end of file From d4b06aaf841eea5e9f1e7f1cdcee5a25943c7c50 Mon Sep 17 00:00:00 2001 From: Lennart Schlegge Date: Sun, 15 Jan 2023 23:16:28 +0100 Subject: [PATCH 2/4] Extend test with quoted strings and separators --- src/Csv/CsvRuntime.fs | 9 ++++++--- tests/FSharp.Data.Core.Tests/CsvReader.fs | 15 +++++++++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Csv/CsvRuntime.fs b/src/Csv/CsvRuntime.fs index dc37b6fe1..7c9b3c9ef 100644 --- a/src/Csv/CsvRuntime.fs +++ b/src/Csv/CsvRuntime.fs @@ -50,7 +50,10 @@ module internal CsvReader = /// Reads a line with data that are separated using specified separators /// and may be quoted. Ends with newline or end of input. let rec readLine data (chars: StringBuilder) = - match reader.Read() with + reader.Read() |> readLineWithChar data chars + + and readLineWithChar data (chars: StringBuilder) i = + match i with | -1 | Char '\r' | Char '\n' -> @@ -70,9 +73,9 @@ module internal CsvReader = | Char '\r' | Char '\n' -> yield! readLines lineNumber - | Char c -> + | i -> yield - readLine [] (StringBuilder(string c)) + readLineWithChar [] (StringBuilder()) i |> List.rev |> Array.ofList, lineNumber diff --git a/tests/FSharp.Data.Core.Tests/CsvReader.fs b/tests/FSharp.Data.Core.Tests/CsvReader.fs index 8a11ac6a2..bbd1760a1 100644 --- a/tests/FSharp.Data.Core.Tests/CsvReader.fs +++ b/tests/FSharp.Data.Core.Tests/CsvReader.fs @@ -76,9 +76,11 @@ let ``Quoted strings parsed correctly`` () = [] let ``Read all rows from non seekable slow network stream`` () = - let data = """ABC -DEF -GHI""" + let data = """ABC;1 +DEF;2 +GHI;3 +"QUOTED";4 +;5""" let encoding = System.Text.Encoding.UTF8 let bytes = data |> encoding.GetBytes @@ -105,6 +107,11 @@ GHI""" let reader = new StreamReader(fakeNetworkStream, encoding) let actual = readCsvFile reader ";" '"' |> Seq.map fst |> Array.ofSeq - let expected = [| [| "ABC" |]; [| "DEF" |]; [| "GHI" |] |] + let expected = + [| [| "ABC"; "1" |] + [| "DEF"; "2" |] + [| "GHI"; "3" |] + [| "QUOTED"; "4" |] + [| ""; "5" |] |] actual |> should equal expected \ No newline at end of file From a3b3f284d17a52e57e138e00e52c0f05a59c293c Mon Sep 17 00:00:00 2001 From: Lennart Schlegge Date: Mon, 16 Jan 2023 09:05:45 +0100 Subject: [PATCH 3/4] Refactor readLine function --- src/Csv/CsvRuntime.fs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Csv/CsvRuntime.fs b/src/Csv/CsvRuntime.fs index 7c9b3c9ef..a39dc2e4c 100644 --- a/src/Csv/CsvRuntime.fs +++ b/src/Csv/CsvRuntime.fs @@ -49,11 +49,8 @@ module internal CsvReader = /// Reads a line with data that are separated using specified separators /// and may be quoted. Ends with newline or end of input. - let rec readLine data (chars: StringBuilder) = - reader.Read() |> readLineWithChar data chars - - and readLineWithChar data (chars: StringBuilder) i = - match i with + let rec readLine data (chars: StringBuilder) current = + match current with | -1 | Char '\r' | Char '\n' -> @@ -61,9 +58,9 @@ module internal CsvReader = item :: data | Separator -> let item = chars.ToString() - readLine (item :: data) (StringBuilder()) - | Quote -> readLine data (readString chars) - | Char c -> readLine data (chars.Append c) + readLine (item :: data) (StringBuilder()) (reader.Read()) + | Quote -> readLine data (readString chars) (reader.Read()) + | Char c -> readLine data (chars.Append c) (reader.Read()) /// Reads multiple lines from the input, skipping newline characters let rec readLines lineNumber = @@ -73,9 +70,9 @@ module internal CsvReader = | Char '\r' | Char '\n' -> yield! readLines lineNumber - | i -> + | current -> yield - readLineWithChar [] (StringBuilder()) i + readLine [] (StringBuilder()) current |> List.rev |> Array.ofList, lineNumber From e52790ffb38c15bfe64c4775bf90c24abd36258e Mon Sep 17 00:00:00 2001 From: Lennart Schlegge Date: Tue, 17 Jan 2023 08:42:45 +0100 Subject: [PATCH 4/4] Format code --- src/Csv/CsvRuntime.fs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Csv/CsvRuntime.fs b/src/Csv/CsvRuntime.fs index a39dc2e4c..489251cc8 100644 --- a/src/Csv/CsvRuntime.fs +++ b/src/Csv/CsvRuntime.fs @@ -68,8 +68,7 @@ module internal CsvReader = match reader.Read() with | -1 -> () | Char '\r' - | Char '\n' -> - yield! readLines lineNumber + | Char '\n' -> yield! readLines lineNumber | current -> yield readLine [] (StringBuilder()) current