From 54a26b91129500968544e5c72e9778f9961b61ff Mon Sep 17 00:00:00 2001 From: Bongjun Jang Date: Tue, 2 May 2023 11:05:14 +0900 Subject: [PATCH 1/3] test more --- tests/FSharp.Data.Tests/CsvProvider.fs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/FSharp.Data.Tests/CsvProvider.fs b/tests/FSharp.Data.Tests/CsvProvider.fs index 6a5e87571..0d11a2c4f 100644 --- a/tests/FSharp.Data.Tests/CsvProvider.fs +++ b/tests/FSharp.Data.Tests/CsvProvider.fs @@ -669,5 +669,6 @@ let ``Can infer from a multiline schema`` () = ProductQuantity (string), ProductPrice (string)">.GetSample () let firstRow = csv.Rows |> Seq.head + csv.NumberOfColumns |> should equal 16 firstRow.OrderCreated |> should equal "2022-01-01 10:00:00" - + firstRow.FioFull |> should equal "John Smith" From 2911ab3148d0f98d8a03d9893134683ab9442d66 Mon Sep 17 00:00:00 2001 From: Bongjun Jang Date: Tue, 2 May 2023 11:08:15 +0900 Subject: [PATCH 2/3] Skips newline characters reading the string --- src/FSharp.Data.Csv.Core/CsvInference.fs | 44 +++++++++++++++++++----- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/FSharp.Data.Csv.Core/CsvInference.fs b/src/FSharp.Data.Csv.Core/CsvInference.fs index aee791ff1..914085ba8 100644 --- a/src/FSharp.Data.Csv.Core/CsvInference.fs +++ b/src/FSharp.Data.Csv.Core/CsvInference.fs @@ -3,6 +3,7 @@ module FSharp.Data.Runtime.CsvInference open System open System.IO +open System.Text open System.Text.RegularExpressions open FSharp.Data open FSharp.Data.Runtime @@ -145,28 +146,55 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider header) | None -> Array.init numberOfColumns (fun i -> "Column" + (i + 1).ToString()) + let readSchema (reader: StringReader) = + let schemas = ResizeArray() + let chars = StringBuilder() + + let (|Comma|_|) chr = if char chr = ',' then Some () else None + let (|Quote|_|) chr = if char chr = '"' then Some () else None + let (|Char|) c = char c + + let rec iter () = + match reader.Read() with + | -1 -> + schemas.Add(chars.ToString()) + () + // Skips quote character ('"') + | Quote -> iter () + // At comma(,), commits the current characters in the builder + | Comma -> + schemas.Add(chars.ToString()) + chars.Clear() |> ignore + iter () + // Skips CR/LF characters + | Char '\r' | Char '\n' -> + iter () + | Char c -> + chars.Append(c) |> ignore + iter () + + iter () + schemas + // If the schema is specified explicitly, then parse the schema // (This can specify just types, names of columns or a mix of both) let schema = if String.IsNullOrWhiteSpace schema then Array.zeroCreate headers.Length else - use reader = new StringReader(schema.Replace("\n", "")) + use reader = new StringReader(schema) - let schemaStr = - CsvReader.readCsvFile reader "," '"' - |> Seq.exactlyOne - |> fst + let schemaStr = readSchema reader - if schemaStr.Length > headers.Length then + if schemaStr.Count > headers.Length then failwithf "The provided schema contains %d columns, the inference found %d columns - please check the number of columns and the separator " - schemaStr.Length + schemaStr.Count headers.Length let schema = Array.zeroCreate headers.Length - for index = 0 to schemaStr.Length - 1 do + for index = 0 to schemaStr.Count - 1 do let item = schemaStr.[index].Trim() match item with From 21c5f26b188686bd2ae686fd54e47888f08c4b35 Mon Sep 17 00:00:00 2001 From: Bongjun Jang Date: Tue, 2 May 2023 11:22:25 +0900 Subject: [PATCH 3/3] check format --- src/FSharp.Data.Csv.Core/CsvInference.fs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/FSharp.Data.Csv.Core/CsvInference.fs b/src/FSharp.Data.Csv.Core/CsvInference.fs index 914085ba8..19358dd95 100644 --- a/src/FSharp.Data.Csv.Core/CsvInference.fs +++ b/src/FSharp.Data.Csv.Core/CsvInference.fs @@ -150,8 +150,8 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider let schemas = ResizeArray() let chars = StringBuilder() - let (|Comma|_|) chr = if char chr = ',' then Some () else None - let (|Quote|_|) chr = if char chr = '"' then Some () else None + let (|Comma|_|) chr = if char chr = ',' then Some() else None + let (|Quote|_|) chr = if char chr = '"' then Some() else None let (|Char|) c = char c let rec iter () = @@ -167,8 +167,8 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider chars.Clear() |> ignore iter () // Skips CR/LF characters - | Char '\r' | Char '\n' -> - iter () + | Char '\r' + | Char '\n' -> iter () | Char c -> chars.Append(c) |> ignore iter ()