diff --git a/src/FSharp.Data.Csv.Core/CsvInference.fs b/src/FSharp.Data.Csv.Core/CsvInference.fs index aee791ff1..19358dd95 100644 --- a/src/FSharp.Data.Csv.Core/CsvInference.fs +++ b/src/FSharp.Data.Csv.Core/CsvInference.fs @@ -3,6 +3,7 @@ module FSharp.Data.Runtime.CsvInference open System open System.IO +open System.Text open System.Text.RegularExpressions open FSharp.Data open FSharp.Data.Runtime @@ -145,28 +146,55 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider header) | None -> Array.init numberOfColumns (fun i -> "Column" + (i + 1).ToString()) + let readSchema (reader: StringReader) = + let schemas = ResizeArray() + let chars = StringBuilder() + + let (|Comma|_|) chr = if char chr = ',' then Some() else None + let (|Quote|_|) chr = if char chr = '"' then Some() else None + let (|Char|) c = char c + + let rec iter () = + match reader.Read() with + | -1 -> + schemas.Add(chars.ToString()) + () + // Skips quote character ('"') + | Quote -> iter () + // At comma(,), commits the current characters in the builder + | Comma -> + schemas.Add(chars.ToString()) + chars.Clear() |> ignore + iter () + // Skips CR/LF characters + | Char '\r' + | Char '\n' -> iter () + | Char c -> + chars.Append(c) |> ignore + iter () + + iter () + schemas + // If the schema is specified explicitly, then parse the schema // (This can specify just types, names of columns or a mix of both) let schema = if String.IsNullOrWhiteSpace schema then Array.zeroCreate headers.Length else - use reader = new StringReader(schema.Replace("\n", "")) + use reader = new StringReader(schema) - let schemaStr = - CsvReader.readCsvFile reader "," '"' - |> Seq.exactlyOne - |> fst + let schemaStr = readSchema reader - if schemaStr.Length > headers.Length then + if schemaStr.Count > headers.Length then failwithf "The provided schema contains %d columns, the inference found %d columns - please check the number of columns and the separator " - schemaStr.Length + schemaStr.Count headers.Length let schema = Array.zeroCreate headers.Length - for index = 0 to schemaStr.Length - 1 do + for index = 0 to schemaStr.Count - 1 do let item = schemaStr.[index].Trim() match item with diff --git a/tests/FSharp.Data.Tests/CsvProvider.fs b/tests/FSharp.Data.Tests/CsvProvider.fs index 6a5e87571..0d11a2c4f 100644 --- a/tests/FSharp.Data.Tests/CsvProvider.fs +++ b/tests/FSharp.Data.Tests/CsvProvider.fs @@ -669,5 +669,6 @@ let ``Can infer from a multiline schema`` () = ProductQuantity (string), ProductPrice (string)">.GetSample () let firstRow = csv.Rows |> Seq.head + csv.NumberOfColumns |> should equal 16 firstRow.OrderCreated |> should equal "2022-01-01 10:00:00" - + firstRow.FioFull |> should equal "John Smith"