Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions src/FSharp.Data.Csv.Core/CsvInference.fs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module FSharp.Data.Runtime.CsvInference

open System
open System.IO
open System.Text
open System.Text.RegularExpressions
open FSharp.Data
open FSharp.Data.Runtime
Expand Down Expand Up @@ -145,28 +146,55 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider
header)
| None -> Array.init numberOfColumns (fun i -> "Column" + (i + 1).ToString())

let readSchema (reader: StringReader) =
let schemas = ResizeArray<string>()
let chars = StringBuilder()

let (|Comma|_|) chr = if char chr = ',' then Some() else None
let (|Quote|_|) chr = if char chr = '"' then Some() else None
let (|Char|) c = char c

let rec iter () =
match reader.Read() with
| -1 ->
schemas.Add(chars.ToString())
()
// Skips quote character ('"')
| Quote -> iter ()
// At comma(,), commits the current characters in the builder
| Comma ->
schemas.Add(chars.ToString())
chars.Clear() |> ignore
iter ()
// Skips CR/LF characters
| Char '\r'
| Char '\n' -> iter ()
| Char c ->
chars.Append(c) |> ignore
iter ()

iter ()
schemas

// If the schema is specified explicitly, then parse the schema
// (This can specify just types, names of columns or a mix of both)
let schema =
if String.IsNullOrWhiteSpace schema then
Array.zeroCreate headers.Length
else
use reader = new StringReader(schema.Replace("\n", ""))
use reader = new StringReader(schema)

let schemaStr =
CsvReader.readCsvFile reader "," '"'
|> Seq.exactlyOne
|> fst
let schemaStr = readSchema reader

if schemaStr.Length > headers.Length then
if schemaStr.Count > headers.Length then
failwithf
"The provided schema contains %d columns, the inference found %d columns - please check the number of columns and the separator "
schemaStr.Length
schemaStr.Count
headers.Length

let schema = Array.zeroCreate headers.Length

for index = 0 to schemaStr.Length - 1 do
for index = 0 to schemaStr.Count - 1 do
let item = schemaStr.[index].Trim()

match item with
Expand Down
3 changes: 2 additions & 1 deletion tests/FSharp.Data.Tests/CsvProvider.fs
Original file line number Diff line number Diff line change
Expand Up @@ -669,5 +669,6 @@ let ``Can infer from a multiline schema`` () =
ProductQuantity (string),
ProductPrice (string)">.GetSample ()
let firstRow = csv.Rows |> Seq.head
csv.NumberOfColumns |> should equal 16
firstRow.OrderCreated |> should equal "2022-01-01 10:00:00"

firstRow.FioFull |> should equal "John Smith"