diff --git a/reader.go b/reader.go index 32aa4bc..7a86c19 100644 --- a/reader.go +++ b/reader.go @@ -67,6 +67,9 @@ type CsvReader struct { // fileBaseName is the base name of the file extracted from filePath. // Is used in logging. fileBaseName string + // LazyQuotes is a flag used to allow quotes in an unquoted field and non-doubled quotes + // in a quoted field + LazyQuotes bool } // New instantiates a new CsvReader object with some default fields preset. @@ -202,6 +205,7 @@ func (cr *CsvReader) readBetweenOffsetsAsync( csvReader := csv.NewReader(bytesReader) csvReader.Comma = cr.ColumnsDelimiter csvReader.FieldsPerRecord = cr.ColumnsCount + csvReader.LazyQuotes = cr.LazyQuotes ForLoop: for { diff --git a/reader_test.go b/reader_test.go index 08830c7..d6dcfe5 100644 --- a/reader_test.go +++ b/reader_test.go @@ -36,6 +36,7 @@ func TestCsvReader(t *testing.T) { t.Run("context is canceled", testCsvReaderWithContextCanceled) t.Run("invalid row", testCsvReaderWithInvalidRow) t.Run("small buffer size", testCsvReaderWithSmallBufferSize) + t.Run("quotes in unquoted field", testCsvReaderWithLazyQuotes) } func testCsvReaderByHeader(withHeader bool) func(t *testing.T) { @@ -180,6 +181,49 @@ func testCsvReaderWithDifferentFileSizesAndMaxGoroutines(rowsCount int64) func(t } } +func testCsvReaderWithLazyQuotes(t *testing.T) { + t.Parallel() + + // arrange + subject := bigcsvreader.New() + subject.SetFilePath("testdata/file_with_quote_in_unquoted_field.csv") + subject.ColumnsCount = 3 + subject.FileHasHeader = false + subject.LazyQuotes = true + + expectedRecords := [][]string{ + {"1", "John \"The Bomb\" Miguel", "33"}, + {"2", "Jane", "30"}, + {"3", "Mike", "18"}, + {"4", "Ronaldinho", "23"}, + {"5", "Elisabeth", "45"}, + } + + ctx, cancelCtx := context.WithTimeout(context.Background(), 15*time.Second) + defer cancelCtx() + + // act + rowsChans, errsChan := subject.Read(ctx) + records, err := gatherRecords(rowsChans, errsChan) + + // assert + assertNil(t, err) + assertEqual(t, len(expectedRecords), len(records)) + for _, expectedRecord := range expectedRecords { + found := false + for _, record := range records { + if reflect.DeepEqual(expectedRecord, record) { + found = true + + break + } + } + if !found { + t.Errorf("record '%v' was expected to be found, but was not", expectedRecord) + } + } +} + func testCsvReaderWithInvalidRow(t *testing.T) { t.Parallel() diff --git a/testdata/file_with_quote_in_unquoted_field.csv b/testdata/file_with_quote_in_unquoted_field.csv new file mode 100644 index 0000000..16bc9a9 --- /dev/null +++ b/testdata/file_with_quote_in_unquoted_field.csv @@ -0,0 +1,5 @@ +1,John "The Bomb" Miguel,33 +2,"Jane",30 +3,"Mike",18 +4,"Ronaldinho",23 +5,Elisabeth,45 \ No newline at end of file