Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/rotten-avocados-play.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@flatfile/plugin-delimiter-extractor': patch
'@flatfile/plugin-xlsx-extractor': patch
---

Update to documentation
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
header1,header2,header3
column1,column2,column3
,,
, ,
,,
column4,column5,column6
column7,column8,column9
Expand Down
47 changes: 38 additions & 9 deletions plugins/delimiter-extractor/src/parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ describe('parser', () => {
path.join(__dirname, '../ref/test-complex.pound')
)

const csvBuffer: Buffer = fs.readFileSync(
path.join(__dirname, '../ref/csv.txt')
const emptyLinesBuffer: Buffer = fs.readFileSync(
path.join(__dirname, '../ref/test-empty-lines.txt')
)

test('colon to WorkbookCapture', async () => {
Expand Down Expand Up @@ -139,12 +139,41 @@ describe('parser', () => {
parseBuffer(colonBasicBuffer, { delimiter: '#' })
)
})
test('skip empty lines', async () => {
const parsedBuffer = await parseBuffer(csvBuffer, {
test('skip empty lines: true', async () => {
const parsedBuffer = await parseBuffer(emptyLinesBuffer, {
delimiter: ',',
skipEmptyLines: true,
})
const data = parsedBuffer.Sheet1.data
expect(data).toEqual([
{
header1: { value: 'column1' },
header2: { value: 'column2' },
header3: { value: 'column3' },
},
{
header1: { value: ' ' },
header2: { value: ' ' },
header3: { value: ' ' },
},
{
header1: { value: 'column4' },
header2: { value: 'column5' },
header3: { value: 'column6' },
},
{
header1: { value: 'column7' },
header2: { value: 'column8' },
header3: { value: 'column9' },
},
])
})
test('skip empty lines: greedy', async () => {
const parsedBuffer = await parseBuffer(emptyLinesBuffer, {
delimiter: ',',
skipEmptyLines: 'greedy',
})
const data = parsedBuffer.Sheet1.data
expect(data).toEqual([
{
header1: { value: 'column1' },
Expand All @@ -163,8 +192,8 @@ describe('parser', () => {
},
])
})
test("don'tskip empty lines", async () => {
const parsedBuffer = await parseBuffer(csvBuffer, {
test('skip empty lines: false', async () => {
const parsedBuffer = await parseBuffer(emptyLinesBuffer, {
delimiter: ',',
skipEmptyLines: false,
})
Expand All @@ -176,9 +205,9 @@ describe('parser', () => {
header3: { value: 'column3' },
},
{
header1: { value: '' },
header2: { value: '' },
header3: { value: '' },
header1: { value: ' ' },
header2: { value: ' ' },
header3: { value: ' ' },
},
{
header1: { value: '' },
Expand Down
26 changes: 17 additions & 9 deletions plugins/delimiter-extractor/src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ export async function parseBuffer(
try {
const skipEmptyLines = options?.headerSelectionEnabled
? false
: options?.skipEmptyLines === false
? false
: 'greedy'
: options?.skipEmptyLines ?? false
const fileContents = buffer.toString('utf8')
const results: ParseResult<Record<string, string>> = Papa.parse(
fileContents,
Expand Down Expand Up @@ -76,12 +74,22 @@ export async function parseBuffer(

const headers = prependNonUniqueHeaderColumns(columnHeaders)

const data: Flatfile.RecordData[] = rows.map((row) => {
const mappedRow = mapKeys(row, (key) => headers[key])
return mapValues(mappedRow, (value) => ({
value: transform(value),
})) as Flatfile.RecordData
})
const data: Flatfile.RecordData[] = rows
.filter((row) => {
if (!skipEmptyLines) return true
const isEmpty = Object.values(row).every(
skipEmptyLines === 'greedy'
? isNullOrWhitespace
: (value) => value === ''
)
return !isEmpty
})
.map((row) => {
const mappedRow = mapKeys(row, (key) => headers[key])
return mapValues(mappedRow, (value) => ({
value: transform(value),
})) as Flatfile.RecordData
})

let metadata: { rowHeaders: number[] } | null

Expand Down
4 changes: 4 additions & 0 deletions plugins/xlsx-extractor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ detecting headers in the file. By default, the first 10 rows are scanned for
the row with the most non-empty cells.


#### `skipEmptyLines` - `default: "false"` - `boolean` - (optional)
The `skipEmptyLines` parameter allows you to specify if empty lines should be
skipped. By default, empty lines are included.

#### `debug` - `default: "false"` - `boolean` - (optional)
The `debug` parameter lets you toggle on/off helpful debugging messages for
development purposes.
Expand Down
1 change: 1 addition & 0 deletions plugins/xlsx-extractor/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { parseBuffer } from './parser'
* @property {number} chunkSize - the size of chunk to process when inserting records.
* @property {number} parallel - the quantity of parallel process when inserting records.
* @property {GetHeadersOptions} headerDetectionOptions - the options for header detection.
* @property {boolean} skipEmptyLines - if true, skip empty lines; if false, include empty lines.
* @property {boolean} debug - if true, display helpful console logs.
*/
export interface ExcelExtractorOptions {
Expand Down