diff --git a/src/helpers/appUtils.js b/src/helpers/appUtils.js index f5438cad..7ab38a6c 100644 --- a/src/helpers/appUtils.js +++ b/src/helpers/appUtils.js @@ -1,6 +1,6 @@ const fs = require('fs'); const path = require('path'); -const parse = require('csv-parse/lib/sync'); +const { csvParse } = require('./csvParsingUtils'); /** * Parses a provided CSV with MRN column into string array of IDs @@ -11,10 +11,7 @@ const parse = require('csv-parse/lib/sync'); function parsePatientIds(pathToCSV) { // Parse CSV for list of patient IDs const patientIdsCsvPath = path.resolve(pathToCSV); - const patientIds = parse(fs.readFileSync(patientIdsCsvPath, 'utf8'), { - columns: (header) => header.map((column) => column.toLowerCase()), - bom: true, - }).map((row) => { + const patientIds = csvParse(fs.readFileSync(patientIdsCsvPath, 'utf8')).map((row) => { if (!row.mrn) { throw new Error(`${pathToCSV} has no "mrn" column`); } diff --git a/src/helpers/csvParsingUtils.js b/src/helpers/csvParsingUtils.js index 8670e5f2..ca3b9f0c 100644 --- a/src/helpers/csvParsingUtils.js +++ b/src/helpers/csvParsingUtils.js @@ -1,3 +1,4 @@ +const parse = require('csv-parse/lib/sync'); const logger = require('./logger'); // The standard string normalizer function @@ -38,7 +39,27 @@ function normalizeEmptyValues(data, unalterableColumns = []) { return newData; } +// Default options for CSV parsing +const DEFAULT_OPTIONS = { + columns: (header) => header.map((column) => stringNormalizer(column)), + // https://csv.js.org/parse/options/bom/ + bom: true, + // https://csv.js.org/parse/options/skip_empty_lines/ + skip_empty_lines: true, + // NOTE: This will skip any records with empty values, not just skip the empty values themselves + // NOTE-2: The name of the flag changed from v4 (what we use) to v5 (what is documented) + // https://csv.js.org/parse/options/skip_records_with_empty_values/ + skip_lines_with_empty_values: true, +}; + +// Common utility for parsing CSV files +function csvParse(csvData, options = {}) { + return parse(csvData, { ...DEFAULT_OPTIONS, ...options }); +} + + module.exports = { stringNormalizer, normalizeEmptyValues, + csvParse, }; diff --git a/src/modules/CSVFileModule.js b/src/modules/CSVFileModule.js index d098fb32..66f181d6 100644 --- a/src/modules/CSVFileModule.js +++ b/src/modules/CSVFileModule.js @@ -1,19 +1,14 @@ const fs = require('fs'); const moment = require('moment'); -const parse = require('csv-parse/lib/sync'); const logger = require('../helpers/logger'); const { validateCSV } = require('../helpers/csvValidator'); -const { stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils'); +const { csvParse, stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils'); class CSVFileModule { constructor(csvFilePath, unalterableColumns) { // Parse then normalize the data - const parsedData = parse(fs.readFileSync(csvFilePath), { - columns: (header) => header.map((column) => stringNormalizer(column)), - bom: true, - }); + const parsedData = csvParse(fs.readFileSync(csvFilePath)); this.filePath = csvFilePath; - this.data = normalizeEmptyValues(parsedData, unalterableColumns); } diff --git a/src/modules/CSVURLModule.js b/src/modules/CSVURLModule.js index 8beba716..00afb6aa 100644 --- a/src/modules/CSVURLModule.js +++ b/src/modules/CSVURLModule.js @@ -1,9 +1,8 @@ const axios = require('axios'); const moment = require('moment'); -const parse = require('csv-parse/lib/sync'); const logger = require('../helpers/logger'); const { validateCSV } = require('../helpers/csvValidator'); -const { stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils'); +const { csvParse, stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils'); class CSVURLModule { constructor(url, unalterableColumns) { @@ -25,10 +24,7 @@ class CSVURLModule { }); logger.debug('Web request successful'); // Parse then normalize the data - const parsedData = parse(csvData, { - columns: (header) => header.map((column) => stringNormalizer(column)), - bom: true, - }); + const parsedData = csvParse(csvData); logger.debug('CSV Data parsing successful'); this.data = normalizeEmptyValues(parsedData, this.unalterableColumns); } diff --git a/test/modules/CSVFileModule.test.js b/test/modules/CSVFileModule.test.js index 7e62b287..248f7363 100644 --- a/test/modules/CSVFileModule.test.js +++ b/test/modules/CSVFileModule.test.js @@ -4,7 +4,6 @@ const exampleResponse = require('./fixtures/csv-response.json'); const INVALID_MRN = 'INVALID MRN'; const csvFileModule = new CSVFileModule(path.join(__dirname, './fixtures/example-csv.csv')); -const csvFileModuleWithBOMs = new CSVFileModule(path.join(__dirname, './fixtures/example-csv-bom.csv')); describe('CSVFileModule', () => { @@ -15,10 +14,36 @@ describe('CSVFileModule', () => { }); test('Reads data from CSV with a Byte Order Mark', async () => { + const csvFileModuleWithBOMs = new CSVFileModule( + path.join(__dirname, './fixtures/example-csv-bom.csv'), + ); + const data = await csvFileModuleWithBOMs.get('mrn', 'example-mrn-1'); expect(data).toEqual(exampleResponse); }); + test('Reads data from CSV with Empty Values', async () => { + // Five row file, with three rows of empty values + // Should be just two rows of data after ingestion + const csvFileModuleWithEmptyValues = new CSVFileModule( + path.join(__dirname, './fixtures/example-csv-empty-values.csv'), + ); + const data = await csvFileModuleWithEmptyValues.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + const data2 = await csvFileModuleWithEmptyValues.get('mrn', 'example-mrn-not-ignored'); + expect(data2).toHaveLength(1); + // Should be just two rows of data after ingestion + expect(csvFileModuleWithEmptyValues.data).toHaveLength(2); + }); + + test('Reads data from CSV with Empty Lines', async () => { + const csvFileModuleWithEmptyLines = new CSVFileModule( + path.join(__dirname, './fixtures/example-csv-empty-line.csv'), + ); + const data = await csvFileModuleWithEmptyLines.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + test('Returns multiple rows', async () => { const data = await csvFileModule.get('mrn', 'example-mrn-2'); expect(data).toHaveLength(2); diff --git a/test/modules/fixtures/example-csv-empty-line.csv b/test/modules/fixtures/example-csv-empty-line.csv new file mode 100644 index 00000000..d2613bfe --- /dev/null +++ b/test/modules/fixtures/example-csv-empty-line.csv @@ -0,0 +1,4 @@ +mrn,trialSubjectID,enrollmentStatus,trialResearchID,trialStatus,dateRecorded +example-mrn-1,subjectId-1,status-1,researchId-1,trialStatus-1,2020-01-10 + +example-mrn-2,subjectId-3,status-3,researchId-3,trialStatus-3,2020-06-10 \ No newline at end of file diff --git a/test/modules/fixtures/example-csv-empty-values.csv b/test/modules/fixtures/example-csv-empty-values.csv new file mode 100644 index 00000000..761b3678 --- /dev/null +++ b/test/modules/fixtures/example-csv-empty-values.csv @@ -0,0 +1,6 @@ +mrn,trialSubjectID,enrollmentStatus,trialResearchID,trialStatus,dateRecorded +example-mrn-1,subjectId-1,status-1,researchId-1,trialStatus-1,2020-01-10 +, , , , , +, , , ,, +,,, , , +example-mrn-not-ignored,,,,, \ No newline at end of file