diff --git a/README.md b/README.md index fdd5a1cd..dad7d828 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ A Node.js framework for extracting mCODE FHIR resources. All resources are profi - [Extraction Date Range](#extraction-date-range) - [CLI From-Date and To-Date (NOT recommended use)](#cli-from-date-and-to-date-not-recommended-use) - [Troubleshooting](#troubleshooting) + - [NULL/NIL values found and replaced with empty-strings](#nullnil-values-found-and-replaced-with-empty-strings) - [Byte Order Markers in CSV Files](#byte-order-markers-in-csv-files) - [Terminology and Architecture](#terminology-and-architecture) - [Glossary](#glossary) @@ -165,6 +166,10 @@ npm start -- --entries-filter --from-date --to-date -- ### Troubleshooting +#### NULL/NIL values found and replaced with empty-strings + +When CSV files are provided containing NULL/NIL values, those values are treated as empty values and are translated into ''. Each Extractor, however, defines a set of `unalterableColumns` which will be immune from this NULL/NIL correction. All values that are corrected will produce a `debug`-level message, and can be seen by running the extractor with the debug flag set. + #### Byte Order Markers in CSV Files The extraction client has built-in handling of byte order markers for CSV files in UTF-8 and UTF-16LE encodings. When using CSV files in other encodings, if you experience unexpected errors be sure to check for a byte order marker at the beginning of the file. One way to check is to run the following command from the command line: diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index ec96cd44..c78abdfa 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -5,11 +5,12 @@ const { validateCSV } = require('../helpers/csvValidator'); const logger = require('../helpers/logger'); class BaseCSVExtractor extends Extractor { - constructor({ filePath, csvSchema }) { + constructor({ filePath, csvSchema, unalterableColumns }) { super(); + this.unalterableColumns = unalterableColumns || []; this.csvSchema = csvSchema; this.filePath = path.resolve(filePath); - this.csvModule = new CSVModule(this.filePath); + this.csvModule = new CSVModule(this.filePath, this.unalterableColumns); } validate() { diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js index 342a2348..7cfb5577 100644 --- a/src/extractors/CSVPatientExtractor.js +++ b/src/extractors/CSVPatientExtractor.js @@ -55,7 +55,9 @@ function joinAndReformatData(patientData) { class CSVPatientExtractor extends BaseCSVExtractor { constructor({ filePath, mask = [] }) { - super({ filePath, csvSchema: CSVPatientSchema }); + // Define CSV Columns whose values should never be altered + const unalterableColumns = ['familyName', 'givenName']; + super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns }); this.mask = mask; } diff --git a/src/modules/CSVModule.js b/src/modules/CSVModule.js index 5fc27f0a..286a8f67 100644 --- a/src/modules/CSVModule.js +++ b/src/modules/CSVModule.js @@ -3,16 +3,54 @@ const moment = require('moment'); const parse = require('csv-parse/lib/sync'); const logger = require('../helpers/logger'); +// The standard string normalizer function +function stringNormalizer(str) { + return str.toLowerCase(); +} + +// For translating null/nil-like values into empty strings +function normalizeEmptyValues(data, unalterableColumns = []) { + const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer); + const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer); + // Flag tracking if empty values were normalized or not. + let wasEmptyNormalized = false; + const newData = data.map((row, i) => { + const newRow = { ...row }; + // Filter out unalterable columns + const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col))); + columnsToNormalize.forEach((col) => { + const value = newRow[col]; + // If the value for this row-col combo is a value that should be empty, replace it + if (EMPTY_VALUES.includes(stringNormalizer(value))) { + logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`); + wasEmptyNormalized = true; + newRow[col] = ''; + } + }); + return newRow; + }); + + if (wasEmptyNormalized) { + logger.warn('NULL/NIL values found and replaced with empty-strings'); + } + return newData; +} + class CSVModule { - constructor(csvFilePath) { - this.data = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => column.toLowerCase()), bom: true }); + constructor(csvFilePath, unalterableColumns) { + // Parse then normalize the data + const parsedData = parse(fs.readFileSync(csvFilePath), { + columns: (header) => header.map((column) => stringNormalizer(column)), + bom: true, + }); + this.data = normalizeEmptyValues(parsedData, unalterableColumns); } async get(key, value, fromDate, toDate) { logger.debug(`Get csvModule info by key '${key}'`); // return all rows if key and value aren't provided if (!key && !value) return this.data; - let result = this.data.filter((d) => d[key.toLowerCase()] === value); + let result = this.data.filter((d) => d[stringNormalizer(key)] === value); if (result.length === 0) { logger.warn(`CSV Record with provided key '${key}' and value was not found`); return result; diff --git a/test/modules/CSVModule.test.js b/test/modules/CSVModule.test.js index 8fd7e7aa..3ce0f814 100644 --- a/test/modules/CSVModule.test.js +++ b/test/modules/CSVModule.test.js @@ -1,48 +1,91 @@ const path = require('path'); +const rewire = require('rewire'); const { CSVModule } = require('../../src/modules'); const exampleResponse = require('./fixtures/csv-response.json'); +const CSVModuleRewired = rewire('../../src/modules/CSVModule.js'); +const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues'); + const INVALID_MRN = 'INVALID MRN'; const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv')); const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv')); -test('Reads data from CSV', async () => { - const data = await csvModule.get('mrn', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); -}); -test('Reads data from CSV with a Byte Order Mark', async () => { - const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); -}); +describe('CSVModule', () => { + describe('get', () => { + test('Reads data from CSV', async () => { + const data = await csvModule.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); -test('Returns multiple rows', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2'); - expect(data).toHaveLength(2); -}); + test('Reads data from CSV with a Byte Order Mark', async () => { + const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); -test('Returns all rows when both key and value are undefined', async () => { - const data = await csvModule.get(); - expect(data).toHaveLength(csvModule.data.length); - expect(data).toEqual(csvModule.data); -}); + test('Returns multiple rows', async () => { + const data = await csvModule.get('mrn', 'example-mrn-2'); + expect(data).toHaveLength(2); + }); -test('Returns data with recordedDate after specified from date', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01'); - expect(data).toHaveLength(1); -}); + test('Returns all rows when both key and value are undefined', async () => { + const data = await csvModule.get(); + expect(data).toHaveLength(csvModule.data.length); + expect(data).toEqual(csvModule.data); + }); -test('Returns data with recordedDate before specified to date', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); - expect(data).toHaveLength(1); -}); + test('Returns data with recordedDate after specified from date', async () => { + const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01'); + expect(data).toHaveLength(1); + }); -test('Should return an empty array when key-value pair does not exist', async () => { - const data = await csvModule.get('mrn', INVALID_MRN); - expect(data).toEqual([]); -}); + test('Returns data with recordedDate before specified to date', async () => { + const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); + expect(data).toHaveLength(1); + }); + + test('Should return an empty array when key-value pair does not exist', async () => { + const data = await csvModule.get('mrn', INVALID_MRN); + expect(data).toEqual([]); + }); + + test('Should return proper value regardless of key casing', async () => { + const data = await csvModule.get('mRN', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + }); + + describe('normalizeEmptyValues', () => { + it('Should turn "null" values into empty strings, regardless of case', () => { + const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).toBe(''); + }); + }); + + it('Should turn "nil" values into empty strings, regardless of case', () => { + const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).toBe(''); + }); + }); + + it('Should not modify unalterableColumns, regardless of their value', () => { + const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; + const normalizedData = normalizeEmptyValues(data, ['key']); + normalizedData.forEach((d) => { + expect(d.key).not.toBe(''); + }); + }); -test('Should return proper value regardless of key casing', async () => { - const data = await csvModule.get('mRN', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); + it('Should leave all other values uneffected, regardless of case', () => { + const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).not.toBe(''); + }); + }); + }); });