From 3b18e7c591daee051ede46d1f7dffbbb8335ddc3 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Thu, 1 Jul 2021 16:37:39 -0400 Subject: [PATCH 1/9] add axios for web requests --- package-lock.json | 6 +++--- package.json | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0d28857c..7150fe2b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4190,9 +4190,9 @@ "dev": true }, "follow-redirects": { - "version": "1.13.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.13.1.tgz", - "integrity": "sha512-SSG5xmZh1mkPGyKzjZP8zLjltIfpW32Y5QpdNJyjcfGxK3qo3NDDkZOZSFiGn1A6SclQxY9GzEwAHQ3dmYRWpg==" + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==" }, "for-in": { "version": "1.0.2", diff --git a/package.json b/package.json index 84961513..d55a8b6a 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "dependencies": { "ajv": "^6.12.6", "antlr4": "4.8.0", + "axios": "^0.21.1", "commander": "^6.2.0", "csv-parse": "^4.8.8", "fhir-crud-client": "^1.2.2", From 7b894b098a16891fe8d04d944cf666372c3d0562 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Fri, 2 Jul 2021 12:48:36 -0400 Subject: [PATCH 2/9] implemented CSVURLModule; updated CSVextractors to support new arg --- src/client/BaseClient.js | 35 ++++--- src/extractors/BaseCSVExtractor.js | 28 +++--- src/extractors/CSVAdverseEventExtractor.js | 4 +- .../CSVCancerDiseaseStatusExtractor.js | 4 +- .../CSVCancerRelatedMedicationExtractor.js | 4 +- .../CSVClinicalTrialInformationExtractor.js | 4 +- src/extractors/CSVConditionExtractor.js | 4 +- src/extractors/CSVObservationExtractor.js | 4 +- src/extractors/CSVPatientExtractor.js | 9 +- src/extractors/CSVProcedureExtractor.js | 4 +- src/extractors/CSVStagingExtractor.js | 4 +- .../CSVTreatmentPlanChangeExtractor.js | 4 +- src/helpers/csvParsingUtils.js | 43 +++++++++ src/helpers/csvValidator.js | 12 ++- src/index.js | 5 +- .../{CSVModule.js => CSVFileModule.js} | 53 ++++------- src/modules/CSVURLModule.js | 71 +++++++++++++++ src/modules/index.js | 6 +- test/client/BaseClient.test.js | 12 +-- test/helpers/csvParsingUtils.test.js | 37 ++++++++ test/modules/CSVFileModule.test.js | 53 +++++++++++ test/modules/CSVModule.test.js | 91 ------------------- 22 files changed, 300 insertions(+), 191 deletions(-) create mode 100644 src/helpers/csvParsingUtils.js rename src/modules/{CSVModule.js => CSVFileModule.js} (50%) create mode 100644 src/modules/CSVURLModule.js create mode 100644 test/helpers/csvParsingUtils.test.js create mode 100644 test/modules/CSVFileModule.test.js delete mode 100644 test/modules/CSVModule.test.js diff --git a/src/client/BaseClient.js b/src/client/BaseClient.js index 9f20faf6..b86da8a3 100644 --- a/src/client/BaseClient.js +++ b/src/client/BaseClient.js @@ -25,32 +25,37 @@ class BaseClient { } // Given an extractor configuration, initialize all the necessary extractors - initializeExtractors(extractorConfig, commonExtractorArgs) { - let allExtractorsValid = true; - + async initializeExtractors(extractorConfig, commonExtractorArgs) { + // Loop to initialize the extractors extractorConfig.forEach((curExtractorConfig) => { const { label, type, constructorArgs } = curExtractorConfig; logger.debug(`Initializing ${label} extractor with type ${type}`); const ExtractorClass = this.extractorClasses[type]; - try { const newExtractor = new ExtractorClass({ ...commonExtractorArgs, ...constructorArgs }); - - if (newExtractor.validate) { - const isExtractorValid = newExtractor.validate(); - allExtractorsValid = (allExtractorsValid && isExtractorValid); - if (isExtractorValid) { - logger.debug(`Extractor ${label} PASSED CSV validation`); - } else { - logger.debug(`Extractor ${label} FAILED CSV validation`); - } - } - this.extractors.push(newExtractor); } catch (e) { throw new Error(`Unable to initialize ${label} extractor with type ${type}: ${e.message}`); } }); + // For validation, we are looping over extractors and performing an async operation on each. + // We need to loop without forEach (since forEach is sequential). + // Using Reduce to compute the validity of all extractors + const allExtractorsValid = await this.extractors.reduce(async (curExtractorsValid, curExtractor) => { + const { name } = curExtractor.constructor; + + if (curExtractor.validate) { + logger.debug(`Validating ${name}`); + const isExtractorValid = await curExtractor.validate(); + if (isExtractorValid) { + logger.debug(`Extractor ${name} PASSED CSV validation`); + } else { + logger.warn(`Extractor ${name} FAILED CSV validation`); + } + return (curExtractorsValid && isExtractorValid); + } + return curExtractorsValid; + }, true); if (allExtractorsValid) { logger.info('Validation succeeded'); diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index c78abdfa..601add07 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -1,25 +1,25 @@ -const path = require('path'); -const { Extractor } = require('./Extractor'); -const { CSVModule } = require('../modules'); -const { validateCSV } = require('../helpers/csvValidator'); const logger = require('../helpers/logger'); +const { Extractor } = require('./Extractor'); +const { CSVFileModule, CSVURLModule } = require('../modules'); class BaseCSVExtractor extends Extractor { - constructor({ filePath, csvSchema, unalterableColumns }) { + constructor({ filePath, url, csvSchema, unalterableColumns }) { super(); this.unalterableColumns = unalterableColumns || []; this.csvSchema = csvSchema; - this.filePath = path.resolve(filePath); - this.csvModule = new CSVModule(this.filePath, this.unalterableColumns); + if (filePath) { + this.filePath = filePath; + this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns); + } else if (url) { + this.url = url; + this.csvModule = new CSVURLModule(this.url, this.unalterableColumns); + } else { + throw new Error('Trying to instantiate a CSVExtractor without a filePath or url'); + } } - validate() { - if (this.csvSchema) { - logger.info(`Validating CSV file for ${this.filePath}`); - return validateCSV(this.filePath, this.csvSchema, this.csvModule.data); - } - logger.warn(`No CSV schema provided for ${this.filePath}`); - return true; + async validate() { + return this.csvModule.validate(this.csvSchema); } } diff --git a/src/extractors/CSVAdverseEventExtractor.js b/src/extractors/CSVAdverseEventExtractor.js index bdbdecbe..b0031d1d 100644 --- a/src/extractors/CSVAdverseEventExtractor.js +++ b/src/extractors/CSVAdverseEventExtractor.js @@ -69,8 +69,8 @@ function formatData(adverseEventData, patientId) { } class CSVAdverseEventExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath }); + constructor({ filePath, url }) { + super({ filePath, url }); } async getAdverseEventData(mrn) { diff --git a/src/extractors/CSVCancerDiseaseStatusExtractor.js b/src/extractors/CSVCancerDiseaseStatusExtractor.js index 69e0dc39..4535d519 100644 --- a/src/extractors/CSVCancerDiseaseStatusExtractor.js +++ b/src/extractors/CSVCancerDiseaseStatusExtractor.js @@ -8,8 +8,8 @@ const logger = require('../helpers/logger'); const { CSVCancerDiseaseStatusSchema } = require('../helpers/schemas/csv'); class CSVCancerDiseaseStatusExtractor extends BaseCSVExtractor { - constructor({ filePath, implementation }) { - super({ filePath, csvSchema: CSVCancerDiseaseStatusSchema }); + constructor({ filePath, url, implementation }) { + super({ filePath, url, csvSchema: CSVCancerDiseaseStatusSchema }); this.implementation = implementation; } diff --git a/src/extractors/CSVCancerRelatedMedicationExtractor.js b/src/extractors/CSVCancerRelatedMedicationExtractor.js index 15765695..a2780096 100644 --- a/src/extractors/CSVCancerRelatedMedicationExtractor.js +++ b/src/extractors/CSVCancerRelatedMedicationExtractor.js @@ -46,8 +46,8 @@ function formatData(medicationData, patientId) { } class CSVCancerRelatedMedicationExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath }); + constructor({ filePath, url }) { + super({ filePath, url }); } async getMedicationData(mrn) { diff --git a/src/extractors/CSVClinicalTrialInformationExtractor.js b/src/extractors/CSVClinicalTrialInformationExtractor.js index f215eb98..db5a7f21 100644 --- a/src/extractors/CSVClinicalTrialInformationExtractor.js +++ b/src/extractors/CSVClinicalTrialInformationExtractor.js @@ -8,8 +8,8 @@ const { CSVClinicalTrialInformationSchema } = require('../helpers/schemas/csv'); class CSVClinicalTrialInformationExtractor extends BaseCSVExtractor { - constructor({ filePath, clinicalSiteID, clinicalSiteSystem }) { - super({ filePath, csvSchema: CSVClinicalTrialInformationSchema }); + constructor({ filePath, url, clinicalSiteID, clinicalSiteSystem }) { + super({ filePath, url, csvSchema: CSVClinicalTrialInformationSchema }); if (!clinicalSiteID) logger.warn(`${this.constructor.name} expects a value for clinicalSiteID but got ${clinicalSiteID}`); this.clinicalSiteID = clinicalSiteID; this.clinicalSiteSystem = clinicalSiteSystem; diff --git a/src/extractors/CSVConditionExtractor.js b/src/extractors/CSVConditionExtractor.js index 56a2775a..f5778cc3 100644 --- a/src/extractors/CSVConditionExtractor.js +++ b/src/extractors/CSVConditionExtractor.js @@ -49,8 +49,8 @@ function formatData(conditionData, patientId) { } class CSVConditionExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath, csvSchema: CSVConditionSchema }); + constructor({ filePath, url }) { + super({ filePath, url, csvSchema: CSVConditionSchema }); } async getConditionData(mrn) { diff --git a/src/extractors/CSVObservationExtractor.js b/src/extractors/CSVObservationExtractor.js index 8a2c8cfe..52c97721 100644 --- a/src/extractors/CSVObservationExtractor.js +++ b/src/extractors/CSVObservationExtractor.js @@ -42,8 +42,8 @@ function formatData(observationData, patientId) { } class CSVObservationExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath }); + constructor({ filePath, url }) { + super({ filePath, url }); } async getObservationData(mrn) { diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js index 7cfb5577..0ebe4c3b 100644 --- a/src/extractors/CSVPatientExtractor.js +++ b/src/extractors/CSVPatientExtractor.js @@ -54,10 +54,15 @@ function joinAndReformatData(patientData) { } class CSVPatientExtractor extends BaseCSVExtractor { - constructor({ filePath, mask = [] }) { + constructor({ filePath, url, mask = [] }) { // Define CSV Columns whose values should never be altered const unalterableColumns = ['familyName', 'givenName']; - super({ filePath, csvSchema: CSVPatientSchema, unalterableColumns }); + super({ + filePath, + url, + // csvSchema: CSVPatientSchema, + unalterableColumns, + }); this.mask = mask; } diff --git a/src/extractors/CSVProcedureExtractor.js b/src/extractors/CSVProcedureExtractor.js index 590bf7fc..e61e49db 100644 --- a/src/extractors/CSVProcedureExtractor.js +++ b/src/extractors/CSVProcedureExtractor.js @@ -48,8 +48,8 @@ function formatData(procedureData, patientId) { } class CSVProcedureExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath }); + constructor({ filePath, url }) { + super({ filePath, url }); } async getProcedureData(mrn) { diff --git a/src/extractors/CSVStagingExtractor.js b/src/extractors/CSVStagingExtractor.js index 3cd73bd2..31823c17 100644 --- a/src/extractors/CSVStagingExtractor.js +++ b/src/extractors/CSVStagingExtractor.js @@ -63,8 +63,8 @@ function formatStagingData(stagingData, categoryIds, patientId) { } class CSVStagingExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath }); + constructor({ filePath, url }) { + super({ filePath, url }); } async getStagingData(mrn) { diff --git a/src/extractors/CSVTreatmentPlanChangeExtractor.js b/src/extractors/CSVTreatmentPlanChangeExtractor.js index b24d580b..fb64c393 100644 --- a/src/extractors/CSVTreatmentPlanChangeExtractor.js +++ b/src/extractors/CSVTreatmentPlanChangeExtractor.js @@ -70,8 +70,8 @@ function formatData(tpcData, patientId) { } class CSVTreatmentPlanChangeExtractor extends BaseCSVExtractor { - constructor({ filePath }) { - super({ filePath, csvSchema: CSVTreatmentPlanChangeSchema }); + constructor({ filePath, url }) { + super({ filePath, url, csvSchema: CSVTreatmentPlanChangeSchema }); } async getTPCData(mrn, fromDate, toDate) { diff --git a/src/helpers/csvParsingUtils.js b/src/helpers/csvParsingUtils.js new file mode 100644 index 00000000..f831a122 --- /dev/null +++ b/src/helpers/csvParsingUtils.js @@ -0,0 +1,43 @@ +const logger = require('./logger'); + +// The standard string normalizer function +function stringNormalizer(str) { + return str.toLowerCase(); +} + +// For translating null/nil-like values into empty strings +function normalizeEmptyValues(data, unalterableColumns = []) { + const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer); + const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer); + // Flag tracking if empty values were normalized or not. + let wasEmptyNormalized = false; + const newData = data.map((row, i) => { + const newRow = { ...row }; + // Filter out unalterable columns + const columnsToNormalize = Object.keys(row).filter( + (col) => !normalizedUnalterableColumns.includes(stringNormalizer(col)), + ); + columnsToNormalize.forEach((col) => { + const value = newRow[col]; + // If the value for this row-col combo is a value that should be empty, replace it + if (EMPTY_VALUES.includes(stringNormalizer(value))) { + logger.debug( + `NULL/NIL values '${value}' found in row-${i}, col-${col}`, + ); + wasEmptyNormalized = true; + newRow[col] = ''; + } + }); + return newRow; + }); + + if (wasEmptyNormalized) { + logger.warn('NULL/NIL values found and replaced with empty-strings'); + } + return newData; +} + +module.exports = { + stringNormalizer, + normalizeEmptyValues, +}; diff --git a/src/helpers/csvValidator.js b/src/helpers/csvValidator.js index eba51eae..b764e9af 100644 --- a/src/helpers/csvValidator.js +++ b/src/helpers/csvValidator.js @@ -1,7 +1,9 @@ const _ = require('lodash'); const logger = require('./logger'); -function validateCSV(pathToCSVFile, csvSchema, csvData) { +// Validates csvData against the csvSchema +// Uses the csvFileIdentifier in logs for readability +function validateCSV(csvFileIdentifier, csvSchema, csvData) { let isValid = true; // Check headers @@ -10,17 +12,17 @@ function validateCSV(pathToCSVFile, csvSchema, csvData) { const fileDiff = _.difference(headers, csvSchema.headers.map((h) => h.name.toLowerCase())); if (fileDiff.length > 0) { - logger.warn(`Found extra column(s) in CSV ${pathToCSVFile}: "${fileDiff.join(',')}"`); + logger.warn(`Found extra column(s) in CSV ${csvFileIdentifier}: "${fileDiff.join(',')}"`); } if (schemaDiff.length > 0) { schemaDiff.forEach((sd) => { const headerSchema = csvSchema.headers.find((h) => h.name.toLowerCase() === sd); if (headerSchema.required) { - logger.error(`Column ${sd} is marked as required but is missing in CSV ${pathToCSVFile}`); + logger.error(`Column ${sd} is marked as required but is missing in CSV ${csvFileIdentifier}`); isValid = false; } else { - logger.warn(`Column ${sd} is missing in CSV ${pathToCSVFile}`); + logger.warn(`Column ${sd} is missing in CSV ${csvFileIdentifier}`); } }); } @@ -31,7 +33,7 @@ function validateCSV(pathToCSVFile, csvSchema, csvData) { const schema = csvSchema.headers.find((h) => h.name === key); if (schema && schema.required && !value) { - logger.error(`Column ${key} marked as required but missing value in row ${i + 1} column ${j + 1} in CSV ${pathToCSVFile}`); + logger.error(`Column ${key} marked as required but missing value in row ${i + 1} column ${j + 1} in CSV ${csvFileIdentifier}`); isValid = false; } }); diff --git a/src/index.js b/src/index.js index 53ebd3a9..db3791d1 100644 --- a/src/index.js +++ b/src/index.js @@ -35,7 +35,7 @@ const { MCODERadiationProcedureExtractor, MCODESurgicalProcedureExtractor, } = require('./extractors'); -const { BaseFHIRModule, CSVModule } = require('./modules'); +const { BaseFHIRModule, CSVFileModule, CSVURLModule } = require('./modules'); const { getEthnicityDisplay, getPatientName, getRaceCodesystem, @@ -85,7 +85,8 @@ module.exports = { CSVCancerRelatedMedicationExtractor, CSVClinicalTrialInformationExtractor, CSVConditionExtractor, - CSVModule, + CSVFileModule, + CSVURLModule, CSVPatientExtractor, CSVObservationExtractor, CSVProcedureExtractor, diff --git a/src/modules/CSVModule.js b/src/modules/CSVFileModule.js similarity index 50% rename from src/modules/CSVModule.js rename to src/modules/CSVFileModule.js index 286a8f67..d098fb32 100644 --- a/src/modules/CSVModule.js +++ b/src/modules/CSVFileModule.js @@ -2,52 +2,23 @@ const fs = require('fs'); const moment = require('moment'); const parse = require('csv-parse/lib/sync'); const logger = require('../helpers/logger'); +const { validateCSV } = require('../helpers/csvValidator'); +const { stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils'); -// The standard string normalizer function -function stringNormalizer(str) { - return str.toLowerCase(); -} - -// For translating null/nil-like values into empty strings -function normalizeEmptyValues(data, unalterableColumns = []) { - const EMPTY_VALUES = ['null', 'nil'].map(stringNormalizer); - const normalizedUnalterableColumns = unalterableColumns.map(stringNormalizer); - // Flag tracking if empty values were normalized or not. - let wasEmptyNormalized = false; - const newData = data.map((row, i) => { - const newRow = { ...row }; - // Filter out unalterable columns - const columnsToNormalize = Object.keys(row).filter((col) => !normalizedUnalterableColumns.includes(stringNormalizer(col))); - columnsToNormalize.forEach((col) => { - const value = newRow[col]; - // If the value for this row-col combo is a value that should be empty, replace it - if (EMPTY_VALUES.includes(stringNormalizer(value))) { - logger.debug(`NULL/NIL values '${value}' found in row-${i}, col-${col}`); - wasEmptyNormalized = true; - newRow[col] = ''; - } - }); - return newRow; - }); - - if (wasEmptyNormalized) { - logger.warn('NULL/NIL values found and replaced with empty-strings'); - } - return newData; -} - -class CSVModule { +class CSVFileModule { constructor(csvFilePath, unalterableColumns) { // Parse then normalize the data const parsedData = parse(fs.readFileSync(csvFilePath), { columns: (header) => header.map((column) => stringNormalizer(column)), bom: true, }); + this.filePath = csvFilePath; + this.data = normalizeEmptyValues(parsedData, unalterableColumns); } async get(key, value, fromDate, toDate) { - logger.debug(`Get csvModule info by key '${key}'`); + logger.debug(`Get csvFileModule info by key '${key}'`); // return all rows if key and value aren't provided if (!key && !value) return this.data; let result = this.data.filter((d) => d[stringNormalizer(key)] === value); @@ -62,8 +33,18 @@ class CSVModule { if (result.length === 0) logger.warn('No data for patient within specified time range'); return result; } + + async validate(csvSchema) { + if (csvSchema) { + logger.info(`Validating CSV file for ${this.filePath}`); + return validateCSV(this.filePath, csvSchema, this.data); + } + logger.warn(`No CSV schema provided for ${this.filePath}`); + return true; + } } + module.exports = { - CSVModule, + CSVFileModule, }; diff --git a/src/modules/CSVURLModule.js b/src/modules/CSVURLModule.js new file mode 100644 index 00000000..178e88a0 --- /dev/null +++ b/src/modules/CSVURLModule.js @@ -0,0 +1,71 @@ +const axios = require('axios'); +const moment = require('moment'); +const parse = require('csv-parse/lib/sync'); +const logger = require('../helpers/logger'); +const { validateCSV } = require('../helpers/csvValidator'); +const { stringNormalizer, normalizeEmptyValues } = require('../helpers/csvParsingUtils'); + +class CSVURLModule { + constructor(url, unalterableColumns) { + this.unalterableColumns = unalterableColumns; + this.url = url; + this.data = undefined; + } + + // Ensures that this.data contains normalized CSV data fetched from the module's url + // If data is already cached, this function does nothing + async fillDataCache() { + if (!this.data) { + const csvData = await axios(this.url).then((res) => res.data); + // Parse then normalize the data + const parsedData = parse(csvData, { + columns: (header) => header.map((column) => stringNormalizer(column)), + bom: true, + }); + this.data = normalizeEmptyValues(parsedData, this.unalterableColumns); + } + } + + async get(key, value, fromDate, toDate) { + await this.fillDataCache(); + + logger.debug(`Get csvURLModule info by key '${key}'`); + // return all rows if key and value aren't provided + if (!key && !value) return this.data; + let result = this.data.filter((d) => d[stringNormalizer(key)] === value); + if (result.length === 0) { + logger.warn(`CSV Record with provided key '${key}' and value was not found`); + return result; + } + + // If fromDate and toDate is provided, filter out all results that fall outside that timespan + if (fromDate && moment(fromDate).isValid()) result = result.filter((r) => !(r.daterecorded && moment(fromDate).isAfter(r.daterecorded))); + if (toDate && moment(toDate).isValid()) result = result.filter((r) => !(r.daterecorded && moment(toDate).isBefore(r.daterecorded))); + if (result.length === 0) logger.warn('No data for patient within specified time range'); + return result; + } + + async validate(csvSchema) { + await this.fillDataCache(); + + if (csvSchema) { + logger.info(`Validating CSV data for ${this.url}`); + if (!this.data) { + const csvData = await axios(this.url); + // Parse then normalize the data + const parsedData = parse(csvData, { + columns: (header) => header.map((column) => stringNormalizer(column)), + bom: true, + }); + this.data = normalizeEmptyValues(parsedData, this.unalterableColumns); + } + return validateCSV(this.url, csvSchema, this.csvModule.data); + } + logger.warn(`No CSV schema provided for data found at ${this.url}`); + return true; + } +} + +module.exports = { + CSVURLModule, +}; diff --git a/src/modules/index.js b/src/modules/index.js index 8a6de6b9..6934fc70 100644 --- a/src/modules/index.js +++ b/src/modules/index.js @@ -1,7 +1,9 @@ const { BaseFHIRModule } = require('./BaseFHIRModule'); -const { CSVModule } = require('./CSVModule'); +const { CSVFileModule } = require('./CSVFileModule'); +const { CSVURLModule } = require('./CSVURLModule'); module.exports = { BaseFHIRModule, - CSVModule, + CSVFileModule, + CSVURLModule, }; diff --git a/test/client/BaseClient.test.js b/test/client/BaseClient.test.js index 6d316ac2..c2d68690 100644 --- a/test/client/BaseClient.test.js +++ b/test/client/BaseClient.test.js @@ -29,16 +29,16 @@ describe('BaseClient', () => { }); describe('initializeExtractors', () => { - it('should fail if extractors are missing a type', () => { + it('should fail if extractors are missing a type', async () => { const extractorsWithoutType = [ { label: 'Broken extractor', type: undefined, }, ]; - expect(() => engine.initializeExtractors(extractorsWithoutType)).toThrowError(); + await expect(engine.initializeExtractors(extractorsWithoutType)).rejects.toThrowError(); }); - it('should fail on un-registered extractors', () => { + it('should fail on un-registered extractors', async () => { // No extractors are registered by default const unregisteredExtractors = [ { @@ -46,9 +46,9 @@ describe('BaseClient', () => { type: 'UnregisteredExtractor', }, ]; - expect(() => engine.initializeExtractors(unregisteredExtractors)).toThrowError(); + await expect(engine.initializeExtractors(unregisteredExtractors)).rejects.toThrowError(); }); - it('should add extractors to engine if they are registered', () => { + it('should add extractors to engine if they are registered', async () => { // Register classes const extractorClasses = [ class Extractor {}, @@ -61,7 +61,7 @@ describe('BaseClient', () => { type: 'Extractor', }, ]; - engine.initializeExtractors(registeredExtractors); + await engine.initializeExtractors(registeredExtractors); expect(engine.extractors).toHaveLength(registeredExtractors.length); expect(engine.extractors[0]).toBeInstanceOf(extractorClasses[0]); }); diff --git a/test/helpers/csvParsingUtils.test.js b/test/helpers/csvParsingUtils.test.js new file mode 100644 index 00000000..76f0896f --- /dev/null +++ b/test/helpers/csvParsingUtils.test.js @@ -0,0 +1,37 @@ +const { normalizeEmptyValues, stringNormalizer } = require('../../src/helpers/csvParsingUtils.js'); + +describe('csvParsingUtils', () => { + describe('normalizeEmptyValues', () => { + it('Should turn "null" values into empty strings, regardless of case', () => { + const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).toBe(''); + }); + }); + + it('Should turn "nil" values into empty strings, regardless of case', () => { + const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).toBe(''); + }); + }); + + it('Should not modify unalterableColumns, regardless of their value', () => { + const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; + const normalizedData = normalizeEmptyValues(data, ['key']); + normalizedData.forEach((d) => { + expect(d.key).not.toBe(''); + }); + }); + + it('Should leave all other values uneffected, regardless of case', () => { + const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }]; + const normalizedData = normalizeEmptyValues(data); + normalizedData.forEach((d) => { + expect(d.key).not.toBe(''); + }); + }); + }); +}); diff --git a/test/modules/CSVFileModule.test.js b/test/modules/CSVFileModule.test.js new file mode 100644 index 00000000..7e62b287 --- /dev/null +++ b/test/modules/CSVFileModule.test.js @@ -0,0 +1,53 @@ +const path = require('path'); +const { CSVFileModule } = require('../../src/modules'); +const exampleResponse = require('./fixtures/csv-response.json'); + +const INVALID_MRN = 'INVALID MRN'; +const csvFileModule = new CSVFileModule(path.join(__dirname, './fixtures/example-csv.csv')); +const csvFileModuleWithBOMs = new CSVFileModule(path.join(__dirname, './fixtures/example-csv-bom.csv')); + + +describe('CSVFileModule', () => { + describe('get', () => { + test('Reads data from CSV', async () => { + const data = await csvFileModule.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + + test('Reads data from CSV with a Byte Order Mark', async () => { + const data = await csvFileModuleWithBOMs.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + + test('Returns multiple rows', async () => { + const data = await csvFileModule.get('mrn', 'example-mrn-2'); + expect(data).toHaveLength(2); + }); + + test('Returns all rows when both key and value are undefined', async () => { + const data = await csvFileModule.get(); + expect(data).toHaveLength(csvFileModule.data.length); + expect(data).toEqual(csvFileModule.data); + }); + + test('Returns data with recordedDate after specified from date', async () => { + const data = await csvFileModule.get('mrn', 'example-mrn-2', '2020-05-01'); + expect(data).toHaveLength(1); + }); + + test('Returns data with recordedDate before specified to date', async () => { + const data = await csvFileModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); + expect(data).toHaveLength(1); + }); + + test('Should return an empty array when key-value pair does not exist', async () => { + const data = await csvFileModule.get('mrn', INVALID_MRN); + expect(data).toEqual([]); + }); + + test('Should return proper value regardless of key casing', async () => { + const data = await csvFileModule.get('mRN', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + }); +}); diff --git a/test/modules/CSVModule.test.js b/test/modules/CSVModule.test.js deleted file mode 100644 index 3ce0f814..00000000 --- a/test/modules/CSVModule.test.js +++ /dev/null @@ -1,91 +0,0 @@ -const path = require('path'); -const rewire = require('rewire'); -const { CSVModule } = require('../../src/modules'); -const exampleResponse = require('./fixtures/csv-response.json'); - -const CSVModuleRewired = rewire('../../src/modules/CSVModule.js'); -const normalizeEmptyValues = CSVModuleRewired.__get__('normalizeEmptyValues'); - -const INVALID_MRN = 'INVALID MRN'; -const csvModule = new CSVModule(path.join(__dirname, './fixtures/example-csv.csv')); -const csvModuleWithBOMs = new CSVModule(path.join(__dirname, './fixtures/example-csv-bom.csv')); - - -describe('CSVModule', () => { - describe('get', () => { - test('Reads data from CSV', async () => { - const data = await csvModule.get('mrn', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); - }); - - test('Reads data from CSV with a Byte Order Mark', async () => { - const data = await csvModuleWithBOMs.get('mrn', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); - }); - - test('Returns multiple rows', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2'); - expect(data).toHaveLength(2); - }); - - test('Returns all rows when both key and value are undefined', async () => { - const data = await csvModule.get(); - expect(data).toHaveLength(csvModule.data.length); - expect(data).toEqual(csvModule.data); - }); - - test('Returns data with recordedDate after specified from date', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2', '2020-05-01'); - expect(data).toHaveLength(1); - }); - - test('Returns data with recordedDate before specified to date', async () => { - const data = await csvModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); - expect(data).toHaveLength(1); - }); - - test('Should return an empty array when key-value pair does not exist', async () => { - const data = await csvModule.get('mrn', INVALID_MRN); - expect(data).toEqual([]); - }); - - test('Should return proper value regardless of key casing', async () => { - const data = await csvModule.get('mRN', 'example-mrn-1'); - expect(data).toEqual(exampleResponse); - }); - }); - - describe('normalizeEmptyValues', () => { - it('Should turn "null" values into empty strings, regardless of case', () => { - const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }]; - const normalizedData = normalizeEmptyValues(data); - normalizedData.forEach((d) => { - expect(d.key).toBe(''); - }); - }); - - it('Should turn "nil" values into empty strings, regardless of case', () => { - const data = [{ key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; - const normalizedData = normalizeEmptyValues(data); - normalizedData.forEach((d) => { - expect(d.key).toBe(''); - }); - }); - - it('Should not modify unalterableColumns, regardless of their value', () => { - const data = [{ key: 'null' }, { key: 'NULL' }, { key: 'nuLL' }, { key: 'nil' }, { key: 'NIL' }, { key: 'NIl' }]; - const normalizedData = normalizeEmptyValues(data, ['key']); - normalizedData.forEach((d) => { - expect(d.key).not.toBe(''); - }); - }); - - it('Should leave all other values uneffected, regardless of case', () => { - const data = [{ key: 'anything' }, { key: 'any' }, { key: 'thing' }]; - const normalizedData = normalizeEmptyValues(data); - normalizedData.forEach((d) => { - expect(d.key).not.toBe(''); - }); - }); - }); -}); From 3659dfc8201ed2bcd80b858e1339497d3677e4c2 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Fri, 2 Jul 2021 13:18:43 -0400 Subject: [PATCH 3/9] lint fix --- src/extractors/BaseCSVExtractor.js | 1 - src/extractors/CSVPatientExtractor.js | 2 +- test/helpers/csvParsingUtils.test.js | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index 601add07..b8ff6dfb 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -1,4 +1,3 @@ -const logger = require('../helpers/logger'); const { Extractor } = require('./Extractor'); const { CSVFileModule, CSVURLModule } = require('../modules'); diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js index 0ebe4c3b..14315f4b 100644 --- a/src/extractors/CSVPatientExtractor.js +++ b/src/extractors/CSVPatientExtractor.js @@ -60,7 +60,7 @@ class CSVPatientExtractor extends BaseCSVExtractor { super({ filePath, url, - // csvSchema: CSVPatientSchema, + csvSchema: CSVPatientSchema, unalterableColumns, }); this.mask = mask; diff --git a/test/helpers/csvParsingUtils.test.js b/test/helpers/csvParsingUtils.test.js index 76f0896f..86ffc9ff 100644 --- a/test/helpers/csvParsingUtils.test.js +++ b/test/helpers/csvParsingUtils.test.js @@ -1,4 +1,4 @@ -const { normalizeEmptyValues, stringNormalizer } = require('../../src/helpers/csvParsingUtils.js'); +const { normalizeEmptyValues } = require('../../src/helpers/csvParsingUtils.js'); describe('csvParsingUtils', () => { describe('normalizeEmptyValues', () => { From 599bd72c25fa2c49e87ec6215f07be055c01e4e7 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Fri, 2 Jul 2021 14:17:09 -0400 Subject: [PATCH 4/9] url takes precedence over filepath --- src/extractors/BaseCSVExtractor.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index b8ff6dfb..1cff5602 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -4,14 +4,14 @@ const { CSVFileModule, CSVURLModule } = require('../modules'); class BaseCSVExtractor extends Extractor { constructor({ filePath, url, csvSchema, unalterableColumns }) { super(); - this.unalterableColumns = unalterableColumns || []; - this.csvSchema = csvSchema; - if (filePath) { - this.filePath = filePath; - this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns); - } else if (url) { + if (url) { + this.unalterableColumns = unalterableColumns || []; + this.csvSchema = csvSchema; this.url = url; this.csvModule = new CSVURLModule(this.url, this.unalterableColumns); + } else if (filePath) { + this.filePath = filePath; + this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns); } else { throw new Error('Trying to instantiate a CSVExtractor without a filePath or url'); } From 3fe2e98fadaa3ba19cb2de42ce39499a805f8896 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Fri, 2 Jul 2021 15:23:16 -0400 Subject: [PATCH 5/9] Fix AXIOS call; added test of the CSVURLModule --- src/modules/CSVURLModule.js | 2 +- test/modules/CSVURLModule.test.js | 93 +++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 test/modules/CSVURLModule.test.js diff --git a/src/modules/CSVURLModule.js b/src/modules/CSVURLModule.js index 178e88a0..c4a27fce 100644 --- a/src/modules/CSVURLModule.js +++ b/src/modules/CSVURLModule.js @@ -16,7 +16,7 @@ class CSVURLModule { // If data is already cached, this function does nothing async fillDataCache() { if (!this.data) { - const csvData = await axios(this.url).then((res) => res.data); + const csvData = await axios.get(this.url).then((res) => res.data); // Parse then normalize the data const parsedData = parse(csvData, { columns: (header) => header.map((column) => stringNormalizer(column)), diff --git a/test/modules/CSVURLModule.test.js b/test/modules/CSVURLModule.test.js new file mode 100644 index 00000000..5cf690ec --- /dev/null +++ b/test/modules/CSVURLModule.test.js @@ -0,0 +1,93 @@ +const axios = require('axios'); +const fs = require('fs'); +const path = require('path'); +const { CSVURLModule } = require('../../src/modules'); +const exampleResponse = require('./fixtures/csv-response.json'); + +const exampleCSV = fs.readFileSync(path.join(__dirname, './fixtures/example-csv.csv')); + +// Instantiate module with mock parameters +const INVALID_MRN = 'INVALID MRN'; +const MOCK_URL = 'http://example.com/some/data.csv'; +const csvURLModule = new CSVURLModule(MOCK_URL); +jest.mock('axios'); + +describe('CSVURLModule', () => { + describe('fillDataCache', () => { + it('should make an axios-request when there is no data cached', async () => { + // Mock response from axios before call + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + expect(csvURLModule.data).toBeUndefined(); + await csvURLModule.fillDataCache(); + expect(axios.get).toHaveBeenCalled(); + expect(csvURLModule.data).not.toBeUndefined(); + }); + it('should make no requests when there is data cached', async () => { + axios.get.mockReset(); + const exampleData = ['anything']; + // Fix the data stored on the module + csvURLModule.data = exampleData; + expect(axios.get).not.toHaveBeenCalled(); + expect(csvURLModule.data).toBe(exampleData); + // Since data is defined, this function call should do nothing + await csvURLModule.fillDataCache(); + expect(axios.get).not.toHaveBeenCalled(); + expect(csvURLModule.data).toBe(exampleData); + // Reset the data stored on the module + csvURLModule.data = undefined; + }); + }); + + describe('get', () => { + test('Reads data from CSV', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get('mrn', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + + test('Returns multiple rows', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get('mrn', 'example-mrn-2'); + expect(data).toHaveLength(2); + }); + + test('Returns all rows when both key and value are undefined', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get(); + expect(data).toHaveLength(csvURLModule.data.length); + expect(data).toEqual(csvURLModule.data); + }); + + test('Returns data with recordedDate after specified from date', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get('mrn', 'example-mrn-2', '2020-05-01'); + expect(data).toHaveLength(1); + }); + + test('Returns data with recordedDate before specified to date', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get('mrn', 'example-mrn-2', null, '2020-05-01'); + expect(data).toHaveLength(1); + }); + + test('Should return an empty array when key-value pair does not exist', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get('mrn', INVALID_MRN); + expect(data).toEqual([]); + }); + + test('Should return proper value regardless of key casing', async () => { + axios.get.mockReset(); + axios.get.mockResolvedValue({ data: exampleCSV }); + const data = await csvURLModule.get('mRN', 'example-mrn-1'); + expect(data).toEqual(exampleResponse); + }); + }); +}); From 5d1a17930334656bcc1da5382a0d8b4805e53956 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Tue, 6 Jul 2021 11:27:11 -0400 Subject: [PATCH 6/9] Minor typo --- src/modules/CSVURLModule.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/CSVURLModule.js b/src/modules/CSVURLModule.js index c4a27fce..c499a620 100644 --- a/src/modules/CSVURLModule.js +++ b/src/modules/CSVURLModule.js @@ -59,7 +59,7 @@ class CSVURLModule { }); this.data = normalizeEmptyValues(parsedData, this.unalterableColumns); } - return validateCSV(this.url, csvSchema, this.csvModule.data); + return validateCSV(this.url, csvSchema, this.data); } logger.warn(`No CSV schema provided for data found at ${this.url}`); return true; From f502726f42af72b4caf4310b4b28179d7b94c51b Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Mon, 12 Jul 2021 09:40:11 -0400 Subject: [PATCH 7/9] dm1 - moved baseCSVExtractor out of conditional block in constructor --- src/extractors/BaseCSVExtractor.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index 1cff5602..aec07774 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -4,9 +4,9 @@ const { CSVFileModule, CSVURLModule } = require('../modules'); class BaseCSVExtractor extends Extractor { constructor({ filePath, url, csvSchema, unalterableColumns }) { super(); + this.unalterableColumns = unalterableColumns || []; + this.csvSchema = csvSchema; if (url) { - this.unalterableColumns = unalterableColumns || []; - this.csvSchema = csvSchema; this.url = url; this.csvModule = new CSVURLModule(this.url, this.unalterableColumns); } else if (filePath) { From ec46ce89d61e005ad77beda0504cf937703bc4ea Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Mon, 12 Jul 2021 13:50:09 -0400 Subject: [PATCH 8/9] Julian was right as always --- src/modules/CSVURLModule.js | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/modules/CSVURLModule.js b/src/modules/CSVURLModule.js index c499a620..32e37d58 100644 --- a/src/modules/CSVURLModule.js +++ b/src/modules/CSVURLModule.js @@ -49,16 +49,7 @@ class CSVURLModule { await this.fillDataCache(); if (csvSchema) { - logger.info(`Validating CSV data for ${this.url}`); - if (!this.data) { - const csvData = await axios(this.url); - // Parse then normalize the data - const parsedData = parse(csvData, { - columns: (header) => header.map((column) => stringNormalizer(column)), - bom: true, - }); - this.data = normalizeEmptyValues(parsedData, this.unalterableColumns); - } + this.data = normalizeEmptyValues(this.data, this.unalterableColumns); return validateCSV(this.url, csvSchema, this.data); } logger.warn(`No CSV schema provided for data found at ${this.url}`); From f8e89de2ecc84149372a364e428bc758fa83e511 Mon Sep 17 00:00:00 2001 From: Dylan Phelan Date: Mon, 12 Jul 2021 13:59:05 -0400 Subject: [PATCH 9/9] added to config schema --- src/helpers/schemas/config.schema.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/helpers/schemas/config.schema.json b/src/helpers/schemas/config.schema.json index fd66a709..d2164247 100644 --- a/src/helpers/schemas/config.schema.json +++ b/src/helpers/schemas/config.schema.json @@ -90,6 +90,10 @@ "filePath": { "type": "string" }, + "url": { + "type": "string", + "format": "uri" + }, "clinicalSiteID": { "type": "string" },