diff --git a/docs/CSV_Templates.xlsx b/docs/CSV_Templates.xlsx index c9a97fd6..e591143f 100644 Binary files a/docs/CSV_Templates.xlsx and b/docs/CSV_Templates.xlsx differ diff --git a/src/client/BaseClient.js b/src/client/BaseClient.js index 9b8d5eea..da848882 100644 --- a/src/client/BaseClient.js +++ b/src/client/BaseClient.js @@ -26,17 +26,37 @@ class BaseClient { // Given an extractor configuration, initialize all the necessary extractors initializeExtractors(extractorConfig, commonExtractorArgs) { + let allExtractorsValid = true; + extractorConfig.forEach((curExtractorConfig) => { const { label, type, constructorArgs } = curExtractorConfig; logger.debug(`Initializing ${label} extractor with type ${type}`); const ExtractorClass = this.extractorClasses[type]; + try { const newExtractor = new ExtractorClass({ ...commonExtractorArgs, ...constructorArgs }); + + if (newExtractor.validate) { + const isExtractorValid = newExtractor.validate(); + allExtractorsValid = (allExtractorsValid && isExtractorValid); + if (isExtractorValid) { + logger.debug(`Extractor ${label} PASSED CSV validation`); + } else { + logger.debug(`Extractor ${label} FAILED CSV validation`); + } + } + this.extractors.push(newExtractor); } catch (e) { - throw new Error(`Unable to initialize ${label} extractor with type ${type}`); + throw new Error(`Unable to initialize ${label} extractor with type ${type}: ${e.message}`); } }); + + if (allExtractorsValid) { + logger.info('Validation succeeded'); + } else { + throw new Error('Error occurred during CSV validation'); + } } // NOTE: Async because in other clients that extend this, we need async helper functions (ex. auth) diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js new file mode 100644 index 00000000..ec96cd44 --- /dev/null +++ b/src/extractors/BaseCSVExtractor.js @@ -0,0 +1,27 @@ +const path = require('path'); +const { Extractor } = require('./Extractor'); +const { CSVModule } = require('../modules'); +const { validateCSV } = require('../helpers/csvValidator'); +const logger = require('../helpers/logger'); + +class BaseCSVExtractor extends Extractor { + constructor({ filePath, csvSchema }) { + super(); + this.csvSchema = csvSchema; + this.filePath = path.resolve(filePath); + this.csvModule = new CSVModule(this.filePath); + } + + validate() { + if (this.csvSchema) { + logger.info(`Validating CSV file for ${this.filePath}`); + return validateCSV(this.filePath, this.csvSchema, this.csvModule.data); + } + logger.warn(`No CSV schema provided for ${this.filePath}`); + return true; + } +} + +module.exports = { + BaseCSVExtractor, +}; diff --git a/src/extractors/CSVAdverseEventExtractor.js b/src/extractors/CSVAdverseEventExtractor.js index 793bd151..09cfdd52 100644 --- a/src/extractors/CSVAdverseEventExtractor.js +++ b/src/extractors/CSVAdverseEventExtractor.js @@ -1,7 +1,5 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { generateMcodeResources } = require('../templates'); -const { Extractor } = require('./Extractor'); const logger = require('../helpers/logger'); const { formatDateTime } = require('../helpers/dateUtils'); @@ -53,15 +51,14 @@ function formatData(adverseEventData) { }); } -class CSVAdverseEventExtractor extends Extractor { +class CSVAdverseEventExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.CSVModule = new CSVModule(path.resolve(filePath)); + super({ filePath }); } async getAdverseEventData(mrn) { logger.debug('Getting Adverse Event Data'); - return this.CSVModule.get('mrn', mrn); + return this.csvModule.get('mrn', mrn); } async get({ mrn }) { diff --git a/src/extractors/CSVCancerDiseaseStatusExtractor.js b/src/extractors/CSVCancerDiseaseStatusExtractor.js index 7afc09b4..69685241 100644 --- a/src/extractors/CSVCancerDiseaseStatusExtractor.js +++ b/src/extractors/CSVCancerDiseaseStatusExtractor.js @@ -1,14 +1,14 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { formatDateTime } = require('../helpers/dateUtils'); const { getDiseaseStatusDisplay, getDiseaseStatusEvidenceDisplay } = require('../helpers/diseaseStatusUtils'); const { generateMcodeResources } = require('../templates'); const { getEmptyBundle } = require('../helpers/fhirUtils'); const logger = require('../helpers/logger'); +const { CSVCancerDiseaseStatusSchema } = require('../helpers/schemas/csv'); -class CSVCancerDiseaseStatusExtractor { +class CSVCancerDiseaseStatusExtractor extends BaseCSVExtractor { constructor({ filePath, implementation }) { - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath, csvSchema: CSVCancerDiseaseStatusSchema }); this.implementation = implementation; } diff --git a/src/extractors/CSVCancerRelatedMedicationExtractor.js b/src/extractors/CSVCancerRelatedMedicationExtractor.js index 3d05b694..259f9075 100644 --- a/src/extractors/CSVCancerRelatedMedicationExtractor.js +++ b/src/extractors/CSVCancerRelatedMedicationExtractor.js @@ -1,7 +1,5 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { generateMcodeResources } = require('../templates'); -const { Extractor } = require('./Extractor'); const logger = require('../helpers/logger'); const { formatDateTime } = require('../helpers/dateUtils'); @@ -35,10 +33,9 @@ function formatData(medicationData) { }); } -class CSVCancerRelatedMedicationExtractor extends Extractor { +class CSVCancerRelatedMedicationExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath }); } async getMedicationData(mrn) { diff --git a/src/extractors/CSVClinicalTrialInformationExtractor.js b/src/extractors/CSVClinicalTrialInformationExtractor.js index cc7cf787..f78cc1a0 100644 --- a/src/extractors/CSVClinicalTrialInformationExtractor.js +++ b/src/extractors/CSVClinicalTrialInformationExtractor.js @@ -1,9 +1,8 @@ -const path = require('path'); -const { Extractor } = require('./Extractor'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { firstEntryInBundle, getBundleResourcesByType } = require('../helpers/fhirUtils'); const { generateMcodeResources } = require('../templates'); const logger = require('../helpers/logger'); +const { CSVClinicalTrialInformationSchema } = require('../helpers/schemas/csv'); function getPatientId(context) { const patientInContext = getBundleResourcesByType(context, 'Patient', {}, true); @@ -16,10 +15,9 @@ function getPatientId(context) { return undefined; } -class CSVClinicalTrialInformationExtractor extends Extractor { +class CSVClinicalTrialInformationExtractor extends BaseCSVExtractor { constructor({ filePath, clinicalSiteID }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath, csvSchema: CSVClinicalTrialInformationSchema }); if (!clinicalSiteID) logger.warn(`${this.constructor.name} expects a value for clinicalSiteID but got ${clinicalSiteID}`); this.clinicalSiteID = clinicalSiteID; } diff --git a/src/extractors/CSVConditionExtractor.js b/src/extractors/CSVConditionExtractor.js index 086bbecb..4249a3c0 100644 --- a/src/extractors/CSVConditionExtractor.js +++ b/src/extractors/CSVConditionExtractor.js @@ -1,9 +1,8 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { generateMcodeResources } = require('../templates'); -const { Extractor } = require('./Extractor'); const logger = require('../helpers/logger'); const { formatDateTime } = require('../helpers/dateUtils'); +const { CSVConditionSchema } = require('../helpers/schemas/csv'); // Formats data to be passed into template-friendly format function formatData(conditionData) { @@ -37,10 +36,9 @@ function formatData(conditionData) { }); } -class CSVConditionExtractor extends Extractor { +class CSVConditionExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath, csvSchema: CSVConditionSchema }); } async getConditionData(mrn) { diff --git a/src/extractors/CSVObservationExtractor.js b/src/extractors/CSVObservationExtractor.js index 0a55aed3..c27b8d19 100644 --- a/src/extractors/CSVObservationExtractor.js +++ b/src/extractors/CSVObservationExtractor.js @@ -1,7 +1,5 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { generateMcodeResources } = require('../templates'); -const { Extractor } = require('./Extractor'); const logger = require('../helpers/logger'); const { formatDateTime } = require('../helpers/dateUtils'); @@ -32,10 +30,9 @@ function formatData(observationData) { }); } -class CSVObservationExtractor extends Extractor { +class CSVObservationExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath }); } async getObservationData(mrn) { diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js index 0c23c3e0..ddaaeab0 100644 --- a/src/extractors/CSVPatientExtractor.js +++ b/src/extractors/CSVPatientExtractor.js @@ -1,11 +1,10 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); const { generateMcodeResources } = require('../templates'); -const { Extractor } = require('./Extractor'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { getEthnicityDisplay, getRaceCodesystem, getRaceDisplay } = require('../helpers/patientUtils'); const logger = require('../helpers/logger'); +const { CSVPatientSchema } = require('../helpers/schemas/csv'); function joinAndReformatData(patientData) { logger.debug('Reformatting patient data from CSV into template format'); @@ -39,10 +38,9 @@ function joinAndReformatData(patientData) { }; } -class CSVPatientExtractor extends Extractor { +class CSVPatientExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath, csvSchema: CSVPatientSchema }); } async getPatientData(mrn) { diff --git a/src/extractors/CSVProcedureExtractor.js b/src/extractors/CSVProcedureExtractor.js index 21efca9c..ae3f2116 100644 --- a/src/extractors/CSVProcedureExtractor.js +++ b/src/extractors/CSVProcedureExtractor.js @@ -1,7 +1,5 @@ -const path = require('path'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { generateMcodeResources } = require('../templates'); -const { Extractor } = require('./Extractor'); const logger = require('../helpers/logger'); const { formatDateTime } = require('../helpers/dateUtils'); @@ -35,10 +33,9 @@ function formatData(procedureData) { }); } -class CSVProcedureExtractor extends Extractor { +class CSVProcedureExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath }); } async getProcedureData(mrn) { diff --git a/src/extractors/CSVStagingExtractor.js b/src/extractors/CSVStagingExtractor.js index 1bf93015..5dd77c2d 100644 --- a/src/extractors/CSVStagingExtractor.js +++ b/src/extractors/CSVStagingExtractor.js @@ -1,6 +1,4 @@ -const path = require('path'); -const { Extractor } = require('./Extractor'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { firstEntryInBundle } = require('../helpers/fhirUtils'); const { generateMcodeResources } = require('../templates'); const logger = require('../helpers/logger'); @@ -51,10 +49,9 @@ function formatStagingData(stagingData, categoryIds) { }; } -class CSVStagingExtractor extends Extractor { +class CSVStagingExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath }); } async getStagingData(mrn) { diff --git a/src/extractors/CSVTreatmentPlanChangeExtractor.js b/src/extractors/CSVTreatmentPlanChangeExtractor.js index 32cf14f6..0e1d3bef 100644 --- a/src/extractors/CSVTreatmentPlanChangeExtractor.js +++ b/src/extractors/CSVTreatmentPlanChangeExtractor.js @@ -1,11 +1,10 @@ -const path = require('path'); const _ = require('lodash'); -const { Extractor } = require('./Extractor'); -const { CSVModule } = require('../modules'); +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { formatDate } = require('../helpers/dateUtils'); const { generateMcodeResources } = require('../templates'); const { getEmptyBundle } = require('../helpers/fhirUtils'); const logger = require('../helpers/logger'); +const { CSVTreatmentPlanChangeSchema } = require('../helpers/schemas/csv'); // Formats data to be passed into template-friendly format function formatData(tpcData) { @@ -68,10 +67,9 @@ function formatData(tpcData) { return [combinedData]; } -class CSVTreatmentPlanChangeExtractor extends Extractor { +class CSVTreatmentPlanChangeExtractor extends BaseCSVExtractor { constructor({ filePath }) { - super(); - this.csvModule = new CSVModule(path.resolve(filePath)); + super({ filePath, csvSchema: CSVTreatmentPlanChangeSchema }); } async getTPCData(mrn, fromDate, toDate) { diff --git a/src/helpers/csvValidator.js b/src/helpers/csvValidator.js new file mode 100644 index 00000000..960cb330 --- /dev/null +++ b/src/helpers/csvValidator.js @@ -0,0 +1,45 @@ +const _ = require('lodash'); +const logger = require('./logger'); + +function validateCSV(pathToCSVFile, csvSchema, csvData) { + let isValid = true; + + // Check headers + const headers = Object.keys(csvData[0]); + const schemaDiff = _.difference(csvSchema.headers.map((h) => h.name), headers); + const fileDiff = _.difference(headers, csvSchema.headers.map((h) => h.name)); + + if (fileDiff.length > 0) { + logger.warn(`Found extra column(s) in CSV ${pathToCSVFile}: "${fileDiff.join(',')}"`); + } + + if (schemaDiff.length > 0) { + schemaDiff.forEach((sd) => { + const headerSchema = csvSchema.headers.find((h) => h.name === sd); + if (headerSchema.required) { + logger.error(`Column ${sd} is marked as required but is missing in CSV ${pathToCSVFile}`); + isValid = false; + } else { + logger.warn(`Column ${sd} is missing in CSV ${pathToCSVFile}`); + } + }); + } + + // Check values + csvData.forEach((row, i) => { + Object.entries(row).forEach(([key, value], j) => { + const schema = csvSchema.headers.find((h) => h.name === key); + + if (schema && schema.required && !value) { + logger.error(`Column ${key} marked as required but missing value in row ${i + 1} column ${j + 1} in CSV ${pathToCSVFile}`); + isValid = false; + } + }); + }); + + return isValid; +} + +module.exports = { + validateCSV, +}; diff --git a/src/helpers/schemas/csv.js b/src/helpers/schemas/csv.js new file mode 100644 index 00000000..d72baa87 --- /dev/null +++ b/src/helpers/schemas/csv.js @@ -0,0 +1,70 @@ +const CSVCancerDiseaseStatusSchema = { + headers: [ + { name: 'mrn', required: true }, + { name: 'conditionId', required: true }, + { name: 'diseaseStatusCode', required: true }, + { name: 'diseaseStatusText' }, + { name: 'dateOfObservation', required: true }, + { name: 'evidence' }, + { name: 'observationStatus' }, + { name: 'dateRecorded' }, + ], +}; + +const CSVConditionSchema = { + headers: [ + { name: 'mrn', required: true }, + { name: 'conditionId', required: true }, + { name: 'codeSystem', required: true }, + { name: 'code', required: true }, + { name: 'displayName' }, + { name: 'category', required: true }, + { name: 'dateOfDiagnosis' }, + { name: 'clinicalStatus' }, + { name: 'verificationStatus' }, + { name: 'bodySite' }, + { name: 'laterality' }, + { name: 'histology' }, + ], +}; + +const CSVPatientSchema = { + headers: [ + { name: 'mrn', required: true }, + { name: 'familyName', required: true }, + { name: 'givenName', required: true }, + { name: 'gender', required: true }, + { name: 'birthsex' }, + { name: 'dateOfBirth' }, + { name: 'race' }, + { name: 'ethnicity' }, + { name: 'language' }, + { name: 'addressLine' }, + { name: 'city' }, + { name: 'state' }, + { name: 'zip' }, + ], +}; + +const CSVClinicalTrialInformationSchema = { + headers: [ + { name: 'mrn', required: true }, + { name: 'trialSubjectID', required: true }, + { name: 'enrollmentStatus', required: true }, + { name: 'trialResearchID', required: true }, + { name: 'trialStatus', required: true }, + { name: 'trialResearchSystem' }, + ], +}; + +const CSVTreatmentPlanChangeSchema = { + headers: [ + { name: 'mrn', required: true }, + { name: 'reasonCode' }, + { name: 'changed', required: true }, + { name: 'dateOfCarePlan', required: true }, + { name: 'dateRecorded' }, + ], +}; + +module.exports = { CSVCancerDiseaseStatusSchema, CSVConditionSchema, CSVPatientSchema, CSVClinicalTrialInformationSchema, CSVTreatmentPlanChangeSchema }; diff --git a/test/cli/mcodeExtraction.test.js b/test/cli/mcodeExtraction.test.js index fe77a7d0..9bcdedf4 100644 --- a/test/cli/mcodeExtraction.test.js +++ b/test/cli/mcodeExtraction.test.js @@ -65,7 +65,7 @@ describe('mcodeExtraction', () => { }; const testClient = new MCODEClient(testConfig); - testClient.init(); + await testClient.init(); const { extractedData, successfulExtraction, totalExtractionErrors } = await extractDataForPatients(testPatientIds, testClient, testFromDate, testToDate); expect(successfulExtraction).toEqual(true); diff --git a/test/extractors/CSVAdverseEventExtractor.test.js b/test/extractors/CSVAdverseEventExtractor.test.js index 76e4d1f6..4abc75e6 100644 --- a/test/extractors/CSVAdverseEventExtractor.test.js +++ b/test/extractors/CSVAdverseEventExtractor.test.js @@ -18,10 +18,10 @@ const csvAdverseEventExtractor = new CSVAdverseEventExtractor({ }); // Destructure all modules -const { CSVModule } = csvAdverseEventExtractor; +const { csvModule } = csvAdverseEventExtractor; // Spy on csvModule -const csvModuleSpy = jest.spyOn(CSVModule, 'get'); +const csvModuleSpy = jest.spyOn(csvModule, 'get'); const formatData = CSVAdverseEventExtractorRewired.__get__('formatData'); diff --git a/test/helpers/csvValidator.test.js b/test/helpers/csvValidator.test.js new file mode 100644 index 00000000..1b554fa7 --- /dev/null +++ b/test/helpers/csvValidator.test.js @@ -0,0 +1,96 @@ +const { validateCSV } = require('../../src/helpers/csvValidator'); + +const SIMPLE_DATA = [ + { + header1: '1', + header2: '2', + header3: '3', + }, + { + header1: '4', + header2: '', + header3: '6', + }, +]; + +const SIMPLE_DATA_MISSING_REQUIRED_VALUE = [ + { + header1: '', + header2: '2', + header3: '3', + }, + { + header1: '4', + header2: '2', + header3: '3', + }, +]; + +const SIMPLE_DATA_MISSING_HEADER = [ + { + wrongHeader1: '1', + header2: '2', + header3: '3', + }, + { + wrongHeader1: '4', + header2: '2', + header3: '3', + }, +]; + +const SIMPLE_DATA_EXTRA_COLUMNS = [ + { + header1: '1', + header2: '2', + header3: '3', + header4: '4', + }, + { + header1: '5', + header2: '6', + header3: '7', + header4: '', + }, +]; + +const SIMPLE_DATA_MISSING_OPTIONAL_COLUMN = [ + { + header1: '1', + header2: '2', + }, + { + header1: '3', + header2: '4', + }, +]; + +const schema = { + headers: [ + { name: 'header1', required: true }, + { name: 'header2' }, + { name: 'header3' }, + ], +}; + +describe('csvValidator', () => { + test('simple data validates', () => { + expect(validateCSV('', schema, SIMPLE_DATA)).toBe(true); + }); + + test('data missing required value does not validate', () => { + expect(validateCSV('', schema, SIMPLE_DATA_MISSING_REQUIRED_VALUE)).toBe(false); + }); + + test('data missing required header does not validate', () => { + expect(validateCSV('', schema, SIMPLE_DATA_MISSING_HEADER)).toBe(false); + }); + + test('data with erroneous column should still validate', () => { + expect(validateCSV('', schema, SIMPLE_DATA_EXTRA_COLUMNS)).toBe(true); + }); + + test('data missing an optional column should still validate', () => { + expect(validateCSV('', schema, SIMPLE_DATA_MISSING_OPTIONAL_COLUMN)).toBe(true); + }); +});