diff --git a/README.md b/README.md index 6bb8fd09..4a43acc9 100644 --- a/README.md +++ b/README.md @@ -92,8 +92,11 @@ Examples files for these extractor can be found in the [`test/sample-client-data After exporting your CSV files to the `data` directory, kickstart the creation of a configuration file by renaming the provided `csv.config.example.json` to `csv.config.json`. Then, ensure the following configuration parameters are properly set: -1. `patientIdCsvPath` should provide a file path to a CSV file containing MRN's for relevant patients; -2. For each extractor, `filePath:` should provide a file path to a CSV file containing that corresponding extractor's data; +1. `patientIdCsvPath` should correspond to an absolute file path to a CSV file containing MRN's for relevant patients; +2. `commonExtractorArgs.dataDirectory` should correspond to an absolute path to the dataDirectory containing all your exported CSV files; +3. For each extractor, `fileName` should correspond to the file name this extractor should be reading from. Note: combining the `dataDirectory` above and `fileName` should resolve to a file on disk containing this corresponding extractor's data; + +**Note**: Previous versions of the MEF suggested using a `filePath` property for each extractor; while this property should still work without issue, the recommended approach is to use a common dataDirectory for all CSV files and to have each Extractor call out the name of the CSV file they need. For instructions on setting up an email notification trigger whenever an error is encountered in extraction, see the [Email Notification](#Email-Notification) section below. @@ -143,7 +146,7 @@ To mask a property, provide an array of the properties to mask in the `construct "label": "patient", "type": "CSVPatientExtractor", "constructorArgs": { - "filePath": "./data/patient-information.csv" + "fileName": "patient-information.csv" "mask": ["address", "birthDate"] } } @@ -156,7 +159,7 @@ Alternatively, providing a string with a value of `all` in the `constructorArgs` "label": "patient", "type": "CSVPatientExtractor", "constructorArgs": { - "filePath": "./data/patient-information.csv" + "fileName": "patient-information.csv" "mask": "all" } } diff --git a/config/csv.config.example.json b/config/csv.config.example.json index 6547fd4e..6e6bf39a 100644 --- a/config/csv.config.example.json +++ b/config/csv.config.example.json @@ -1,6 +1,8 @@ { - "patientIdCsvPath": "./data/patient-mrns.csv", - "commonExtractorArgs": {}, + "patientIdCsvPath": "Users/YourAccount/absolute/path/to/patient-mrns.csv", + "commonExtractorArgs": { + "dataDirectory": "Users/YourAccount/absolute/path/to/data/directory" + }, "notificationInfo": { "host": "smtp.example.com", "port": 587, @@ -16,28 +18,28 @@ "label": "patient", "type": "CSVPatientExtractor", "constructorArgs": { - "filePath": "./data/patient-information.csv" + "fileName": "patient-information.csv" } }, { "label": "condition", "type": "CSVConditionExtractor", "constructorArgs": { - "filePath": "./data/condition-information.csv" + "fileName": "condition-information.csv" } }, { "label": "cancerDiseaseStatus", "type": "CSVCancerDiseaseStatusExtractor", "constructorArgs": { - "filePath": "./data/cancer-disease-status-information.csv" + "fileName": "cancer-disease-status-information.csv" } }, { "label": "clinicalTrialInformation", "type": "CSVClinicalTrialInformationExtractor", "constructorArgs": { - "filePath": "./data/clinical-trial-information.csv", + "fileName": "clinical-trial-information.csv", "clinicalSiteID": "example-site-id" } }, @@ -45,56 +47,56 @@ "label": "treatmentPlanChange", "type": "CSVTreatmentPlanChangeExtractor", "constructorArgs": { - "filePath": "./data/treatment-plan-change-information.csv" + "fileName": "treatment-plan-change-information.csv" } }, { "label": "staging", "type": "CSVStagingExtractor", "constructorArgs": { - "filePath": "./data/staging-information.csv" + "fileName": "staging-information.csv" } }, { "label": "cancerRelatedMedicationAdministration", "type": "CSVCancerRelatedMedicationAdministrationExtractor", "constructorArgs": { - "filePath": "./data/cancer-related-medication-administration-information.csv" + "fileName": "cancer-related-medication-administration-information.csv" } }, { "label": "cancerRelatedMedicationRequest", "type": "CSVCancerRelatedMedicationRequestExtractor", "constructorArgs": { - "filePath": "./data/cancer-related-medication-request-information.csv" + "fileName": "cancer-related-medication-request-information.csv" } }, { "label": "genericObservations", "type": "CSVObservationExtractor", "constructorArgs": { - "filePath": "./data/observation-information.csv" + "fileName": "observation-information.csv" } }, { "label": "genericProcedures", "type": "CSVProcedureExtractor", "constructorArgs": { - "filePath": "./data/procedure-information.csv" + "fileName": "procedure-information.csv" } }, { "label": "adverseEvent", "type": "CSVAdverseEventExtractor", "constructorArgs": { - "filePath": "./test/sample-client-data/adverse-event-information.csv" + "fileName": "adverse-event-information.csv" } }, { "label": "ctcAdverseEvent", "type": "CSVCTCAdverseEventExtractor", "constructorArgs": { - "filePath": "./test/sample-client-data/ctc-adverse-event-information.csv" + "fileName": "ctc-adverse-event-information.csv" } } ] diff --git a/src/extractors/BaseCSVExtractor.js b/src/extractors/BaseCSVExtractor.js index aec07774..88a23e05 100644 --- a/src/extractors/BaseCSVExtractor.js +++ b/src/extractors/BaseCSVExtractor.js @@ -1,19 +1,36 @@ +const path = require('path'); const { Extractor } = require('./Extractor'); const { CSVFileModule, CSVURLModule } = require('../modules'); +const logger = require('../helpers/logger'); + class BaseCSVExtractor extends Extractor { - constructor({ filePath, url, csvSchema, unalterableColumns }) { + constructor({ + filePath, url, fileName, dataDirectory, csvSchema, unalterableColumns, + }) { super(); this.unalterableColumns = unalterableColumns || []; this.csvSchema = csvSchema; if (url) { + logger.debug('Found url argument; creating a CSVURLModule with the provided url'); this.url = url; this.csvModule = new CSVURLModule(this.url, this.unalterableColumns); + } else if (fileName && dataDirectory) { + if (!path.isAbsolute(dataDirectory)) throw new Error('dataDirectory is not an absolutePath, it needs to be.'); + this.filePath = path.join(dataDirectory, fileName); + logger.debug( + 'Found fileName and dataDirectory arguments; creating a CSVFileModule with the provided dataDirectory and fileName', + ); + this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns); } else if (filePath) { + logger.debug('Found filePath argument; creating a CSVFileModule with the provided filePath'); this.filePath = filePath; this.csvModule = new CSVFileModule(this.filePath, this.unalterableColumns); } else { - throw new Error('Trying to instantiate a CSVExtractor without a filePath or url'); + logger.debug( + 'Could not instantiate a CSVExtractor with the provided constructor args', + ); + throw new Error('Trying to instantiate a CSVExtractor without a valid filePath, url, or fileName+dataDirectory combination'); } } diff --git a/src/extractors/CSVAdverseEventExtractor.js b/src/extractors/CSVAdverseEventExtractor.js index b0031d1d..817880cc 100644 --- a/src/extractors/CSVAdverseEventExtractor.js +++ b/src/extractors/CSVAdverseEventExtractor.js @@ -69,8 +69,8 @@ function formatData(adverseEventData, patientId) { } class CSVAdverseEventExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getAdverseEventData(mrn) { diff --git a/src/extractors/CSVCTCAdverseEventExtractor.js b/src/extractors/CSVCTCAdverseEventExtractor.js index 6657a709..4710c1eb 100644 --- a/src/extractors/CSVCTCAdverseEventExtractor.js +++ b/src/extractors/CSVCTCAdverseEventExtractor.js @@ -95,8 +95,8 @@ function formatData(adverseEventData, patientId) { } class CSVCTCAdverseEventExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getAdverseEventData(mrn) { diff --git a/src/extractors/CSVCancerDiseaseStatusExtractor.js b/src/extractors/CSVCancerDiseaseStatusExtractor.js index 4535d519..0aac0d92 100644 --- a/src/extractors/CSVCancerDiseaseStatusExtractor.js +++ b/src/extractors/CSVCancerDiseaseStatusExtractor.js @@ -8,8 +8,10 @@ const logger = require('../helpers/logger'); const { CSVCancerDiseaseStatusSchema } = require('../helpers/schemas/csv'); class CSVCancerDiseaseStatusExtractor extends BaseCSVExtractor { - constructor({ filePath, url, implementation }) { - super({ filePath, url, csvSchema: CSVCancerDiseaseStatusSchema }); + constructor({ + filePath, url, fileName, dataDirectory, implementation, + }) { + super({ filePath, url, fileName, dataDirectory, csvSchema: CSVCancerDiseaseStatusSchema }); this.implementation = implementation; } diff --git a/src/extractors/CSVCancerRelatedMedicationAdministrationExtractor.js b/src/extractors/CSVCancerRelatedMedicationAdministrationExtractor.js index 4763e216..6e59b607 100644 --- a/src/extractors/CSVCancerRelatedMedicationAdministrationExtractor.js +++ b/src/extractors/CSVCancerRelatedMedicationAdministrationExtractor.js @@ -46,8 +46,8 @@ function formatData(medicationData, patientId) { } class CSVCancerRelatedMedicationAdministrationExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getMedicationData(mrn) { diff --git a/src/extractors/CSVCancerRelatedMedicationRequestExtractor.js b/src/extractors/CSVCancerRelatedMedicationRequestExtractor.js index 74e66fdf..bde51da2 100644 --- a/src/extractors/CSVCancerRelatedMedicationRequestExtractor.js +++ b/src/extractors/CSVCancerRelatedMedicationRequestExtractor.js @@ -48,8 +48,8 @@ function formatData(medicationData, patientId) { } class CSVCancerRelatedMedicationRequestExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getMedicationData(mrn) { diff --git a/src/extractors/CSVClinicalTrialInformationExtractor.js b/src/extractors/CSVClinicalTrialInformationExtractor.js index db5a7f21..6af23903 100644 --- a/src/extractors/CSVClinicalTrialInformationExtractor.js +++ b/src/extractors/CSVClinicalTrialInformationExtractor.js @@ -8,8 +8,10 @@ const { CSVClinicalTrialInformationSchema } = require('../helpers/schemas/csv'); class CSVClinicalTrialInformationExtractor extends BaseCSVExtractor { - constructor({ filePath, url, clinicalSiteID, clinicalSiteSystem }) { - super({ filePath, url, csvSchema: CSVClinicalTrialInformationSchema }); + constructor({ + filePath, url, fileName, dataDirectory, clinicalSiteID, clinicalSiteSystem, + }) { + super({ filePath, url, fileName, dataDirectory, csvSchema: CSVClinicalTrialInformationSchema }); if (!clinicalSiteID) logger.warn(`${this.constructor.name} expects a value for clinicalSiteID but got ${clinicalSiteID}`); this.clinicalSiteID = clinicalSiteID; this.clinicalSiteSystem = clinicalSiteSystem; diff --git a/src/extractors/CSVConditionExtractor.js b/src/extractors/CSVConditionExtractor.js index f5778cc3..eda3831e 100644 --- a/src/extractors/CSVConditionExtractor.js +++ b/src/extractors/CSVConditionExtractor.js @@ -49,8 +49,8 @@ function formatData(conditionData, patientId) { } class CSVConditionExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url, csvSchema: CSVConditionSchema }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory, csvSchema: CSVConditionSchema }); } async getConditionData(mrn) { diff --git a/src/extractors/CSVObservationExtractor.js b/src/extractors/CSVObservationExtractor.js index 52c97721..8c7ee509 100644 --- a/src/extractors/CSVObservationExtractor.js +++ b/src/extractors/CSVObservationExtractor.js @@ -42,8 +42,8 @@ function formatData(observationData, patientId) { } class CSVObservationExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getObservationData(mrn) { diff --git a/src/extractors/CSVPatientExtractor.js b/src/extractors/CSVPatientExtractor.js index f2e66e8a..7ebf82e5 100644 --- a/src/extractors/CSVPatientExtractor.js +++ b/src/extractors/CSVPatientExtractor.js @@ -54,12 +54,16 @@ function joinAndReformatData(patientData) { } class CSVPatientExtractor extends BaseCSVExtractor { - constructor({ filePath, url, mask = [] }) { + constructor({ + filePath, url, fileName, dataDirectory, mask = [], + }) { // Define CSV Columns whose values should never be altered const unalterableColumns = ['familyName', 'givenName']; super({ filePath, url, + fileName, + dataDirectory, csvSchema: CSVPatientSchema, unalterableColumns, }); diff --git a/src/extractors/CSVProcedureExtractor.js b/src/extractors/CSVProcedureExtractor.js index e61e49db..507cc10d 100644 --- a/src/extractors/CSVProcedureExtractor.js +++ b/src/extractors/CSVProcedureExtractor.js @@ -48,8 +48,8 @@ function formatData(procedureData, patientId) { } class CSVProcedureExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getProcedureData(mrn) { diff --git a/src/extractors/CSVStagingExtractor.js b/src/extractors/CSVStagingExtractor.js index 31823c17..96a62088 100644 --- a/src/extractors/CSVStagingExtractor.js +++ b/src/extractors/CSVStagingExtractor.js @@ -63,8 +63,8 @@ function formatStagingData(stagingData, categoryIds, patientId) { } class CSVStagingExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory }); } async getStagingData(mrn) { diff --git a/src/extractors/CSVTreatmentPlanChangeExtractor.js b/src/extractors/CSVTreatmentPlanChangeExtractor.js index fb64c393..e17dd75e 100644 --- a/src/extractors/CSVTreatmentPlanChangeExtractor.js +++ b/src/extractors/CSVTreatmentPlanChangeExtractor.js @@ -70,8 +70,8 @@ function formatData(tpcData, patientId) { } class CSVTreatmentPlanChangeExtractor extends BaseCSVExtractor { - constructor({ filePath, url }) { - super({ filePath, url, csvSchema: CSVTreatmentPlanChangeSchema }); + constructor({ filePath, url, fileName, dataDirectory }) { + super({ filePath, url, fileName, dataDirectory, csvSchema: CSVTreatmentPlanChangeSchema }); } async getTPCData(mrn, fromDate, toDate) { diff --git a/src/extractors/index.js b/src/extractors/index.js index 49c5aefb..6ecf5253 100644 --- a/src/extractors/index.js +++ b/src/extractors/index.js @@ -1,3 +1,4 @@ +const { BaseCSVExtractor } = require('./BaseCSVExtractor'); const { BaseFHIRExtractor } = require('./BaseFHIRExtractor'); const { CSVAdverseEventExtractor } = require('./CSVAdverseEventExtractor'); const { CSVCancerDiseaseStatusExtractor } = require('./CSVCancerDiseaseStatusExtractor'); @@ -26,6 +27,7 @@ const { FHIRProcedureExtractor } = require('./FHIRProcedureExtractor'); const { MCODESurgicalProcedureExtractor } = require('./MCODESurgicalProcedureExtractor'); module.exports = { + BaseCSVExtractor, BaseFHIRExtractor, CSVAdverseEventExtractor, CSVCancerDiseaseStatusExtractor, diff --git a/src/helpers/schemas/config.schema.json b/src/helpers/schemas/config.schema.json index ccbdf751..5ebdfd1e 100644 --- a/src/helpers/schemas/config.schema.json +++ b/src/helpers/schemas/config.schema.json @@ -32,6 +32,11 @@ "type": "string", "format": "uri" }, + "dataDirectory": { + "title": "Data Directory", + "description": "Absolute path for the directory containing any data files used by extractors.", + "type": "string" + }, "requestHeaders": { "title": "Request Headers", "type": "object" @@ -110,6 +115,17 @@ "properties": { "filePath": { "title": "File Path", + "description": "An absolute path to a data file from which data is to be extracted", + "type": "string" + }, + "fileName": { + "title": "File Name", + "description": "The name of a file, with the appropriate file suffix, from which data is to be extracted. This fileName is combined with the extractor's dataDictionary to create a valid path.", + "type": "string" + }, + "dataDirectory": { + "title": "Data Directory", + "description": "Absolute path for the directory containing any data files used by extractors. Extractors can infer this value from the global commonExtractorArgs or define their own.", "type": "string" }, "url": { diff --git a/test/extractors/BaseCSVExtractor.test.js b/test/extractors/BaseCSVExtractor.test.js new file mode 100644 index 00000000..125edd05 --- /dev/null +++ b/test/extractors/BaseCSVExtractor.test.js @@ -0,0 +1,42 @@ +const path = require('path'); +const { BaseCSVExtractor } = require('../../src/extractors'); + +// Some global variables +const fixturesPath = path.join(__dirname, 'fixtures'); +const csvFileName = 'example.csv'; + +// Tests +describe('BaseCSVExtractor', () => { + test('Should create a CSVURLModule when provided a filePath that is an absolute path', () => { + const absolutePath = path.join(fixturesPath, csvFileName); + const filePathExtractor = new BaseCSVExtractor({ filePath: absolutePath }); + expect(filePathExtractor.csvModule).not.toBeUndefined(); + expect(filePathExtractor.csvModule.constructor.name).toEqual('CSVFileModule'); + }); + test('Should create a CSVFileModule when provided a URL', () => { + const urlExtractor = new BaseCSVExtractor({ url: 'http://example.com' }); + expect(urlExtractor.csvModule).not.toBeUndefined(); + expect(urlExtractor.csvModule.constructor.name).toEqual('CSVURLModule'); + }); + test('Should create a CSVFileModule when provided a fileName and a dataDirectory', () => { + const fileNameDataDirectoryExtractor = new BaseCSVExtractor({ fileName: csvFileName, dataDirectory: fixturesPath }); + expect(fileNameDataDirectoryExtractor.csvModule).not.toBeUndefined(); + expect(fileNameDataDirectoryExtractor.csvModule.constructor.name).toEqual('CSVFileModule'); + }); + test('Should fail when the provided dataDirectory is not an absolute path', () => { + expect(() => new BaseCSVExtractor({ fileName: csvFileName, dataDirectory: './fixtures/' })) + .toThrowError('dataDirectory is not an absolutePath, it needs to be.'); + }); + test('Should fail when provided only provided a fileName and no dataDirectory', () => { + expect(() => new BaseCSVExtractor({ fileName: csvFileName })) + .toThrowError('Trying to instantiate a CSVExtractor without a valid filePath, url, or fileName+dataDirectory combination'); + }); + test('Should fail when provided only provided a dataDirectory and no fileName', () => { + expect(() => new BaseCSVExtractor({ dataDirectory: fixturesPath })) + .toThrowError('Trying to instantiate a CSVExtractor without a valid filePath, url, or fileName+dataDirectory combination'); + }); + test('Should fail when provided none of the three options above', () => { + expect(() => new BaseCSVExtractor({})) + .toThrowError('Trying to instantiate a CSVExtractor without a valid filePath, url, or fileName+dataDirectory combination'); + }); +});