diff --git a/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java b/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java index ab0272ba8..1ed883a7d 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java +++ b/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java @@ -17,6 +17,7 @@ package org.breedinginsight.brapps.importer.model; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -34,6 +35,7 @@ import org.jooq.Record; import tech.tablesaw.api.Table; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -69,6 +71,15 @@ public void updateProgress(Integer finished, Integer inProgress) { progress.setInProgress((long) inProgress); } + public void setDynamicColumnNames(List dynamicColumnNames) { + super.setDynamicColumnNames(dynamicColumnNames.toArray(new String[0])); + } + + @JsonIgnore + public List getDynamicColumnNamesList(){ + return Arrays.asList(super.getDynamicColumnNames()); + } + public static ImportUpload parseSQLRecord(Record record) { return ImportUpload.uploadBuilder() @@ -85,6 +96,7 @@ public static ImportUpload parseSQLRecord(Record record) { .updatedAt(record.getValue(IMPORTER_IMPORT.UPDATED_AT)) .createdBy(record.getValue(IMPORTER_IMPORT.CREATED_BY)) .updatedBy(record.getValue(IMPORTER_IMPORT.UPDATED_BY)) + .dynamicColumnNames(record.getValue(IMPORTER_IMPORT.DYNAMIC_COLUMN_NAMES)) .build(); } } diff --git a/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java b/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java index 0d5fe4e5d..b46fc6335 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java +++ b/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java @@ -29,4 +29,5 @@ public class ImportPreviewResponse { private Map statistics; private List rows; + private List dynamicColumnNames; } diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java b/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java index cf9c6b916..4aaf4a743 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java @@ -299,6 +299,7 @@ public ImportResponse uploadData(UUID programId, UUID mappingId, AuthenticatedUs newUpload.setUserId(actingUser.getId()); newUpload.setCreatedBy(actingUser.getId()); newUpload.setUpdatedBy(actingUser.getId()); + newUpload = setDynamicColumns(newUpload, data, importMapping); // Create a progress object ImportProgress importProgress = new ImportProgress(); @@ -397,6 +398,26 @@ public ImportResponse updateUpload(UUID programId, UUID uploadId, AuthenticatedU return importResponse; } + /** + * If mapping has experiment structure, retrieve dynamic columns + * Experiment and germplasm mapping presently have different structures + * @param newUpload + * @param data + * @param importMapping + * @return updated newUpload with dynamic columns set + */ + public ImportUpload setDynamicColumns(ImportUpload newUpload, Table data, ImportMapping importMapping) { + if (importMapping.getMappingConfig().get(0).getValue() != null) { + List mappingCols = importMapping.getMappingConfig().stream().map(field -> field.getValue().getFileFieldName()).collect(Collectors.toList()); + List dynamicCols = data.columnNames().stream() + .filter(column -> !mappingCols.contains(column)).collect(Collectors.toList()); + newUpload.setDynamicColumnNames(dynamicCols); + } else { + newUpload.setDynamicColumnNames(new ArrayList<>()); + } + return newUpload; + } + private void processFile(List finalBrAPIImportList, Table data, Program program, ImportUpload upload, User user, Boolean commit, BrAPIImportService importService, AuthenticatedUser actingUser) { diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java b/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java index b87c2a62c..b70c11f30 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java @@ -32,10 +32,7 @@ import javax.inject.Inject; import javax.inject.Singleton; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; @Singleton diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index b0adb1d9e..a32668134 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -64,6 +64,7 @@ import javax.inject.Inject; import java.math.BigDecimal; import java.math.BigInteger; +import java.time.OffsetDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; @@ -115,6 +116,9 @@ public class ExperimentProcessor implements Processor { // existingGermplasmByGID is populated by getExistingBrapiData(), but not updated by the getNewBrapiData() method private Map> existingGermplasmByGID = null; + // Associates timestamp columns to associated phenotype column name for ease of storage + private Map timeStampColByPheno = new HashMap<>(); + @Inject public ExperimentProcessor(DSLContext dsl, BrAPITrialDAO brapiTrialDAO, @@ -180,8 +184,20 @@ public Map process( ValidationErrors validationErrors = new ValidationErrors(); // Get dynamic phenotype columns for processing - List> phenotypeCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); + List> dynamicCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); + List> phenotypeCols = new ArrayList<>(); + List> timestampCols = new ArrayList<>(); + for (Column dynamicCol: dynamicCols) { + //Distinguish between phenotype and timestamp columns + if (dynamicCol.name().startsWith("TS:")) { + timestampCols.add(dynamicCol); + } else { + phenotypeCols.add(dynamicCol); + } + } + List varNames = phenotypeCols.stream().map(Column::name).collect(Collectors.toList()); + List tsNames = timestampCols.stream().map(Column::name).collect(Collectors.toList()); // Lookup all traits in system for program, maybe eventually add a variable search in ontology service List traits = null; @@ -208,6 +224,20 @@ public Map process( "Ontology term(s) not found: " + String.join(", ", differences)); } + // Check that each ts column corresponds to a phenotype column + List unmatchedTimestamps = tsNames.stream() + .filter(e -> !(varNames.contains(e.replaceFirst("^TS:\\s*","")))) + .collect(Collectors.toList()); + if (unmatchedTimestamps.size() > 0) { + throw new HttpStatusException(HttpStatus.UNPROCESSABLE_ENTITY, + "Timestamp column(s) lack corresponding phenotype column(s): " + String.join(", ", unmatchedTimestamps)); + } + + //Now know timestamps all valid phenotypes, can associate with phenotype column name for easy retrieval + for (Column tsColumn: timestampCols) { + timeStampColByPheno.put(tsColumn.name().replaceFirst("^TS:\\s*",""), tsColumn); + } + // Perform ontology validations on each observation value in phenotype column Map colVarMap = filteredTraits.stream() .collect(Collectors.toMap(Trait::getObservationVariableName, Function.identity())); @@ -220,6 +250,15 @@ public Map process( } } + //Timestamp validation + for (Column column : timestampCols) { + for (int i=0; i < column.size(); i++) { + String value = column.getString(i); + String colName = column.name(); + validateTimeStampValue(value, colName, validationErrors, i); + } + } + // add "New" pending data to the BrapiData objects getNewBrapiData(importRows, phenotypeCols, program, user, commit); @@ -319,7 +358,16 @@ private void getNewBrapiData(List importRows, List> pheno this.observationUnitByNameNoScope.put(key, obsUnitPIO); for (Column column : phenotypeCols) { - PendingImportObject obsPIO = createObservationPIO(importRow, column.name(), column.getString(i)); + //If associated timestamp column, add + String dateTimeValue = null; + if (timeStampColByPheno.get(column.name()) != null) { + dateTimeValue = timeStampColByPheno.get(column.name()).getString(i); + //If no timestamp, set to midnight + if (!dateTimeValue.isBlank() && !validDateTimeValue(dateTimeValue)){ + dateTimeValue+="T00:00:00-00:00"; + } + } + PendingImportObject obsPIO = createObservationPIO(importRow, column.name(), column.getString(i), dateTimeValue); this.observationByHash.put(getImportObservationHash(importRow, getVariableNameFromColumn(column)), obsPIO); } } @@ -331,11 +379,9 @@ private String createObservationUnitKey(ExperimentObservation importRow) { } private String getImportObservationHash(ExperimentObservation importRow, String variableName) { - // TODO: handle timestamps once we support them return getObservationHash(createObservationUnitKey(importRow), variableName, importRow.getEnv()); } - //TODO: Add timestamp parameter once we support them private String getObservationHash(String observationUnitName, String variableName, String studyName) { String concat = DigestUtils.sha256Hex(observationUnitName) + DigestUtils.sha256Hex(variableName) + @@ -526,13 +572,18 @@ private PendingImportObject createObsUnitPIO(Program progr } - private PendingImportObject createObservationPIO(ExperimentObservation importRow, String variableName, String value) { + private PendingImportObject createObservationPIO(ExperimentObservation importRow, String variableName, String value, String timeStampValue) { PendingImportObject pio = null; if (this.observationByHash.containsKey(getImportObservationHash(importRow, variableName))) { pio = observationByHash.get(getImportObservationHash(importRow, variableName)); } else { BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); + //NOTE: Can't parse invalid timestamp value, so have to skip if invalid. + // Validation error should be thrown for offending value, but that doesn't happen until later downstream + if (timeStampValue != null && !timeStampValue.isBlank() && (validDateValue(timeStampValue) || validDateTimeValue(timeStampValue))) { + newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); + } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); } return pio; @@ -837,6 +888,17 @@ private String simpleStudyName(String scopedName){ return scopedName.replaceFirst(" \\[.*\\]", ""); } + private void validateTimeStampValue(String value, + String columnHeader, ValidationErrors validationErrors, int row){ + if(StringUtils.isBlank(value)) { + log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", columnHeader, row)); + return; + } + if (!validDateValue(value) && !validDateTimeValue(value)) { + addRowError(columnHeader, "Incorrect datetime format detected. Expected YYYY-MM-DD or YYYY-MM-DDThh:mm:ss+hh:mm", validationErrors, row); + } + + } private void validateObservationValue(Trait variable, String value, String columnHeader, ValidationErrors validationErrors, int row) { if(StringUtils.isBlank(value)) { @@ -900,6 +962,16 @@ private boolean validDateValue(String value) { return true; } + private boolean validDateTimeValue(String value) { + DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME; + try { + formatter.parse(value); + } catch (DateTimeParseException e) { + return false; + } + return true; + } + private boolean validCategory(List categories, String value) { Set categoryValues = categories.stream().map(category -> category.getValue().toLowerCase()).collect(Collectors.toSet()); return categoryValues.contains(value.toLowerCase()); diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java index cc281df34..c6bc2ec3e 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java @@ -69,6 +69,7 @@ public ImportPreviewResponse process(List importRows, List mappedBrAPIImportList = new ArrayList<>(mappedBrAPIImport.values()); response.setRows(mappedBrAPIImportList); + response.setDynamicColumnNames(upload.getDynamicColumnNamesList()); statusService.updateMappedData(upload, response, "Finished mapping data to brapi objects"); diff --git a/src/main/java/org/breedinginsight/utilities/FileUtil.java b/src/main/java/org/breedinginsight/utilities/FileUtil.java index 6427bca49..a52ff1ee5 100644 --- a/src/main/java/org/breedinginsight/utilities/FileUtil.java +++ b/src/main/java/org/breedinginsight/utilities/FileUtil.java @@ -25,6 +25,7 @@ import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; +import tech.tablesaw.io.csv.CsvReadOptions; import tech.tablesaw.io.json.JsonReadOptions; import java.io.*; @@ -69,8 +70,14 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR if (cell == null) { columns.get(header).add(null); } else if (cell.getCellType() == CellType.NUMERIC) { - double cellValue = cell.getNumericCellValue(); - String stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString(); + //Distinguish between date and numeric + DataFormatter dataFormatter = new DataFormatter(); + String stringValue = dataFormatter.formatCellValue(cell); + if (!stringValue.contains("-")) { + //No dashes, assume cell is numeric and not date + double cellValue = cell.getNumericCellValue(); + stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString(); + } columns.get(header).add(stringValue); } else { columns.get(header).add(formatter.formatCellValue(cell)); @@ -101,7 +108,15 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR public static Table parseTableFromCsv(InputStream inputStream) throws ParsingException { //TODO: See if this has the windows BOM issue try { - Table df = Table.read().csv(inputStream); + //Jackson used downstream messily converts LOCAL_DATE/LOCAL_DATETIME, so need to interpret date input as strings + //Note that if another type is needed later this is what needs to be updated + ArrayList acceptedTypes = new ArrayList<>(Arrays.asList(ColumnType.STRING, ColumnType.INTEGER, ColumnType.DOUBLE, ColumnType.FLOAT)); + Table df = Table.read().usingOptions( + CsvReadOptions + .builder(inputStream) + .columnTypesToDetect(acceptedTypes) + .separator(',') + ); return removeNullRows(df); } catch (IOException e) { log.error(e.getMessage()); diff --git a/src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql b/src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql new file mode 100644 index 000000000..5f65addf6 --- /dev/null +++ b/src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql @@ -0,0 +1,18 @@ +/* + * See the NOTICE file distributed with this work for additional information + * regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +alter table importer_import add column dynamic_column_names text[]; \ No newline at end of file