From 97c8e957b82ce05d24141683b00fe2faa8db3901 Mon Sep 17 00:00:00 2001 From: hms243 Date: Thu, 10 Nov 2022 12:54:30 -0500 Subject: [PATCH 01/12] [BI-1194] - Upload with timestamps --- .../processors/ExperimentProcessor.java | 79 ++++++++++++++++++- .../breedinginsight/utilities/FileUtil.java | 21 ++++- 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index b0adb1d9e..152c0acd9 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -64,6 +64,7 @@ import javax.inject.Inject; import java.math.BigDecimal; import java.math.BigInteger; +import java.time.OffsetDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; @@ -115,6 +116,9 @@ public class ExperimentProcessor implements Processor { // existingGermplasmByGID is populated by getExistingBrapiData(), but not updated by the getNewBrapiData() method private Map> existingGermplasmByGID = null; + // Associates timestamp columns to associated phenotype column name for ease of storage + private Map timeStampColByPheno = new HashMap<>(); + @Inject public ExperimentProcessor(DSLContext dsl, BrAPITrialDAO brapiTrialDAO, @@ -180,8 +184,21 @@ public Map process( ValidationErrors validationErrors = new ValidationErrors(); // Get dynamic phenotype columns for processing - List> phenotypeCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); + List> dynamicCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); + List> phenotypeCols = new ArrayList<>(); + List> timestampCols = new ArrayList<>(); + //todo can maybe be clever later with filter and differences + for (Column dynamicCol: dynamicCols) { + //Distinguish between phenotype and timestamp columns + if (dynamicCol.name().startsWith("TS: ")) { + timestampCols.add(dynamicCol); + } else { + phenotypeCols.add(dynamicCol); + } + } + List varNames = phenotypeCols.stream().map(Column::name).collect(Collectors.toList()); + List tsNames = timestampCols.stream().map(Column::name).collect(Collectors.toList()); // Lookup all traits in system for program, maybe eventually add a variable search in ontology service List traits = null; @@ -208,6 +225,20 @@ public Map process( "Ontology term(s) not found: " + String.join(", ", differences)); } + // Check that each ts column corresponds to a phenotype column + List unmatchedTimestamps = tsNames.stream() + .filter(e -> !(varNames.contains(e.replaceFirst("TS: ","")))) + .collect(Collectors.toList()); + if (unmatchedTimestamps.size() > 0) { + throw new HttpStatusException(HttpStatus.UNPROCESSABLE_ENTITY, + "Timestamp column(s) lack corresponding phenotype column(s): " + String.join(", ", unmatchedTimestamps)); + } + + //Now know timestamps all valid phenotypes, can associate with phenotype column name for easy retrieval + for (Column tsColumn: timestampCols) { + timeStampColByPheno.put(tsColumn.name().replaceFirst("TS: ",""), tsColumn); + } + // Perform ontology validations on each observation value in phenotype column Map colVarMap = filteredTraits.stream() .collect(Collectors.toMap(Trait::getObservationVariableName, Function.identity())); @@ -220,6 +251,15 @@ public Map process( } } + //Timestamp validation + for (Column column : timestampCols) { + for (int i=0; i < column.size(); i++) { + String value = column.getString(i); + String colName = column.name(); + validateTimeStampValue(colVarMap.get(colName), value, colName, validationErrors, i); + } + } + // add "New" pending data to the BrapiData objects getNewBrapiData(importRows, phenotypeCols, program, user, commit); @@ -319,7 +359,16 @@ private void getNewBrapiData(List importRows, List> pheno this.observationUnitByNameNoScope.put(key, obsUnitPIO); for (Column column : phenotypeCols) { - PendingImportObject obsPIO = createObservationPIO(importRow, column.name(), column.getString(i)); + //If associated timestamp column, add + String dateTimeValue = null; + if (timeStampColByPheno.get(column.name())!=null) { + dateTimeValue = timeStampColByPheno.get(column.name()).getString(i); + //If no timestamp, set to midnight + if (!validDateTimeValue(dateTimeValue)){ + dateTimeValue+="T00:00:00-00:00"; + } + } + PendingImportObject obsPIO = createObservationPIO(importRow, column.name(), column.getString(i), dateTimeValue); this.observationByHash.put(getImportObservationHash(importRow, getVariableNameFromColumn(column)), obsPIO); } } @@ -526,13 +575,16 @@ private PendingImportObject createObsUnitPIO(Program progr } - private PendingImportObject createObservationPIO(ExperimentObservation importRow, String variableName, String value) { + private PendingImportObject createObservationPIO(ExperimentObservation importRow, String variableName, String value, String timeStampValue) { PendingImportObject pio = null; if (this.observationByHash.containsKey(getImportObservationHash(importRow, variableName))) { pio = observationByHash.get(getImportObservationHash(importRow, variableName)); } else { BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); + if (timeStampValue != null) { + newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); + } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); } return pio; @@ -837,6 +889,17 @@ private String simpleStudyName(String scopedName){ return scopedName.replaceFirst(" \\[.*\\]", ""); } + private void validateTimeStampValue(Trait variable, String value, + String columnHeader, ValidationErrors validationErrors, int row){ + if(StringUtils.isBlank(value)) { + log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", variable.getObservationVariableName(), row)); + return; + } + if (!validDateValue(value) && !validDateTimeValue(value)) { + addRowError(columnHeader, "Incorrect datetime format detected. Expected YYYY-MM-DD or YYYY-MM-DDThh:mm:ss+hh:mm", validationErrors, row); + } + + } private void validateObservationValue(Trait variable, String value, String columnHeader, ValidationErrors validationErrors, int row) { if(StringUtils.isBlank(value)) { @@ -900,6 +963,16 @@ private boolean validDateValue(String value) { return true; } + private boolean validDateTimeValue(String value) { + DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME; + try { + formatter.parse(value); + } catch (DateTimeParseException e) { + return false; + } + return true; + } + private boolean validCategory(List categories, String value) { Set categoryValues = categories.stream().map(category -> category.getValue().toLowerCase()).collect(Collectors.toSet()); return categoryValues.contains(value.toLowerCase()); diff --git a/src/main/java/org/breedinginsight/utilities/FileUtil.java b/src/main/java/org/breedinginsight/utilities/FileUtil.java index 6427bca49..a52ff1ee5 100644 --- a/src/main/java/org/breedinginsight/utilities/FileUtil.java +++ b/src/main/java/org/breedinginsight/utilities/FileUtil.java @@ -25,6 +25,7 @@ import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; +import tech.tablesaw.io.csv.CsvReadOptions; import tech.tablesaw.io.json.JsonReadOptions; import java.io.*; @@ -69,8 +70,14 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR if (cell == null) { columns.get(header).add(null); } else if (cell.getCellType() == CellType.NUMERIC) { - double cellValue = cell.getNumericCellValue(); - String stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString(); + //Distinguish between date and numeric + DataFormatter dataFormatter = new DataFormatter(); + String stringValue = dataFormatter.formatCellValue(cell); + if (!stringValue.contains("-")) { + //No dashes, assume cell is numeric and not date + double cellValue = cell.getNumericCellValue(); + stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString(); + } columns.get(header).add(stringValue); } else { columns.get(header).add(formatter.formatCellValue(cell)); @@ -101,7 +108,15 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR public static Table parseTableFromCsv(InputStream inputStream) throws ParsingException { //TODO: See if this has the windows BOM issue try { - Table df = Table.read().csv(inputStream); + //Jackson used downstream messily converts LOCAL_DATE/LOCAL_DATETIME, so need to interpret date input as strings + //Note that if another type is needed later this is what needs to be updated + ArrayList acceptedTypes = new ArrayList<>(Arrays.asList(ColumnType.STRING, ColumnType.INTEGER, ColumnType.DOUBLE, ColumnType.FLOAT)); + Table df = Table.read().usingOptions( + CsvReadOptions + .builder(inputStream) + .columnTypesToDetect(acceptedTypes) + .separator(',') + ); return removeNullRows(df); } catch (IOException e) { log.error(e.getMessage()); From ace3b78da2b9620bf64315880e03a9254f664d19 Mon Sep 17 00:00:00 2001 From: Nick Palladino Date: Thu, 10 Nov 2022 16:21:03 -0500 Subject: [PATCH 02/12] Reverse column names order for now --- .../brapps/importer/services/FileMappingUtil.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java b/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java index b87c2a62c..6a1a8e15a 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java @@ -32,10 +32,7 @@ import javax.inject.Inject; import javax.inject.Singleton; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; @Singleton @@ -97,7 +94,10 @@ public List> getDynamicColumns(Table data, String templateName) { } } - List differences = data.columnNames().stream() + List names = data.columnNames(); + Collections.reverse(names); + + List differences = names.stream() .filter(col -> !columnNames.contains(col)) .collect(Collectors.toList()); From 3424d9fa3c2e11d87c6e99ec472c90cd21837c38 Mon Sep 17 00:00:00 2001 From: hms243 Date: Fri, 11 Nov 2022 13:09:33 -0500 Subject: [PATCH 03/12] [BI-1194] - Handle blank timestamps --- .../services/processors/ExperimentProcessor.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index 152c0acd9..f00102f41 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -256,7 +256,7 @@ public Map process( for (int i=0; i < column.size(); i++) { String value = column.getString(i); String colName = column.name(); - validateTimeStampValue(colVarMap.get(colName), value, colName, validationErrors, i); + validateTimeStampValue(value, colName, validationErrors, i); } } @@ -364,7 +364,7 @@ private void getNewBrapiData(List importRows, List> pheno if (timeStampColByPheno.get(column.name())!=null) { dateTimeValue = timeStampColByPheno.get(column.name()).getString(i); //If no timestamp, set to midnight - if (!validDateTimeValue(dateTimeValue)){ + if (!validDateTimeValue(dateTimeValue) && !dateTimeValue.isBlank()){ dateTimeValue+="T00:00:00-00:00"; } } @@ -380,11 +380,9 @@ private String createObservationUnitKey(ExperimentObservation importRow) { } private String getImportObservationHash(ExperimentObservation importRow, String variableName) { - // TODO: handle timestamps once we support them return getObservationHash(createObservationUnitKey(importRow), variableName, importRow.getEnv()); } - //TODO: Add timestamp parameter once we support them private String getObservationHash(String observationUnitName, String variableName, String studyName) { String concat = DigestUtils.sha256Hex(observationUnitName) + DigestUtils.sha256Hex(variableName) + @@ -582,7 +580,7 @@ private PendingImportObject createObservationPIO(ExperimentObs } else { BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); - if (timeStampValue != null) { + if (timeStampValue != null && !timeStampValue.isBlank()) { newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); @@ -889,10 +887,10 @@ private String simpleStudyName(String scopedName){ return scopedName.replaceFirst(" \\[.*\\]", ""); } - private void validateTimeStampValue(Trait variable, String value, + private void validateTimeStampValue(String value, String columnHeader, ValidationErrors validationErrors, int row){ if(StringUtils.isBlank(value)) { - log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", variable.getObservationVariableName(), row)); + log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", columnHeader, row)); return; } if (!validDateValue(value) && !validDateTimeValue(value)) { From bc7f3e1d5c28245044fc2979c2a09774c00afa57 Mon Sep 17 00:00:00 2001 From: hms243 Date: Thu, 10 Nov 2022 12:54:30 -0500 Subject: [PATCH 04/12] [BI-1194] - Upload with timestamps --- .../processors/ExperimentProcessor.java | 79 ++++++++++++++++++- .../breedinginsight/utilities/FileUtil.java | 21 ++++- 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index b0adb1d9e..152c0acd9 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -64,6 +64,7 @@ import javax.inject.Inject; import java.math.BigDecimal; import java.math.BigInteger; +import java.time.OffsetDateTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.*; @@ -115,6 +116,9 @@ public class ExperimentProcessor implements Processor { // existingGermplasmByGID is populated by getExistingBrapiData(), but not updated by the getNewBrapiData() method private Map> existingGermplasmByGID = null; + // Associates timestamp columns to associated phenotype column name for ease of storage + private Map timeStampColByPheno = new HashMap<>(); + @Inject public ExperimentProcessor(DSLContext dsl, BrAPITrialDAO brapiTrialDAO, @@ -180,8 +184,21 @@ public Map process( ValidationErrors validationErrors = new ValidationErrors(); // Get dynamic phenotype columns for processing - List> phenotypeCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); + List> dynamicCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); + List> phenotypeCols = new ArrayList<>(); + List> timestampCols = new ArrayList<>(); + //todo can maybe be clever later with filter and differences + for (Column dynamicCol: dynamicCols) { + //Distinguish between phenotype and timestamp columns + if (dynamicCol.name().startsWith("TS: ")) { + timestampCols.add(dynamicCol); + } else { + phenotypeCols.add(dynamicCol); + } + } + List varNames = phenotypeCols.stream().map(Column::name).collect(Collectors.toList()); + List tsNames = timestampCols.stream().map(Column::name).collect(Collectors.toList()); // Lookup all traits in system for program, maybe eventually add a variable search in ontology service List traits = null; @@ -208,6 +225,20 @@ public Map process( "Ontology term(s) not found: " + String.join(", ", differences)); } + // Check that each ts column corresponds to a phenotype column + List unmatchedTimestamps = tsNames.stream() + .filter(e -> !(varNames.contains(e.replaceFirst("TS: ","")))) + .collect(Collectors.toList()); + if (unmatchedTimestamps.size() > 0) { + throw new HttpStatusException(HttpStatus.UNPROCESSABLE_ENTITY, + "Timestamp column(s) lack corresponding phenotype column(s): " + String.join(", ", unmatchedTimestamps)); + } + + //Now know timestamps all valid phenotypes, can associate with phenotype column name for easy retrieval + for (Column tsColumn: timestampCols) { + timeStampColByPheno.put(tsColumn.name().replaceFirst("TS: ",""), tsColumn); + } + // Perform ontology validations on each observation value in phenotype column Map colVarMap = filteredTraits.stream() .collect(Collectors.toMap(Trait::getObservationVariableName, Function.identity())); @@ -220,6 +251,15 @@ public Map process( } } + //Timestamp validation + for (Column column : timestampCols) { + for (int i=0; i < column.size(); i++) { + String value = column.getString(i); + String colName = column.name(); + validateTimeStampValue(colVarMap.get(colName), value, colName, validationErrors, i); + } + } + // add "New" pending data to the BrapiData objects getNewBrapiData(importRows, phenotypeCols, program, user, commit); @@ -319,7 +359,16 @@ private void getNewBrapiData(List importRows, List> pheno this.observationUnitByNameNoScope.put(key, obsUnitPIO); for (Column column : phenotypeCols) { - PendingImportObject obsPIO = createObservationPIO(importRow, column.name(), column.getString(i)); + //If associated timestamp column, add + String dateTimeValue = null; + if (timeStampColByPheno.get(column.name())!=null) { + dateTimeValue = timeStampColByPheno.get(column.name()).getString(i); + //If no timestamp, set to midnight + if (!validDateTimeValue(dateTimeValue)){ + dateTimeValue+="T00:00:00-00:00"; + } + } + PendingImportObject obsPIO = createObservationPIO(importRow, column.name(), column.getString(i), dateTimeValue); this.observationByHash.put(getImportObservationHash(importRow, getVariableNameFromColumn(column)), obsPIO); } } @@ -526,13 +575,16 @@ private PendingImportObject createObsUnitPIO(Program progr } - private PendingImportObject createObservationPIO(ExperimentObservation importRow, String variableName, String value) { + private PendingImportObject createObservationPIO(ExperimentObservation importRow, String variableName, String value, String timeStampValue) { PendingImportObject pio = null; if (this.observationByHash.containsKey(getImportObservationHash(importRow, variableName))) { pio = observationByHash.get(getImportObservationHash(importRow, variableName)); } else { BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); + if (timeStampValue != null) { + newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); + } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); } return pio; @@ -837,6 +889,17 @@ private String simpleStudyName(String scopedName){ return scopedName.replaceFirst(" \\[.*\\]", ""); } + private void validateTimeStampValue(Trait variable, String value, + String columnHeader, ValidationErrors validationErrors, int row){ + if(StringUtils.isBlank(value)) { + log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", variable.getObservationVariableName(), row)); + return; + } + if (!validDateValue(value) && !validDateTimeValue(value)) { + addRowError(columnHeader, "Incorrect datetime format detected. Expected YYYY-MM-DD or YYYY-MM-DDThh:mm:ss+hh:mm", validationErrors, row); + } + + } private void validateObservationValue(Trait variable, String value, String columnHeader, ValidationErrors validationErrors, int row) { if(StringUtils.isBlank(value)) { @@ -900,6 +963,16 @@ private boolean validDateValue(String value) { return true; } + private boolean validDateTimeValue(String value) { + DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME; + try { + formatter.parse(value); + } catch (DateTimeParseException e) { + return false; + } + return true; + } + private boolean validCategory(List categories, String value) { Set categoryValues = categories.stream().map(category -> category.getValue().toLowerCase()).collect(Collectors.toSet()); return categoryValues.contains(value.toLowerCase()); diff --git a/src/main/java/org/breedinginsight/utilities/FileUtil.java b/src/main/java/org/breedinginsight/utilities/FileUtil.java index 6427bca49..a52ff1ee5 100644 --- a/src/main/java/org/breedinginsight/utilities/FileUtil.java +++ b/src/main/java/org/breedinginsight/utilities/FileUtil.java @@ -25,6 +25,7 @@ import tech.tablesaw.api.ColumnType; import tech.tablesaw.api.StringColumn; import tech.tablesaw.api.Table; +import tech.tablesaw.io.csv.CsvReadOptions; import tech.tablesaw.io.json.JsonReadOptions; import java.io.*; @@ -69,8 +70,14 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR if (cell == null) { columns.get(header).add(null); } else if (cell.getCellType() == CellType.NUMERIC) { - double cellValue = cell.getNumericCellValue(); - String stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString(); + //Distinguish between date and numeric + DataFormatter dataFormatter = new DataFormatter(); + String stringValue = dataFormatter.formatCellValue(cell); + if (!stringValue.contains("-")) { + //No dashes, assume cell is numeric and not date + double cellValue = cell.getNumericCellValue(); + stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString(); + } columns.get(header).add(stringValue); } else { columns.get(header).add(formatter.formatCellValue(cell)); @@ -101,7 +108,15 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR public static Table parseTableFromCsv(InputStream inputStream) throws ParsingException { //TODO: See if this has the windows BOM issue try { - Table df = Table.read().csv(inputStream); + //Jackson used downstream messily converts LOCAL_DATE/LOCAL_DATETIME, so need to interpret date input as strings + //Note that if another type is needed later this is what needs to be updated + ArrayList acceptedTypes = new ArrayList<>(Arrays.asList(ColumnType.STRING, ColumnType.INTEGER, ColumnType.DOUBLE, ColumnType.FLOAT)); + Table df = Table.read().usingOptions( + CsvReadOptions + .builder(inputStream) + .columnTypesToDetect(acceptedTypes) + .separator(',') + ); return removeNullRows(df); } catch (IOException e) { log.error(e.getMessage()); From 49b0de47666fb6dd3b996b5da186e6e7ab1d058e Mon Sep 17 00:00:00 2001 From: hms243 Date: Fri, 11 Nov 2022 13:09:33 -0500 Subject: [PATCH 05/12] [BI-1194] - Handle blank timestamps --- .../services/processors/ExperimentProcessor.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index 152c0acd9..f00102f41 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -256,7 +256,7 @@ public Map process( for (int i=0; i < column.size(); i++) { String value = column.getString(i); String colName = column.name(); - validateTimeStampValue(colVarMap.get(colName), value, colName, validationErrors, i); + validateTimeStampValue(value, colName, validationErrors, i); } } @@ -364,7 +364,7 @@ private void getNewBrapiData(List importRows, List> pheno if (timeStampColByPheno.get(column.name())!=null) { dateTimeValue = timeStampColByPheno.get(column.name()).getString(i); //If no timestamp, set to midnight - if (!validDateTimeValue(dateTimeValue)){ + if (!validDateTimeValue(dateTimeValue) && !dateTimeValue.isBlank()){ dateTimeValue+="T00:00:00-00:00"; } } @@ -380,11 +380,9 @@ private String createObservationUnitKey(ExperimentObservation importRow) { } private String getImportObservationHash(ExperimentObservation importRow, String variableName) { - // TODO: handle timestamps once we support them return getObservationHash(createObservationUnitKey(importRow), variableName, importRow.getEnv()); } - //TODO: Add timestamp parameter once we support them private String getObservationHash(String observationUnitName, String variableName, String studyName) { String concat = DigestUtils.sha256Hex(observationUnitName) + DigestUtils.sha256Hex(variableName) + @@ -582,7 +580,7 @@ private PendingImportObject createObservationPIO(ExperimentObs } else { BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); - if (timeStampValue != null) { + if (timeStampValue != null && !timeStampValue.isBlank()) { newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); @@ -889,10 +887,10 @@ private String simpleStudyName(String scopedName){ return scopedName.replaceFirst(" \\[.*\\]", ""); } - private void validateTimeStampValue(Trait variable, String value, + private void validateTimeStampValue(String value, String columnHeader, ValidationErrors validationErrors, int row){ if(StringUtils.isBlank(value)) { - log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", variable.getObservationVariableName(), row)); + log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", columnHeader, row)); return; } if (!validDateValue(value) && !validDateTimeValue(value)) { From d5b4c87d143a0afd816785322518c1fb4c40994f Mon Sep 17 00:00:00 2001 From: hms243 Date: Tue, 15 Nov 2022 16:14:12 -0500 Subject: [PATCH 06/12] [BI-1194] - Preview display and ordering --- .../brapps/importer/model/ImportUpload.java | 12 ++++++++++++ .../model/response/ImportPreviewResponse.java | 1 + .../importer/services/FileImportService.java | 5 +++++ .../services/processors/ProcessorManager.java | 1 + .../V1.0.9__add_dynamic_column_storage.sql | 18 ++++++++++++++++++ 5 files changed, 37 insertions(+) create mode 100644 src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql diff --git a/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java b/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java index ab0272ba8..1ed883a7d 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java +++ b/src/main/java/org/breedinginsight/brapps/importer/model/ImportUpload.java @@ -17,6 +17,7 @@ package org.breedinginsight.brapps.importer.model; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -34,6 +35,7 @@ import org.jooq.Record; import tech.tablesaw.api.Table; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -69,6 +71,15 @@ public void updateProgress(Integer finished, Integer inProgress) { progress.setInProgress((long) inProgress); } + public void setDynamicColumnNames(List dynamicColumnNames) { + super.setDynamicColumnNames(dynamicColumnNames.toArray(new String[0])); + } + + @JsonIgnore + public List getDynamicColumnNamesList(){ + return Arrays.asList(super.getDynamicColumnNames()); + } + public static ImportUpload parseSQLRecord(Record record) { return ImportUpload.uploadBuilder() @@ -85,6 +96,7 @@ public static ImportUpload parseSQLRecord(Record record) { .updatedAt(record.getValue(IMPORTER_IMPORT.UPDATED_AT)) .createdBy(record.getValue(IMPORTER_IMPORT.CREATED_BY)) .updatedBy(record.getValue(IMPORTER_IMPORT.UPDATED_BY)) + .dynamicColumnNames(record.getValue(IMPORTER_IMPORT.DYNAMIC_COLUMN_NAMES)) .build(); } } diff --git a/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java b/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java index 0d5fe4e5d..b46fc6335 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java +++ b/src/main/java/org/breedinginsight/brapps/importer/model/response/ImportPreviewResponse.java @@ -29,4 +29,5 @@ public class ImportPreviewResponse { private Map statistics; private List rows; + private List dynamicColumnNames; } diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java b/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java index cf9c6b916..1acbcbf58 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java @@ -300,6 +300,11 @@ public ImportResponse uploadData(UUID programId, UUID mappingId, AuthenticatedUs newUpload.setCreatedBy(actingUser.getId()); newUpload.setUpdatedBy(actingUser.getId()); + List mappingCols = importMapping.getMappingConfig().stream().map(field -> field.getValue().getFileFieldName()).collect(Collectors.toList()); + List dynamicCols = data.columnNames().stream() + .filter(column -> !mappingCols.contains(column)).collect(Collectors.toList()); + newUpload.setDynamicColumnNames(dynamicCols); + // Create a progress object ImportProgress importProgress = new ImportProgress(); importProgress.setCreatedBy(actingUser.getId()); diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java index cc281df34..c6bc2ec3e 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ProcessorManager.java @@ -69,6 +69,7 @@ public ImportPreviewResponse process(List importRows, List mappedBrAPIImportList = new ArrayList<>(mappedBrAPIImport.values()); response.setRows(mappedBrAPIImportList); + response.setDynamicColumnNames(upload.getDynamicColumnNamesList()); statusService.updateMappedData(upload, response, "Finished mapping data to brapi objects"); diff --git a/src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql b/src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql new file mode 100644 index 000000000..5f65addf6 --- /dev/null +++ b/src/main/resources/db/migration/V1.0.9__add_dynamic_column_storage.sql @@ -0,0 +1,18 @@ +/* + * See the NOTICE file distributed with this work for additional information + * regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +alter table importer_import add column dynamic_column_names text[]; \ No newline at end of file From 8e7bb96cc35b0e88f20b47936686ce99d2a387c5 Mon Sep 17 00:00:00 2001 From: hms243 Date: Wed, 16 Nov 2022 13:50:22 -0500 Subject: [PATCH 07/12] [BI-1194] - Whitespace flexibility --- .../brapps/importer/services/FileMappingUtil.java | 5 +---- .../importer/services/processors/ExperimentProcessor.java | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java b/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java index 6a1a8e15a..b70c11f30 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/FileMappingUtil.java @@ -94,10 +94,7 @@ public List> getDynamicColumns(Table data, String templateName) { } } - List names = data.columnNames(); - Collections.reverse(names); - - List differences = names.stream() + List differences = data.columnNames().stream() .filter(col -> !columnNames.contains(col)) .collect(Collectors.toList()); diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index f00102f41..0c1553592 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -190,7 +190,7 @@ public Map process( //todo can maybe be clever later with filter and differences for (Column dynamicCol: dynamicCols) { //Distinguish between phenotype and timestamp columns - if (dynamicCol.name().startsWith("TS: ")) { + if (dynamicCol.name().startsWith("TS:")) { timestampCols.add(dynamicCol); } else { phenotypeCols.add(dynamicCol); @@ -227,7 +227,7 @@ public Map process( // Check that each ts column corresponds to a phenotype column List unmatchedTimestamps = tsNames.stream() - .filter(e -> !(varNames.contains(e.replaceFirst("TS: ","")))) + .filter(e -> !(varNames.contains(e.replaceFirst("^TS:\\s*","")))) .collect(Collectors.toList()); if (unmatchedTimestamps.size() > 0) { throw new HttpStatusException(HttpStatus.UNPROCESSABLE_ENTITY, @@ -236,7 +236,7 @@ public Map process( //Now know timestamps all valid phenotypes, can associate with phenotype column name for easy retrieval for (Column tsColumn: timestampCols) { - timeStampColByPheno.put(tsColumn.name().replaceFirst("TS: ",""), tsColumn); + timeStampColByPheno.put(tsColumn.name().replaceFirst("^TS:\\s*",""), tsColumn); } // Perform ontology validations on each observation value in phenotype column From 6eea2a631863c5191ac6d1cfd1f2d6ccdacdc998 Mon Sep 17 00:00:00 2001 From: hms243 Date: Thu, 17 Nov 2022 12:03:57 -0500 Subject: [PATCH 08/12] [BI-1194] - Handle different mapping structures --- .../importer/services/FileImportService.java | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java b/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java index 1acbcbf58..4aaf4a743 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/FileImportService.java @@ -299,11 +299,7 @@ public ImportResponse uploadData(UUID programId, UUID mappingId, AuthenticatedUs newUpload.setUserId(actingUser.getId()); newUpload.setCreatedBy(actingUser.getId()); newUpload.setUpdatedBy(actingUser.getId()); - - List mappingCols = importMapping.getMappingConfig().stream().map(field -> field.getValue().getFileFieldName()).collect(Collectors.toList()); - List dynamicCols = data.columnNames().stream() - .filter(column -> !mappingCols.contains(column)).collect(Collectors.toList()); - newUpload.setDynamicColumnNames(dynamicCols); + newUpload = setDynamicColumns(newUpload, data, importMapping); // Create a progress object ImportProgress importProgress = new ImportProgress(); @@ -402,6 +398,26 @@ public ImportResponse updateUpload(UUID programId, UUID uploadId, AuthenticatedU return importResponse; } + /** + * If mapping has experiment structure, retrieve dynamic columns + * Experiment and germplasm mapping presently have different structures + * @param newUpload + * @param data + * @param importMapping + * @return updated newUpload with dynamic columns set + */ + public ImportUpload setDynamicColumns(ImportUpload newUpload, Table data, ImportMapping importMapping) { + if (importMapping.getMappingConfig().get(0).getValue() != null) { + List mappingCols = importMapping.getMappingConfig().stream().map(field -> field.getValue().getFileFieldName()).collect(Collectors.toList()); + List dynamicCols = data.columnNames().stream() + .filter(column -> !mappingCols.contains(column)).collect(Collectors.toList()); + newUpload.setDynamicColumnNames(dynamicCols); + } else { + newUpload.setDynamicColumnNames(new ArrayList<>()); + } + return newUpload; + } + private void processFile(List finalBrAPIImportList, Table data, Program program, ImportUpload upload, User user, Boolean commit, BrAPIImportService importService, AuthenticatedUser actingUser) { From a6a0b9ba1a77e66af1456178b43183dfa9435351 Mon Sep 17 00:00:00 2001 From: hms243 Date: Thu, 17 Nov 2022 15:26:02 -0500 Subject: [PATCH 09/12] [BI-1194] - Cleanup --- .../brapps/importer/services/processors/ExperimentProcessor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index 0c1553592..86a6fdfbe 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -187,7 +187,6 @@ public Map process( List> dynamicCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME); List> phenotypeCols = new ArrayList<>(); List> timestampCols = new ArrayList<>(); - //todo can maybe be clever later with filter and differences for (Column dynamicCol: dynamicCols) { //Distinguish between phenotype and timestamp columns if (dynamicCol.name().startsWith("TS:")) { From 7faee933e341af5b9e75780daf95aed77157ddcc Mon Sep 17 00:00:00 2001 From: hms243 Date: Thu, 17 Nov 2022 16:54:51 -0500 Subject: [PATCH 10/12] [BI-1194] - Timestamp validation fix --- .../importer/services/processors/ExperimentProcessor.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index 86a6fdfbe..b649d2630 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -579,7 +579,9 @@ private PendingImportObject createObservationPIO(ExperimentObs } else { BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); - if (timeStampValue != null && !timeStampValue.isBlank()) { + //NOTE: Can't parse invalid timestamp value, so have to skip if invalid. + // Validation error should be thrown for offending value, but that doesn't happen until later downstream + if (timeStampValue != null && !timeStampValue.isBlank() && (validDateValue(value) || validDateTimeValue(value))) { newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); From e8fad5c83ce49ac71e487e686c8ead95703130cf Mon Sep 17 00:00:00 2001 From: hms243 Date: Mon, 28 Nov 2022 12:45:27 -0500 Subject: [PATCH 11/12] [BI-1194] - Fix timestamp display --- .../importer/services/processors/ExperimentProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index b649d2630..2bd0b50f8 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -581,7 +581,7 @@ private PendingImportObject createObservationPIO(ExperimentObs BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName); //NOTE: Can't parse invalid timestamp value, so have to skip if invalid. // Validation error should be thrown for offending value, but that doesn't happen until later downstream - if (timeStampValue != null && !timeStampValue.isBlank() && (validDateValue(value) || validDateTimeValue(value))) { + if (timeStampValue != null && !timeStampValue.isBlank() && (validDateValue(timeStampValue) || validDateTimeValue(timeStampValue))) { newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue)); } pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation); From e75dc3f835a2641ac1ff6e780eda0309db2c117e Mon Sep 17 00:00:00 2001 From: hms243 Date: Tue, 29 Nov 2022 14:22:14 -0500 Subject: [PATCH 12/12] [BI-1194] - Code review improvements --- .../importer/services/processors/ExperimentProcessor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java index 2bd0b50f8..a32668134 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/ExperimentProcessor.java @@ -360,10 +360,10 @@ private void getNewBrapiData(List importRows, List> pheno for (Column column : phenotypeCols) { //If associated timestamp column, add String dateTimeValue = null; - if (timeStampColByPheno.get(column.name())!=null) { + if (timeStampColByPheno.get(column.name()) != null) { dateTimeValue = timeStampColByPheno.get(column.name()).getString(i); //If no timestamp, set to midnight - if (!validDateTimeValue(dateTimeValue) && !dateTimeValue.isBlank()){ + if (!dateTimeValue.isBlank() && !validDateTimeValue(dateTimeValue)){ dateTimeValue+="T00:00:00-00:00"; } }