Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.breedinginsight.brapps.importer.model;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
Expand All @@ -34,6 +35,7 @@
import org.jooq.Record;
import tech.tablesaw.api.Table;

import java.util.Arrays;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -69,6 +71,15 @@ public void updateProgress(Integer finished, Integer inProgress) {
progress.setInProgress((long) inProgress);
}

public void setDynamicColumnNames(List<String> dynamicColumnNames) {
super.setDynamicColumnNames(dynamicColumnNames.toArray(new String[0]));
}

@JsonIgnore
public List<String> getDynamicColumnNamesList(){
return Arrays.asList(super.getDynamicColumnNames());
}

public static ImportUpload parseSQLRecord(Record record) {

return ImportUpload.uploadBuilder()
Expand All @@ -85,6 +96,7 @@ public static ImportUpload parseSQLRecord(Record record) {
.updatedAt(record.getValue(IMPORTER_IMPORT.UPDATED_AT))
.createdBy(record.getValue(IMPORTER_IMPORT.CREATED_BY))
.updatedBy(record.getValue(IMPORTER_IMPORT.UPDATED_BY))
.dynamicColumnNames(record.getValue(IMPORTER_IMPORT.DYNAMIC_COLUMN_NAMES))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@
public class ImportPreviewResponse {
private Map<String, ImportPreviewStatistics> statistics;
private List<PendingImport> rows;
private List<String> dynamicColumnNames;
}
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ public ImportResponse uploadData(UUID programId, UUID mappingId, AuthenticatedUs
newUpload.setUserId(actingUser.getId());
newUpload.setCreatedBy(actingUser.getId());
newUpload.setUpdatedBy(actingUser.getId());
newUpload = setDynamicColumns(newUpload, data, importMapping);

// Create a progress object
ImportProgress importProgress = new ImportProgress();
Expand Down Expand Up @@ -397,6 +398,26 @@ public ImportResponse updateUpload(UUID programId, UUID uploadId, AuthenticatedU
return importResponse;
}

/**
* If mapping has experiment structure, retrieve dynamic columns
* Experiment and germplasm mapping presently have different structures
* @param newUpload
* @param data
* @param importMapping
* @return updated newUpload with dynamic columns set
*/
public ImportUpload setDynamicColumns(ImportUpload newUpload, Table data, ImportMapping importMapping) {
if (importMapping.getMappingConfig().get(0).getValue() != null) {
List<String> mappingCols = importMapping.getMappingConfig().stream().map(field -> field.getValue().getFileFieldName()).collect(Collectors.toList());
List<String> dynamicCols = data.columnNames().stream()
.filter(column -> !mappingCols.contains(column)).collect(Collectors.toList());
newUpload.setDynamicColumnNames(dynamicCols);
} else {
newUpload.setDynamicColumnNames(new ArrayList<>());
}
return newUpload;
}

private void processFile(List<BrAPIImport> finalBrAPIImportList, Table data, Program program,
ImportUpload upload, User user, Boolean commit, BrAPIImportService importService,
AuthenticatedUser actingUser) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@

import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

@Singleton
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import javax.inject.Inject;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.*;
Expand Down Expand Up @@ -115,6 +116,9 @@ public class ExperimentProcessor implements Processor {
// existingGermplasmByGID is populated by getExistingBrapiData(), but not updated by the getNewBrapiData() method
private Map<String, PendingImportObject<BrAPIGermplasm>> existingGermplasmByGID = null;

// Associates timestamp columns to associated phenotype column name for ease of storage
private Map<String, Column> timeStampColByPheno = new HashMap<>();

@Inject
public ExperimentProcessor(DSLContext dsl,
BrAPITrialDAO brapiTrialDAO,
Expand Down Expand Up @@ -180,8 +184,20 @@ public Map<String, ImportPreviewStatistics> process(
ValidationErrors validationErrors = new ValidationErrors();

// Get dynamic phenotype columns for processing
List<Column<?>> phenotypeCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME);
List<Column<?>> dynamicCols = fileMappingUtil.getDynamicColumns(data, EXPERIMENT_TEMPLATE_NAME);
List<Column<?>> phenotypeCols = new ArrayList<>();
List<Column<?>> timestampCols = new ArrayList<>();
for (Column dynamicCol: dynamicCols) {
//Distinguish between phenotype and timestamp columns
if (dynamicCol.name().startsWith("TS:")) {
timestampCols.add(dynamicCol);
} else {
phenotypeCols.add(dynamicCol);
}
}

List<String> varNames = phenotypeCols.stream().map(Column::name).collect(Collectors.toList());
List<String> tsNames = timestampCols.stream().map(Column::name).collect(Collectors.toList());

// Lookup all traits in system for program, maybe eventually add a variable search in ontology service
List<Trait> traits = null;
Expand All @@ -208,6 +224,20 @@ public Map<String, ImportPreviewStatistics> process(
"Ontology term(s) not found: " + String.join(", ", differences));
}

// Check that each ts column corresponds to a phenotype column
List<String> unmatchedTimestamps = tsNames.stream()
.filter(e -> !(varNames.contains(e.replaceFirst("^TS:\\s*",""))))
.collect(Collectors.toList());
if (unmatchedTimestamps.size() > 0) {
throw new HttpStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
"Timestamp column(s) lack corresponding phenotype column(s): " + String.join(", ", unmatchedTimestamps));
}

//Now know timestamps all valid phenotypes, can associate with phenotype column name for easy retrieval
for (Column tsColumn: timestampCols) {
timeStampColByPheno.put(tsColumn.name().replaceFirst("^TS:\\s*",""), tsColumn);
}

// Perform ontology validations on each observation value in phenotype column
Map<String, Trait> colVarMap = filteredTraits.stream()
.collect(Collectors.toMap(Trait::getObservationVariableName, Function.identity()));
Expand All @@ -220,6 +250,15 @@ public Map<String, ImportPreviewStatistics> process(
}
}

//Timestamp validation
for (Column<?> column : timestampCols) {
for (int i=0; i < column.size(); i++) {
String value = column.getString(i);
String colName = column.name();
validateTimeStampValue(value, colName, validationErrors, i);
}
}

// add "New" pending data to the BrapiData objects
getNewBrapiData(importRows, phenotypeCols, program, user, commit);

Expand Down Expand Up @@ -319,7 +358,16 @@ private void getNewBrapiData(List<BrAPIImport> importRows, List<Column<?>> pheno
this.observationUnitByNameNoScope.put(key, obsUnitPIO);

for (Column<?> column : phenotypeCols) {
PendingImportObject<BrAPIObservation> obsPIO = createObservationPIO(importRow, column.name(), column.getString(i));
//If associated timestamp column, add
String dateTimeValue = null;
if (timeStampColByPheno.get(column.name()) != null) {
dateTimeValue = timeStampColByPheno.get(column.name()).getString(i);
//If no timestamp, set to midnight
if (!dateTimeValue.isBlank() && !validDateTimeValue(dateTimeValue)){
dateTimeValue+="T00:00:00-00:00";
}
}
PendingImportObject<BrAPIObservation> obsPIO = createObservationPIO(importRow, column.name(), column.getString(i), dateTimeValue);
this.observationByHash.put(getImportObservationHash(importRow, getVariableNameFromColumn(column)), obsPIO);
}
}
Expand All @@ -331,11 +379,9 @@ private String createObservationUnitKey(ExperimentObservation importRow) {
}

private String getImportObservationHash(ExperimentObservation importRow, String variableName) {
// TODO: handle timestamps once we support them
return getObservationHash(createObservationUnitKey(importRow), variableName, importRow.getEnv());
}

//TODO: Add timestamp parameter once we support them
private String getObservationHash(String observationUnitName, String variableName, String studyName) {
String concat = DigestUtils.sha256Hex(observationUnitName) +
DigestUtils.sha256Hex(variableName) +
Expand Down Expand Up @@ -526,13 +572,18 @@ private PendingImportObject<BrAPIObservationUnit> createObsUnitPIO(Program progr
}


private PendingImportObject<BrAPIObservation> createObservationPIO(ExperimentObservation importRow, String variableName, String value) {
private PendingImportObject<BrAPIObservation> createObservationPIO(ExperimentObservation importRow, String variableName, String value, String timeStampValue) {
PendingImportObject<BrAPIObservation> pio = null;
if (this.observationByHash.containsKey(getImportObservationHash(importRow, variableName))) {
pio = observationByHash.get(getImportObservationHash(importRow, variableName));
}
else {
BrAPIObservation newObservation = importRow.constructBrAPIObservation(value, variableName);
//NOTE: Can't parse invalid timestamp value, so have to skip if invalid.
// Validation error should be thrown for offending value, but that doesn't happen until later downstream
if (timeStampValue != null && !timeStampValue.isBlank() && (validDateValue(timeStampValue) || validDateTimeValue(timeStampValue))) {
newObservation.setObservationTimeStamp(OffsetDateTime.parse(timeStampValue));
}
pio = new PendingImportObject<>(ImportObjectState.NEW, newObservation);
}
return pio;
Expand Down Expand Up @@ -837,6 +888,17 @@ private String simpleStudyName(String scopedName){
return scopedName.replaceFirst(" \\[.*\\]", "");
}

private void validateTimeStampValue(String value,
String columnHeader, ValidationErrors validationErrors, int row){
if(StringUtils.isBlank(value)) {
log.debug(String.format("skipping validation of observation timestamp because there is no value.\n\tvariable: %s\n\trow: %d", columnHeader, row));
return;
}
if (!validDateValue(value) && !validDateTimeValue(value)) {
addRowError(columnHeader, "Incorrect datetime format detected. Expected YYYY-MM-DD or YYYY-MM-DDThh:mm:ss+hh:mm", validationErrors, row);
}

}
private void validateObservationValue(Trait variable, String value,
String columnHeader, ValidationErrors validationErrors, int row) {
if(StringUtils.isBlank(value)) {
Expand Down Expand Up @@ -900,6 +962,16 @@ private boolean validDateValue(String value) {
return true;
}

private boolean validDateTimeValue(String value) {
DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME;
try {
formatter.parse(value);
} catch (DateTimeParseException e) {
return false;
}
return true;
}

private boolean validCategory(List<BrAPIScaleValidValuesCategories> categories, String value) {
Set<String> categoryValues = categories.stream().map(category -> category.getValue().toLowerCase()).collect(Collectors.toSet());
return categoryValues.contains(value.toLowerCase());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public ImportPreviewResponse process(List<BrAPIImport> importRows, List<Processo
response.setStatistics(statistics);
List<PendingImport> mappedBrAPIImportList = new ArrayList<>(mappedBrAPIImport.values());
response.setRows(mappedBrAPIImportList);
response.setDynamicColumnNames(upload.getDynamicColumnNamesList());

statusService.updateMappedData(upload, response, "Finished mapping data to brapi objects");

Expand Down
21 changes: 18 additions & 3 deletions src/main/java/org/breedinginsight/utilities/FileUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import tech.tablesaw.api.ColumnType;
import tech.tablesaw.api.StringColumn;
import tech.tablesaw.api.Table;
import tech.tablesaw.io.csv.CsvReadOptions;
import tech.tablesaw.io.json.JsonReadOptions;

import java.io.*;
Expand Down Expand Up @@ -69,8 +70,14 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR
if (cell == null) {
columns.get(header).add(null);
} else if (cell.getCellType() == CellType.NUMERIC) {
double cellValue = cell.getNumericCellValue();
String stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString();
//Distinguish between date and numeric
DataFormatter dataFormatter = new DataFormatter();
String stringValue = dataFormatter.formatCellValue(cell);
if (!stringValue.contains("-")) {
//No dashes, assume cell is numeric and not date
double cellValue = cell.getNumericCellValue();
stringValue = BigDecimal.valueOf(cellValue).stripTrailingZeros().toPlainString();
}
columns.get(header).add(stringValue);
} else {
columns.get(header).add(formatter.formatCellValue(cell));
Expand Down Expand Up @@ -101,7 +108,15 @@ public static Table parseTableFromExcel(InputStream inputStream, Integer headerR
public static Table parseTableFromCsv(InputStream inputStream) throws ParsingException {
//TODO: See if this has the windows BOM issue
try {
Table df = Table.read().csv(inputStream);
//Jackson used downstream messily converts LOCAL_DATE/LOCAL_DATETIME, so need to interpret date input as strings
//Note that if another type is needed later this is what needs to be updated
ArrayList<ColumnType> acceptedTypes = new ArrayList<>(Arrays.asList(ColumnType.STRING, ColumnType.INTEGER, ColumnType.DOUBLE, ColumnType.FLOAT));
Table df = Table.read().usingOptions(
CsvReadOptions
.builder(inputStream)
.columnTypesToDetect(acceptedTypes)
.separator(',')
);
return removeNullRows(df);
} catch (IOException e) {
log.error(e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

alter table importer_import add column dynamic_column_names text[];