diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index eb1b2b658d5..ccc9468f89f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -325,6 +325,10 @@ from a local InputStream (or a readChannel) into the } else { logger.fine("Failed to extract indexable metadata from file " + fileName); } + } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { + // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"! + // "text/tsv" should be used instead: + dataFile.setContentType(FileUtil.MIME_TYPE_TSV); } // temp dbug line //System.out.println("ADDING FILE: " + fileName + "; for dataset: " + dataset.getGlobalId()); @@ -423,12 +427,12 @@ public String startIngestJobs(List dataFiles, AuthenticatedUser user) List scheduledFiles = new ArrayList<>(); for (DataFile dataFile : dataFiles) { + // refresh the copy of the DataFile: + dataFile = fileService.find(dataFile.getId()); + if (dataFile.isIngestScheduled()) { - // refresh the copy of the DataFile: - dataFile = fileService.find(dataFile.getId()); - - long ingestSizeLimit = -1; + long ingestSizeLimit = 0; try { ingestSizeLimit = systemConfig.getTabularIngestSizeLimit(getTabDataReaderByMimeType(dataFile.getContentType()).getFormatName()); } catch (IOException ioex) { @@ -731,6 +735,15 @@ public boolean ingestAsTabular(Long datafile_id) { boolean ingestSuccessful = false; boolean forceTypeCheck = false; + // Never attempt to ingest a file that's already ingested! + if (dataFile.isTabularData()) { + FileUtil.createIngestFailureReport(dataFile, "Repeated ingest attempted on a tabular data file! (status flag was: "+dataFile.getIngestStatus()); + dataFile.setIngestDone(); + dataFile = fileService.save(dataFile); + logger.warning("Repeated ingest attempted on a tabular data file (datafile id "+datafile_id+"); exiting."); + return false; + } + IngestRequest ingestRequest = dataFile.getIngestRequest(); if (ingestRequest != null) { forceTypeCheck = ingestRequest.isForceTypeCheck(); @@ -1068,7 +1081,7 @@ public static TabularDataFileReader getTabDataReaderByMimeType(String mimeType) ingestPlugin = new RDATAFileReader(new RDATAFileReaderSpi()); } else if (mimeType.equals(FileUtil.MIME_TYPE_CSV) || mimeType.equals(FileUtil.MIME_TYPE_CSV_ALT)) { ingestPlugin = new CSVFileReader(new CSVFileReaderSpi(), ','); - } else if (mimeType.equals(FileUtil.MIME_TYPE_TSV) || mimeType.equals(FileUtil.MIME_TYPE_TSV_ALT)) { + } else if (mimeType.equals(FileUtil.MIME_TYPE_TSV) /*|| mimeType.equals(FileUtil.MIME_TYPE_TSV_ALT)*/) { ingestPlugin = new CSVFileReader(new CSVFileReaderSpi(), '\t'); } else if (mimeType.equals(FileUtil.MIME_TYPE_XLSX)) { ingestPlugin = new XLSXFileReader(new XLSXFileReaderSpi()); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 45ce6949127..832043530fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1246,7 +1246,7 @@ public static boolean canIngestAsTabular(String mimeType) { case MIME_TYPE_CSV: case MIME_TYPE_CSV_ALT: case MIME_TYPE_TSV: - case MIME_TYPE_TSV_ALT: + //case MIME_TYPE_TSV_ALT: case MIME_TYPE_XLSX: case MIME_TYPE_SPSS_SAV: case MIME_TYPE_SPSS_POR: