From f7e7e4aed8e2e089ac7ce55bb583795230d6849e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 18:22:08 +0200 Subject: [PATCH 1/6] refactor(settings): replace lookups of dataverse.files.directory with MPCONFIG #7000 - Adding dataverse.files.directory equivalent to JvmSettings - Remove all System.getPropert("dataverse.files.directory") or similar - Add default with /tmp/dataverse via microprofile-config.properties as formerly seen at FileUtil and Dataset only - Refactor SwordConfigurationImpl to reuse the NoSuchElementException thrown by MPCONFIG - Refactor GoogleCloudSubmitToArchiveCommand to use the JvmSettings.lookup and create file stream in try-with-resources --- .../edu/harvard/iq/dataverse/Dataset.java | 9 ++-- .../iq/dataverse/EditDatafilesPage.java | 7 ++- .../datadeposit/SwordConfigurationImpl.java | 52 +++++++++---------- .../filesystem/FileRecordJobListener.java | 7 ++- .../importer/filesystem/FileRecordReader.java | 9 ++-- .../GoogleCloudSubmitToArchiveCommand.java | 31 +++++------ .../impl/ImportFromFileSystemCommand.java | 48 +++++++++-------- .../iq/dataverse/settings/JvmSettings.java | 4 ++ .../harvard/iq/dataverse/util/FileUtil.java | 8 ++- .../iq/dataverse/util/SystemConfig.java | 5 -- .../META-INF/microprofile-config.properties | 3 ++ 11 files changed, 94 insertions(+), 89 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..e2f00d0b54b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,6 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -528,11 +530,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index f53e2377a69..a895c90dabe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -2425,10 +2426,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index ce5f9415fcc..1e506c6a0b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api.datadeposit; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.util.Arrays; @@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() { @Override public String getTempDirectory() { - String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY); - if (tmpFileDir != null) { - String swordDirString = tmpFileDir + File.separator + "sword"; - File swordDirFile = new File(swordDirString); - /** - * @todo Do we really need this check? It seems like we do because - * if you create a dataset via the native API and then later try to - * upload a file via SWORD, the directory defined by - * dataverse.files.directory may not exist and we get errors deep in - * the SWORD library code. Could maybe use a try catch in the doPost - * method of our SWORDv2MediaResourceServlet. - */ - if (swordDirFile.exists()) { + // will throw a runtime exception when not found + String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup(); + + String swordDirString = tmpFileDir + File.separator + "sword"; + File swordDirFile = new File(swordDirString); + /** + * @todo Do we really need this check? It seems like we do because + * if you create a dataset via the native API and then later try to + * upload a file via SWORD, the directory defined by + * dataverse.files.directory may not exist and we get errors deep in + * the SWORD library code. Could maybe use a try catch in the doPost + * method of our SWORDv2MediaResourceServlet. + */ + if (swordDirFile.exists()) { + return swordDirString; + } else { + boolean mkdirSuccess = swordDirFile.mkdirs(); + if (mkdirSuccess) { + logger.info("Created directory " + swordDirString); return swordDirString; } else { - boolean mkdirSuccess = swordDirFile.mkdirs(); - if (mkdirSuccess) { - logger.info("Created directory " + swordDirString); - return swordDirString; - } else { - String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); - logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); - } + String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); + logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); + // sadly, must throw RunTimeException to communicate with SWORD user + throw new RuntimeException(msgForSwordUsers); } - } else { - String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details."); - logger.severe(msgForSwordUsers); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 6b82a665c17..ecb998c66af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -57,6 +57,7 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import java.io.FileReader; @@ -433,8 +434,10 @@ private void loadChecksumManifest() { manifest = checksumManifest; getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)"); } - // construct full path - String manifestAbsolutePath = System.getProperty("dataverse.files.directory") + + // Construct full path - retrieve base dir via MPCONFIG. + // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties) + String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup() + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index b3d3a7107a6..e3b67e9b0d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.filefilter.NotFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -96,9 +97,11 @@ public void init() { @Override public void open(Serializable checkpoint) throws Exception { - - directory = new File(System.getProperty("dataverse.files.directory") - + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String baseDir = JvmSettings.FILES_DIRECTORY.lookup(); + + directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 5d017173685..da2701a41e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,16 +1,27 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import org.apache.commons.codec.binary.Hex; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PipedInputStream; @@ -21,17 +32,6 @@ import java.util.Map; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonObjectBuilder; - -import org.apache.commons.codec.binary.Hex; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageException; -import com.google.cloud.storage.StorageOptions; - @RequiredPermissions(Permission.PublishDataset) public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); + String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json"; + + try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) { storage = StorageOptions.newBuilder() - .setCredentials(ServiceAccountCredentials.fromStream(fis)) + .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream)) .setProjectId(projectName) .build() .getService(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 64beba82450..5f31ea756eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -12,17 +12,20 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.File; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; + import javax.batch.operations.JobOperator; import javax.batch.operations.JobSecurityException; import javax.batch.operations.JobStartException; import javax.batch.runtime.BatchRuntime; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import java.io.File; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @RequiredPermissions(Permission.EditDataset) public class ImportFromFileSystemCommand extends AbstractCommand { @@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { logger.info(error); throw new IllegalCommandException(error, this); } - File directory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); - // TODO: - // The above goes directly to the filesystem directory configured by the - // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementation (#6488). - // We probably want package files to be able to use specific stores instead. - // More importantly perhaps, the approach above does not take into account - // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer - // needs to be used to name the storage directory, instead of the main/current - // persistent identifier above. + + File directory = new File( + String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier())); + + // TODO: The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementation (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(error, this); } - File uploadDirectory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() - + File.separator + uploadFolder); - // TODO: - // see the comment above. + File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier(), uploadFolder)); + + // TODO: see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..12e5e311278 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,10 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // FILES SETTINGS + SCOPE_FILES(PREFIX, "files"), + FILES_DIRECTORY(SCOPE_FILES, "directory"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 893c62b3cb0..a2c55d41613 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; @@ -1389,11 +1390,8 @@ public static boolean canIngestAsTabular(String mimeType) { } public static String getFilesTempDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String filesTempDirectory = filesRootDirectory + "/temp"; if (!Files.exists(Paths.get(filesTempDirectory))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..e9313e70218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -78,11 +78,6 @@ public class SystemConfig { */ public static final String SITE_URL = "dataverse.siteUrl"; - /** - * A JVM option for where files are stored on the file system. - */ - public static final String FILES_DIRECTORY = "dataverse.files.directory"; - /** * Some installations may not want download URLs to their files to be * available in Schema.org JSON-LD output. diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..ab219071767 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,9 @@ dataverse.version=${project.version} dataverse.build= +# FILES +dataverse.files.directory=/tmp/dataverse + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 From 5c2c7022ad9f11234b0e33ddaf3a0aa2696ab154 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 22:27:30 +0200 Subject: [PATCH 2/6] docs(settings): provide more detail for dataverse.files.directory --- doc/sphinx-guides/source/api/native-api.rst | 2 ++ doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 339a291bf4d..6dd1bbab728 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..89329ea3821 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -274,6 +274,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -1404,7 +1406,19 @@ dataverse.siteUrl dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``//`` data location for file system imports, see + :ref:`api-import-dataset`. +4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + +This directory might also be used for permanent storage of data, but this setting is independent from +:ref:`storage-files-dir` configuration. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ From d7ab9f6e5359356db3b01ab9e6f87347cf117fe7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 15:11:01 +0200 Subject: [PATCH 3/6] style: replace system prop 'file.separator' with File.separator --- .../batch/jobs/importer/filesystem/FileRecordJobListener.java | 3 ++- .../batch/jobs/importer/filesystem/FileRecordReader.java | 2 +- .../java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index ecb998c66af..7837474fc27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -60,6 +60,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Timestamp; @@ -80,7 +81,7 @@ @Dependent public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index e3b67e9b0d2..a4f8ffd2378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -55,7 +55,7 @@ @Dependent public class FileRecordReader extends AbstractItemReader { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; @Inject JobContext jobContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java index 4a778dc7abb..a2f76ca953d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java @@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) { try { Logger jobLogger = Logger.getLogger("job-"+jobId); FileHandler fh; - String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") - + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator"); + String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator + + "logs" + File.separator + "batch-jobs" + File.separator; checkCreateLogDirectory( logDir ); fh = new FileHandler(logDir + "job-" + jobId + ".log"); logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log"); From 3b6d17c8e5cebe16e4623364f439f05374138fef Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 9 Jan 2023 15:25:36 +0100 Subject: [PATCH 4/6] docs(config): adapt wording and order for dataverse.files.directory as per review --- .../source/installation/config.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d46ec3ca3d5..946d580bbde 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -263,7 +263,9 @@ To support multiple stores, a Dataverse installation now requires an id, type, a Out of the box, a Dataverse installation is configured to use local file storage in the 'file' store by default. You can add additional stores and, as a superuser, configure specific Dataverse collections to use them (by editing the 'General Information' for the Dataverse collection as described in the :doc:`/admin/dataverses-datasets` section). -Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored +(in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +(See also the option reference: :ref:`dataverse.files.directory`) If you wish to change which store is used by default, you'll need to delete the existing default storage driver and set a new one using jvm options. @@ -1495,6 +1497,8 @@ protocol, host, and port number and should not include a trailing slash. https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. +.. _dataverse.files.directory: + .. _dataverse.files.directory: dataverse.files.directory @@ -1503,16 +1507,15 @@ dataverse.files.directory Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number of purposes: -1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before +1. ``//`` is the subdirectory layout when using the target + directory as a :ref:`permanent file storage `. The DCM feature for :doc:`../developers/big-data-support` + is able to trigger imports for externally uploaded files from this area under certain conditions. +2. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. 2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer to final storage location and/or ingest. -3. ``//`` data location for file system imports, see - :ref:`api-import-dataset`. 4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. - -This directory might also be used for permanent storage of data, but this setting is independent from -:ref:`storage-files-dir` configuration. + This location is deprecated and might be refactored into a distinct setting in the future. .. _dataverse.files.uploads: @@ -3116,7 +3119,7 @@ For example: ``curl -X PUT -d "This content needs to go through an additional review by the Curation Team before it can be published." http://localhost:8080/api/admin/settings/:DatasetMetadataValidationFailureMsg`` - + :ExternalValidationAdminOverride ++++++++++++++++++++++++++++++++ From 8750cfe0cda0a6db9a356719f42dc24e87581b41 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 9 Jan 2023 15:36:53 +0100 Subject: [PATCH 5/6] doc(config): remove some merge artifact and fix enumeration --- doc/sphinx-guides/source/installation/config.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c0de6e43a98..03dcbb51624 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1521,9 +1521,6 @@ protocol, host, and port number and should not include a trailing slash. - We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. - -.. _dataverse.files.directory: - .. _dataverse.files.directory: dataverse.files.directory @@ -1537,7 +1534,7 @@ of purposes: is able to trigger imports for externally uploaded files from this area under certain conditions. 2. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. -2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer +3. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer to final storage location and/or ingest. 4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. This location is deprecated and might be refactored into a distinct setting in the future. From b7aecf56e67f13515150a7d5d3b344450d5ce7c3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 10 Jan 2023 10:38:54 +0100 Subject: [PATCH 6/6] docs(config): change wording for dataverse.files.directory option as per review --- doc/sphinx-guides/source/installation/config.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 03dcbb51624..5f6a3a9aee8 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1529,15 +1529,18 @@ dataverse.files.directory Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number of purposes: -1. ``//`` is the subdirectory layout when using the target - directory as a :ref:`permanent file storage `. The DCM feature for :doc:`../developers/big-data-support` - is able to trigger imports for externally uploaded files from this area under certain conditions. -2. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. -3. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer to final storage location and/or ingest. -4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. +3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. This location is deprecated and might be refactored into a distinct setting in the future. +4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally + uploaded files in a directory tree at ``//`` + under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, + but this is controlled by other, store-specific settings. + +Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DIRECTORY``. .. _dataverse.files.uploads: