diff --git a/conf/docker-aio/c7.dockerfile b/conf/docker-aio/c7.dockerfile
index 649fe74f673..ed73ab02d2a 100644
--- a/conf/docker-aio/c7.dockerfile
+++ b/conf/docker-aio/c7.dockerfile
@@ -3,7 +3,7 @@ FROM centos:7
RUN yum install -y https://download.postgresql.org/pub/repos/yum/9.6/redhat/rhel-7-x86_64/pgdg-centos96-9.6-3.noarch.rpm
#RUN yum install -y java-1.8.0-openjdk-headless postgresql-server sudo epel-release unzip perl curl httpd
RUN yum install -y java-1.8.0-openjdk-devel postgresql96-server sudo epel-release unzip perl curl httpd
-RUN yum install -y jq lsof
+RUN yum install -y jq lsof awscli
# copy and unpack dependencies (solr, glassfish)
COPY dv /tmp/dv
diff --git a/conf/docker-dcm/0prep.sh b/conf/docker-dcm/0prep.sh
index a77f8775495..98cff3c805a 100755
--- a/conf/docker-dcm/0prep.sh
+++ b/conf/docker-dcm/0prep.sh
@@ -1,4 +1,3 @@
#!/bin/sh
-
-wget https://github.com/sbgrid/data-capture-module/releases/download/0.3/dcm-0.3-0.noarch.rpm
+wget https://github.com/sbgrid/data-capture-module/releases/download/0.5/dcm-0.5-0.noarch.rpm
diff --git a/conf/docker-dcm/dcmsrv.dockerfile b/conf/docker-dcm/dcmsrv.dockerfile
index 02d88b6c3a2..4ec6fb86c06 100644
--- a/conf/docker-dcm/dcmsrv.dockerfile
+++ b/conf/docker-dcm/dcmsrv.dockerfile
@@ -1,13 +1,14 @@
# build from repo root
FROM centos:6
RUN yum install -y epel-release
-ARG RPMFILE=dcm-0.3-0.noarch.rpm
+ARG RPMFILE=dcm-0.5-0.noarch.rpm
COPY ${RPMFILE} /tmp/
COPY bashrc /root/.bashrc
COPY test_install.sh /root/
RUN yum localinstall -y /tmp/${RPMFILE}
RUN pip install -r /opt/dcm/requirements.txt
RUN pip install awscli==1.15.75
+run export PATH=~/.local/bin:$PATH
RUN /root/test_install.sh
COPY rq-init-d /etc/init.d/rq
RUN useradd glassfish
diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst
index 567fb99ec7a..e0d0b4ffd25 100644
--- a/doc/sphinx-guides/source/developers/big-data-support.rst
+++ b/doc/sphinx-guides/source/developers/big-data-support.rst
@@ -18,9 +18,7 @@ Install a DCM
Installation instructions can be found at https://github.com/sbgrid/data-capture-module . Note that a shared filesystem (posix or AWS S3) between Dataverse and your DCM is required. You cannot use a DCM with Swift at this point in time.
-Please note that S3 support for DCM is highly experimental. Files can be uploaded to S3 but they cannot be downloaded until https://github.com/IQSS/dataverse/issues/4949 is worked on. If you want to play around with S3 support for DCM, you must configure a JVM option called ``dataverse.files.dcm-s3-bucket-name`` which is a holding area for uploaded files that have not yet passed checksum validation. Search for that JVM option at https://github.com/IQSS/dataverse/issues/4703 for commands on setting that JVM option and related setup. Note that because that GitHub issue has so many comments you will need to click "Load more" where it says "hidden items". FIXME: Document all of this properly.
-
-. FIXME: Explain what ``dataverse.files.dcm-s3-bucket-name`` is for and what it has to do with ``dataverse.files.s3-bucket-name``.
+.. FIXME: Explain what ``dataverse.files.dcm-s3-bucket-name`` is for and what it has to do with ``dataverse.files.s3-bucket-name``.
Once you have installed a DCM, you will need to configure two database settings on the Dataverse side. These settings are documented in the :doc:`/installation/config` section of the Installation Guide:
@@ -61,7 +59,6 @@ Steps to set up a DCM mock for Development
Install Flask.
-
Download and run the mock. You will be cloning the https://github.com/sbgrid/data-capture-module repo.
- ``git clone git://github.com/sbgrid/data-capture-module.git``
@@ -108,6 +105,123 @@ The following low level command should only be used when troubleshooting the "im
``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$DV_BASE_URL/api/batch/jobs/import/datasets/files/$DATASET_DB_ID?uploadFolder=$UPLOAD_FOLDER&totalSize=$TOTAL_SIZE"``
+Steps to set up a DCM via Docker for Development
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you need a fully operating DCM client for development purposes, these steps will guide you to setting one up. This includes steps to set up the DCM on S3 variant.
+
+Docker Image Set-up
+^^^^^^^^^^^^^^^^^^^
+
+- Install docker if you do not have it
+- Follow these steps (extracted from ``docker-aio/readme.md`` & ``docker-dcm/readme.txt``) :
+
+ - ``cd conf/docker-aio`` and run ``./0prep_deps.sh`` to create Glassfish and Solr tarballs in conf/docker-aio/dv/deps.
+ - Run ``./1prep.sh``
+ - Build the docker image: ``docker build -t dv0 -f c7.dockerfile .``
+ - ``cd ../docker-dcm`` and run ``./0prep.sh``
+ - Build dcm/dv0dcm images with docker-compose: ``docker-compose -f docker-compose.yml build``
+ - Start containers: ``docker-compose -f docker-compose.yml up -d``
+ - Wait for container to show "healthy" (aka - ``docker ps``), then wait another 5 minutes (even though it shows healthy, glassfish is still standing itself up). Then run Dataverse app installation: ``docker exec -it dvsrv /opt/dv/install.bash``
+ - Configure Dataverse application to use DCM (run from outside the container): ``docker exec -it dvsrv /opt/dv/configure_dcm.sh``
+ - The Dataverse installation is accessible at ``http://localhost:8084``.
+ - You may need to change the DoiProvider inside dvsrv (ezid does not work):
+
+ - ``curl -X DELETE -d EZID "localhost:8080/api/admin/settings/:DoiProvider"``
+ - ``curl -X PUT -d DataCite "localhost:8080/api/admin/settings/:DoiProvider"``
+ - Also change the doi.baseUrlString, doi.username, doi.password
+
+Optional steps for setting up the S3 Docker DCM Variant
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Before: the default bucket for DCM to hold files in S3 is named test-dcm. It is coded into `post_upload_s3.bash` (line 30). Change to a different bucket if needed.
+- Add AWS bucket info to dcmsrv
+
+ - You need a credentials files in ~/.aws
+
+ - ``mkdir ~/.aws``
+ - ``yum install nano`` (or use a different editor below)
+ - ``nano ~/.aws/credentials`` and add these contents with your keys:
+
+ - ``[default]``
+ - ``aws_access_key_id =``
+ - ``aws_secret_access_key =``
+
+- Dataverse configuration (on dvsrv):
+
+ - Set S3 as the storage driver
+
+ - ``cd /opt/glassfish4/bin/``
+ - ``./asadmin delete-jvm-options "\-Ddataverse.files.storage-driver-id=file"``
+ - ``./asadmin create-jvm-options "\-Ddataverse.files.storage-driver-id=s3"``
+
+ - Add AWS bucket info to Dataverse
+
+ - ``mkdir ~/.aws``
+ - ``yum install nano`` (or use a different editor below)
+ - ``nano ~/.aws/credentials`` and add these contents with your keys:
+
+ - ``[default]``
+ - ``aws_access_key_id =``
+ - ``aws_secret_access_key =``
+
+ - Also: ``nano ~/.aws/config`` to create a region file. Add these contents:
+
+ - ``[default]``
+ - ``region = us-east-1``
+
+ - Add the S3 bucket names to Dataverse
+
+ - S3 bucket for Dataverse
+
+ - ``/usr/local/glassfish4/glassfish/bin/asadmin create-jvm-options "-Ddataverse.files.s3-bucket-name=iqsstestdcmbucket"``
+
+ - S3 bucket for DCM (as Dataverse needs to do the copy over)
+
+ - ``/usr/local/glassfish4/glassfish/bin/asadmin create-jvm-options "-Ddataverse.files.dcm-s3-bucket-name=test-dcm"``
+
+ - Set download method to be HTTP, as DCM downloads through S3 are over this protocol ``curl -X PUT "http://localhost:8080/api/admin/settings/:DownloadMethods" -d "native/http"``
+
+Using the DCM Docker Containers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For using these commands, you will need to connect to the shell prompt inside various containers (e.g. ``docker exec -it dvsrv /bin/bash``)
+
+- Create a dataset and download rsync upload script
+- Upload script to dcm_client (if needed, you can probably do all the actions for create/download inside dcm_client)
+
+ - ``docker cp ~/Downloads/upload-FK2_NN49YM.bash dcm_client:/tmp``
+
+- Create a folder of files to upload (files can be empty)
+- Run script
+
+ - e.g. ``bash ./upload-FK2_NN49YM.bash``
+
+- Manually run post upload script on dcmsrv
+
+ - for posix implementation: ``bash ./opt/dcm/scn/post_upload.bash``
+ - for S3 implementation: ``bash ./opt/dcm/scn/post_upload_s3.bash``
+
+Additional DCM docker development tips
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- You can completely blow away all the docker images with these commands (including non DCM ones!)
+
+ - ``docker stop dvsrv``
+ - ``docker stop dcm_client``
+ - ``docker stop dcmsrv``
+ - ``docker rm $(docker ps -a -q)``
+ - ``docker rmi $(docker images -q)``
+
+- There are a few logs to tail
+
+ - dvsrv : ``tail -n 2000 -f /opt/glassfish4/glassfish/domains/domain1/logs/server.log``
+ - dcmsrv : ``tail -n 2000 -f /var/log/lighttpd/breakage.log``
+ - dcmsrv : ``tail -n 2000 -f /var/log/lighttpd/access.log``
+
+- Note that by default the docker container will stop running if the process it is following is turned off. For example flask with dcmsrv. You can get around this by having the script being followed never close (e.g. sleep infinity) https://stackoverflow.com/questions/31870222/how-can-i-keep-container-running-on-kubernetes
+- You may have to restart the glassfish domain occasionally to deal with memory filling up. If deployment is getting reallllllly slow, its a good time.
+
Repository Storage Abstraction Layer (RSAL)
-------------------------------------------
@@ -221,7 +335,7 @@ Available Steps
Dataverse has an internal step provider, whose id is ``:internal``. It offers the following steps:
log
-+++
+^^^
A step that writes data about the current workflow invocation to the instance log. It also writes the messages in its ``parameters`` map.
@@ -238,7 +352,7 @@ A step that writes data about the current workflow invocation to the instance lo
pause
-+++++
+^^^^^
A step that pauses the workflow. The workflow is paused until a POST request is sent to ``/api/workflows/{invocation-id}``.
@@ -251,7 +365,7 @@ A step that pauses the workflow. The workflow is paused until a POST request is
http/sr
-+++++++
+^^^^^^^
A step that sends a HTTP request to an external system, and then waits for a response. The response has to match a regular expression specified in the step parameters. The url, content type, and message body can use data from the workflow context, using a simple markup language. This step has specific parameters for rollback.
diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst
index 4d57afa11ec..6c59ebc2987 100755
--- a/doc/sphinx-guides/source/user/find-use-data.rst
+++ b/doc/sphinx-guides/source/user/find-use-data.rst
@@ -17,7 +17,7 @@ Basic Search
You can search the entire contents of the Dataverse installation, including dataverses, datasets, and files. You can access the search through the search bar on the homepage, or by clicking the magnifying glass icon in the header of every page. The search bar accepts search terms, queries, or exact phrases (in quotations).
Sorting and Viewing Search Results
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Facets: to the left of the search results, there are several facets a user can click on to narrow the number of results displayed.
- Choosing a facet: to choose a facet to narrow your results by, click on that facet.
@@ -81,7 +81,7 @@ You may also download a file from its file page by clicking the Download button
Tabular data files offer additional options: You can explore using the TwoRavens data visualization tool (or other :doc:`/installation/external-tools` if they have been enabled) by clicking the Explore button, or choose from a number of tabular-data-specific download options available as a dropdown under the Download button.
Tabular Data
-~~~~~~~~~~~~
+^^^^^^^^^^^^
Ingested files can be downloaded in several different ways.
@@ -99,7 +99,7 @@ Ingested files can be downloaded in several different ways.
.. _rsync_download:
Downloading a Dataverse Package via rsync
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
rsync is typically used for synchronizing files and directories between two different systems. Some Dataverse installations allow downloads using rsync, to facilitate large file transfers in a reliable and secure manner.
@@ -110,6 +110,13 @@ In a dataset containing a Dataverse Package, at the bottom of the dataset page,
After you've downloaded the Dataverse Package, you may want to double-check that your download went perfectly. Under **Verify Data**, you'll find a command that you can run in your terminal that will initiate a checksum to ensure that the data you downloaded matches the data in Dataverse precisely. This way, you can ensure the integrity of the data you're working with.
+.. _package_download_url:
+
+Downloading a Dataverse Package via URL
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Dataverse Packages are typically used to represent extremely large files or bundles containing a large number of files. Dataverse Packages are often too large to be reliably downloaded using a web browser. When you click to download a Dataverse Package, instead of automatically initiating the download in your web browser, Dataverse displays a plaintext URL for the location of the file. To ensure a reliable, resumable download, we recommend using `GNU Wget `_ in a command line terminal or using a download manager software of your choice. If you try to simply paste the URL into your web browser then the download may overwhelm your browser, resulting in an interrupted, timed out, or otherwise failed download.
+
Explore Data
------------
diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties
index e14063961ee..0e750c1910c 100755
--- a/src/main/java/Bundle.properties
+++ b/src/main/java/Bundle.properties
@@ -1622,6 +1622,7 @@ file.downloadDialog.header=Dataset Terms
file.downloadDialog.tip=Please confirm and/or complete the information needed below in order to continue.
file.requestAccessTermsDialog.tip=Please confirm and/or complete the information needed below in order to request access to files in this dataset.
file.requestAccess.notAllowed=Requests for access are not accepted on the Dataset.
+
file.search.placeholder=Search this dataset...
file.results.btn.sort=Sort
file.results.btn.sort.option.nameAZ=Name (A-Z)
@@ -2145,6 +2146,10 @@ permission.ViewUnpublishedDataset.desc=View an unpublished dataset and its files
permission.ViewUnpublishedDataverse.desc=View an unpublished dataverse
permission.AddDataset.desc=Add a dataset to a dataverse
+packageDownload.title=Package File Download
+packageDownload.instructions=Use the Download URL in a Wget command or a download manager to download this package file. Download via web browser is not recommended. User Guide - Downloading a Dataverse Package via URL
+packageDownload.urlHeader=Download URL
+
#mydata_fragment.xhtml
Published=Published
Unpublished=Unpublished
@@ -2243,4 +2248,4 @@ rtabfileparser.ioexception.mismatch=Reading mismatch, line {0} of the Data file:
rtabfileparser.ioexception.boolean=Unexpected value for the Boolean variable ({0}):
rtabfileparser.ioexception.read=Couldn't read Boolean variable ({0})!
rtabfileparser.ioexception.parser1=R Tab File Parser: Could not obtain varQnty from the dataset metadata.
-rtabfileparser.ioexception.parser2=R Tab File Parser: varQnty=0 in the dataset metadata!
\ No newline at end of file
+rtabfileparser.ioexception.parser2=R Tab File Parser: varQnty=0 in the dataset metadata!
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index e8c6b95b5f0..bb4b6efc506 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2,6 +2,7 @@
import com.amazonaws.services.lightsail.model.Bundle;
import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean;
+import edu.harvard.iq.dataverse.PackagePopupFragmentBean;
import edu.harvard.iq.dataverse.api.AbstractApiBean;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.Permission;
@@ -198,7 +199,6 @@ public enum DisplayMode {
@Inject
ProvPopupFragmentBean provPopupFragmentBean;
-
private Dataset dataset = new Dataset();
private EditMode editMode;
private boolean bulkFileDeleteInProgress = false;
@@ -1469,7 +1469,8 @@ private String init(boolean initFull) {
this.guestbookResponse = guestbookResponseService.initGuestbookResponseForFragment(workingVersion, null, session);
this.getFileDownloadHelper().setGuestbookResponse(guestbookResponse);
logger.fine("Checking if rsync support is enabled.");
- if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))) {
+ if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))
+ && dataset.getFiles().isEmpty()) { //only check for rsync if no files exist
try {
ScriptRequestResponse scriptRequestResponse = commandEngine.submit(new RequestRsyncScriptCommand(dvRequestService.getDataverseRequest(), dataset));
logger.fine("script: " + scriptRequestResponse.getScript());
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index 2f3cfc72f66..e4cd115bccb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -1728,7 +1728,8 @@ public void setHasRsyncScript(Boolean hasRsyncScript) {
private void setUpRsync() {
logger.fine("setUpRsync called...");
- if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))) {
+ if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))
+ && dataset.getFiles().isEmpty()) { //only check for rsync if no files exist
try {
ScriptRequestResponse scriptRequestResponse = commandEngine.submit(new RequestRsyncScriptCommand(dvRequestService.getDataverseRequest(), dataset));
logger.fine("script: " + scriptRequestResponse.getScript());
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
index d60a302279c..e93385ae7a7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
@@ -297,7 +297,22 @@ public void writeGuestbookAndLaunchExploreTool(GuestbookResponse guestbookRespon
}
fileDownloadService.explore(guestbookResponse, fmd, externalTool);
requestContext.execute("PF('downloadPopup').hide()");
- }
+ }
+
+ public void writeGuestbookAndLaunchPackagePopup(GuestbookResponse guestbookResponse) {
+ RequestContext requestContext = RequestContext.getCurrentInstance();
+ boolean valid = validateGuestbookResponse(guestbookResponse);
+
+ if (!valid) {
+ JH.addMessage(FacesMessage.SEVERITY_ERROR, JH.localize("dataset.message.validationError"));
+ } else {
+ requestContext.execute("PF('downloadPopup').hide()");
+ requestContext.execute("PF('downloadPackagePopup').show()");
+ requestContext.execute("handleResizeDialog('downloadPackagePopup')");
+
+ fileDownloadService.writeGuestbookResponseRecord(guestbookResponse);
+ }
+ }
public String startWorldMapDownloadLink(GuestbookResponse guestbookResponse, FileMetadata fmd){
@@ -336,10 +351,8 @@ public void clearRequestAccessFiles(){
public void addMultipleFilesForRequestAccess(DataFile dataFile) {
this.filesForRequestAccess.add(dataFile);
- }
+ }
-
-
private String selectedFileId = null;
public String getSelectedFileId() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index a56ea335788..7cd6ba32a52 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -145,14 +145,19 @@ public void writeGuestbookAndStartFileDownload(GuestbookResponse guestbookRespon
logger.fine("issued file download redirect for datafile "+guestbookResponse.getDataFile().getId());
}
+ public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse, FileMetadata fileMetadata, String format) {
+ if(!fileMetadata.getDatasetVersion().isDraft()){
+ guestbookResponse = guestbookResponseService.modifyDatafileAndFormat(guestbookResponse, fileMetadata, format);
+ writeGuestbookResponseRecord(guestbookResponse);
+ }
+ }
+
public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse) {
-
try {
CreateGuestbookResponseCommand cmd = new CreateGuestbookResponseCommand(dvRequestService.getDataverseRequest(), guestbookResponse, guestbookResponse.getDataset());
commandEngine.submit(cmd);
} catch (CommandException e) {
//if an error occurs here then download won't happen no need for response recs...
-
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/PackagePopupFragmentBean.java b/src/main/java/edu/harvard/iq/dataverse/PackagePopupFragmentBean.java
new file mode 100644
index 00000000000..fac2abeddb8
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/PackagePopupFragmentBean.java
@@ -0,0 +1,32 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package edu.harvard.iq.dataverse;
+
+import javax.faces.view.ViewScoped;
+import javax.inject.Named;
+
+/**
+ *
+ * @author matthew
+ */
+
+@ViewScoped
+@Named
+public class PackagePopupFragmentBean implements java.io.Serializable {
+
+ FileMetadata fm;
+
+ public void setFileMetadata(FileMetadata fileMetadata) {
+ fm = fileMetadata;
+ }
+
+ public FileMetadata getFileMetadata() {
+ return fm;
+ }
+
+}
+
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java
index f5310119150..953fbeeb820 100644
--- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java
@@ -21,10 +21,10 @@
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
-import edu.harvard.iq.dataverse.util.FileUtil;
-import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
+import edu.harvard.iq.dataverse.util.FileUtil;;
+import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
+import java.io.InputStreamReader;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
@@ -34,8 +34,6 @@
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.inject.Named;
-import javax.json.JsonObject;
-import javax.json.JsonObjectBuilder;
/**
* This class is for importing files added to s3 outside of dataverse.
@@ -59,6 +57,7 @@ public class S3PackageImporter extends AbstractApiBean implements java.io.Serial
@EJB
EjbDataverseEngine commandEngine;
+ //Copies from another s3 bucket to our own
public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException {
try {
s3 = AmazonS3ClientBuilder.standard().defaultClient();
@@ -67,11 +66,7 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException
"Cannot instantiate a S3 client using; check your AWS credentials and region",
e);
}
-
- JsonObjectBuilder bld = jsonObjectBuilder();
-
- String fileMode = FileRecordWriter.FILE_MODE_PACKAGE_FILE;
-
+
String dcmBucketName = System.getProperty("dataverse.files.dcm-s3-bucket-name");
String dcmDatasetKey = s3ImportPath;
String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name");
@@ -133,80 +128,118 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException
throw new IOException("Failed to delete object" + new Object[]{item});
}
}
-
}
- public DataFile createPackageDataFile(Dataset dataset, String folderName, long totalSize) {
- DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE);
- packageFile.setChecksumType(DataFile.ChecksumType.SHA1);
-
- FileUtil.generateStorageIdentifier(packageFile);
-
-
- String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name");
- String dvDatasetKey = getS3DatasetKey(dataset);
- S3Object s3object = null;
-
- s3object = s3.getObject(new GetObjectRequest(dvBucketName, dvDatasetKey+"/files.sha"));
-
- InputStream in = s3object.getObjectContent();
- String checksumVal = FileUtil.CalculateChecksum(in, packageFile.getChecksumType());
+ public DataFile createPackageDataFile(Dataset dataset, String folderName, long totalSize) throws IOException {
+ DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE);
+ packageFile.setChecksumType(DataFile.ChecksumType.SHA1);
- packageFile.setChecksumValue(checksumVal);
+ //This is a brittle calculation, changes of the dcm post_upload script will blow this up
+ String rootPackageName = "package_" + folderName.replace("/", "");
- packageFile.setFilesize(totalSize);
- packageFile.setModificationTime(new Timestamp(new Date().getTime()));
- packageFile.setCreateDate(new Timestamp(new Date().getTime()));
- packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
- packageFile.setOwner(dataset);
- dataset.getFiles().add(packageFile);
+ String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name");
+ String dvDatasetKey = getS3DatasetKey(dataset);
- packageFile.setIngestDone();
+ //getting the name of the .sha file via substring, ${packageName}.sha
+ logger.log(Level.INFO, "shaname {0}", new Object[]{rootPackageName + ".sha"});
- // set metadata and add to latest version
- FileMetadata fmd = new FileMetadata();
- fmd.setLabel(folderName.substring(folderName.lastIndexOf('/') + 1));
-
- fmd.setDataFile(packageFile);
- packageFile.getFileMetadatas().add(fmd);
- if (dataset.getLatestVersion().getFileMetadatas() == null) dataset.getLatestVersion().setFileMetadatas(new ArrayList<>());
+ if(!s3.doesObjectExist(dvBucketName, dvDatasetKey + "/" + rootPackageName + ".zip")) {
+ throw new IOException ("S3 Package data file could not be found after copy from dcm. Name: " + dvDatasetKey + "/" + rootPackageName + ".zip");
+ }
- dataset.getLatestVersion().getFileMetadatas().add(fmd);
- fmd.setDatasetVersion(dataset.getLatestVersion());
-
- GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext());
- if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) {
- String packageIdentifier = dataFileServiceBean.generateDataFileIdentifier(packageFile, idServiceBean);
- packageFile.setIdentifier(packageIdentifier);
- }
-
- String nonNullDefaultIfKeyNotFound = "";
- String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound);
- String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound);
+ S3Object s3FilesSha = s3.getObject(new GetObjectRequest(dvBucketName, dvDatasetKey + "/" + rootPackageName + ".sha"));
- if (packageFile.getProtocol() == null) {
- packageFile.setProtocol(protocol);
+ InputStreamReader str = new InputStreamReader(s3FilesSha.getObjectContent());
+ BufferedReader reader = new BufferedReader(str);
+ String checksumVal = "";
+ try {
+ String line;
+ while((line = reader.readLine()) != null && checksumVal.isEmpty()) {
+ logger.log(Level.FINE, "line {0}", new Object[]{line});
+ String[] splitLine = line.split(" ");
+
+ //the sha file should only contain one entry, but incase it doesn't we will check for the one for our zip
+ if(splitLine[1].contains(rootPackageName + ".zip")) {
+ checksumVal = splitLine[0];
+ logger.log(Level.FINE, "checksumVal found {0}", new Object[]{checksumVal});
+ }
}
- if (packageFile.getAuthority() == null) {
- packageFile.setAuthority(authority);
+ if(checksumVal.isEmpty()) {
+ logger.log(Level.SEVERE, "No checksum found for uploaded DCM S3 zip on dataset {0}", new Object[]{dataset.getIdentifier()});
+ }
+ } catch (IOException ex){
+ logger.log(Level.SEVERE, "Error parsing DCM s3 checksum file on dataset {0} . Error: {1} ", new Object[]{dataset.getIdentifier(), ex});
+ } finally {
+ try {
+ str.close();
+ reader.close();
+ } catch (IOException ex) {
+ logger.log(Level.WARNING, "errors closing s3 DCM object reader stream: {0}", new Object[]{ex});
}
- if (!packageFile.isIdentifierRegistered()) {
- String doiRetString = "";
- idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext());
- try {
- doiRetString = idServiceBean.createIdentifier(packageFile);
- } catch (Throwable e) {
-
- }
+ }
+
+ logger.log(Level.FINE, "Checksum value for the package in Dataset {0} is: {1}",
+ new Object[]{dataset.getIdentifier(), checksumVal});
+
+ packageFile.setChecksumValue(checksumVal);
+
+ packageFile.setFilesize(totalSize);
+ packageFile.setModificationTime(new Timestamp(new Date().getTime()));
+ packageFile.setCreateDate(new Timestamp(new Date().getTime()));
+ packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
+ packageFile.setOwner(dataset);
+ dataset.getFiles().add(packageFile);
+
+ packageFile.setIngestDone();
+
+ // set metadata and add to latest version
+ // Set early so we can generate the storage id with the info
+ FileMetadata fmd = new FileMetadata();
+ fmd.setLabel(rootPackageName + ".zip");
+
+ fmd.setDataFile(packageFile);
+ packageFile.getFileMetadatas().add(fmd);
+ if (dataset.getLatestVersion().getFileMetadatas() == null) dataset.getLatestVersion().setFileMetadatas(new ArrayList<>());
+
+ dataset.getLatestVersion().getFileMetadatas().add(fmd);
+ fmd.setDatasetVersion(dataset.getLatestVersion());
+
+ FileUtil.generateS3PackageStorageIdentifier(packageFile);
+
+ GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext());
+ if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) {
+ String packageIdentifier = dataFileServiceBean.generateDataFileIdentifier(packageFile, idServiceBean);
+ packageFile.setIdentifier(packageIdentifier);
+ }
+
+ String nonNullDefaultIfKeyNotFound = "";
+ String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound);
+ String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound);
+
+ if (packageFile.getProtocol() == null) {
+ packageFile.setProtocol(protocol);
+ }
+ if (packageFile.getAuthority() == null) {
+ packageFile.setAuthority(authority);
+ }
+
+ if (!packageFile.isIdentifierRegistered()) {
+ String doiRetString = "";
+ idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext());
+ try {
+ doiRetString = idServiceBean.createIdentifier(packageFile);
+ } catch (Throwable e) {
- // Check return value to make sure registration succeeded
- if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) {
- packageFile.setIdentifierRegistered(true);
- packageFile.setGlobalIdCreateTime(new Date());
- }
}
+ // Check return value to make sure registration succeeded
+ if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) {
+ packageFile.setIdentifierRegistered(true);
+ packageFile.setGlobalIdCreateTime(new Date());
+ }
+ }
+
return packageFile;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index fab2538832e..399e9a081df 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -33,6 +33,7 @@
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
+import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ws.rs.NotFoundException;
import javax.ws.rs.RedirectionException;
@@ -75,6 +76,7 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
storageIO.open();
} catch (IOException ioex) {
//throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
+ logger.log(Level.INFO, "Datafile {0}: Failed to locate and/or open physical file. Error message: {1}", new Object[]{dataFile.getId(), ioex.getLocalizedMessage()});
throw new NotFoundException("Datafile "+dataFile.getId()+": Failed to locate and/or open physical file.");
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java
index 92f8252f45c..1aa384d205e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java
@@ -77,7 +77,7 @@ public static String getMessageFromException(DataCaptureModuleException ex) {
}
public static String getScriptName(DatasetVersion datasetVersion) {
- return "upload-" + datasetVersion.getDataset().getIdentifier() + ".bash";
+ return "upload-" + datasetVersion.getDataset().getIdentifier().replace("/", "_") + ".bash";
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 48fbfb5e37d..38f17e78b9b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -28,6 +28,7 @@
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
+import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX;
import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
@@ -91,7 +92,7 @@ public class FileUtil implements java.io.Serializable {
private static final String[] TABULAR_DATA_FORMAT_SET = {"POR", "SAV", "DTA", "RDA"};
private static Map STATISTICAL_FILE_EXTENSION = new HashMap();
-
+
/*
* The following are Stata, SAS and SPSS syntax/control cards:
* These are recognized as text files (because they are!) so
@@ -1132,6 +1133,12 @@ public static String getFilesTempDirectory() {
return filesTempDirectory;
}
+ public static void generateS3PackageStorageIdentifier(DataFile dataFile) {
+ String bucketName = System.getProperty("dataverse.files.s3-bucket-name");
+ String storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel();
+ dataFile.setStorageIdentifier(storageId);
+ }
+
public static void generateStorageIdentifier(DataFile dataFile) {
dataFile.setStorageIdentifier(generateStorageIdentifier());
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index ccf5bbef19e..fd059640780 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -893,7 +893,7 @@ public enum FileDownloadMethods {
* go through Glassfish.
*/
RSYNC("rsal/rsync"),
- NATIVE("NATIVE");
+ NATIVE("native/http");
private final String text;
private FileDownloadMethods(final String text) {
@@ -1005,12 +1005,17 @@ public boolean isRsyncOnly(){
}
}
- public boolean isRsyncDownload()
- {
+ public boolean isRsyncDownload() {
String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods);
return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.RSYNC.toString());
}
+ public boolean isHTTPDownload() {
+ String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods);
+ logger.warning("Download Methods:" + downloadMethods);
+ return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.NATIVE.toString());
+ }
+
private Boolean getUploadMethodAvailable(String method){
String uploadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.UploadMethods);
if (uploadMethods==null){
diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml
index 5ef5faf85f6..24d19cbd909 100644
--- a/src/main/webapp/WEB-INF/web.xml
+++ b/src/main/webapp/WEB-INF/web.xml
@@ -22,6 +22,13 @@
org.jboss.weld.context.conversation.lazy
false
+
+
+ javax.faces.PROJECT_STAGE
+
+ Production
+
primefaces.THEME
bootstrap
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 9bd399e89e5..b5c9321ccff 100755
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -1165,7 +1165,6 @@
-
+
+