diff --git a/conf/docker-aio/c7.dockerfile b/conf/docker-aio/c7.dockerfile index 649fe74f673..ed73ab02d2a 100644 --- a/conf/docker-aio/c7.dockerfile +++ b/conf/docker-aio/c7.dockerfile @@ -3,7 +3,7 @@ FROM centos:7 RUN yum install -y https://download.postgresql.org/pub/repos/yum/9.6/redhat/rhel-7-x86_64/pgdg-centos96-9.6-3.noarch.rpm #RUN yum install -y java-1.8.0-openjdk-headless postgresql-server sudo epel-release unzip perl curl httpd RUN yum install -y java-1.8.0-openjdk-devel postgresql96-server sudo epel-release unzip perl curl httpd -RUN yum install -y jq lsof +RUN yum install -y jq lsof awscli # copy and unpack dependencies (solr, glassfish) COPY dv /tmp/dv diff --git a/conf/docker-dcm/0prep.sh b/conf/docker-dcm/0prep.sh index a77f8775495..98cff3c805a 100755 --- a/conf/docker-dcm/0prep.sh +++ b/conf/docker-dcm/0prep.sh @@ -1,4 +1,3 @@ #!/bin/sh - -wget https://github.com/sbgrid/data-capture-module/releases/download/0.3/dcm-0.3-0.noarch.rpm +wget https://github.com/sbgrid/data-capture-module/releases/download/0.5/dcm-0.5-0.noarch.rpm diff --git a/conf/docker-dcm/dcmsrv.dockerfile b/conf/docker-dcm/dcmsrv.dockerfile index 02d88b6c3a2..4ec6fb86c06 100644 --- a/conf/docker-dcm/dcmsrv.dockerfile +++ b/conf/docker-dcm/dcmsrv.dockerfile @@ -1,13 +1,14 @@ # build from repo root FROM centos:6 RUN yum install -y epel-release -ARG RPMFILE=dcm-0.3-0.noarch.rpm +ARG RPMFILE=dcm-0.5-0.noarch.rpm COPY ${RPMFILE} /tmp/ COPY bashrc /root/.bashrc COPY test_install.sh /root/ RUN yum localinstall -y /tmp/${RPMFILE} RUN pip install -r /opt/dcm/requirements.txt RUN pip install awscli==1.15.75 +run export PATH=~/.local/bin:$PATH RUN /root/test_install.sh COPY rq-init-d /etc/init.d/rq RUN useradd glassfish diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 567fb99ec7a..e0d0b4ffd25 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -18,9 +18,7 @@ Install a DCM Installation instructions can be found at https://github.com/sbgrid/data-capture-module . Note that a shared filesystem (posix or AWS S3) between Dataverse and your DCM is required. You cannot use a DCM with Swift at this point in time. -Please note that S3 support for DCM is highly experimental. Files can be uploaded to S3 but they cannot be downloaded until https://github.com/IQSS/dataverse/issues/4949 is worked on. If you want to play around with S3 support for DCM, you must configure a JVM option called ``dataverse.files.dcm-s3-bucket-name`` which is a holding area for uploaded files that have not yet passed checksum validation. Search for that JVM option at https://github.com/IQSS/dataverse/issues/4703 for commands on setting that JVM option and related setup. Note that because that GitHub issue has so many comments you will need to click "Load more" where it says "hidden items". FIXME: Document all of this properly. - -. FIXME: Explain what ``dataverse.files.dcm-s3-bucket-name`` is for and what it has to do with ``dataverse.files.s3-bucket-name``. +.. FIXME: Explain what ``dataverse.files.dcm-s3-bucket-name`` is for and what it has to do with ``dataverse.files.s3-bucket-name``. Once you have installed a DCM, you will need to configure two database settings on the Dataverse side. These settings are documented in the :doc:`/installation/config` section of the Installation Guide: @@ -61,7 +59,6 @@ Steps to set up a DCM mock for Development Install Flask. - Download and run the mock. You will be cloning the https://github.com/sbgrid/data-capture-module repo. - ``git clone git://github.com/sbgrid/data-capture-module.git`` @@ -108,6 +105,123 @@ The following low level command should only be used when troubleshooting the "im ``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$DV_BASE_URL/api/batch/jobs/import/datasets/files/$DATASET_DB_ID?uploadFolder=$UPLOAD_FOLDER&totalSize=$TOTAL_SIZE"`` +Steps to set up a DCM via Docker for Development +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you need a fully operating DCM client for development purposes, these steps will guide you to setting one up. This includes steps to set up the DCM on S3 variant. + +Docker Image Set-up +^^^^^^^^^^^^^^^^^^^ + +- Install docker if you do not have it +- Follow these steps (extracted from ``docker-aio/readme.md`` & ``docker-dcm/readme.txt``) : + + - ``cd conf/docker-aio`` and run ``./0prep_deps.sh`` to create Glassfish and Solr tarballs in conf/docker-aio/dv/deps. + - Run ``./1prep.sh`` + - Build the docker image: ``docker build -t dv0 -f c7.dockerfile .`` + - ``cd ../docker-dcm`` and run ``./0prep.sh`` + - Build dcm/dv0dcm images with docker-compose: ``docker-compose -f docker-compose.yml build`` + - Start containers: ``docker-compose -f docker-compose.yml up -d`` + - Wait for container to show "healthy" (aka - ``docker ps``), then wait another 5 minutes (even though it shows healthy, glassfish is still standing itself up). Then run Dataverse app installation: ``docker exec -it dvsrv /opt/dv/install.bash`` + - Configure Dataverse application to use DCM (run from outside the container): ``docker exec -it dvsrv /opt/dv/configure_dcm.sh`` + - The Dataverse installation is accessible at ``http://localhost:8084``. + - You may need to change the DoiProvider inside dvsrv (ezid does not work): + + - ``curl -X DELETE -d EZID "localhost:8080/api/admin/settings/:DoiProvider"`` + - ``curl -X PUT -d DataCite "localhost:8080/api/admin/settings/:DoiProvider"`` + - Also change the doi.baseUrlString, doi.username, doi.password + +Optional steps for setting up the S3 Docker DCM Variant +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Before: the default bucket for DCM to hold files in S3 is named test-dcm. It is coded into `post_upload_s3.bash` (line 30). Change to a different bucket if needed. +- Add AWS bucket info to dcmsrv + + - You need a credentials files in ~/.aws + + - ``mkdir ~/.aws`` + - ``yum install nano`` (or use a different editor below) + - ``nano ~/.aws/credentials`` and add these contents with your keys: + + - ``[default]`` + - ``aws_access_key_id =`` + - ``aws_secret_access_key =`` + +- Dataverse configuration (on dvsrv): + + - Set S3 as the storage driver + + - ``cd /opt/glassfish4/bin/`` + - ``./asadmin delete-jvm-options "\-Ddataverse.files.storage-driver-id=file"`` + - ``./asadmin create-jvm-options "\-Ddataverse.files.storage-driver-id=s3"`` + + - Add AWS bucket info to Dataverse + + - ``mkdir ~/.aws`` + - ``yum install nano`` (or use a different editor below) + - ``nano ~/.aws/credentials`` and add these contents with your keys: + + - ``[default]`` + - ``aws_access_key_id =`` + - ``aws_secret_access_key =`` + + - Also: ``nano ~/.aws/config`` to create a region file. Add these contents: + + - ``[default]`` + - ``region = us-east-1`` + + - Add the S3 bucket names to Dataverse + + - S3 bucket for Dataverse + + - ``/usr/local/glassfish4/glassfish/bin/asadmin create-jvm-options "-Ddataverse.files.s3-bucket-name=iqsstestdcmbucket"`` + + - S3 bucket for DCM (as Dataverse needs to do the copy over) + + - ``/usr/local/glassfish4/glassfish/bin/asadmin create-jvm-options "-Ddataverse.files.dcm-s3-bucket-name=test-dcm"`` + + - Set download method to be HTTP, as DCM downloads through S3 are over this protocol ``curl -X PUT "http://localhost:8080/api/admin/settings/:DownloadMethods" -d "native/http"`` + +Using the DCM Docker Containers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For using these commands, you will need to connect to the shell prompt inside various containers (e.g. ``docker exec -it dvsrv /bin/bash``) + +- Create a dataset and download rsync upload script +- Upload script to dcm_client (if needed, you can probably do all the actions for create/download inside dcm_client) + + - ``docker cp ~/Downloads/upload-FK2_NN49YM.bash dcm_client:/tmp`` + +- Create a folder of files to upload (files can be empty) +- Run script + + - e.g. ``bash ./upload-FK2_NN49YM.bash`` + +- Manually run post upload script on dcmsrv + + - for posix implementation: ``bash ./opt/dcm/scn/post_upload.bash`` + - for S3 implementation: ``bash ./opt/dcm/scn/post_upload_s3.bash`` + +Additional DCM docker development tips +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- You can completely blow away all the docker images with these commands (including non DCM ones!) + + - ``docker stop dvsrv`` + - ``docker stop dcm_client`` + - ``docker stop dcmsrv`` + - ``docker rm $(docker ps -a -q)`` + - ``docker rmi $(docker images -q)`` + +- There are a few logs to tail + + - dvsrv : ``tail -n 2000 -f /opt/glassfish4/glassfish/domains/domain1/logs/server.log`` + - dcmsrv : ``tail -n 2000 -f /var/log/lighttpd/breakage.log`` + - dcmsrv : ``tail -n 2000 -f /var/log/lighttpd/access.log`` + +- Note that by default the docker container will stop running if the process it is following is turned off. For example flask with dcmsrv. You can get around this by having the script being followed never close (e.g. sleep infinity) https://stackoverflow.com/questions/31870222/how-can-i-keep-container-running-on-kubernetes +- You may have to restart the glassfish domain occasionally to deal with memory filling up. If deployment is getting reallllllly slow, its a good time. + Repository Storage Abstraction Layer (RSAL) ------------------------------------------- @@ -221,7 +335,7 @@ Available Steps Dataverse has an internal step provider, whose id is ``:internal``. It offers the following steps: log -+++ +^^^ A step that writes data about the current workflow invocation to the instance log. It also writes the messages in its ``parameters`` map. @@ -238,7 +352,7 @@ A step that writes data about the current workflow invocation to the instance lo pause -+++++ +^^^^^ A step that pauses the workflow. The workflow is paused until a POST request is sent to ``/api/workflows/{invocation-id}``. @@ -251,7 +365,7 @@ A step that pauses the workflow. The workflow is paused until a POST request is http/sr -+++++++ +^^^^^^^ A step that sends a HTTP request to an external system, and then waits for a response. The response has to match a regular expression specified in the step parameters. The url, content type, and message body can use data from the workflow context, using a simple markup language. This step has specific parameters for rollback. diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index 4d57afa11ec..6c59ebc2987 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -17,7 +17,7 @@ Basic Search You can search the entire contents of the Dataverse installation, including dataverses, datasets, and files. You can access the search through the search bar on the homepage, or by clicking the magnifying glass icon in the header of every page. The search bar accepts search terms, queries, or exact phrases (in quotations). Sorting and Viewing Search Results -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Facets: to the left of the search results, there are several facets a user can click on to narrow the number of results displayed. - Choosing a facet: to choose a facet to narrow your results by, click on that facet. @@ -81,7 +81,7 @@ You may also download a file from its file page by clicking the Download button Tabular data files offer additional options: You can explore using the TwoRavens data visualization tool (or other :doc:`/installation/external-tools` if they have been enabled) by clicking the Explore button, or choose from a number of tabular-data-specific download options available as a dropdown under the Download button. Tabular Data -~~~~~~~~~~~~ +^^^^^^^^^^^^ Ingested files can be downloaded in several different ways. @@ -99,7 +99,7 @@ Ingested files can be downloaded in several different ways. .. _rsync_download: Downloading a Dataverse Package via rsync -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ rsync is typically used for synchronizing files and directories between two different systems. Some Dataverse installations allow downloads using rsync, to facilitate large file transfers in a reliable and secure manner. @@ -110,6 +110,13 @@ In a dataset containing a Dataverse Package, at the bottom of the dataset page, After you've downloaded the Dataverse Package, you may want to double-check that your download went perfectly. Under **Verify Data**, you'll find a command that you can run in your terminal that will initiate a checksum to ensure that the data you downloaded matches the data in Dataverse precisely. This way, you can ensure the integrity of the data you're working with. +.. _package_download_url: + +Downloading a Dataverse Package via URL +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Dataverse Packages are typically used to represent extremely large files or bundles containing a large number of files. Dataverse Packages are often too large to be reliably downloaded using a web browser. When you click to download a Dataverse Package, instead of automatically initiating the download in your web browser, Dataverse displays a plaintext URL for the location of the file. To ensure a reliable, resumable download, we recommend using `GNU Wget `_ in a command line terminal or using a download manager software of your choice. If you try to simply paste the URL into your web browser then the download may overwhelm your browser, resulting in an interrupted, timed out, or otherwise failed download. + Explore Data ------------ diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index e14063961ee..0e750c1910c 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -1622,6 +1622,7 @@ file.downloadDialog.header=Dataset Terms file.downloadDialog.tip=Please confirm and/or complete the information needed below in order to continue. file.requestAccessTermsDialog.tip=Please confirm and/or complete the information needed below in order to request access to files in this dataset. file.requestAccess.notAllowed=Requests for access are not accepted on the Dataset. + file.search.placeholder=Search this dataset... file.results.btn.sort=Sort file.results.btn.sort.option.nameAZ=Name (A-Z) @@ -2145,6 +2146,10 @@ permission.ViewUnpublishedDataset.desc=View an unpublished dataset and its files permission.ViewUnpublishedDataverse.desc=View an unpublished dataverse permission.AddDataset.desc=Add a dataset to a dataverse +packageDownload.title=Package File Download +packageDownload.instructions=Use the Download URL in a Wget command or a download manager to download this package file. Download via web browser is not recommended. User Guide - Downloading a Dataverse Package via URL +packageDownload.urlHeader=Download URL + #mydata_fragment.xhtml Published=Published Unpublished=Unpublished @@ -2243,4 +2248,4 @@ rtabfileparser.ioexception.mismatch=Reading mismatch, line {0} of the Data file: rtabfileparser.ioexception.boolean=Unexpected value for the Boolean variable ({0}): rtabfileparser.ioexception.read=Couldn't read Boolean variable ({0})! rtabfileparser.ioexception.parser1=R Tab File Parser: Could not obtain varQnty from the dataset metadata. -rtabfileparser.ioexception.parser2=R Tab File Parser: varQnty=0 in the dataset metadata! \ No newline at end of file +rtabfileparser.ioexception.parser2=R Tab File Parser: varQnty=0 in the dataset metadata! diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index e8c6b95b5f0..bb4b6efc506 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2,6 +2,7 @@ import com.amazonaws.services.lightsail.model.Bundle; import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean; +import edu.harvard.iq.dataverse.PackagePopupFragmentBean; import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; @@ -198,7 +199,6 @@ public enum DisplayMode { @Inject ProvPopupFragmentBean provPopupFragmentBean; - private Dataset dataset = new Dataset(); private EditMode editMode; private boolean bulkFileDeleteInProgress = false; @@ -1469,7 +1469,8 @@ private String init(boolean initFull) { this.guestbookResponse = guestbookResponseService.initGuestbookResponseForFragment(workingVersion, null, session); this.getFileDownloadHelper().setGuestbookResponse(guestbookResponse); logger.fine("Checking if rsync support is enabled."); - if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))) { + if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods)) + && dataset.getFiles().isEmpty()) { //only check for rsync if no files exist try { ScriptRequestResponse scriptRequestResponse = commandEngine.submit(new RequestRsyncScriptCommand(dvRequestService.getDataverseRequest(), dataset)); logger.fine("script: " + scriptRequestResponse.getScript()); diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 2f3cfc72f66..e4cd115bccb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1728,7 +1728,8 @@ public void setHasRsyncScript(Boolean hasRsyncScript) { private void setUpRsync() { logger.fine("setUpRsync called..."); - if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))) { + if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods)) + && dataset.getFiles().isEmpty()) { //only check for rsync if no files exist try { ScriptRequestResponse scriptRequestResponse = commandEngine.submit(new RequestRsyncScriptCommand(dvRequestService.getDataverseRequest(), dataset)); logger.fine("script: " + scriptRequestResponse.getScript()); diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index d60a302279c..e93385ae7a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -297,7 +297,22 @@ public void writeGuestbookAndLaunchExploreTool(GuestbookResponse guestbookRespon } fileDownloadService.explore(guestbookResponse, fmd, externalTool); requestContext.execute("PF('downloadPopup').hide()"); - } + } + + public void writeGuestbookAndLaunchPackagePopup(GuestbookResponse guestbookResponse) { + RequestContext requestContext = RequestContext.getCurrentInstance(); + boolean valid = validateGuestbookResponse(guestbookResponse); + + if (!valid) { + JH.addMessage(FacesMessage.SEVERITY_ERROR, JH.localize("dataset.message.validationError")); + } else { + requestContext.execute("PF('downloadPopup').hide()"); + requestContext.execute("PF('downloadPackagePopup').show()"); + requestContext.execute("handleResizeDialog('downloadPackagePopup')"); + + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + } + } public String startWorldMapDownloadLink(GuestbookResponse guestbookResponse, FileMetadata fmd){ @@ -336,10 +351,8 @@ public void clearRequestAccessFiles(){ public void addMultipleFilesForRequestAccess(DataFile dataFile) { this.filesForRequestAccess.add(dataFile); - } + } - - private String selectedFileId = null; public String getSelectedFileId() { diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index a56ea335788..7cd6ba32a52 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -145,14 +145,19 @@ public void writeGuestbookAndStartFileDownload(GuestbookResponse guestbookRespon logger.fine("issued file download redirect for datafile "+guestbookResponse.getDataFile().getId()); } + public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse, FileMetadata fileMetadata, String format) { + if(!fileMetadata.getDatasetVersion().isDraft()){ + guestbookResponse = guestbookResponseService.modifyDatafileAndFormat(guestbookResponse, fileMetadata, format); + writeGuestbookResponseRecord(guestbookResponse); + } + } + public void writeGuestbookResponseRecord(GuestbookResponse guestbookResponse) { - try { CreateGuestbookResponseCommand cmd = new CreateGuestbookResponseCommand(dvRequestService.getDataverseRequest(), guestbookResponse, guestbookResponse.getDataset()); commandEngine.submit(cmd); } catch (CommandException e) { //if an error occurs here then download won't happen no need for response recs... - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/PackagePopupFragmentBean.java b/src/main/java/edu/harvard/iq/dataverse/PackagePopupFragmentBean.java new file mode 100644 index 00000000000..fac2abeddb8 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/PackagePopupFragmentBean.java @@ -0,0 +1,32 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +package edu.harvard.iq.dataverse; + +import javax.faces.view.ViewScoped; +import javax.inject.Named; + +/** + * + * @author matthew + */ + +@ViewScoped +@Named +public class PackagePopupFragmentBean implements java.io.Serializable { + + FileMetadata fm; + + public void setFileMetadata(FileMetadata fileMetadata) { + fm = fileMetadata; + } + + public FileMetadata getFileMetadata() { + return fm; + } + +} + \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java index f5310119150..953fbeeb820 100644 --- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java @@ -21,10 +21,10 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.FileUtil; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; +import edu.harvard.iq.dataverse.util.FileUtil;; +import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; +import java.io.InputStreamReader; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Date; @@ -34,8 +34,6 @@ import javax.ejb.EJB; import javax.ejb.Stateless; import javax.inject.Named; -import javax.json.JsonObject; -import javax.json.JsonObjectBuilder; /** * This class is for importing files added to s3 outside of dataverse. @@ -59,6 +57,7 @@ public class S3PackageImporter extends AbstractApiBean implements java.io.Serial @EJB EjbDataverseEngine commandEngine; + //Copies from another s3 bucket to our own public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException { try { s3 = AmazonS3ClientBuilder.standard().defaultClient(); @@ -67,11 +66,7 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException "Cannot instantiate a S3 client using; check your AWS credentials and region", e); } - - JsonObjectBuilder bld = jsonObjectBuilder(); - - String fileMode = FileRecordWriter.FILE_MODE_PACKAGE_FILE; - + String dcmBucketName = System.getProperty("dataverse.files.dcm-s3-bucket-name"); String dcmDatasetKey = s3ImportPath; String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name"); @@ -133,80 +128,118 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException throw new IOException("Failed to delete object" + new Object[]{item}); } } - } - public DataFile createPackageDataFile(Dataset dataset, String folderName, long totalSize) { - DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE); - packageFile.setChecksumType(DataFile.ChecksumType.SHA1); - - FileUtil.generateStorageIdentifier(packageFile); - - - String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name"); - String dvDatasetKey = getS3DatasetKey(dataset); - S3Object s3object = null; - - s3object = s3.getObject(new GetObjectRequest(dvBucketName, dvDatasetKey+"/files.sha")); - - InputStream in = s3object.getObjectContent(); - String checksumVal = FileUtil.CalculateChecksum(in, packageFile.getChecksumType()); + public DataFile createPackageDataFile(Dataset dataset, String folderName, long totalSize) throws IOException { + DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE); + packageFile.setChecksumType(DataFile.ChecksumType.SHA1); - packageFile.setChecksumValue(checksumVal); + //This is a brittle calculation, changes of the dcm post_upload script will blow this up + String rootPackageName = "package_" + folderName.replace("/", ""); - packageFile.setFilesize(totalSize); - packageFile.setModificationTime(new Timestamp(new Date().getTime())); - packageFile.setCreateDate(new Timestamp(new Date().getTime())); - packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime())); - packageFile.setOwner(dataset); - dataset.getFiles().add(packageFile); + String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name"); + String dvDatasetKey = getS3DatasetKey(dataset); - packageFile.setIngestDone(); + //getting the name of the .sha file via substring, ${packageName}.sha + logger.log(Level.INFO, "shaname {0}", new Object[]{rootPackageName + ".sha"}); - // set metadata and add to latest version - FileMetadata fmd = new FileMetadata(); - fmd.setLabel(folderName.substring(folderName.lastIndexOf('/') + 1)); - - fmd.setDataFile(packageFile); - packageFile.getFileMetadatas().add(fmd); - if (dataset.getLatestVersion().getFileMetadatas() == null) dataset.getLatestVersion().setFileMetadatas(new ArrayList<>()); + if(!s3.doesObjectExist(dvBucketName, dvDatasetKey + "/" + rootPackageName + ".zip")) { + throw new IOException ("S3 Package data file could not be found after copy from dcm. Name: " + dvDatasetKey + "/" + rootPackageName + ".zip"); + } - dataset.getLatestVersion().getFileMetadatas().add(fmd); - fmd.setDatasetVersion(dataset.getLatestVersion()); - - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext()); - if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) { - String packageIdentifier = dataFileServiceBean.generateDataFileIdentifier(packageFile, idServiceBean); - packageFile.setIdentifier(packageIdentifier); - } - - String nonNullDefaultIfKeyNotFound = ""; - String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); + S3Object s3FilesSha = s3.getObject(new GetObjectRequest(dvBucketName, dvDatasetKey + "/" + rootPackageName + ".sha")); - if (packageFile.getProtocol() == null) { - packageFile.setProtocol(protocol); + InputStreamReader str = new InputStreamReader(s3FilesSha.getObjectContent()); + BufferedReader reader = new BufferedReader(str); + String checksumVal = ""; + try { + String line; + while((line = reader.readLine()) != null && checksumVal.isEmpty()) { + logger.log(Level.FINE, "line {0}", new Object[]{line}); + String[] splitLine = line.split(" "); + + //the sha file should only contain one entry, but incase it doesn't we will check for the one for our zip + if(splitLine[1].contains(rootPackageName + ".zip")) { + checksumVal = splitLine[0]; + logger.log(Level.FINE, "checksumVal found {0}", new Object[]{checksumVal}); + } } - if (packageFile.getAuthority() == null) { - packageFile.setAuthority(authority); + if(checksumVal.isEmpty()) { + logger.log(Level.SEVERE, "No checksum found for uploaded DCM S3 zip on dataset {0}", new Object[]{dataset.getIdentifier()}); + } + } catch (IOException ex){ + logger.log(Level.SEVERE, "Error parsing DCM s3 checksum file on dataset {0} . Error: {1} ", new Object[]{dataset.getIdentifier(), ex}); + } finally { + try { + str.close(); + reader.close(); + } catch (IOException ex) { + logger.log(Level.WARNING, "errors closing s3 DCM object reader stream: {0}", new Object[]{ex}); } - if (!packageFile.isIdentifierRegistered()) { - String doiRetString = ""; - idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext()); - try { - doiRetString = idServiceBean.createIdentifier(packageFile); - } catch (Throwable e) { - - } + } + + logger.log(Level.FINE, "Checksum value for the package in Dataset {0} is: {1}", + new Object[]{dataset.getIdentifier(), checksumVal}); + + packageFile.setChecksumValue(checksumVal); + + packageFile.setFilesize(totalSize); + packageFile.setModificationTime(new Timestamp(new Date().getTime())); + packageFile.setCreateDate(new Timestamp(new Date().getTime())); + packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime())); + packageFile.setOwner(dataset); + dataset.getFiles().add(packageFile); + + packageFile.setIngestDone(); + + // set metadata and add to latest version + // Set early so we can generate the storage id with the info + FileMetadata fmd = new FileMetadata(); + fmd.setLabel(rootPackageName + ".zip"); + + fmd.setDataFile(packageFile); + packageFile.getFileMetadatas().add(fmd); + if (dataset.getLatestVersion().getFileMetadatas() == null) dataset.getLatestVersion().setFileMetadatas(new ArrayList<>()); + + dataset.getLatestVersion().getFileMetadatas().add(fmd); + fmd.setDatasetVersion(dataset.getLatestVersion()); + + FileUtil.generateS3PackageStorageIdentifier(packageFile); + + GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext()); + if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) { + String packageIdentifier = dataFileServiceBean.generateDataFileIdentifier(packageFile, idServiceBean); + packageFile.setIdentifier(packageIdentifier); + } + + String nonNullDefaultIfKeyNotFound = ""; + String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); + String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); + + if (packageFile.getProtocol() == null) { + packageFile.setProtocol(protocol); + } + if (packageFile.getAuthority() == null) { + packageFile.setAuthority(authority); + } + + if (!packageFile.isIdentifierRegistered()) { + String doiRetString = ""; + idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext()); + try { + doiRetString = idServiceBean.createIdentifier(packageFile); + } catch (Throwable e) { - // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { - packageFile.setIdentifierRegistered(true); - packageFile.setGlobalIdCreateTime(new Date()); - } } + // Check return value to make sure registration succeeded + if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { + packageFile.setIdentifierRegistered(true); + packageFile.setGlobalIdCreateTime(new Date()); + } + } + return packageFile; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index fab2538832e..399e9a081df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -33,6 +33,7 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; import javax.ws.rs.NotFoundException; import javax.ws.rs.RedirectionException; @@ -75,6 +76,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] storageIO.open(); } catch (IOException ioex) { //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE); + logger.log(Level.INFO, "Datafile {0}: Failed to locate and/or open physical file. Error message: {1}", new Object[]{dataFile.getId(), ioex.getLocalizedMessage()}); throw new NotFoundException("Datafile "+dataFile.getId()+": Failed to locate and/or open physical file."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java index 92f8252f45c..1aa384d205e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java @@ -77,7 +77,7 @@ public static String getMessageFromException(DataCaptureModuleException ex) { } public static String getScriptName(DatasetVersion datasetVersion) { - return "upload-" + datasetVersion.getDataset().getIdentifier() + ".bash"; + return "upload-" + datasetVersion.getDataset().getIdentifier().replace("/", "_") + ".bash"; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 48fbfb5e37d..38f17e78b9b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -28,6 +28,7 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; +import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; @@ -91,7 +92,7 @@ public class FileUtil implements java.io.Serializable { private static final String[] TABULAR_DATA_FORMAT_SET = {"POR", "SAV", "DTA", "RDA"}; private static Map STATISTICAL_FILE_EXTENSION = new HashMap(); - + /* * The following are Stata, SAS and SPSS syntax/control cards: * These are recognized as text files (because they are!) so @@ -1132,6 +1133,12 @@ public static String getFilesTempDirectory() { return filesTempDirectory; } + public static void generateS3PackageStorageIdentifier(DataFile dataFile) { + String bucketName = System.getProperty("dataverse.files.s3-bucket-name"); + String storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); + dataFile.setStorageIdentifier(storageId); + } + public static void generateStorageIdentifier(DataFile dataFile) { dataFile.setStorageIdentifier(generateStorageIdentifier()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index ccf5bbef19e..fd059640780 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -893,7 +893,7 @@ public enum FileDownloadMethods { * go through Glassfish. */ RSYNC("rsal/rsync"), - NATIVE("NATIVE"); + NATIVE("native/http"); private final String text; private FileDownloadMethods(final String text) { @@ -1005,12 +1005,17 @@ public boolean isRsyncOnly(){ } } - public boolean isRsyncDownload() - { + public boolean isRsyncDownload() { String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.RSYNC.toString()); } + public boolean isHTTPDownload() { + String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); + logger.warning("Download Methods:" + downloadMethods); + return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.NATIVE.toString()); + } + private Boolean getUploadMethodAvailable(String method){ String uploadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.UploadMethods); if (uploadMethods==null){ diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml index 5ef5faf85f6..24d19cbd909 100644 --- a/src/main/webapp/WEB-INF/web.xml +++ b/src/main/webapp/WEB-INF/web.xml @@ -22,6 +22,13 @@ org.jboss.weld.context.conversation.lazy false + + + javax.faces.PROJECT_STAGE + + Production + primefaces.THEME bootstrap diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 9bd399e89e5..b5c9321ccff 100755 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -1165,7 +1165,6 @@ - @@ -1173,9 +1172,17 @@ - - + + + + + + + + + + diff --git a/src/main/webapp/editdatafiles.xhtml b/src/main/webapp/editdatafiles.xhtml index aab4541229e..db847210924 100755 --- a/src/main/webapp/editdatafiles.xhtml +++ b/src/main/webapp/editdatafiles.xhtml @@ -38,57 +38,16 @@ - - -
-
-
-
- - - - - -
- -
-
- -
-
-
- +
+ + + + +
diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index a08acdf74e8..f8f7f50a532 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -112,8 +112,34 @@ #{bundle['file.compute']} - - + + + + + #{bundle.download} + + + + + + + #{bundle.download} + + + #{bundle.download} -
+ @@ -172,7 +174,15 @@ update="guestbookUIFragment"> #{bundle['acceptTerms']} + + + + + + #{bundle['acceptTerms']} +
\ No newline at end of file diff --git a/src/main/webapp/file-info-fragment.xhtml b/src/main/webapp/file-info-fragment.xhtml new file mode 100644 index 00000000000..2add73eab3a --- /dev/null +++ b/src/main/webapp/file-info-fragment.xhtml @@ -0,0 +1,60 @@ + + +
+
+
+ + + + + +
+ +
+
+ +
+
+
+ +
+
+
\ No newline at end of file diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index fbfbe4adb96..ac4c3c1b780 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -173,7 +173,8 @@
+ and (!FilePage.fileMetadata.dataFile.filePackage or + FilePage.fileMetadata.dataFile.filePackage and systemConfig.HTTPDownload)}"> @@ -362,7 +363,8 @@ + rendered="#{settingsWrapper.rsyncDownload and FilePage.fileMetadata.dataFile.filePackage and systemConfig.rsyncDownload + and !FilePage.fileMetadata.getDataFile().getOwner().getStorageIdentifier().startsWith('s3://') }"> @@ -551,6 +553,16 @@
+ + + + + + + + + + diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index f77b253f557..0b5b11cfe4f 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -152,6 +152,7 @@ +
@@ -234,6 +235,7 @@
+ @@ -256,8 +258,9 @@ - -
  • + + +
  • @@ -375,7 +378,8 @@ -
    +
    @@ -388,8 +392,9 @@
    - -
    +
    @@ -398,5 +403,5 @@
    - + diff --git a/src/main/webapp/package-download-popup-fragment.xhtml b/src/main/webapp/package-download-popup-fragment.xhtml new file mode 100644 index 00000000000..e1e229d5113 --- /dev/null +++ b/src/main/webapp/package-download-popup-fragment.xhtml @@ -0,0 +1,35 @@ + + + +
    +
    +

    + + + + + +

    +
    +
    + + + +
    +
    + +

    #{systemConfig.getDataverseSiteUrl()}/api/access/datafile/#{packagePopupFragmentBean.fileMetadata.dataFile.id}

    +
    +
    + +
    +
    +
    diff --git a/src/test/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtilTest.java index 17f67c31b7f..a00daef63c2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtilTest.java @@ -28,9 +28,9 @@ public void testRsyncSupportEnabled() { assertEquals(false, DataCaptureModuleUtil.rsyncSupportEnabled(null)); assertEquals(true, DataCaptureModuleUtil.rsyncSupportEnabled("dcm/rsync+ssh")); // Comma sepratated lists of upload methods are supported. - assertEquals(false, DataCaptureModuleUtil.rsyncSupportEnabled("NATIVE:dcm/rsync+ssh")); - assertEquals(true, DataCaptureModuleUtil.rsyncSupportEnabled("NATIVE,dcm/rsync+ssh")); - assertEquals(false, DataCaptureModuleUtil.rsyncSupportEnabled("NATIVE")); + assertEquals(false, DataCaptureModuleUtil.rsyncSupportEnabled("native/http:dcm/rsync+ssh")); + assertEquals(true, DataCaptureModuleUtil.rsyncSupportEnabled("native/http,dcm/rsync+ssh")); + assertEquals(false, DataCaptureModuleUtil.rsyncSupportEnabled("native/http")); assertEquals(false, DataCaptureModuleUtil.rsyncSupportEnabled("junk")); }