From ace254bbf615d7befeadfc671aafd360251feabe Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 7 Jun 2024 13:53:27 -0400 Subject: [PATCH 1/5] remove rsync docs #8985 --- doc/release-notes/8985-deprecate-rsync.md | 1 + .../checksumValidationSuccess.json | 5 - doc/sphinx-guides/source/api/native-api.rst | 4 +- .../source/developers/big-data-support.rst | 178 +----------------- .../source/installation/config.rst | 10 +- .../source/user/dataset-management.rst | 34 +--- .../source/user/find-use-data.rst | 13 -- .../data/storageSites/add-storage-site.json | 6 - 8 files changed, 7 insertions(+), 244 deletions(-) create mode 100644 doc/release-notes/8985-deprecate-rsync.md delete mode 100644 doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json delete mode 100644 scripts/api/data/storageSites/add-storage-site.json diff --git a/doc/release-notes/8985-deprecate-rsync.md b/doc/release-notes/8985-deprecate-rsync.md new file mode 100644 index 00000000000..3ae590f6d2c --- /dev/null +++ b/doc/release-notes/8985-deprecate-rsync.md @@ -0,0 +1 @@ +Support for rsync has been deprecated. Information has been removed from the guides for rsync and related software such as Data Capture Module (DCM) and Repository Storage Abstraction Layer (RSAL). You can still find this information in [older versions](https://guides.dataverse.org/en/6.2/developers/big-data-support.html#data-capture-module-dcm) of the guides. diff --git a/doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json b/doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json deleted file mode 100644 index 6b609c4c65e..00000000000 --- a/doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "status": "validation passed", - "uploadFolder": "OS7O8Y", - "totalSize": 72 -} diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 8c54a937353..82885196d9c 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2315,7 +2315,7 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/locks?type=Ingest" -Currently implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. +Currently implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload`` (deprecated), ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. The API will output the list of locks, for example:: @@ -2406,7 +2406,7 @@ Use the following API to list ALL the locks on all the datasets in your installa The listing can be filtered by specific lock type **and/or** user, using the following *optional* query parameters: * ``userIdentifier`` - To list the locks owned by a specific user -* ``type`` - To list the locks of the type specified. If the supplied value does not match a known lock type, the API will return an error and a list of valid lock types. As of writing this, the implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. +* ``type`` - To list the locks of the type specified. If the supplied value does not match a known lock type, the API will return an error and a list of valid lock types. As of writing this, the implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload`` (deprecated), ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. For example: diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 5ea97029271..4aaed10512e 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -1,7 +1,7 @@ Big Data Support ================ -Big data support includes some highly experimental options. Eventually more of this content will move to the Installation Guide. +Big data support includes some experimental options. Eventually more of this content will move to the Installation Guide. .. contents:: |toctitle| :local: @@ -187,179 +187,3 @@ As described in that document, Globus transfers can be initiated by choosing the An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. See also :ref:`Globus settings <:GlobusSettings>`. - -Data Capture Module (DCM) -------------------------- - -Please note: The DCM feature is deprecated. - -Data Capture Module (DCM) is an experimental component that allows users to upload large datasets via rsync over ssh. - -DCM was developed and tested using Glassfish but these docs have been updated with references to Payara. - -Install a DCM -~~~~~~~~~~~~~ - -Installation instructions can be found at https://github.com/sbgrid/data-capture-module/blob/master/doc/installation.md. Note that shared storage (posix or AWS S3) between your Dataverse installation and your DCM is required. You cannot use a DCM with Swift at this point in time. - -.. FIXME: Explain what ``dataverse.files.dcm-s3-bucket-name`` is for and what it has to do with ``dataverse.files.s3.bucket-name``. - -Once you have installed a DCM, you will need to configure two database settings on the Dataverse installation side. These settings are documented in the :doc:`/installation/config` section of the Installation Guide: - -- ``:DataCaptureModuleUrl`` should be set to the URL of a DCM you installed. -- ``:UploadMethods`` should include ``dcm/rsync+ssh``. - -This will allow your Dataverse installation to communicate with your DCM, so that your Dataverse installation can download rsync scripts for your users. - -Downloading rsync scripts via Your Dataverse Installation's API -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The rsync script can be downloaded from your Dataverse installation via API using an authorized API token. In the curl example below, substitute ``$PERSISTENT_ID`` with a DOI or Handle: - -``curl -H "X-Dataverse-key: $API_TOKEN" $DV_BASE_URL/api/datasets/:persistentId/dataCaptureModule/rsync?persistentId=$PERSISTENT_ID`` - -How a DCM reports checksum success or failure to your Dataverse Installation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Once the user uploads files to a DCM, that DCM will perform checksum validation and report to your Dataverse installation the results of that validation. The DCM must be configured to pass the API token of a superuser. The implementation details, which are subject to change, are below. - -The JSON that a DCM sends to your Dataverse installation on successful checksum validation looks something like the contents of :download:`checksumValidationSuccess.json <../_static/installation/files/root/big-data-support/checksumValidationSuccess.json>` below: - -.. literalinclude:: ../_static/installation/files/root/big-data-support/checksumValidationSuccess.json - :language: json - -- ``status`` - The valid strings to send are ``validation passed`` and ``validation failed``. -- ``uploadFolder`` - This is the directory on disk where your Dataverse installation should attempt to find the files that a DCM has moved into place. There should always be a ``files.sha`` file and a least one data file. ``files.sha`` is a manifest of all the data files and their checksums. The ``uploadFolder`` directory is inside the directory where data is stored for the dataset and may have the same name as the "identifier" of the persistent id (DOI or Handle). For example, you would send ``"uploadFolder": "DNXV2H"`` in the JSON file when the absolute path to this directory is ``/usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/DNXV2H/DNXV2H``. -- ``totalSize`` - Your Dataverse installation will use this value to represent the total size in bytes of all the files in the "package" that's created. If 360 data files and one ``files.sha`` manifest file are in the ``uploadFolder``, this value is the sum of the 360 data files. - - -Here's the syntax for sending the JSON. - -``curl -H "X-Dataverse-key: $API_TOKEN" -X POST -H 'Content-type: application/json' --upload-file checksumValidationSuccess.json $DV_BASE_URL/api/datasets/:persistentId/dataCaptureModule/checksumValidation?persistentId=$PERSISTENT_ID`` - - -Steps to set up a DCM mock for Development -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See instructions at https://github.com/sbgrid/data-capture-module/blob/master/doc/mock.md - - -Add Dataverse Installation settings to use mock (same as using DCM, noted above): - -- ``curl http://localhost:8080/api/admin/settings/:DataCaptureModuleUrl -X PUT -d "http://localhost:5000"`` -- ``curl http://localhost:8080/api/admin/settings/:UploadMethods -X PUT -d "dcm/rsync+ssh"`` - -At this point you should be able to download a placeholder rsync script. Your Dataverse installation is then waiting for news from the DCM about if checksum validation has succeeded or not. First, you have to put files in place, which is usually the job of the DCM. You should substitute "X1METO" for the "identifier" of the dataset you create. You must also use the proper path for where you store files in your dev environment. - -- ``mkdir /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO`` -- ``mkdir /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` -- ``cd /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` -- ``echo "hello" > file1.txt`` -- ``shasum file1.txt > files.sha`` - - - -Now the files are in place and you need to send JSON to your Dataverse installation with a success or failure message as described above. Make a copy of ``doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json`` and put the identifier in place such as "X1METO" under "uploadFolder"). Then use curl as described above to send the JSON. - -Troubleshooting -~~~~~~~~~~~~~~~ - -The following low level command should only be used when troubleshooting the "import" code a DCM uses but is documented here for completeness. - -``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$DV_BASE_URL/api/batch/jobs/import/datasets/files/$DATASET_DB_ID?uploadFolder=$UPLOAD_FOLDER&totalSize=$TOTAL_SIZE"`` - -Repository Storage Abstraction Layer (RSAL) -------------------------------------------- - -Please note: The RSAL feature is deprecated. - -Steps to set up a DCM via Docker for Development -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See https://github.com/IQSS/dataverse/blob/develop/conf/docker-dcm/readme.md - -Using the RSAL Docker Containers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Create a dataset (either with the procedure mentioned in DCM Docker Containers, or another process) -- Publish the dataset (from the client container): ``cd /mnt; ./publish_major.bash ${database_id}`` -- Run the RSAL component of the workflow (from the host): ``docker exec -it rsalsrv /opt/rsal/scn/pub.py`` -- If desired, from the client container you can download the dataset following the instructions in the dataset access section of the dataset page. - -Configuring the RSAL Mock -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Info for configuring the RSAL Mock: https://github.com/sbgrid/rsal/tree/master/mocks - -Also, to configure your Dataverse installation to use the new workflow you must do the following (see also the :doc:`workflows` section): - -1. Configure the RSAL URL: - -``curl -X PUT -d 'http://:5050' http://localhost:8080/api/admin/settings/:RepositoryStorageAbstractionLayerUrl`` - -2. Update workflow json with correct URL information: - -Edit internal-httpSR-workflow.json and replace url and rollbackUrl to be the url of your RSAL mock. - -3. Create the workflow: - -``curl http://localhost:8080/api/admin/workflows -X POST --data-binary @internal-httpSR-workflow.json -H "Content-type: application/json"`` - -4. List available workflows: - -``curl http://localhost:8080/api/admin/workflows`` - -5. Set the workflow (id) as the default workflow for the appropriate trigger: - -``curl http://localhost:8080/api/admin/workflows/default/PrePublishDataset -X PUT -d 2`` - -6. Check that the trigger has the appropriate default workflow set: - -``curl http://localhost:8080/api/admin/workflows/default/PrePublishDataset`` - -7. Add RSAL to whitelist - -8. When finished testing, unset the workflow: - -``curl -X DELETE http://localhost:8080/api/admin/workflows/default/PrePublishDataset`` - -Configuring download via rsync -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In order to see the rsync URLs, you must run this command: - -``curl -X PUT -d 'rsal/rsync' http://localhost:8080/api/admin/settings/:DownloadMethods`` - -.. TODO: Document these in the Installation Guide once they're final. - -To specify replication sites that appear in rsync URLs: - -Download :download:`add-storage-site.json <../../../../scripts/api/data/storageSites/add-storage-site.json>` and adjust it to meet your needs. The file should look something like this: - -.. literalinclude:: ../../../../scripts/api/data/storageSites/add-storage-site.json - -Then add the storage site using curl: - -``curl -H "Content-type:application/json" -X POST http://localhost:8080/api/admin/storageSites --upload-file add-storage-site.json`` - -You make a storage site the primary site by passing "true". Pass "false" to make it not the primary site. (id "1" in the example): - -``curl -X PUT -d true http://localhost:8080/api/admin/storageSites/1/primaryStorage`` - -You can delete a storage site like this (id "1" in the example): - -``curl -X DELETE http://localhost:8080/api/admin/storageSites/1`` - -You can view a single storage site like this: (id "1" in the example): - -``curl http://localhost:8080/api/admin/storageSites/1`` - -You can view all storage site like this: - -``curl http://localhost:8080/api/admin/storageSites`` - -In the GUI, this is called "Local Access". It's where you can compute on files on your cluster. - -``curl http://localhost:8080/api/admin/settings/:LocalDataAccessPath -X PUT -d "/programs/datagrid"`` - - diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8fb9460892b..5060707fcf9 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4241,23 +4241,15 @@ This setting controls which upload methods are available to users of your Datave - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native). - ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files..upload-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set. CORS allowed on the S3 bucket. See :ref:`cors-s3-bucket`. -- ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide. Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this: -``curl -X PUT -d 'native/http,dcm/rsync+ssh' http://localhost:8080/api/admin/settings/:UploadMethods`` +``curl -X PUT -d 'native/http,dvwebloader' http://localhost:8080/api/admin/settings/:UploadMethods`` You'll always want at least one upload method, so the easiest way to remove one of them is to simply ``PUT`` just the one you want, like this: ``curl -X PUT -d 'native/http' http://localhost:8080/api/admin/settings/:UploadMethods`` -:DownloadMethods -++++++++++++++++ - -This setting is experimental and related to Repository Storage Abstraction Layer (RSAL). - -``curl -X PUT -d 'rsal/rsync' http://localhost:8080/api/admin/settings/:DownloadMethods`` - :GuestbookResponsesPageDisplayLimit +++++++++++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index d803aae6d19..a1e214589e3 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -84,7 +84,7 @@ HTTP Upload is a common browser-based file upload tool you may be familiar with Once you have uploaded files, you will be able to edit file metadata, restrict access to files [#f1]_ , and/or add tags. Click "Save Changes" to complete the upload. If you uploaded a file by mistake, you can delete it before saving by clicking the checkbox to select the file, and then clicking the "Delete" button above the Files Table. -File upload limit size varies based on Dataverse installation. The file upload size limit can be found in the text above the HTTP upload widget. If you need to upload a very large file or a very large *number* of files, consider using rsync + SSH upload if your Dataverse installation offers it. +File upload limit size varies based on Dataverse installation. The file upload size limit can be found in the text above the HTTP upload widget. If you need to upload a very large file or a very large *number* of files, consider using DVUploader (see :ref:`DVUploader`). .. [#f1] Some Dataverse installations do not allow this feature. @@ -100,37 +100,7 @@ Folder Upload Some Dataverse installations support the ability to upload files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click "Start Uploads". More detailed instructions are available in the `DVWebloader wiki `_. -.. _rsync_upload: - -rsync + SSH Upload ------------------- - -rsync is typically used for synchronizing files and directories between two different systems, using SSH to connect rather than HTTP. Some Dataverse installations allow uploads using rsync, to facilitate large file transfers in a reliable and secure manner. - -File Upload Script -~~~~~~~~~~~~~~~~~~ - -An rsync-enabled Dataverse installation has a file upload process that differs from the traditional browser-based upload process you may be used to. In order to transfer your data to the Dataverse installation's storage, you will need to complete the following steps: - -1. Create your dataset. In rsync-enabled Dataverse installations, you cannot upload files until the dataset creation process is complete. After you hit "Save Dataset" on the Dataset Creation page, you will be taken to the page for your dataset. - -2. On the dataset page, click the "+ Upload Files" button. This will open a box with instructions and a link to the file upload script. - -3. Make sure your files are ready for upload. You will need to have one directory that you can point the upload script to. All files in this directory and in any subdirectories will be uploaded. The directory structure will be preserved, and will be reproduced when your dataset is downloaded from the Dataverse installation. Note that your data will be uploaded in the form of a data package, and each dataset can only host one such package. Be sure that all files you want to include are present before you upload. - -4. Download the rsync file upload script by clicking the "Download Script" button in the Upload Files instruction box. There are no requirements for where you save the script; put it somewhere you can find it. Downloading the upload script will put a temporary lock on your dataset to prepare it for upload. While your dataset is locked, you will not be able to delete or publish your dataset, or edit its metadata. Once you upload your files and Dataverse installation processes them, your dataset will be automatically unlocked and these disabled functions will be enabled again. If you have downloaded the script and locked your dataset, but you have then changed your mind and decided *not* to upload files, please contact Support about unlocking your dataset. - -5. To begin the upload process, you will need to run the script you downloaded. For this, you will have to go outside your browser and open a terminal (AKA command line) window on your computer. Use the terminal to navigate to the directory where you saved the upload script, and run the command that the Upload Files instruction box provides. This will begin the upload script. Please note that this upload script will expire 7 days after you downloaded it. If it expires and you still need to use it, simply download the script from the Dataverse installation again. - -**Note:** Unlike other operating systems, Windows does not come with rsync supported by default. We have not optimized this feature for Windows users, but you may be able to get it working if you install the right Unix utilities. (If you have found a way to get this feature working for you on Windows, you can contribute it to our project. Please reference our `Contributing to the Dataverse Project `_ document in the root of the source tree.) - -6. Follow the instructions provided by the upload script running in your terminal. It will direct you to enter the full path of the directory where your dataset files are located, and then it will start the upload process. Once you've initiated the upload, if you need to cancel it then you can do so by canceling the script running in your terminal window. If your upload gets interrupted, you can resume it from the same point later. - -7. Once the upload script completes its job, the Dataverse installation will begin processing your data upload and running a checksum validation. This may take some time depending on the file size of your upload. During processing, you will see a blue bar at the bottom of the dataset page that reads "Upload in progress..." - -8. Once processing is complete, you will be notified. At this point you can publish your dataset and your data will be available for download on the dataset page. - -**Note:** A dataset can only hold one data package. If you need to replace the data package in your dataset, contact Support. +.. _DVUploader: Command-line DVUploader ----------------------- diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index bea23cbcd0e..4bf45774b53 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -142,19 +142,6 @@ Downloading a Dataverse File Package via URL Dataverse File Packages are typically used to represent extremely large files or bundles containing a large number of files. Dataverse File Packages are often too large to be reliably downloaded using a web browser. When you click to download a Dataverse File Package, instead of automatically initiating the download in your web browser, the Dataverse installation displays a plaintext URL for the location of the file. To ensure a reliable, resumable download, we recommend using `GNU Wget `_ in a command line terminal or using a download manager software of your choice. If you try to simply paste the URL into your web browser then the download may overwhelm your browser, resulting in an interrupted, timed out, or otherwise failed download. -.. _rsync_download: - -Downloading a Dataverse File Package via rsync -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -rsync is typically used for synchronizing files and directories between two different systems. Some Dataverse installations allow downloads using rsync, to facilitate large file transfers in a reliable and secure manner. - -rsync-enabled Dataverse installations offer a new file download process that differs from traditional browser-based downloading. Instead of multiple files, each dataset uploaded via rsync contains a single "Dataverse File Package". When you download this package you will receive a folder that contains all files from the dataset, arranged in the exact folder structure in which they were originally uploaded. - -In a dataset containing a Dataverse File Package, the information to download and/or access is in outlined the **Data Access** listed under the Access File button. If the data is locally available to you (on a shared drive, for example) you will find the folder path to access the data locally. To download, use one of the rsync commands provided. There may be multiple commands, each corresponding to a different mirror that hosts the Dataverse File Package. Go outside your browser and open a terminal (AKA command line) window on your computer. Use the terminal to run the command that corresponds with the mirror of your choice. It’s usually best to choose the mirror that is geographically closest to you. Running this command will initiate the download process. - -After you've downloaded the Dataverse File Package, you may want to double-check that your download went perfectly. Under **Verify Data**, you'll find a command that you can run in your terminal that will initiate a checksum to ensure that the data you downloaded matches the data in the Dataverse installation precisely. This way, you can ensure the integrity of the data you're working with. - Explore Data ------------ diff --git a/scripts/api/data/storageSites/add-storage-site.json b/scripts/api/data/storageSites/add-storage-site.json deleted file mode 100644 index d13ec2f165d..00000000000 --- a/scripts/api/data/storageSites/add-storage-site.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "hostname": "dataverse.librascholar.edu", - "name": "LibraScholar, USA", - "primaryStorage": true, - "transferProtocols": "rsync,posix,globus" -} From 44b43f86eaae0861c48a2ff442b854768786cd08 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 7 Jun 2024 14:23:21 -0400 Subject: [PATCH 2/5] add deprecation notices to Java code for rsync #8985 --- doc/release-notes/8985-deprecate-rsync.md | 8 ++++++++ .../java/edu/harvard/iq/dataverse/FilePage.java | 1 + .../harvard/iq/dataverse/SettingsWrapper.java | 12 ++++++++---- .../edu/harvard/iq/dataverse/api/Datasets.java | 1 + .../DataCaptureModuleException.java | 2 ++ .../DataCaptureModuleUtil.java | 7 +++++++ .../command/impl/RequestRsyncScriptCommand.java | 1 + .../RepositoryStorageAbstractionLayerPage.java | 4 ++++ .../RepositoryStorageAbstractionLayerUtil.java | 6 ++++++ .../dataverse/settings/SettingsServiceBean.java | 7 +++++++ .../harvard/iq/dataverse/util/SystemConfig.java | 17 ++++++++++++----- 11 files changed, 57 insertions(+), 9 deletions(-) diff --git a/doc/release-notes/8985-deprecate-rsync.md b/doc/release-notes/8985-deprecate-rsync.md index 3ae590f6d2c..5d368e6fec9 100644 --- a/doc/release-notes/8985-deprecate-rsync.md +++ b/doc/release-notes/8985-deprecate-rsync.md @@ -1 +1,9 @@ Support for rsync has been deprecated. Information has been removed from the guides for rsync and related software such as Data Capture Module (DCM) and Repository Storage Abstraction Layer (RSAL). You can still find this information in [older versions](https://guides.dataverse.org/en/6.2/developers/big-data-support.html#data-capture-module-dcm) of the guides. + +The following related database settings have been deprecated as well: + +- :DataCaptureModuleUrl +- :DownloadMethods +- :LocalDataAccessPath +- :PublicInstall +- :RepositoryStorageAbstractionLayerUrl diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index afede00f3eb..a1a31f629b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -1546,6 +1546,7 @@ public String getIngestMessage() { } //Determines whether this File uses a public store and therefore doesn't support embargoed or restricted files + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 7854f5adfd8..7aad932b42c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -299,20 +299,23 @@ public Long getZipDownloadLimit(){ return zipDownloadLimit; } + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isPublicInstall(){ if (publicInstall == null) { publicInstall = systemConfig.isPublicInstall(); } return publicInstall; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncUpload() { if (rsyncUpload == null) { rsyncUpload = getUploadMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString()); } return rsyncUpload; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncDownload() { if (rsyncDownload == null) { rsyncDownload = systemConfig.isRsyncDownload(); @@ -379,7 +382,8 @@ public boolean isWebloaderUpload() { } return webloaderUpload; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); @@ -398,7 +402,7 @@ public boolean isRsyncOnly() { } return rsyncOnly; } - + public boolean isHTTPUpload(){ if (httpUpload == null) { httpUpload = getUploadMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index fc0afc562fc..9e9f89c8140 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2297,6 +2297,7 @@ public Response removeDatasetLogo(@Context ContainerRequestContext crc, @PathPar } } + @Deprecated(forRemoval = true, since = "2024-07-07") @GET @AuthRequired @Path("{identifier}/dataCaptureModule/rsync") diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java index 3329d92b7a9..474674bda73 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java @@ -1,7 +1,9 @@ package edu.harvard.iq.dataverse.datacapturemodule; +@Deprecated(forRemoval = true, since = "2024-07-07") public class DataCaptureModuleException extends Exception { + @Deprecated(forRemoval = true, since = "2024-07-07") public DataCaptureModuleException(String message, Throwable cause) { super(message, cause); } diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java index 460e4727afc..094d3976133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java @@ -12,10 +12,12 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; +@Deprecated(forRemoval = true, since = "2024-07-07") public class DataCaptureModuleUtil { private static final Logger logger = Logger.getLogger(DataCaptureModuleUtil.class.getCanonicalName()); + @Deprecated(forRemoval = true, since = "2024-07-07") public static boolean rsyncSupportEnabled(String uploadMethodsSettings) { logger.fine("uploadMethodsSettings: " + uploadMethodsSettings);; if (uploadMethodsSettings==null){ @@ -28,6 +30,7 @@ public static boolean rsyncSupportEnabled(String uploadMethodsSettings) { /** * generate JSON to send to DCM */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static JsonObject generateJsonForUploadRequest(AuthenticatedUser user, Dataset dataset) { JsonObjectBuilder jab = Json.createObjectBuilder(); // The general rule should be to always pass the user id and dataset identifier to the DCM. @@ -39,6 +42,7 @@ public static JsonObject generateJsonForUploadRequest(AuthenticatedUser user, Da /** * transfer script from DCM */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static ScriptRequestResponse getScriptFromRequest(HttpResponse uploadRequest) { int status = uploadRequest.getStatus(); JsonNode body = uploadRequest.getBody(); @@ -54,6 +58,7 @@ public static ScriptRequestResponse getScriptFromRequest(HttpResponse return scriptRequestResponse; } + @Deprecated(forRemoval = true, since = "2024-07-07") static UploadRequestResponse makeUploadRequest(HttpResponse uploadRequest) { int status = uploadRequest.getStatus(); String body = uploadRequest.getBody(); @@ -61,6 +66,7 @@ static UploadRequestResponse makeUploadRequest(HttpResponse uploadReques return new UploadRequestResponse(uploadRequest.getStatus(), body); } + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getMessageFromException(DataCaptureModuleException ex) { if (ex == null) { return "DataCaptureModuleException was null!"; @@ -76,6 +82,7 @@ public static String getMessageFromException(DataCaptureModuleException ex) { return message + " was caused by " + cause.getMessage(); } + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getScriptName(DatasetVersion datasetVersion) { return "upload-" + datasetVersion.getDataset().getIdentifier().replace("/", "_") + ".bash"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java index a29e7fdd59c..6b7baa7d01b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java @@ -27,6 +27,7 @@ * "actiontype" in the actionlogrecord rather than "InternalError" if you throw * a CommandExecutionException. */ +@Deprecated(forRemoval = true, since = "2024-07-07") @RequiredPermissions(Permission.EditDataset) public class RequestRsyncScriptCommand extends AbstractCommand { diff --git a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java index c252d2e3330..9edb536eda2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java @@ -11,6 +11,7 @@ import jakarta.inject.Named; import jakarta.json.JsonArray; +@Deprecated(forRemoval = true, since = "2024-07-07") @Stateless @Named public class RepositoryStorageAbstractionLayerPage { @@ -22,17 +23,20 @@ public class RepositoryStorageAbstractionLayerPage { @EJB StorageSiteServiceBean storageSiteServiceBean; + @Deprecated(forRemoval = true, since = "2024-07-07") public String getLocalDataAccessDirectory(DatasetVersion datasetVersion) { String localDataAccessParentDir = settingsService.getValueForKey(SettingsServiceBean.Key.LocalDataAccessPath); return RepositoryStorageAbstractionLayerUtil.getLocalDataAccessDirectory(localDataAccessParentDir, datasetVersion.getDataset()); } + @Deprecated(forRemoval = true, since = "2024-07-07") public List getRsyncSites(DatasetVersion datasetVersion) { List storageSites = storageSiteServiceBean.findAll(); JsonArray storageSitesAsJson = RepositoryStorageAbstractionLayerUtil.getStorageSitesAsJson(storageSites); return RepositoryStorageAbstractionLayerUtil.getRsyncSites(datasetVersion.getDataset(), storageSitesAsJson); } + @Deprecated(forRemoval = true, since = "2024-07-07") public String getVerifyDataCommand(DatasetVersion datasetVersion) { return RepositoryStorageAbstractionLayerUtil.getVerifyDataCommand(datasetVersion.getDataset()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java index 8501fba3ce0..0d547402676 100644 --- a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java @@ -13,10 +13,12 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +@Deprecated(forRemoval = true, since = "2024-07-07") public class RepositoryStorageAbstractionLayerUtil { private static final Logger logger = Logger.getLogger(RepositoryStorageAbstractionLayerUtil.class.getCanonicalName()); + @Deprecated(forRemoval = true, since = "2024-07-07") public static List getRsyncSites(Dataset dataset, JsonArray rsalSitesAsJson) { List rsalSites = new ArrayList<>(); boolean leafDirectoryOnly = false; @@ -30,6 +32,7 @@ public static List getRsyncSites(Dataset dataset, JsonArray rsalSites return rsalSites; } + @Deprecated(forRemoval = true, since = "2024-07-07") static String getLocalDataAccessDirectory(String localDataAccessParentDir, Dataset dataset) { if (localDataAccessParentDir == null) { localDataAccessParentDir = File.separator + "UNCONFIGURED ( " + SettingsServiceBean.Key.LocalDataAccessPath + " )"; @@ -38,6 +41,7 @@ static String getLocalDataAccessDirectory(String localDataAccessParentDir, Datas return localDataAccessParentDir + File.separator + getDirectoryContainingTheData(dataset, leafDirectoryOnly); } + @Deprecated(forRemoval = true, since = "2024-07-07") static String getVerifyDataCommand(Dataset dataset) { boolean leafDirectoryOnly = true; // TODO: if "files.sha" is defined somewhere, use it. @@ -51,6 +55,7 @@ static String getVerifyDataCommand(Dataset dataset) { * leafDirectoryOnly. See also * http://www.gnu.org/software/coreutils/manual/html_node/basename-invocation.html */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getDirectoryContainingTheData(Dataset dataset, boolean leafDirectoryOnly) { /** * FIXME: What if there is more than one package in the dataset? @@ -81,6 +86,7 @@ public static String getDirectoryContainingTheData(Dataset dataset, boolean leaf * RSAL or some other "big data" component live for a list of remotes sites * to which a particular dataset is replicated to. */ + @Deprecated(forRemoval = true, since = "2024-07-07") static JsonArray getStorageSitesAsJson(List storageSites) { JsonArrayBuilder arraybuilder = Json.createArrayBuilder(); if (storageSites == null || storageSites.isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 35d70498c3f..bc194c0b1b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -55,7 +55,10 @@ public enum Key { CustomDatasetSummaryFields, /** * Defines a public installation -- all datafiles are unrestricted + * + * This was added for rsync, which is now deprecated. */ + @Deprecated(forRemoval = true, since = "2024-07-07") PublicInstall, /** * Sets the name of your cloud computing environment. @@ -75,9 +78,12 @@ public enum Key { /** * For example, https://datacapture.example.org */ + @Deprecated(forRemoval = true, since = "2024-07-07") DataCaptureModuleUrl, + @Deprecated(forRemoval = true, since = "2024-07-07") RepositoryStorageAbstractionLayerUrl, UploadMethods, + @Deprecated(forRemoval = true, since = "2024-07-07") DownloadMethods, /** * If the data replicated around the world using RSAL (Repository @@ -87,6 +93,7 @@ public enum Key { * TODO: Think about if it makes sense to make this a column in the * StorageSite database table. */ + @Deprecated(forRemoval = true, since = "2024-07-07") LocalDataAccessPath, /** * The algorithm used to generate PIDs, randomString (default) or diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 3f2f36ea36a..60429ffa0cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -752,6 +752,7 @@ public enum FileUploadMethods { * DCM stands for Data Capture Module. Right now it supports upload over * rsync+ssh but DCM may support additional methods in the future. */ + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("dcm/rsync+ssh"), /** * Traditional Dataverse file handling, which tends to involve users @@ -809,6 +810,7 @@ public enum FileDownloadMethods { * RSAL stands for Repository Storage Abstraction Layer. Downloads don't * go through Glassfish. */ + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("rsal/rsync"), NATIVE("native/http"), GLOBUS("globus") @@ -862,6 +864,7 @@ public String toString() { */ public enum TransferProtocols { + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("rsync"), /** * POSIX includes NFS. This is related to Key.LocalDataAccessPath in @@ -893,12 +896,14 @@ public String toString() { } } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isPublicInstall(){ boolean saneDefault = false; return settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, saneDefault); } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncUpload(){ return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), true); } @@ -915,7 +920,8 @@ public boolean isWebloaderUpload(){ public boolean isHTTPUpload(){ return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), true); } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncOnly(){ String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); if(downloadMethods == null){ @@ -931,11 +937,12 @@ public boolean isRsyncOnly(){ return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size() == 1 && uploadMethods.toLowerCase().equals(SystemConfig.FileUploadMethods.RSYNC.toString()); } } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncDownload() { return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), false); } - + public boolean isHTTPDownload() { return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), false); } From d8f2abd5bd7f77393ad5e6aacd021ae704a0f76b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 7 Jun 2024 14:33:06 -0400 Subject: [PATCH 3/5] delete deprecated config options from guides #8985 --- .../source/installation/config.rst | 35 +------------------ 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5060707fcf9..eff2fcbd611 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -991,8 +991,6 @@ You also have the option to set a **custom container name separator.** It is ini ``./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.files.swift.folderPathSeparator=-"`` -By default, your Swift installation will be public-only, meaning users will be unable to put access restrictions on their data. If you are comfortable with this level of privacy, the final step in your setup is to set the :ref:`:PublicInstall` setting to `true`. - In order to **enable file access restrictions**, you must enable Swift to use temporary URLs for file access. To enable usage of temporary URLs, set a hash key both on your swift endpoint and in your swift.properties file. You can do so by running the create command: ``./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.files.swift.hashKey.endpoint1=your-hash-key"`` @@ -1023,11 +1021,7 @@ If a user is computing on multiple datasets, the compute tool option will redire ``:ComputeBaseUrl/multiparty?datasetPersistentId&anotherDatasetPersistentId&anotherDatasetPersistentId&...`` -If a user is computing on a single file, depending on the configuration of your installation, the compute tool option will either redirect to: - -``:ComputeBaseUrl?datasetPersistentId=yourObject`` - -if your installation's :ref:`:PublicInstall` setting is true, or: +If a user is computing on a single file, the compute tool option will redirect to: ``:ComputeBaseUrl?datasetPersistentId=yourObject&temp_url_sig=yourTempUrlSig&temp_url_expires=yourTempUrlExpiry`` @@ -4207,33 +4201,6 @@ Set the name of the cloud environment you've integrated with your Dataverse inst ``curl -X PUT -d 'Massachusetts Open Cloud (MOC)' http://localhost:8080/api/admin/settings/:CloudEnvironmentName`` -.. _:PublicInstall: - -:PublicInstall -++++++++++++++ - -Setting an installation to public will remove the ability to restrict data files or datasets. This functionality of the Dataverse Software will be disabled from your installation. - -This is useful for specific cases where an installation's files are stored in public access. Because files stored this way do not obey the Dataverse Software's file restrictions, users would still be able to access the files even when they're restricted. In these cases it's best to use :PublicInstall to disable the feature altogether. - -``curl -X PUT -d true http://localhost:8080/api/admin/settings/:PublicInstall`` - -:DataCaptureModuleUrl -+++++++++++++++++++++ - -The URL for your Data Capture Module (DCM) installation. This component is experimental and can be downloaded from https://github.com/sbgrid/data-capture-module . - -``curl -X PUT -d 'https://dcm.example.edu' http://localhost:8080/api/admin/settings/:DataCaptureModuleUrl`` - -:RepositoryStorageAbstractionLayerUrl -+++++++++++++++++++++++++++++++++++++ - -The URL for your Repository Storage Abstraction Layer (RSAL) installation. This component is experimental and can be downloaded from https://github.com/sbgrid/rsal . - -``curl -X PUT -d 'https://rsal.example.edu' http://localhost:8080/api/admin/settings/:RepositoryStorageAbstractionLayerUrl`` - -.. _:UploadMethods: - :UploadMethods ++++++++++++++ From 48bd3da9508ca03d707743df67b1fabc76e18b6c Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 7 Jun 2024 14:35:33 -0400 Subject: [PATCH 4/5] whoops, put :UploadMethods doc ref back #8985 --- doc/sphinx-guides/source/installation/config.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index eff2fcbd611..82bbf913ca2 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4201,6 +4201,8 @@ Set the name of the cloud environment you've integrated with your Dataverse inst ``curl -X PUT -d 'Massachusetts Open Cloud (MOC)' http://localhost:8080/api/admin/settings/:CloudEnvironmentName`` +.. _:UploadMethods: + :UploadMethods ++++++++++++++ From a00d2700d78f8c3ef107bd824dc65b720de581c3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 10 Jun 2024 16:22:08 -0400 Subject: [PATCH 5/5] restore :PublicInstall to non-deprecated #8985 --- doc/release-notes/8985-deprecate-rsync.md | 1 - doc/sphinx-guides/source/api/native-api.rst | 2 +- .../source/installation/config.rst | 19 ++++++++++++++++++- .../edu/harvard/iq/dataverse/FilePage.java | 1 - .../harvard/iq/dataverse/SettingsWrapper.java | 1 - .../settings/SettingsServiceBean.java | 5 +++-- .../iq/dataverse/util/SystemConfig.java | 3 +-- 7 files changed, 23 insertions(+), 9 deletions(-) diff --git a/doc/release-notes/8985-deprecate-rsync.md b/doc/release-notes/8985-deprecate-rsync.md index 5d368e6fec9..44563f292fd 100644 --- a/doc/release-notes/8985-deprecate-rsync.md +++ b/doc/release-notes/8985-deprecate-rsync.md @@ -5,5 +5,4 @@ The following related database settings have been deprecated as well: - :DataCaptureModuleUrl - :DownloadMethods - :LocalDataAccessPath -- :PublicInstall - :RepositoryStorageAbstractionLayerUrl diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 82885196d9c..04123321e54 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3192,7 +3192,7 @@ Note: you can use the combination of cURL's ``-J`` (``--remote-header-name``) an Restrict Files ~~~~~~~~~~~~~~ -Restrict or unrestrict an existing file where ``id`` is the database id of the file or ``pid`` is the persistent id (DOI or Handle) of the file to restrict. Note that some Dataverse installations do not allow the ability to restrict files. +Restrict or unrestrict an existing file where ``id`` is the database id of the file or ``pid`` is the persistent id (DOI or Handle) of the file to restrict. Note that some Dataverse installations do not allow the ability to restrict files (see :ref:`:PublicInstall`). A curl example using an ``id`` diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 82bbf913ca2..213ac827819 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -991,6 +991,8 @@ You also have the option to set a **custom container name separator.** It is ini ``./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.files.swift.folderPathSeparator=-"`` +By default, your Swift installation will be public-only, meaning users will be unable to put access restrictions on their data. If you are comfortable with this level of privacy, the final step in your setup is to set the :ref:`:PublicInstall` setting to `true`. + In order to **enable file access restrictions**, you must enable Swift to use temporary URLs for file access. To enable usage of temporary URLs, set a hash key both on your swift endpoint and in your swift.properties file. You can do so by running the create command: ``./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.files.swift.hashKey.endpoint1=your-hash-key"`` @@ -1021,7 +1023,11 @@ If a user is computing on multiple datasets, the compute tool option will redire ``:ComputeBaseUrl/multiparty?datasetPersistentId&anotherDatasetPersistentId&anotherDatasetPersistentId&...`` -If a user is computing on a single file, the compute tool option will redirect to: +If a user is computing on a single file, depending on the configuration of your installation, the compute tool option will either redirect to: + +``:ComputeBaseUrl?datasetPersistentId=yourObject`` + +if your installation's :ref:`:PublicInstall` setting is true, or: ``:ComputeBaseUrl?datasetPersistentId=yourObject&temp_url_sig=yourTempUrlSig&temp_url_expires=yourTempUrlExpiry`` @@ -4201,6 +4207,17 @@ Set the name of the cloud environment you've integrated with your Dataverse inst ``curl -X PUT -d 'Massachusetts Open Cloud (MOC)' http://localhost:8080/api/admin/settings/:CloudEnvironmentName`` +.. _:PublicInstall: + +:PublicInstall +++++++++++++++ + +Setting an installation to public will remove the ability to restrict data files or datasets. This functionality of the Dataverse Software will be disabled from your installation. + +This is useful for specific cases where an installation's files are stored in public access. Because files stored this way do not obey the Dataverse Software's file restrictions, users would still be able to access the files even when they're restricted. In these cases it's best to use :PublicInstall to disable the feature altogether. + +``curl -X PUT -d true http://localhost:8080/api/admin/settings/:PublicInstall`` + .. _:UploadMethods: :UploadMethods diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index a1a31f629b7..afede00f3eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -1546,7 +1546,6 @@ public String getIngestMessage() { } //Determines whether this File uses a public store and therefore doesn't support embargoed or restricted files - @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 7aad932b42c..48196591b19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -299,7 +299,6 @@ public Long getZipDownloadLimit(){ return zipDownloadLimit; } - @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isPublicInstall(){ if (publicInstall == null) { publicInstall = systemConfig.isPublicInstall(); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index bc194c0b1b7..a1975b0b975 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -56,9 +56,10 @@ public enum Key { /** * Defines a public installation -- all datafiles are unrestricted * - * This was added for rsync, which is now deprecated. + * This was added along with CloudEnvironmentName and ComputeBaseUrl. + * See https://github.com/IQSS/dataverse/issues/3776 and + * https://github.com/IQSS/dataverse/pull/3967 */ - @Deprecated(forRemoval = true, since = "2024-07-07") PublicInstall, /** * Sets the name of your cloud computing environment. diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 60429ffa0cb..f9801419e47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -896,8 +896,7 @@ public String toString() { } } - - @Deprecated(forRemoval = true, since = "2024-07-07") + public boolean isPublicInstall(){ boolean saneDefault = false; return settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, saneDefault);