Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
7e7a502
download differentially private statistics #7400
pdurbin Mar 9, 2021
2e89bde
initial docs for DP metadata access
djbrooke Mar 5, 2021
d8e4b0a
update to match text from depositing data
djbrooke Mar 5, 2021
53b4201
Moved aux file download link to clean up float issue [ref #7400]
mheppler Mar 9, 2021
5c8085d
Moved aux file download link out of canDownload render logic [ref #7400]
mheppler Mar 10, 2021
767f2eb
Removed aux file download link debug code and typo [ref #7400]
mheppler Mar 10, 2021
3c0140b
formatTag and formatVersion are working now #7400
pdurbin Mar 10, 2021
0127d5c
Render logic changes to file access btn and dropdown options [ref #7400]
mheppler Mar 10, 2021
fc9381a
Added file access status to dropdown, tooltips to icons, other aux fi…
mheppler Mar 17, 2021
9b7a15d
Fixed render logic on Variable Metadata option under File Access btn …
mheppler Mar 18, 2021
a04b783
refactor tests #7400
pdurbin Mar 23, 2021
006a9ba
move English to bundle #7400
pdurbin Mar 25, 2021
cc00904
put aux files starting with tag "dp" under DP Stats #7400
pdurbin Mar 26, 2021
5a045be
new domain: opendp.org #7400
pdurbin Mar 26, 2021
22f1d2d
Add "dp" rule to guides #7400
pdurbin Mar 26, 2021
439fcb7
add release note #7400
pdurbin Mar 26, 2021
353134f
Merge branch 'develop' into 7400-opendp-download #7400
pdurbin Mar 26, 2021
01741b3
pass isPublic boolean in tests #7400
pdurbin Apr 1, 2021
d368a85
add type for aux files #7400
pdurbin Apr 5, 2021
289ef85
Merge branch 'develop' into 7400-opendp-download #7400
pdurbin Apr 5, 2021
5c67f95
add file extension to aux files on download #7400
pdurbin Apr 6, 2021
dcc9104
Merge branch 'develop' into 7400-opendp-download #7400
pdurbin Apr 6, 2021
e419194
update release note to reflect recent changes #7400
pdurbin Apr 6, 2021
92af221
add AuxiliaryFilesIT to test suite script #7400
pdurbin Apr 6, 2021
a1e852a
prevent anon download of aux files in draft #7400
pdurbin Apr 7, 2021
c610321
Merge branch 'develop' into 7400-opendp-download #7400
pdurbin Apr 8, 2021
3b2ea4f
add missing "metadata" from aux file download path #7400
pdurbin Apr 12, 2021
9e5f390
base "other" type/grouping on absence from bundle #7400
pdurbin Apr 13, 2021
9115380
add type=null aux files to "other" list #7400
pdurbin Apr 14, 2021
3441758
move SQL to named queries #7400
pdurbin Apr 14, 2021
f55b368
switch from "every version" check to "is file published" #7400
pdurbin Apr 15, 2021
9b852de
refactor getFileExtension into own method #7400
pdurbin Apr 15, 2021
6f8ddc9
make "File Access: " its own entry in bundle #7400
pdurbin Apr 15, 2021
7903a8f
remove contradictory render logic #7400
pdurbin Apr 15, 2021
98fdaef
prevent constant "missing bundle key" messages in server.log #7400
pdurbin Apr 15, 2021
8162886
remove TODO, allowAccessRequests is working #7400
pdurbin Apr 15, 2021
20b4382
remove reference to type=OTHER (legacy concept) in release note #7400
pdurbin Apr 15, 2021
5dc7234
reword "type" on aux file page #7400
pdurbin Apr 15, 2021
51cffe8
remove cruft (tmp file that got committed) #7400
pdurbin Apr 15, 2021
b132580
remove duplicate query and refactor #7400
pdurbin Apr 15, 2021
7ea9ffa
Merge branch 'develop' into 7400-opendp-download #7400
pdurbin Apr 15, 2021
46d1bd4
rename SQL script (5.4.1 is out) #7400
pdurbin Apr 15, 2021
20654cd
minor tweak to wording
scolapasta Apr 15, 2021
d277aa1
standardize on "Restricted with Access Granted" #7400
pdurbin Apr 16, 2021
dab2d8b
make it clear that API might change #7400
pdurbin Apr 16, 2021
ed0b037
Minor text tweaking
scolapasta Apr 16, 2021
e1d5541
switch SQL query from LIKE to = #7400
pdurbin Apr 16, 2021
e913a23
remove duplicate bundle entries #7400
pdurbin Apr 16, 2021
cf7b6d1
add note that aux file APIs can be blocked #7400
pdurbin Apr 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/docker-aio/run-test-suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ fi

# Please note the "dataverse.test.baseurl" is set to run for "all-in-one" Docker environment.
# TODO: Rather than hard-coding the list of "IT" classes here, add a profile to pom.xml.
source maven/maven.sh && mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT -Ddataverse.test.baseurl=$dvurl
source maven/maven.sh && mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT -Ddataverse.test.baseurl=$dvurl
12 changes: 12 additions & 0 deletions doc/release-notes/7400-opendp-download.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Auxiliary Files can now be downloaded from the web interface.

- Aux files uploaded as type=DP appear under "Differentially Private Statistics" under file level download. The rest appear under "Other Auxiliary Files".

In addition, related changes were made, including the following:

- New tooltip over the lock indicating if you have been granted access to a restricted file or not.
- When downloading individual files, you will see "Restricted with Access Granted" or just "Restricted" (followed by "Users may not request access to files.") as appropriate.
- When downloading individual files, instead of "Download" you should expect to see the file type such as "JPEG Image" or "Original File Format" if the type is unknown.
- Downloaded aux files now have a file extension if it can be determined.

Please note that the auxiliary files feature is experimental and if you don't need it, its API endpoints can be blocked.
9 changes: 5 additions & 4 deletions doc/sphinx-guides/source/developers/aux-file-support.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Auxiliary File Support
======================

Auxiliary file support is experimental. Auxiliary files in the Dataverse Software are being added to support depositing and downloading differentially private metadata, as part of the OpenDP project (OpenDP.io). In future versions, this approach may become more broadly used and supported.
Auxiliary file support is experimental and as such, related APIs may be added, changed or removed without standard backward compatibility. Auxiliary files in the Dataverse Software are being added to support depositing and downloading differentially private metadata, as part of the OpenDP project (opendp.org). In future versions, this approach will likely become more broadly used and supported.

Adding an Auxiliary File to a Datafile
--------------------------------------
To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the formatTag and formatVersion (if applicable) associated with the auxiliary file. There are two form parameters. "Origin" specifies the application/entity that created the auxiliary file, an "isPublic" controls access to downloading the file. If "isPublic" is true, any user can download the file, else, access authorization is based on the access rules as defined for the DataFile itself.
To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the formatTag and formatVersion (if applicable) associated with the auxiliary file. There are multiple form parameters. "Origin" specifies the application/entity that created the auxiliary file, and "isPublic" controls access to downloading the file. If "isPublic" is true, any user can download the file if the dataset has been published, else, access authorization is based on the access rules as defined for the DataFile itself. The "type" parameter is used to group similar auxiliary files in the UI. Currently, auxiliary files with type "DP" appear under "Differentially Private Statistics", while all other auxiliary files appear under "Other Auxiliary Files".

.. code-block:: bash

Expand All @@ -14,9 +14,10 @@ To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and
export FILE_ID='12345'
export FORMAT_TAG='dpJson'
export FORMAT_VERSION='v1'
export TYPE='DP'
export SERVER_URL=https://demo.dataverse.org

curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/access/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION"
curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' -F "type=$TYPE" "$SERVER_URL/api/access/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION"

You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file.

Expand All @@ -33,4 +34,4 @@ formatTag and formatVersion (if applicable) associated with the auxiliary file:
export FORMAT_TAG='dpJson'
export FORMAT_VERSION='v1'

curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION"
curl "$SERVER_URL/api/access/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION"
21 changes: 21 additions & 0 deletions doc/sphinx-guides/source/user/dataset-management.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ Additional download options available for tabular data (found in the same drop-d
- Data File Citation (currently in either RIS, EndNote XML, or BibTeX format);
- All of the above, as a zipped bundle.

Differentially Private (DP) Metadata can also be accessed for restricted tabular files if the data depositor has created a DP Metadata Release. See :ref:`dp-release-create` for more information.

Astronomy (FITS)
----------------

Expand Down Expand Up @@ -210,6 +212,8 @@ Restricted Files

When you restrict a file it cannot be downloaded unless permission has been granted.

Differentially Private (DP) Metadata can be accessed for restricted tabular files if the data depositor has created a DP Metadata Release. See :ref:`dp-release-create` for more information.

See also :ref:`terms-of-access` and :ref:`permissions`.

Edit Files
Expand Down Expand Up @@ -302,6 +306,23 @@ If you restrict any files in your dataset, you will be prompted by a pop-up to e

See also :ref:`restricted-files`.

.. _dp-release-create:

Creating and Depositing Differentially Private Metadata (Experimental)
----------------------------------------------------------------------

Through an integration with tools from the OpenDP Project (opendp.org), the Dataverse Software offers an experimental workflow that allows a data depositor to create and deposit Differentially Private (DP) Metadata files, which can then be used for exploratory data analysis. This workflow allows researchers to view the DP metadata for a tabular file, determine whether or not the file contains useful information, and then make an informed decision about whether or not to request access to the original file.

If this integration has been enabled in your Dataverse installation, you can follow these steps to create a DP Metadata Release and make it available to researchers, while still keeping the files themselves restricted and able to be accessed after a successful access request.

- Deposit a tabular file and let the ingest process complete
- Restrict the File
- In the kebab next to the file on the dataset page, or from the "Edit Files" dropdown on the file page, click "OpenDP Tool"
- Go through the process to create a DP Metadata Release in the OpenDP tool, and at the end of the process deposit the DP Metadata Release back to the Dataverse installation
- Publish the Dataset

Once the dataset is published, users will be able to request access using the normal process, but will also have the option to download DP Statistics in order to get more information about the file.

Guestbook
---------

Expand Down
13 changes: 13 additions & 0 deletions doc/sphinx-guides/source/user/find-use-data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ Explore Data

Some file types and datasets offer data exploration options if external tools have been installed. The tools are described in the :doc:`/admin/external-tools` section of the Admin Guide.

Exploratory Data Analysis Using Differentially Private Metadata (Experimental)
------------------------------------------------------------------------------

Through an integration with tools from the OpenDP Project (opendp.org), the Dataverse Software offers an experimental workflow that allows a data depositor to create and deposit Differentially Private (DP) Metadata files, which can then be used for exploratory data analysis. This workflow allows researchers to view the DP metadata for a tabular file, determine whether or not the file contains useful information, and then make an informed decision about whether or not to request access to the original file.

If the data depositor has made available DP metadata for one or more files in their dataset, these access options will appear on the access dropdown on both the Dataset Page and the File Page. These access options will be available even if a file is restricted. Three types of DP metadata will be available:

- .PDF
- .XML
- .JSON

For more information about how data depositors can enable access using the OpenDP tool, visit the :doc:`/user/dataset-management` section of the User Guide.

.. |image-file-tree-view| image:: ./img/file-tree-view.png
:class: img-responsive
.. |image-file-search-facets| image:: ./img/file-search-facets.png
Expand Down
44 changes: 42 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@

package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.util.BundleUtil;
import java.io.Serializable;
import java.util.MissingResourceException;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.JoinColumn;
import javax.persistence.ManyToOne;
import javax.persistence.NamedNativeQueries;
import javax.persistence.NamedNativeQuery;
import javax.persistence.NamedQueries;
import javax.persistence.NamedQuery;

/**
*
* @author ekraffmiller
* Represents a generic file that is associated with a dataFile.
* This is a data representation of a physical file in StorageIO
*/
@NamedQueries({
@NamedQuery(name = "AuxiliaryFile.lookupAuxiliaryFile",
query = "select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.formatTag = :formatTag and o.formatVersion = :formatVersion"),
@NamedQuery(name = "AuxiliaryFile.findAuxiliaryFiles",
query = "select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId"),
@NamedQuery(name = "AuxiliaryFile.findAuxiliaryFilesByType",
query = "select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.type = :type"),
@NamedQuery(name = "AuxiliaryFile.findAuxiliaryFilesWithoutType",
query = "select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.type is null"),})
@NamedNativeQueries({
@NamedNativeQuery(name = "AuxiliaryFile.findAuxiliaryFileTypes",
query = "select distinct type from auxiliaryfile where datafile_id = ?1")
})
@Entity
public class AuxiliaryFile implements Serializable {

Expand Down Expand Up @@ -44,6 +63,12 @@ public class AuxiliaryFile implements Serializable {

private String checksum;

/**
* A way of grouping similar auxiliary files together. The type could be
* "DP" for "Differentially Private Statistics", for example.
*/
private String type;

public Long getId() {
return id;
}
Expand Down Expand Up @@ -115,6 +140,21 @@ public String getChecksum() {
public void setChecksum(String checksum) {
this.checksum = checksum;
}



public String getType() {
return type;
}

public void setType(String type) {
this.type = type;
}

public String getTypeFriendly() {
try {
return BundleUtil.getStringFromPropertyFile("file.auxfiles.types." + type, "Bundle");
} catch (MissingResourceException ex) {
return null;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
import javax.persistence.TypedQuery;
import org.apache.tika.Tika;

/**
Expand All @@ -28,7 +31,7 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable {
private static final Logger logger = Logger.getLogger(AuxiliaryFileServiceBean.class.getCanonicalName());

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;
protected EntityManager em;

@EJB
private SystemConfig systemConfig;
Expand All @@ -54,9 +57,11 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) {
* @param formatVersion - to distinguish between multiple versions of a file
* @param origin - name of the tool/system that created the file
* @param isPublic boolean - is this file available to any user?
* @param type how to group the files such as "DP" for "Differentially
* Private Statistics".
* @return success boolean - returns whether the save was successful
*/
public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic) {
public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type) {

StorageIO<DataFile> storageIO =null;
AuxiliaryFile auxFile = new AuxiliaryFile();
Expand All @@ -81,6 +86,7 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile
auxFile.setFormatVersion(formatVersion);
auxFile.setOrigin(origin);
auxFile.setIsPublic(isPublic);
auxFile.setType(type);
auxFile.setDataFile(dataFile);
auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension));
auxFile = save(auxFile);
Expand All @@ -101,7 +107,7 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile

public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) {

Query query = em.createQuery("select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.formatTag = :formatTag and o.formatVersion = :formatVersion");
Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile");

query.setParameter("dataFileId", dataFile.getId());
query.setParameter("formatTag", formatTag);
Expand All @@ -114,4 +120,73 @@ public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, St
}
}

public List<AuxiliaryFile> findAuxiliaryFiles(DataFile dataFile) {
TypedQuery query = em.createNamedQuery("AuxiliaryFile.findAuxiliaryFiles", AuxiliaryFile.class);
query.setParameter("dataFileId", dataFile.getId());
return query.getResultList();
}

/**
* @param inBundle If true, only return types that are in the bundle. If
* false, only return types that are not in the bundle.
*/
public List<String> findAuxiliaryFileTypes(DataFile dataFile, boolean inBundle) {
List<String> allTypes = findAuxiliaryFileTypes(dataFile);
List<String> typesInBundle = new ArrayList<>();
List<String> typeNotInBundle = new ArrayList<>();
for (String type : allTypes) {
// Check if type is in the bundle.
String friendlyType = getFriendlyNameForType(type);
if (friendlyType != null) {
typesInBundle.add(type);
} else {
typeNotInBundle.add(type);
}
}
if (inBundle) {
return typesInBundle;
} else {
return typeNotInBundle;
}
}

public List<String> findAuxiliaryFileTypes(DataFile dataFile) {
Query query = em.createNamedQuery("AuxiliaryFile.findAuxiliaryFileTypes");
query.setParameter(1, dataFile.getId());
return query.getResultList();
}

public List<AuxiliaryFile> findAuxiliaryFilesByType(DataFile dataFile, String typeString) {
TypedQuery query = em.createNamedQuery("AuxiliaryFile.findAuxiliaryFilesByType", AuxiliaryFile.class);
query.setParameter("dataFileId", dataFile.getId());
query.setParameter("type", typeString);
return query.getResultList();
}

public List<AuxiliaryFile> findOtherAuxiliaryFiles(DataFile dataFile) {
List<AuxiliaryFile> otherAuxFiles = new ArrayList<>();
List<String> otherTypes = findAuxiliaryFileTypes(dataFile, false);
for (String typeString : otherTypes) {
TypedQuery query = em.createNamedQuery("AuxiliaryFile.findAuxiliaryFilesByType", AuxiliaryFile.class);
query.setParameter("dataFileId", dataFile.getId());
query.setParameter("type", typeString);
List<AuxiliaryFile> auxFiles = query.getResultList();
otherAuxFiles.addAll(auxFiles);
}
otherAuxFiles.addAll(findAuxiliaryFilesWithoutType(dataFile));
return otherAuxFiles;
}

public List<AuxiliaryFile> findAuxiliaryFilesWithoutType(DataFile dataFile) {
Query query = em.createNamedQuery("AuxiliaryFile.findAuxiliaryFilesWithoutType", AuxiliaryFile.class);
query.setParameter("dataFileId", dataFile.getId());
return query.getResultList();
}

public String getFriendlyNameForType(String type) {
AuxiliaryFile auxFile = new AuxiliaryFile();
auxFile.setType(type);
return auxFile.getTypeFriendly();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,14 @@ private void redirectToBatchDownloadAPI(String multiFileString, Boolean download
redirectToBatchDownloadAPI(multiFileString, true, downloadOriginal);
}

public void redirectToAuxFileDownloadAPI(Long fileId, String formatTag, String formatVersion) {
String fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata/" + formatTag + "/" + formatVersion;
try {
FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl);
} catch (IOException ex) {
logger.info("Failed to issue a redirect to aux file download url (" + fileDownloadUrl + "): " + ex);
}
}

/**
* Launch an "explore" tool which is a type of ExternalTool such as
Expand Down
Loading