IQSS · kcondon · Nov 30, 2020 · Oct 13, 2020 · Oct 19, 2020 · Oct 22, 2020
diff --git a/doc/release-notes/7275-aux-files.md b/doc/release-notes/7275-aux-files.md
@@ -0,0 +1,3 @@
+## Notes for Tool Developers and Integrators
+
+Experimental endpoints have been added to allow auxiliary files to be added to datafiles. These auxiliary files can be deposited and accessed via API. Later releases will include options for accessing these files through the UI. For more information, see the Auxiliary File Support section of the [Developer Guide](https://guides.dataverse.org/en/5.3/developers/).
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
@@ -3,7 +3,7 @@ Native API
 
 Dataverse 4 exposes most of its GUI functionality via a REST-based API. This section describes that functionality. Most API endpoints require an API token that can be passed as the ``X-Dataverse-key`` HTTP header or in the URL as the ``key`` query parameter.
 
-.. note:: |CORS| Some API endpoint allow CORS_ (cross-origin resource sharing), which makes them usable from scripts runing in web browsers. These endpoints are marked with a *CORS* badge.
+.. note:: |CORS| Some API endpoint allow CORS_ (cross-origin resource sharing), which makes them usable from scripts running in web browsers. These endpoints are marked with a *CORS* badge.
 
 .. note:: Bash environment variables shown below. The idea is that you can "export" these environment variables before copying and pasting the commands that use them. For example, you can set ``$SERVER_URL`` by running ``export SERVER_URL="https://demo.dataverse.org"`` in your Bash shell. To check if the environment variable was set properly, you can "echo" it (e.g. ``echo $SERVER_URL``). See also :ref:`curl-examples-and-environment-variables`.
 

diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst
@@ -0,0 +1,36 @@
+Auxiliary File Support
+======================
+
+Auxiliary file support is experimental. Auxiliary files in Dataverse are being added to support depositing and downloading differentially private metadata, as part of the OpenDP project (OpenDP.io). In future versions, this approach may become more broadly used and supported. 
+
+Adding an Auxiliary File to a Datafile
+--------------------------------------
+To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the formatTag and formatVersion (if applicable) associated with the auxiliary file. There are two form parameters. "Origin" specifies the application/entity that created the auxiliary file, an "isPublic" controls access to downloading the file. If "isPublic" is true, any user can download the file, else, access authorization is based on the access rules as defined for the DataFile itself.
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export FILENAME='auxfile.txt'
+  export FILE_ID='12345'
+  export FORMAT_TAG='dpJson'
+  export FORMAT_VERSION='v1'
+  export SERVER_URL=https://demo.dataverse.org
+
+  curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/access/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION"
+
+You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file.
+
+Downloading an Auxiliary File that belongs to a Datafile 
+--------------------------------------------------------
+To download an auxiliary file, use the primary key of the datafile, and the
+formatTag and formatVersion (if applicable) associated with the auxiliary file:
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export FILE_ID='12345'
+  export FORMAT_TAG='dpJson'
+  export FORMAT_VERSION='v1'
+
+  curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION"
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
@@ -32,4 +32,5 @@ Developer Guide
    geospatial
    selinux
    big-data-support
+   aux-file-support
    workflows
diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java
@@ -0,0 +1,120 @@
+
+package edu.harvard.iq.dataverse;
+
+import java.io.Serializable;
+import javax.persistence.Entity;
+import javax.persistence.GeneratedValue;
+import javax.persistence.GenerationType;
+import javax.persistence.Id;
+import javax.persistence.JoinColumn;
+import javax.persistence.ManyToOne;
+
+/**
+ *
+ * @author ekraffmiller 
+ * Represents a generic file that is associated with a dataFile.
+ * This is a data representation of a physical file in StorageIO
+ */
+@Entity
+public class AuxiliaryFile implements Serializable {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    private Long id;
+
+     /**
+     * The data file that this AuxiliaryFile belongs to
+     * a data file may have many auxiliaryFiles
+     */
+    @ManyToOne
+    @JoinColumn(nullable=false)
+    private DataFile dataFile;
+
+    private String formatTag;
+
+    private String formatVersion;
+
+    private String origin;
+
+    private boolean isPublic;
+
+    private String contentType; 
+
+    private Long fileSize; 
+
+    private String checksum;
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+
+    public DataFile getDataFile() {
+        return dataFile;
+    }
+
+    public void setDataFile(DataFile dataFile) {
+        this.dataFile = dataFile;
+    }
+
+    public String getFormatTag() {
+        return formatTag;
+    }
+
+    public void setFormatTag(String formatTag) {
+        this.formatTag = formatTag;
+    }
+
+    public String getFormatVersion() {
+        return formatVersion;
+    }
+
+    public void setFormatVersion(String formatVersion) {
+        this.formatVersion = formatVersion;
+    }
+
+    public String getOrigin() {
+        return origin;
+    }
+
+    public void setOrigin(String origin) {
+        this.origin = origin;
+    }
+
+    public boolean getIsPublic() {
+        return isPublic;
+    }
+
+    public void setIsPublic(boolean isPublic) {
+        this.isPublic = isPublic;
+    }
+
+    public String getContentType() {
+        return this.contentType;
+    }
+
+    public void setContentType(String contentType) {
+        this.contentType = contentType;
+    }
+
+    public Long getFileSize() {
+            return fileSize; 
+    }
+
+    public void setFileSize(long fileSize) {
+        this.fileSize = fileSize;
+    }
+
+    public String getChecksum() {
+        return checksum;
+    }
+
+    public void setChecksum(String checksum) {
+        this.checksum = checksum;
+    }
+
+
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
@@ -0,0 +1,117 @@
+
+package edu.harvard.iq.dataverse;
+
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.util.FileUtil;
+import edu.harvard.iq.dataverse.util.SystemConfig;
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
+import java.util.logging.Logger;
+import javax.ejb.EJB;
+import javax.ejb.Stateless;
+import javax.inject.Named;
+import javax.persistence.EntityManager;
+import javax.persistence.PersistenceContext;
+import javax.persistence.Query;
+import org.apache.tika.Tika;
+
+/**
+ *
+ * @author ekraffmiller
+ *  Methods related to the AuxiliaryFile Entity.
+ */
+@Stateless
+@Named
+public class AuxiliaryFileServiceBean implements java.io.Serializable {
+   private static final Logger logger = Logger.getLogger(AuxiliaryFileServiceBean.class.getCanonicalName());
+
+    @PersistenceContext(unitName = "VDCNet-ejbPU")
+    private EntityManager em;
+
+    @EJB
+    private SystemConfig systemConfig;
+
+
+    public AuxiliaryFile find(Object pk) {
+        return em.find(AuxiliaryFile.class, pk);
+    }
+
+    public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) {
+        AuxiliaryFile savedFile = em.merge(auxiliaryFile);
+        return savedFile;
+
+    }
+
+    /**
+     * Save the physical file to storageIO, and save the AuxiliaryFile entity
+     * to the database.  This should be an all or nothing transaction - if either
+     * process fails, than nothing will be saved
+     * @param fileInputStream - auxiliary file data to be saved
+     * @param dataFile  - the dataFile entity this will be added to
+     * @param formatTag - type of file being saved
+     * @param formatVersion - to distinguish between multiple versions of a file
+     * @param origin - name of the tool/system that created the file
+     * @param isPublic boolean - is this file available to any user?
+     * @return success boolean - returns whether the save was successful
+     */
+    public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic) {
+
+        StorageIO<DataFile> storageIO =null;
+        AuxiliaryFile auxFile = new AuxiliaryFile();
+        String auxExtension = formatTag + "_" + formatVersion;
+        try {
+            // Save to storage first.
+            // If that is successful (does not throw exception),
+            // then save to db.
+            // If the db fails for any reason, then rollback
+            // by removing the auxfile from storage.
+            storageIO = dataFile.getStorageIO();
+            MessageDigest md = MessageDigest.getInstance(systemConfig.getFileFixityChecksumAlgorithm().toString());
+            DigestInputStream di 
+                = new DigestInputStream(fileInputStream, md); 
+
+            storageIO.saveInputStreamAsAux(fileInputStream, auxExtension);          
+            auxFile.setChecksum(FileUtil.checksumDigestToString(di.getMessageDigest().digest())    );
+
+            Tika tika = new Tika();
+            auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension)));
+            auxFile.setFormatTag(formatTag);
+            auxFile.setFormatVersion(formatVersion);
+            auxFile.setOrigin(origin);
+            auxFile.setIsPublic(isPublic);
+            auxFile.setDataFile(dataFile);         
+            auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension));
+            auxFile = save(auxFile);
+        } catch (IOException ioex) {
+            logger.info("IO Exception trying to save auxiliary file: " + ioex.getMessage());
+            return null;
+        } catch (Exception e) {
+            // If anything fails during database insert, remove file from storage
+            try {
+                storageIO.deleteAuxObject(auxExtension);
+            } catch(IOException ioex) {
+                    logger.info("IO Exception trying remove auxiliary file in exception handler: " + ioex.getMessage());
+            return null;
+            }
+        }
+        return auxFile;
+    }
+
+    public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) {
+
+        Query query = em.createQuery("select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.formatTag = :formatTag and o.formatVersion = :formatVersion");
+
+        query.setParameter("dataFileId", dataFile.getId());
+        query.setParameter("formatTag", formatTag);
+        query.setParameter("formatVersion", formatVersion);
+        try {
+            AuxiliaryFile retVal = (AuxiliaryFile)query.getSingleResult();
+            return retVal;
+        } catch(Exception ex) {
+            return null;
+        }
+    }
+
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
@@ -192,6 +192,9 @@ public String toString() {
     @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
     private List<DataTable> dataTables;
 
+    @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
+    private List<AuxiliaryFile> auxiliaryFiles;
+
     @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
     private List<IngestReport> ingestReports;
 
@@ -281,6 +284,14 @@ public String getDuplicateFilename() {
     public void setDuplicateFilename(String duplicateFilename) {
         this.duplicateFilename = duplicateFilename;
     }
+
+    public List<AuxiliaryFile> getAuxiliaryFiles() {
+        return auxiliaryFiles;
+    }
+
+    public void setAuxiliaryFiles(List<AuxiliaryFile> auxiliaryFiles) {
+        this.auxiliaryFiles = auxiliaryFiles;
+    }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		## Notes for Tool Developers and Integrators

		Experimental endpoints have been added to allow auxiliary files to be added to datafiles. These auxiliary files can be deposited and accessed via API. Later releases will include options for accessing these files through the UI. For more information, see the Auxiliary File Support section of the [Developer Guide](https://guides.dataverse.org/en/5.3/developers/).