diff --git a/doc/release-notes/7275-aux-files.md b/doc/release-notes/7275-aux-files.md new file mode 100644 index 00000000000..24aa7f86f2d --- /dev/null +++ b/doc/release-notes/7275-aux-files.md @@ -0,0 +1,3 @@ +## Notes for Tool Developers and Integrators + +Experimental endpoints have been added to allow auxiliary files to be added to datafiles. These auxiliary files can be deposited and accessed via API. Later releases will include options for accessing these files through the UI. For more information, see the Auxiliary File Support section of the [Developer Guide](https://guides.dataverse.org/en/5.3/developers/). diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6d0176c094d..3ffd06bcace 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3,7 +3,7 @@ Native API Dataverse 4 exposes most of its GUI functionality via a REST-based API. This section describes that functionality. Most API endpoints require an API token that can be passed as the ``X-Dataverse-key`` HTTP header or in the URL as the ``key`` query parameter. -.. note:: |CORS| Some API endpoint allow CORS_ (cross-origin resource sharing), which makes them usable from scripts runing in web browsers. These endpoints are marked with a *CORS* badge. +.. note:: |CORS| Some API endpoint allow CORS_ (cross-origin resource sharing), which makes them usable from scripts running in web browsers. These endpoints are marked with a *CORS* badge. .. note:: Bash environment variables shown below. The idea is that you can "export" these environment variables before copying and pasting the commands that use them. For example, you can set ``$SERVER_URL`` by running ``export SERVER_URL="https://demo.dataverse.org"`` in your Bash shell. To check if the environment variable was set properly, you can "echo" it (e.g. ``echo $SERVER_URL``). See also :ref:`curl-examples-and-environment-variables`. diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst new file mode 100644 index 00000000000..be21b56c245 --- /dev/null +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -0,0 +1,36 @@ +Auxiliary File Support +====================== + +Auxiliary file support is experimental. Auxiliary files in Dataverse are being added to support depositing and downloading differentially private metadata, as part of the OpenDP project (OpenDP.io). In future versions, this approach may become more broadly used and supported. + +Adding an Auxiliary File to a Datafile +-------------------------------------- +To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the formatTag and formatVersion (if applicable) associated with the auxiliary file. There are two form parameters. "Origin" specifies the application/entity that created the auxiliary file, an "isPublic" controls access to downloading the file. If "isPublic" is true, any user can download the file, else, access authorization is based on the access rules as defined for the DataFile itself. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export FILENAME='auxfile.txt' + export FILE_ID='12345' + export FORMAT_TAG='dpJson' + export FORMAT_VERSION='v1' + export SERVER_URL=https://demo.dataverse.org + + curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/access/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION" + +You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file. + +Downloading an Auxiliary File that belongs to a Datafile +-------------------------------------------------------- +To download an auxiliary file, use the primary key of the datafile, and the +formatTag and formatVersion (if applicable) associated with the auxiliary file: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_ID='12345' + export FORMAT_TAG='dpJson' + export FORMAT_VERSION='v1' + + curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION" diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 96595220e07..9c524571a39 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -32,4 +32,5 @@ Developer Guide geospatial selinux big-data-support + aux-file-support workflows diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java new file mode 100644 index 00000000000..957a7cc93bf --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -0,0 +1,120 @@ + +package edu.harvard.iq.dataverse; + +import java.io.Serializable; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.JoinColumn; +import javax.persistence.ManyToOne; + +/** + * + * @author ekraffmiller + * Represents a generic file that is associated with a dataFile. + * This is a data representation of a physical file in StorageIO + */ +@Entity +public class AuxiliaryFile implements Serializable { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * The data file that this AuxiliaryFile belongs to + * a data file may have many auxiliaryFiles + */ + @ManyToOne + @JoinColumn(nullable=false) + private DataFile dataFile; + + private String formatTag; + + private String formatVersion; + + private String origin; + + private boolean isPublic; + + private String contentType; + + private Long fileSize; + + private String checksum; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public DataFile getDataFile() { + return dataFile; + } + + public void setDataFile(DataFile dataFile) { + this.dataFile = dataFile; + } + + public String getFormatTag() { + return formatTag; + } + + public void setFormatTag(String formatTag) { + this.formatTag = formatTag; + } + + public String getFormatVersion() { + return formatVersion; + } + + public void setFormatVersion(String formatVersion) { + this.formatVersion = formatVersion; + } + + public String getOrigin() { + return origin; + } + + public void setOrigin(String origin) { + this.origin = origin; + } + + public boolean getIsPublic() { + return isPublic; + } + + public void setIsPublic(boolean isPublic) { + this.isPublic = isPublic; + } + + public String getContentType() { + return this.contentType; + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public Long getFileSize() { + return fileSize; + } + + public void setFileSize(long fileSize) { + this.fileSize = fileSize; + } + + public String getChecksum() { + return checksum; + } + + public void setChecksum(String checksum) { + this.checksum = checksum; + } + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java new file mode 100644 index 00000000000..4f97c146e7b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -0,0 +1,117 @@ + +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import java.io.IOException; +import java.io.InputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.util.logging.Logger; +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.inject.Named; +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; +import javax.persistence.Query; +import org.apache.tika.Tika; + +/** + * + * @author ekraffmiller + * Methods related to the AuxiliaryFile Entity. + */ +@Stateless +@Named +public class AuxiliaryFileServiceBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(AuxiliaryFileServiceBean.class.getCanonicalName()); + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + @EJB + private SystemConfig systemConfig; + + + public AuxiliaryFile find(Object pk) { + return em.find(AuxiliaryFile.class, pk); + } + + public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { + AuxiliaryFile savedFile = em.merge(auxiliaryFile); + return savedFile; + + } + + /** + * Save the physical file to storageIO, and save the AuxiliaryFile entity + * to the database. This should be an all or nothing transaction - if either + * process fails, than nothing will be saved + * @param fileInputStream - auxiliary file data to be saved + * @param dataFile - the dataFile entity this will be added to + * @param formatTag - type of file being saved + * @param formatVersion - to distinguish between multiple versions of a file + * @param origin - name of the tool/system that created the file + * @param isPublic boolean - is this file available to any user? + * @return success boolean - returns whether the save was successful + */ + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic) { + + StorageIO storageIO =null; + AuxiliaryFile auxFile = new AuxiliaryFile(); + String auxExtension = formatTag + "_" + formatVersion; + try { + // Save to storage first. + // If that is successful (does not throw exception), + // then save to db. + // If the db fails for any reason, then rollback + // by removing the auxfile from storage. + storageIO = dataFile.getStorageIO(); + MessageDigest md = MessageDigest.getInstance(systemConfig.getFileFixityChecksumAlgorithm().toString()); + DigestInputStream di + = new DigestInputStream(fileInputStream, md); + + storageIO.saveInputStreamAsAux(fileInputStream, auxExtension); + auxFile.setChecksum(FileUtil.checksumDigestToString(di.getMessageDigest().digest()) ); + + Tika tika = new Tika(); + auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); + auxFile.setFormatTag(formatTag); + auxFile.setFormatVersion(formatVersion); + auxFile.setOrigin(origin); + auxFile.setIsPublic(isPublic); + auxFile.setDataFile(dataFile); + auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); + auxFile = save(auxFile); + } catch (IOException ioex) { + logger.info("IO Exception trying to save auxiliary file: " + ioex.getMessage()); + return null; + } catch (Exception e) { + // If anything fails during database insert, remove file from storage + try { + storageIO.deleteAuxObject(auxExtension); + } catch(IOException ioex) { + logger.info("IO Exception trying remove auxiliary file in exception handler: " + ioex.getMessage()); + return null; + } + } + return auxFile; + } + + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { + + Query query = em.createQuery("select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.formatTag = :formatTag and o.formatVersion = :formatVersion"); + + query.setParameter("dataFileId", dataFile.getId()); + query.setParameter("formatTag", formatTag); + query.setParameter("formatVersion", formatVersion); + try { + AuxiliaryFile retVal = (AuxiliaryFile)query.getSingleResult(); + return retVal; + } catch(Exception ex) { + return null; + } + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 560048db9ca..2f0981c80af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -192,6 +192,9 @@ public String toString() { @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) private List dataTables; + @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) + private List auxiliaryFiles; + @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) private List ingestReports; @@ -281,6 +284,14 @@ public String getDuplicateFilename() { public void setDuplicateFilename(String duplicateFilename) { this.duplicateFilename = duplicateFilename; } + + public List getAuxiliaryFiles() { + return auxiliaryFiles; + } + + public void setAuxiliaryFiles(List auxiliaryFiles) { + this.auxiliaryFiles = auxiliaryFiles; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 8f913ea5f1b..0c7a4224648 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -6,6 +6,8 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.FileMetadata; @@ -43,14 +45,12 @@ import edu.harvard.iq.dataverse.dataaccess.DataFileZipper; import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import edu.harvard.iq.dataverse.dataaccess.StoredOriginalFile; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; @@ -62,13 +62,12 @@ import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.worldmapauth.WorldMapTokenServiceBean; import java.util.logging.Logger; @@ -88,16 +87,9 @@ import java.util.logging.Level; import javax.inject.Inject; import javax.json.Json; -import javax.json.JsonObjectBuilder; -import java.math.BigDecimal; import java.net.URI; -import java.util.HashSet; -import java.util.Set; -import java.util.function.Consumer; -import javax.faces.context.FacesContext; import javax.json.JsonArrayBuilder; import javax.persistence.TypedQuery; -import javax.servlet.http.HttpServletRequest; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -110,7 +102,6 @@ import javax.servlet.http.HttpServletResponse; -import javax.servlet.http.HttpSession; import javax.ws.rs.BadRequestException; import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; @@ -125,10 +116,13 @@ import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import javax.ws.rs.core.StreamingOutput; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import java.net.URISyntaxException; import javax.ws.rs.RedirectionException; +import javax.ws.rs.core.MediaType; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; /* Custom API exceptions [NOT YET IMPLEMENTED] @@ -184,6 +178,8 @@ public class Access extends AbstractApiBean { UserNotificationServiceBean userNotificationService; @EJB FileDownloadServiceBean fileDownloadService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject PermissionsWrapper permissionsWrapper; @Inject @@ -505,16 +501,19 @@ public String dataVariableMetadataDDI(@PathParam("varId") Long varId, @QueryPara } /* - * "Preprocessed data" metadata format: - * (this was previously provided as a "format conversion" option of the - * file download form of the access API call) + * GET method for retrieving various auxiliary files associated with + * a tabular datafile. */ - @Path("datafile/{fileId}/metadata/preprocessed") - @GET - @Produces({"text/xml"}) - - public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId") String fileId, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { + @Path("datafile/{fileId}/metadata/{formatTag}/{formatVersion}") + @GET + public DownloadInstance tabularDatafileMetadataAux(@PathParam("fileId") String fileId, + @PathParam("formatTag") String formatTag, + @PathParam("formatVersion") String formatVersion, + @QueryParam("key") String apiToken, + @Context UriInfo uriInfo, + @Context HttpHeaders headers, + @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); @@ -522,18 +521,48 @@ public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId") apiToken = headers.getHeaderString(API_KEY_HEADER); } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); DownloadInfo dInfo = new DownloadInfo(df); + boolean publiclyAvailable = false; - if (df.isTabularData()) { + if (!df.isTabularData()) { + throw new BadRequestException("tabular data required"); + } + + DownloadInstance downloadInstance; + AuxiliaryFile auxFile = null; + + // formatTag=preprocessed is handled as a special case. + // This is (as of now) the only aux. tabular metadata format that Dataverse + // can generate (and cache) itself. (All the other formats served have + // to be deposited first, by the @POST version of this API). + + if ("preprocessed".equals(formatTag)) { dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON")); + downloadInstance = new DownloadInstance(dInfo); + if (downloadInstance.checkIfServiceSupportedAndSetConverter("format", "prep")) { + logger.fine("Preprocessed data for tabular file "+fileId); + } } else { - throw new BadRequestException("tabular data required"); + // All other (deposited) formats: + auxFile = auxiliaryFileService.lookupAuxiliaryFile(df, formatTag, formatVersion); + + if (auxFile == null) { + throw new NotFoundException("Auxiliary metadata format "+formatTag+" is not available for datafile "+fileId); + } + + if (auxFile.getIsPublic()) { + publiclyAvailable = true; + } + downloadInstance = new DownloadInstance(dInfo); + downloadInstance.setAuxiliaryFile(auxFile); } - DownloadInstance downloadInstance = new DownloadInstance(dInfo); - if (downloadInstance.checkIfServiceSupportedAndSetConverter("format", "prep")) { - logger.fine("Preprocessed data for tabular file "+fileId); + + // Unless this format is explicitly authorized to be publicly available, + // the following will check access authorization (based on the access rules + // as defined for the DataFile itself), and will throw a ForbiddenException + // if access is denied: + if (!publiclyAvailable) { + checkAuthorization(df, apiToken); } return downloadInstance; @@ -1084,6 +1113,64 @@ private String getWebappImageResource(String imageName) { } */ + /** + * + * @param fileId + * @param formatTag + * @param formatVersion + * @param origin + * @param isPublic + * @param fileInputStream + * @param contentDispositionHeader + * @param formDataBodyPart + * @return + */ + @Path("datafile/{fileId}/metadata/{formatTag}/{formatVersion}") + @POST + @Consumes(MediaType.MULTIPART_FORM_DATA) + + public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, + @PathParam("formatTag") String formatTag, + @PathParam("formatVersion") String formatVersion, + @FormDataParam("origin") String origin, + @FormDataParam("isPublic") boolean isPublic, + @FormDataParam("file") InputStream fileInputStream + + ) { + AuthenticatedUser authenticatedUser; + try { + authenticatedUser = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + return error(FORBIDDEN, "Authorized users only."); + } + + DataFile dataFile = dataFileService.find(fileId); + if (dataFile == null) { + return error(BAD_REQUEST, "File not found based on id " + fileId + "."); + } + + if (!permissionService.userOn(authenticatedUser, dataFile.getOwner()).has(Permission.EditDataset)) { + return error(FORBIDDEN, "User not authorized to edit the dataset."); + } + + if (!dataFile.isTabularData()) { + return error(BAD_REQUEST, "Not a tabular DataFile (db id=" + fileId + ")"); + } + + + AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic); + + if (saved!=null) { + return ok(json(saved)); + } else { + return error(BAD_REQUEST, "Error saving Auxiliary file."); + } + } + + + + + /** * Allow (or disallow) access requests to Dataset * @@ -1835,5 +1922,5 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, throw new BadRequestException(); } return redirectUri; - } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java index 7e354bea24b..07215cb919e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java @@ -6,6 +6,7 @@ package edu.harvard.iq.dataverse.api; //import java.io.ByteArrayOutputStream; +import edu.harvard.iq.dataverse.AuxiliaryFile; import edu.harvard.iq.dataverse.DataverseRequestServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.GuestbookResponse; @@ -47,6 +48,12 @@ public void setExtraArguments(List extraArguments) { private String conversionParam = null; private String conversionParamValue = null; + // This download instance is for an auxiliary file associated with + // the DataFile. Unlike "conversions" (above) this is used for files + // that Dataverse has no way of producing/deriving from the parent Datafile + // itself, that have to be deposited externally. + private AuxiliaryFile auxiliaryFile = null; + private EjbDataverseEngine command; private DataverseRequestServiceBean dataverseRequestService; @@ -210,4 +217,12 @@ public void setDataverseRequestService(DataverseRequestServiceBean dataverseRequ this.dataverseRequestService = dataverseRequestService; } + public AuxiliaryFile getAuxiliaryFile() { + return auxiliaryFile; + } + + public void setAuxiliaryFile(AuxiliaryFile auxiliaryFile) { + this.auxiliaryFile = auxiliaryFile; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index b10412a577d..1624e9932e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -227,6 +227,20 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // (similarly to what the Access API returns when a thumbnail is requested on a text file, etc.) throw new NotFoundException("datafile access error: requested optional service (image scaling, format conversion, etc.) could not be performed on this datafile."); } + } else if (di.getAuxiliaryFile() != null) { + // Make sure to close the InputStream for the main datafile: + try {storageIO.getInputStream().close();} catch (IOException ioex) {} + String auxTag = di.getAuxiliaryFile().getFormatTag(); + String auxVersion = di.getAuxiliaryFile().getFormatVersion(); + if (auxVersion != null) { + auxTag = auxTag + "_" + auxVersion; + } + long auxFileSize = di.getAuxiliaryFile().getFileSize(); + InputStreamIO auxStreamIO = new InputStreamIO(storageIO.getAuxFileAsInputStream(auxTag), auxFileSize); + auxStreamIO.setFileName(storageIO.getFileName() + "." + auxTag); + auxStreamIO.setMimeType(di.getAuxiliaryFile().getContentType()); + storageIO = auxStreamIO; + } else { if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { // definitely close the (still open) S3 input stream, diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 4b7dce7d8c7..c30927281f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -686,7 +686,7 @@ public static String calculateChecksum(byte[] dataBytes, ChecksumType checksumTy } - private static String checksumDigestToString(byte[] digestBytes) { + public static String checksumDigestToString(byte[] digestBytes) { StringBuilder sb = new StringBuilder(""); for (int i = 0; i < digestBytes.length; i++) { sb.append(Integer.toString((digestBytes[i] & 0xff) + 0x100, 16).substring(1)); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index b52dabdb31d..c37efc3178f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.util.json; +import edu.harvard.iq.dataverse.AuxiliaryFile; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileTag; @@ -563,6 +564,16 @@ public static JsonObjectBuilder json(FileMetadata fmd) { .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd)); } + public static JsonObjectBuilder json(AuxiliaryFile auxFile) { + return jsonObjectBuilder() + .add("formatTag", auxFile.getFormatTag()) + .add("formatVersion", auxFile.getFormatVersion()) // "label" is the filename + .add("origin", auxFile.getOrigin()) + .add("isPublic", auxFile.getIsPublic()) + .add("fileSize", auxFile.getFileSize()) + .add("checksum", auxFile.getChecksum()) + .add("dataFile", JsonPrinter.json(auxFile.getDataFile())); + } public static JsonObjectBuilder json(DataFile df) { return JsonPrinter.json(df, null); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index ad0c93a80d1..4fb1271c8c9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -6,20 +6,24 @@ package edu.harvard.iq.dataverse.api; import com.jayway.restassured.RestAssured; +import static com.jayway.restassured.RestAssured.given; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; import edu.harvard.iq.dataverse.DataFile; +import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.IOException; import java.util.zip.ZipInputStream; -import static javax.ws.rs.core.Response.Status.OK; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.util.zip.ZipEntry; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.InputStream; +import java.nio.file.Path; import java.util.HashMap; +import static javax.ws.rs.core.Response.Status.OK; import org.hamcrest.collection.IsMapContaining; import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -27,12 +31,6 @@ import static org.junit.Assert.assertTrue; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.not; -import static junit.framework.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; /** * @@ -156,6 +154,7 @@ public static void setUp() throws InterruptedException { String tab4PathToFile = "scripts/search/data/tabular/" + tabFile4NameUnpublished; Response tab4AddResponse = UtilIT.uploadFileViaNative(datasetId.toString(), tab4PathToFile, apiToken); tabFile4IdUnpublished = JsonPath.from(tab4AddResponse.body().asString()).getInt("data.files[0].dataFile.id"); + assertTrue("Failed test if Ingest Lock exceeds max duration " + tabFile2Name, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); } @@ -172,6 +171,26 @@ public static void tearDown() { } + @Test + public void testSaveAuxiliaryFileWithVersion() throws IOException { + System.out.println("Add aux file with update"); + String mimeType = null; + String pathToFile = "scripts/search/data/tabular/1char"; + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .multiPart("file", new File(pathToFile), mimeType) + .post("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); + response.prettyPrint(); + assertEquals(200, response.getStatusCode()); + System.out.println("Downloading Aux file that was just added"); + response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); + + String dataStr = response.prettyPrint(); + assertEquals(dataStr,"a\n"); + assertEquals(200, response.getStatusCode()); + } //This test does a lot of testing of non-original downloads as well @Test