From 5c2391beb65528675609d499882e41eb56709518 Mon Sep 17 00:00:00 2001 From: ellenk Date: Tue, 13 Oct 2020 14:13:25 -0400 Subject: [PATCH 01/15] new API method for saving a generic auxiliary file to a data file --- .../harvard/iq/dataverse/AuxiliaryFile.java | 90 +++++++++++++++++++ .../dataverse/AuxiliaryFileServiceBean.java | 83 +++++++++++++++++ .../edu/harvard/iq/dataverse/DataFile.java | 11 +++ .../edu/harvard/iq/dataverse/api/Access.java | 83 +++++++++++++---- .../harvard/iq/dataverse/api/AccessIT.java | 24 +++-- 5 files changed, 269 insertions(+), 22 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java new file mode 100644 index 00000000000..1c6c5708fe5 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -0,0 +1,90 @@ + +package edu.harvard.iq.dataverse; + +import java.io.Serializable; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.JoinColumn; +import javax.persistence.ManyToOne; + +/** + * + * @author ekraffmiller + * Represents a generic file that is associated with a dataFile. + * This is a data representation of a physical file in StorageIO + */ +@Entity +public class AuxiliaryFile implements Serializable { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * The data file that this AuxiliaryFile belongs to + * a data file may have many auxiliaryFiles + */ + @ManyToOne + @JoinColumn(nullable=false) + private DataFile dataFile; + + private String formatTag; + + private String formatVersion; + + private String origin; + + private boolean isPublic; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public DataFile getDataFile() { + return dataFile; + } + + public void setDataFile(DataFile dataFile) { + this.dataFile = dataFile; + } + + public String getFormatTag() { + return formatTag; + } + + public void setFormatTag(String formatTag) { + this.formatTag = formatTag; + } + + public String getFormatVersion() { + return formatVersion; + } + + public void setFormatVersion(String formatVersion) { + this.formatVersion = formatVersion; + } + + public String getOrigin() { + return origin; + } + + public void setOrigin(String origin) { + this.origin = origin; + } + + public boolean getIsPublic() { + return isPublic; + } + + public void setIsPublic(boolean isPublic) { + this.isPublic = isPublic; + } + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java new file mode 100644 index 00000000000..01b0ee6e865 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -0,0 +1,83 @@ + +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import java.io.IOException; +import java.io.InputStream; +import java.util.logging.Logger; +import javax.ejb.Stateless; +import javax.inject.Named; +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; + +/** + * + * @author ekraffmiller + * Methods related to the AuxiliaryFile Entity. + */ +@Stateless +@Named +public class AuxiliaryFileServiceBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(AuxiliaryFileServiceBean.class.getCanonicalName()); + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + public AuxiliaryFile find(Object pk) { + return em.find(AuxiliaryFile.class, pk); + } + + public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { + AuxiliaryFile savedFile = em.merge(auxiliaryFile); + return savedFile; + + } + + /** + * Save the physical file to storageIO, and save the AuxiliaryFile entity + * to the database. This should be an all or nothing transaction - if either + * process fails, than nothing will be saved + * @param fileInputStream - auxiliary file data to be saved + * @param dataFile - the dataFile entity this will be added to + * @param formatTag - type of file being saved + * @param formatVersion - to distinguish between multiple versions of a file + * @param origin - name of the tool/system that created the file + * @param isPublic boolean - is this file available to any user? + * @return success boolean - returns whether the save was successful + */ + public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic) { + + StorageIO storageIO =null; + + String auxExtension = formatTag + "_" + formatVersion; + try { + // Save to storage first. + // If that is successful (does not throw exception), + // then save to db. + // If the db fails for any reason, then rollback + // by removing the auxfile from storage. + storageIO = dataFile.getStorageIO(); + storageIO.saveInputStreamAsAux(fileInputStream, auxExtension); + AuxiliaryFile auxFile = new AuxiliaryFile(); + auxFile.setFormatTag(formatTag); + auxFile.setFormatVersion(formatVersion); + auxFile.setOrigin(origin); + auxFile.setIsPublic(isPublic); + auxFile.setDataFile(dataFile); + save(auxFile); + } catch (IOException ioex) { + logger.info("IO Exception trying to save auxiliary file: " + ioex.getMessage()); + return false; + } catch (Exception e) { + // If anything fails during database insert, remove file from storage + try { + storageIO.deleteAuxObject(auxExtension); + } catch(IOException ioex) { + logger.info("IO Exception trying remove auxiliary file in exception handler: " + ioex.getMessage()); + return false; + } + } + return true; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 560048db9ca..2f0981c80af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -192,6 +192,9 @@ public String toString() { @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) private List dataTables; + @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) + private List auxiliaryFiles; + @OneToMany(mappedBy = "dataFile", cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) private List ingestReports; @@ -281,6 +284,14 @@ public String getDuplicateFilename() { public void setDuplicateFilename(String duplicateFilename) { this.duplicateFilename = duplicateFilename; } + + public List getAuxiliaryFiles() { + return auxiliaryFiles; + } + + public void setAuxiliaryFiles(List auxiliaryFiles) { + this.auxiliaryFiles = auxiliaryFiles; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 8f913ea5f1b..f05dd02e0a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -6,6 +6,7 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.FileMetadata; @@ -43,14 +44,12 @@ import edu.harvard.iq.dataverse.dataaccess.DataFileZipper; import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import edu.harvard.iq.dataverse.dataaccess.StoredOriginalFile; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; @@ -62,13 +61,11 @@ import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.worldmapauth.WorldMapTokenServiceBean; import java.util.logging.Logger; @@ -88,16 +85,9 @@ import java.util.logging.Level; import javax.inject.Inject; import javax.json.Json; -import javax.json.JsonObjectBuilder; -import java.math.BigDecimal; import java.net.URI; -import java.util.HashSet; -import java.util.Set; -import java.util.function.Consumer; -import javax.faces.context.FacesContext; import javax.json.JsonArrayBuilder; import javax.persistence.TypedQuery; -import javax.servlet.http.HttpServletRequest; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -110,7 +100,6 @@ import javax.servlet.http.HttpServletResponse; -import javax.servlet.http.HttpSession; import javax.ws.rs.BadRequestException; import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; @@ -125,10 +114,13 @@ import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import javax.ws.rs.core.StreamingOutput; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import java.net.URISyntaxException; import javax.ws.rs.RedirectionException; +import javax.ws.rs.core.MediaType; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; /* Custom API exceptions [NOT YET IMPLEMENTED] @@ -184,6 +176,8 @@ public class Access extends AbstractApiBean { UserNotificationServiceBean userNotificationService; @EJB FileDownloadServiceBean fileDownloadService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject PermissionsWrapper permissionsWrapper; @Inject @@ -1084,6 +1078,65 @@ private String getWebappImageResource(String imageName) { } */ + /** + * + * @param fileId + * @param formatTag + * @param formatVersion + * @param origin + * @param isPublic + * @param fileInputStream + * @param contentDispositionHeader + * @param formDataBodyPart + * @return + */ + @Path("datafile/{fileId}/metadata/{formatTag}/{formatVersion}") + @POST + @Consumes(MediaType.MULTIPART_FORM_DATA) + + public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, + @PathParam("formatTag") String formatTag, + @PathParam("formatVersion") String formatVersion, + @FormDataParam("origin") String origin, + @FormDataParam("isPublic") boolean isPublic, + @FormDataParam("file") InputStream fileInputStream, + @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, + @FormDataParam("file") final FormDataBodyPart formDataBodyPart + ) { + AuthenticatedUser authenticatedUser; + try { + authenticatedUser = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + return error(FORBIDDEN, "Authorized users only."); + } + + DataFile dataFile = dataFileService.find(fileId); + if (dataFile == null) { + return error(BAD_REQUEST, "File not found based on id " + fileId + "."); + } + + if (!permissionService.userOn(authenticatedUser, dataFile.getOwner()).has(Permission.EditDataset)) { + return error(FORBIDDEN, "User not authorized to edit the dataset."); + } + + if (!dataFile.isTabularData()) { + return error(BAD_REQUEST, "Not a tabular DataFile (db id=" + fileId + ")"); + } + + + boolean saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic); + + if (saved) { + return ok("Auxiliary file has been saved."); + } else { + return error(BAD_REQUEST, "Error saving Auxiliary file."); + } + } + + + + + /** * Allow (or disallow) access requests to Dataset * @@ -1835,5 +1888,5 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, throw new BadRequestException(); } return redirectUri; - } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index ad0c93a80d1..ba646aa3592 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -6,20 +6,23 @@ package edu.harvard.iq.dataverse.api; import com.jayway.restassured.RestAssured; +import static com.jayway.restassured.RestAssured.given; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; import edu.harvard.iq.dataverse.DataFile; +import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.IOException; import java.util.zip.ZipInputStream; -import static javax.ws.rs.core.Response.Status.OK; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import java.util.zip.ZipEntry; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.InputStream; import java.util.HashMap; +import static javax.ws.rs.core.Response.Status.OK; import org.hamcrest.collection.IsMapContaining; import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -27,12 +30,6 @@ import static org.junit.Assert.assertTrue; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.not; -import static junit.framework.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; /** * @@ -156,6 +153,7 @@ public static void setUp() throws InterruptedException { String tab4PathToFile = "scripts/search/data/tabular/" + tabFile4NameUnpublished; Response tab4AddResponse = UtilIT.uploadFileViaNative(datasetId.toString(), tab4PathToFile, apiToken); tabFile4IdUnpublished = JsonPath.from(tab4AddResponse.body().asString()).getInt("data.files[0].dataFile.id"); + assertTrue("Failed test if Ingest Lock exceeds max duration " + tabFile2Name, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); } @@ -172,6 +170,18 @@ public static void tearDown() { } + @Test + public void testSaveAuxiliaryFileWithVersion() { + System.out.println("Add aux file with update"); + String mimeType = null; + String pathToFile = "scripts/search/data/tabular/1char"; + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .multiPart("file", new File(pathToFile), mimeType) + .post("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); + response.prettyPrint(); + assertEquals(200, response.getStatusCode()); + } //This test does a lot of testing of non-original downloads as well @Test From 91748b3e395c78b176eab1fd9f66085cfaae36bb Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 19 Oct 2020 18:44:09 -0400 Subject: [PATCH 02/15] modified framework for the download/GET part of the new aux. metadata API. will post more info tomorrow, about what may still needs to be done there. #7275 --- .../harvard/iq/dataverse/AuxiliaryFile.java | 20 ++++++ .../dataverse/AuxiliaryFileServiceBean.java | 21 +++++++ .../edu/harvard/iq/dataverse/api/Access.java | 61 +++++++++++++++---- .../iq/dataverse/api/DownloadInstance.java | 15 +++++ .../dataverse/api/DownloadInstanceWriter.java | 12 ++++ 5 files changed, 116 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index 1c6c5708fe5..55655e3974f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -37,6 +37,10 @@ public class AuxiliaryFile implements Serializable { private String origin; private boolean isPublic; + + private String contentType; + + private Long fileSize; public Long getId() { return id; @@ -86,5 +90,21 @@ public void setIsPublic(boolean isPublic) { this.isPublic = isPublic; } + public String getContentType() { + // TODO: hard-coded for testing: + return "application/json"; + //return contentType; + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public Long getFileSize() { + return fileSize; + } + public void setFileSize(long fileSize) { + this.fileSize = fileSize; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 01b0ee6e865..2c09ff3f6a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -9,6 +9,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; +import javax.persistence.Query; /** * @@ -64,6 +65,9 @@ public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFi auxFile.setOrigin(origin); auxFile.setIsPublic(isPublic); auxFile.setDataFile(dataFile); + // TODO: mime type! + //auxFile.setContentType(mimeType); + auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); save(auxFile); } catch (IOException ioex) { logger.info("IO Exception trying to save auxiliary file: " + ioex.getMessage()); @@ -79,5 +83,22 @@ public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFi } return true; } + + // Looks up an auxiliary file by its parent DataFile, the formatTag and version + // TODO: improve as needed. + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { + + Query query = em.createQuery("select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.formatTag = :formatTag and o.formatVersion = :formatVersion"); + + query.setParameter("dataFileId", dataFile.getId()); + query.setParameter("formatTag", formatTag); + query.setParameter("formatVersion", formatVersion); + try { + AuxiliaryFile retVal = (AuxiliaryFile)query.getSingleResult(); + return retVal; + } catch(Exception ex) { + return null; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index f05dd02e0a4..3a5c38dec64 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -6,6 +6,7 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.AuxiliaryFile; import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DataFile; @@ -499,16 +500,20 @@ public String dataVariableMetadataDDI(@PathParam("varId") Long varId, @QueryPara } /* - * "Preprocessed data" metadata format: - * (this was previously provided as a "format conversion" option of the - * file download form of the access API call) + * GET method for retrieving various auxiliary files associated with + * a tabular datafile. */ - @Path("datafile/{fileId}/metadata/preprocessed") + @Path("datafile/{fileId}/metadata/{formatTag}/{formatVersion}") @GET - @Produces({"text/xml"}) - public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId") String fileId, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { + public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId") String fileId, + @PathParam("formatTag") String formatTag, + @PathParam("formatVersion") String formatVersion, + @QueryParam("key") String apiToken, + @Context UriInfo uriInfo, + @Context HttpHeaders headers, + @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); @@ -516,18 +521,48 @@ public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId") apiToken = headers.getHeaderString(API_KEY_HEADER); } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); DownloadInfo dInfo = new DownloadInfo(df); + boolean publiclyAvailable = false; - if (df.isTabularData()) { + if (!df.isTabularData()) { + throw new BadRequestException("tabular data required"); + } + + DownloadInstance downloadInstance; + AuxiliaryFile auxFile = null; + + // formatTag=preprocessed is handled as a special case. + // This is (as of now) the only aux. tabular metadata format that Dataverse + // can generate (and cache) itself. (All the other formats served have + // to be deposited first, by the @POST version of this API). + + if ("preprocessed".equals(formatTag)) { dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON")); + downloadInstance = new DownloadInstance(dInfo); + if (downloadInstance.checkIfServiceSupportedAndSetConverter("format", "prep")) { + logger.fine("Preprocessed data for tabular file "+fileId); + } } else { - throw new BadRequestException("tabular data required"); + // All other (deposited) formats: + auxFile = auxiliaryFileService.lookupAuxiliaryFile(df, formatTag, formatVersion); + + if (auxFile == null) { + throw new NotFoundException("Auxiliary metadata format "+formatTag+" is not available for datafile "+fileId); + } + + if (auxFile.getIsPublic()) { + publiclyAvailable = true; + } + downloadInstance = new DownloadInstance(dInfo); + downloadInstance.setAuxiliaryFile(auxFile); } - DownloadInstance downloadInstance = new DownloadInstance(dInfo); - if (downloadInstance.checkIfServiceSupportedAndSetConverter("format", "prep")) { - logger.fine("Preprocessed data for tabular file "+fileId); + + // Unless this format is explicitly authorized to be publicly available, + // the following will check access authorization (based on the access rules + // as defined for the DataFile itself), and will throw a ForbiddenException + // if access is denied: + if (!publiclyAvailable) { + checkAuthorization(df, apiToken); } return downloadInstance; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java index 7e354bea24b..07215cb919e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java @@ -6,6 +6,7 @@ package edu.harvard.iq.dataverse.api; //import java.io.ByteArrayOutputStream; +import edu.harvard.iq.dataverse.AuxiliaryFile; import edu.harvard.iq.dataverse.DataverseRequestServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.GuestbookResponse; @@ -47,6 +48,12 @@ public void setExtraArguments(List extraArguments) { private String conversionParam = null; private String conversionParamValue = null; + // This download instance is for an auxiliary file associated with + // the DataFile. Unlike "conversions" (above) this is used for files + // that Dataverse has no way of producing/deriving from the parent Datafile + // itself, that have to be deposited externally. + private AuxiliaryFile auxiliaryFile = null; + private EjbDataverseEngine command; private DataverseRequestServiceBean dataverseRequestService; @@ -210,4 +217,12 @@ public void setDataverseRequestService(DataverseRequestServiceBean dataverseRequ this.dataverseRequestService = dataverseRequestService; } + public AuxiliaryFile getAuxiliaryFile() { + return auxiliaryFile; + } + + public void setAuxiliaryFile(AuxiliaryFile auxiliaryFile) { + this.auxiliaryFile = auxiliaryFile; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index b10412a577d..1a8cca15595 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -227,6 +227,18 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // (similarly to what the Access API returns when a thumbnail is requested on a text file, etc.) throw new NotFoundException("datafile access error: requested optional service (image scaling, format conversion, etc.) could not be performed on this datafile."); } + } else if (di.getAuxiliaryFile() != null) { + String auxTag = di.getAuxiliaryFile().getFormatTag(); + String auxVersion = di.getAuxiliaryFile().getFormatVersion(); + if (auxVersion != null) { + auxTag = auxTag + "_" + auxVersion; + } + long auxFileSize = di.getAuxiliaryFile().getFileSize(); + InputStreamIO auxStreamIO = new InputStreamIO(storageIO.getAuxFileAsInputStream(auxTag), auxFileSize); + auxStreamIO.setFileName(storageIO.getFileName() + "." + auxTag); + auxStreamIO.setMimeType(di.getAuxiliaryFile().getContentType()); + storageIO = auxStreamIO; + } else { if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { // definitely close the (still open) S3 input stream, From d6ce1d7e5066a6ad2c68f5f6aac6974fa799c1e9 Mon Sep 17 00:00:00 2001 From: ellenk Date: Thu, 22 Oct 2020 14:56:28 -0400 Subject: [PATCH 03/15] added checksum and contentType to AuxiliaryFile entity --- .../harvard/iq/dataverse/AuxiliaryFile.java | 16 +++++++++++++--- .../iq/dataverse/AuxiliaryFileServiceBean.java | 18 ++++++++++++++---- .../edu/harvard/iq/dataverse/api/AccessIT.java | 11 ++++++++++- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index 55655e3974f..957a7cc93bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -41,6 +41,8 @@ public class AuxiliaryFile implements Serializable { private String contentType; private Long fileSize; + + private String checksum; public Long getId() { return id; @@ -91,9 +93,7 @@ public void setIsPublic(boolean isPublic) { } public String getContentType() { - // TODO: hard-coded for testing: - return "application/json"; - //return contentType; + return this.contentType; } public void setContentType(String contentType) { @@ -107,4 +107,14 @@ public Long getFileSize() { public void setFileSize(long fileSize) { this.fileSize = fileSize; } + + public String getChecksum() { + return checksum; + } + + public void setChecksum(String checksum) { + this.checksum = checksum; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 2c09ff3f6a1..9fe6181ff92 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -2,14 +2,18 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.io.InputStream; import java.util.logging.Logger; +import javax.ejb.EJB; import javax.ejb.Stateless; import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.Query; +import org.apache.tika.Tika; /** * @@ -23,6 +27,10 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + + @EJB + private SystemConfig systemConfig; + public AuxiliaryFile find(Object pk) { return em.find(AuxiliaryFile.class, pk); @@ -58,15 +66,17 @@ public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFi // If the db fails for any reason, then rollback // by removing the auxfile from storage. storageIO = dataFile.getStorageIO(); - storageIO.saveInputStreamAsAux(fileInputStream, auxExtension); AuxiliaryFile auxFile = new AuxiliaryFile(); + storageIO.saveInputStreamAsAux(fileInputStream, auxExtension); + auxFile.setChecksum(FileUtil.calculateChecksum(storageIO.getAuxFileAsInputStream(auxExtension), systemConfig.getFileFixityChecksumAlgorithm())); + + Tika tika = new Tika(); + auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); auxFile.setFormatTag(formatTag); auxFile.setFormatVersion(formatVersion); auxFile.setOrigin(origin); auxFile.setIsPublic(isPublic); - auxFile.setDataFile(dataFile); - // TODO: mime type! - //auxFile.setContentType(mimeType); + auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); save(auxFile); } catch (IOException ioex) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index ba646aa3592..4fb1271c8c9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -21,6 +21,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.InputStream; +import java.nio.file.Path; import java.util.HashMap; import static javax.ws.rs.core.Response.Status.OK; import org.hamcrest.collection.IsMapContaining; @@ -171,7 +172,7 @@ public static void tearDown() { @Test - public void testSaveAuxiliaryFileWithVersion() { + public void testSaveAuxiliaryFileWithVersion() throws IOException { System.out.println("Add aux file with update"); String mimeType = null; String pathToFile = "scripts/search/data/tabular/1char"; @@ -181,6 +182,14 @@ public void testSaveAuxiliaryFileWithVersion() { .post("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); response.prettyPrint(); assertEquals(200, response.getStatusCode()); + System.out.println("Downloading Aux file that was just added"); + response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); + + String dataStr = response.prettyPrint(); + assertEquals(dataStr,"a\n"); + assertEquals(200, response.getStatusCode()); } //This test does a lot of testing of non-original downloads as well From 1e473163032c742eab609346223455abcdadc13f Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 26 Oct 2020 10:38:42 -0400 Subject: [PATCH 04/15] Making sure there's no open inputstreams left. (#7275) --- .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 1a8cca15595..1624e9932e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -228,6 +228,8 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] throw new NotFoundException("datafile access error: requested optional service (image scaling, format conversion, etc.) could not be performed on this datafile."); } } else if (di.getAuxiliaryFile() != null) { + // Make sure to close the InputStream for the main datafile: + try {storageIO.getInputStream().close();} catch (IOException ioex) {} String auxTag = di.getAuxiliaryFile().getFormatTag(); String auxVersion = di.getAuxiliaryFile().getFormatVersion(); if (auxVersion != null) { From 1af5a24dfb45f968e4f9e6e84d0f6f37121b17fa Mon Sep 17 00:00:00 2001 From: ellenk Date: Mon, 2 Nov 2020 16:18:30 -0500 Subject: [PATCH 05/15] Use DigestInputStream so that checksum can be calculated when stream is being written to storage --- .../harvard/iq/dataverse/AuxiliaryFileServiceBean.java | 10 ++++++++-- .../java/edu/harvard/iq/dataverse/util/FileUtil.java | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 9fe6181ff92..1ee3c5e7794 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -6,6 +6,8 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.io.InputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; @@ -67,8 +69,12 @@ public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFi // by removing the auxfile from storage. storageIO = dataFile.getStorageIO(); AuxiliaryFile auxFile = new AuxiliaryFile(); - storageIO.saveInputStreamAsAux(fileInputStream, auxExtension); - auxFile.setChecksum(FileUtil.calculateChecksum(storageIO.getAuxFileAsInputStream(auxExtension), systemConfig.getFileFixityChecksumAlgorithm())); + MessageDigest md = MessageDigest.getInstance(systemConfig.getFileFixityChecksumAlgorithm().toString()); + DigestInputStream di + = new DigestInputStream(fileInputStream, md); + + storageIO.saveInputStreamAsAux(fileInputStream, auxExtension); + auxFile.setChecksum(FileUtil.checksumDigestToString(di.getMessageDigest().digest()) ); Tika tika = new Tika(); auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 7ed9970fe13..cb243b1b8b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -686,7 +686,7 @@ public static String calculateChecksum(byte[] dataBytes, ChecksumType checksumTy } - private static String checksumDigestToString(byte[] digestBytes) { + public static String checksumDigestToString(byte[] digestBytes) { StringBuilder sb = new StringBuilder(""); for (int i = 0; i < digestBytes.length; i++) { sb.append(Integer.toString((digestBytes[i] & 0xff) + 0x100, 16).substring(1)); From c1e03a0e302666585123865116a49eba7ee9d031 Mon Sep 17 00:00:00 2001 From: ellenk Date: Mon, 16 Nov 2020 18:50:56 -0500 Subject: [PATCH 06/15] Updated deposit API method to return JSon data for saved file --- .../dataverse/AuxiliaryFileServiceBean.java | 16 +++++------ .../edu/harvard/iq/dataverse/api/Access.java | 17 ++++++----- .../iq/dataverse/util/json/JsonPrinter.java | 28 ++++++++++++++++++- .../harvard/iq/dataverse/api/AccessIT.java | 2 +- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 1ee3c5e7794..b160902319c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -56,10 +56,10 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param isPublic boolean - is this file available to any user? * @return success boolean - returns whether the save was successful */ - public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic) { StorageIO storageIO =null; - + AuxiliaryFile auxFile = new AuxiliaryFile(); String auxExtension = formatTag + "_" + formatVersion; try { // Save to storage first. @@ -68,7 +68,7 @@ public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFi // If the db fails for any reason, then rollback // by removing the auxfile from storage. storageIO = dataFile.getStorageIO(); - AuxiliaryFile auxFile = new AuxiliaryFile(); + auxFile = new AuxiliaryFile(); MessageDigest md = MessageDigest.getInstance(systemConfig.getFileFixityChecksumAlgorithm().toString()); DigestInputStream di = new DigestInputStream(fileInputStream, md); @@ -84,24 +84,22 @@ public boolean processAuxiliaryFile(InputStream fileInputStream, DataFile dataFi auxFile.setIsPublic(isPublic); auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); - save(auxFile); + auxFile = save(auxFile); } catch (IOException ioex) { logger.info("IO Exception trying to save auxiliary file: " + ioex.getMessage()); - return false; + return null; } catch (Exception e) { // If anything fails during database insert, remove file from storage try { storageIO.deleteAuxObject(auxExtension); } catch(IOException ioex) { logger.info("IO Exception trying remove auxiliary file in exception handler: " + ioex.getMessage()); - return false; + return null; } } - return true; + return auxFile; } - // Looks up an auxiliary file by its parent DataFile, the formatTag and version - // TODO: improve as needed. public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { Query query = em.createQuery("select object(o) from AuxiliaryFile as o where o.dataFile.id = :dataFileId and o.formatTag = :formatTag and o.formatVersion = :formatVersion"); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 3a5c38dec64..752eab4cc3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -67,6 +67,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.worldmapauth.WorldMapTokenServiceBean; import java.util.logging.Logger; @@ -505,9 +506,8 @@ public String dataVariableMetadataDDI(@PathParam("varId") Long varId, @QueryPara */ @Path("datafile/{fileId}/metadata/{formatTag}/{formatVersion}") - @GET - - public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId") String fileId, + @GET + public DownloadInstance tabularDatafileMetadataAux(@PathParam("fileId") String fileId, @PathParam("formatTag") String formatTag, @PathParam("formatVersion") String formatVersion, @QueryParam("key") String apiToken, @@ -1134,9 +1134,8 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, @PathParam("formatVersion") String formatVersion, @FormDataParam("origin") String origin, @FormDataParam("isPublic") boolean isPublic, - @FormDataParam("file") InputStream fileInputStream, - @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, - @FormDataParam("file") final FormDataBodyPart formDataBodyPart + @FormDataParam("file") InputStream fileInputStream + ) { AuthenticatedUser authenticatedUser; try { @@ -1159,10 +1158,10 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, } - boolean saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic); + AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic); - if (saved) { - return ok("Auxiliary file has been saved."); + if (saved!=null) { + return created(saved.getId().toString(),JsonPrinter.json(saved)); } else { return error(BAD_REQUEST, "Error saving Auxiliary file."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 57fcdd75ac1..935c2ab38a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.util.json; +import edu.harvard.iq.dataverse.AuxiliaryFile; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileTag; @@ -561,7 +562,32 @@ public static JsonObjectBuilder json(FileMetadata fmd) { .add("categories", getFileCategories(fmd)) .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd)); } - +/* + + private String formatTag; + + private String formatVersion; + + private String origin; + + private boolean isPublic; + + private String contentType; + + private Long fileSize; + + private String checksum; + */ + public static JsonObjectBuilder json(AuxiliaryFile auxFile) { + return jsonObjectBuilder() + .add("formatTag", auxFile.getFormatTag()) + .add("formatVersion", auxFile.getFormatVersion()) // "label" is the filename + .add("origin", auxFile.getOrigin()) + .add("isPublic", auxFile.getIsPublic()) + .add("fileSize", auxFile.getFileSize()) + .add("checksum", auxFile.getChecksum()) + .add("dataFile", JsonPrinter.json(auxFile.getDataFile())); + } public static JsonObjectBuilder json(DataFile df) { return JsonPrinter.json(df, null); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index 4fb1271c8c9..8ad915336b6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -181,7 +181,7 @@ public void testSaveAuxiliaryFileWithVersion() throws IOException { .multiPart("file", new File(pathToFile), mimeType) .post("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); response.prettyPrint(); - assertEquals(200, response.getStatusCode()); + assertEquals(201, response.getStatusCode()); System.out.println("Downloading Aux file that was just added"); response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) From 11b9db65a453229dee6d5242223433e25fff2259 Mon Sep 17 00:00:00 2001 From: ellenk Date: Mon, 16 Nov 2020 19:07:23 -0500 Subject: [PATCH 07/15] removed unneeded comments --- .../iq/dataverse/util/json/JsonPrinter.java | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index d705330b702..c37efc3178f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -563,22 +563,7 @@ public static JsonObjectBuilder json(FileMetadata fmd) { .add("categories", getFileCategories(fmd)) .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd)); } -/* - - private String formatTag; - - private String formatVersion; - - private String origin; - - private boolean isPublic; - - private String contentType; - - private Long fileSize; - - private String checksum; - */ + public static JsonObjectBuilder json(AuxiliaryFile auxFile) { return jsonObjectBuilder() .add("formatTag", auxFile.getFormatTag()) From 867ad10a7d99b8db5a7ee92407eb7fd72d9a4759 Mon Sep 17 00:00:00 2001 From: ellenk Date: Mon, 16 Nov 2020 19:08:57 -0500 Subject: [PATCH 08/15] removed redundant initialization --- .../java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index b160902319c..4f97c146e7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -68,7 +68,6 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile // If the db fails for any reason, then rollback // by removing the auxfile from storage. storageIO = dataFile.getStorageIO(); - auxFile = new AuxiliaryFile(); MessageDigest md = MessageDigest.getInstance(systemConfig.getFileFixityChecksumAlgorithm().toString()); DigestInputStream di = new DigestInputStream(fileInputStream, md); From 6e88b55e6ee62a84def7bb23f7a7526172317eae Mon Sep 17 00:00:00 2001 From: ellenk Date: Tue, 17 Nov 2020 13:49:03 -0500 Subject: [PATCH 09/15] added documentation for Auxiliary File APIs --- .../source/developers/aux-file-support.rst | 49 +++++++++++++++++++ doc/sphinx-guides/source/developers/index.rst | 1 + 2 files changed, 50 insertions(+) create mode 100644 doc/sphinx-guides/source/developers/aux-file-support.rst diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst new file mode 100644 index 00000000000..d77b337ffb6 --- /dev/null +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -0,0 +1,49 @@ +Auxiliary File Support +====================== + +Auxiliary file support is experimental. Eventually this content will move to the API Guide. Currently +it is being used to support depositing and downloading differentially private +metadata, as part of the OpenDP project. + +Adding an Auxiliary File to a Data File: +---------------------------------------------------------- +To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the +formatTag and formatVersion (if applicable) associated with the Auxiliary File. +There are two form parameters. "Origin" specifies the application/entity that +created the auxiliary file, an "isPublic" controls access to downloading the file. +If "isPublic" is true, any user can download the file, else, access +authorization is based on the access rules as defined for the DataFile itself. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export FILENAME='auxfile.txt' + export FILE_ID='12345' + export FORMAT_TAG='dpJson' + export FORMAT_VERSION='v1' + export SERVER_URL=https://demo.dataverse.org + + curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION") + +.. code-block:: bash + +You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded file. + + + +Downloading an Auxiliary File that belongs to a Data File: +---------------------------------------------------------- +To download a file, use the primary key of the datafile, and the +formatTag and formatVersion (if applicable) associated with the Auxiliary File: + + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_ID='12345' + export FORMAT_TAG='dpJson' + export FORMAT_VERSION='v1' + + + curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION" \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 96595220e07..9c524571a39 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -32,4 +32,5 @@ Developer Guide geospatial selinux big-data-support + aux-file-support workflows From a2c3f0d9e59b098026278f77053fcd6c61b89bf9 Mon Sep 17 00:00:00 2001 From: Danny Brooke Date: Tue, 17 Nov 2020 16:56:02 -0500 Subject: [PATCH 10/15] doc updates --- doc/sphinx-guides/source/api/native-api.rst | 2 +- .../source/developers/aux-file-support.rst | 35 ++++++------------- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 0c355b35254..575355865ea 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3,7 +3,7 @@ Native API Dataverse 4 exposes most of its GUI functionality via a REST-based API. This section describes that functionality. Most API endpoints require an API token that can be passed as the ``X-Dataverse-key`` HTTP header or in the URL as the ``key`` query parameter. -.. note:: |CORS| Some API endpoint allow CORS_ (cross-origin resource sharing), which makes them usable from scripts runing in web browsers. These endpoints are marked with a *CORS* badge. +.. note:: |CORS| Some API endpoint allow CORS_ (cross-origin resource sharing), which makes them usable from scripts running in web browsers. These endpoints are marked with a *CORS* badge. .. note:: Bash environment variables shown below. The idea is that you can "export" these environment variables before copying and pasting the commands that use them. For example, you can set ``$SERVER_URL`` by running ``export SERVER_URL="https://demo.dataverse.org"`` in your Bash shell. To check if the environment variable was set properly, you can "echo" it (e.g. ``echo $SERVER_URL``). See also :ref:`curl-examples-and-environment-variables`. diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst index d77b337ffb6..d0ccfdd59d5 100644 --- a/doc/sphinx-guides/source/developers/aux-file-support.rst +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -1,18 +1,11 @@ Auxiliary File Support ====================== -Auxiliary file support is experimental. Eventually this content will move to the API Guide. Currently -it is being used to support depositing and downloading differentially private -metadata, as part of the OpenDP project. - -Adding an Auxiliary File to a Data File: ----------------------------------------------------------- -To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the -formatTag and formatVersion (if applicable) associated with the Auxiliary File. -There are two form parameters. "Origin" specifies the application/entity that -created the auxiliary file, an "isPublic" controls access to downloading the file. -If "isPublic" is true, any user can download the file, else, access -authorization is based on the access rules as defined for the DataFile itself. +Auxiliary file support is experimental. Auxiliary files in Dataverse are being added to support depositing and downloading differentially private metadata, as part of the OpenDP project (OpenDP.io). In future versions, this approach may become more broadly used and supported. + +Adding an Auxiliary File to a Datafile: +--------------------------------------- +To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the formatTag and formatVersion (if applicable) associated with the auxiliary file. There are two form parameters. "Origin" specifies the application/entity that created the auxiliary file, an "isPublic" controls access to downloading the file. If "isPublic" is true, any user can download the file, else, access authorization is based on the access rules as defined for the DataFile itself. .. code-block:: bash @@ -23,19 +16,14 @@ authorization is based on the access rules as defined for the DataFile itself. export FORMAT_VERSION='v1' export SERVER_URL=https://demo.dataverse.org - curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION") - -.. code-block:: bash - -You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded file. - - + curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION" -Downloading an Auxiliary File that belongs to a Data File: ----------------------------------------------------------- -To download a file, use the primary key of the datafile, and the -formatTag and formatVersion (if applicable) associated with the Auxiliary File: +You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded auxiliary file. +Downloading an Auxiliary File that belongs to a Datafile: +--------------------------------------------------------- +To download an auxiliary file, use the primary key of the datafile, and the +formatTag and formatVersion (if applicable) associated with the auxiliary file: .. code-block:: bash @@ -44,6 +32,5 @@ formatTag and formatVersion (if applicable) associated with the Auxiliary File: export FILE_ID='12345' export FORMAT_TAG='dpJson' export FORMAT_VERSION='v1' - curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION" \ No newline at end of file From 36b69df6ebf8a32b72cd524337645df22720c0e5 Mon Sep 17 00:00:00 2001 From: Danny Brooke Date: Tue, 17 Nov 2020 16:58:54 -0500 Subject: [PATCH 11/15] removing : --- doc/sphinx-guides/source/developers/aux-file-support.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst index d0ccfdd59d5..0a387023a48 100644 --- a/doc/sphinx-guides/source/developers/aux-file-support.rst +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -3,8 +3,8 @@ Auxiliary File Support Auxiliary file support is experimental. Auxiliary files in Dataverse are being added to support depositing and downloading differentially private metadata, as part of the OpenDP project (OpenDP.io). In future versions, this approach may become more broadly used and supported. -Adding an Auxiliary File to a Datafile: ---------------------------------------- +Adding an Auxiliary File to a Datafile +-------------------------------------- To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and the formatTag and formatVersion (if applicable) associated with the auxiliary file. There are two form parameters. "Origin" specifies the application/entity that created the auxiliary file, an "isPublic" controls access to downloading the file. If "isPublic" is true, any user can download the file, else, access authorization is based on the access rules as defined for the DataFile itself. .. code-block:: bash @@ -20,8 +20,8 @@ To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded auxiliary file. -Downloading an Auxiliary File that belongs to a Datafile: ---------------------------------------------------------- +Downloading an Auxiliary File that belongs to a Datafile +-------------------------------------------------------- To download an auxiliary file, use the primary key of the datafile, and the formatTag and formatVersion (if applicable) associated with the auxiliary file: From 471dd25d46c253e158aefe382873599719141786 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 24 Nov 2020 13:48:18 -0500 Subject: [PATCH 12/15] switch from created to ok #7275 --- src/main/java/edu/harvard/iq/dataverse/api/Access.java | 2 +- src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 752eab4cc3d..0c7a4224648 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -1161,7 +1161,7 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic); if (saved!=null) { - return created(saved.getId().toString(),JsonPrinter.json(saved)); + return ok(json(saved)); } else { return error(BAD_REQUEST, "Error saving Auxiliary file."); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index 8ad915336b6..4fb1271c8c9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -181,7 +181,7 @@ public void testSaveAuxiliaryFileWithVersion() throws IOException { .multiPart("file", new File(pathToFile), mimeType) .post("/api/access/datafile/" + tabFile1Id + "/metadata/dpJSON/v1"); response.prettyPrint(); - assertEquals(201, response.getStatusCode()); + assertEquals(200, response.getStatusCode()); System.out.println("Downloading Aux file that was just added"); response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) From 21977d0688e3b3465ce8f5a9db081134bcef95fa Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 24 Nov 2020 13:53:47 -0500 Subject: [PATCH 13/15] make docs match OK/200 response from add #7275 --- doc/sphinx-guides/source/developers/aux-file-support.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst index 0a387023a48..0bcd2766b8b 100644 --- a/doc/sphinx-guides/source/developers/aux-file-support.rst +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -18,7 +18,7 @@ To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION" -You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded auxiliary file. +You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file. Downloading an Auxiliary File that belongs to a Datafile -------------------------------------------------------- @@ -33,4 +33,4 @@ formatTag and formatVersion (if applicable) associated with the auxiliary file: export FORMAT_TAG='dpJson' export FORMAT_VERSION='v1' - curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION" \ No newline at end of file + curl "$SERVER_URL/api/access/datafile/$FILE_ID/$FORMAT_TAG/$FORMAT_VERSION" From 103b591015df8292174c934e1fde43beee081f16 Mon Sep 17 00:00:00 2001 From: Danny Brooke Date: Mon, 30 Nov 2020 11:46:16 -0500 Subject: [PATCH 14/15] adding release notes --- doc/release-notes/7275-aux-files.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7275-aux-files.md diff --git a/doc/release-notes/7275-aux-files.md b/doc/release-notes/7275-aux-files.md new file mode 100644 index 00000000000..24aa7f86f2d --- /dev/null +++ b/doc/release-notes/7275-aux-files.md @@ -0,0 +1,3 @@ +## Notes for Tool Developers and Integrators + +Experimental endpoints have been added to allow auxiliary files to be added to datafiles. These auxiliary files can be deposited and accessed via API. Later releases will include options for accessing these files through the UI. For more information, see the Auxiliary File Support section of the [Developer Guide](https://guides.dataverse.org/en/5.3/developers/). From 82c5f6699ce3e57f3185107967fdab232244988c Mon Sep 17 00:00:00 2001 From: ekraffmiller Date: Mon, 30 Nov 2020 16:51:20 -0500 Subject: [PATCH 15/15] Update aux-file-support.rst fixed typo in deposit URL --- doc/sphinx-guides/source/developers/aux-file-support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst index 0bcd2766b8b..be21b56c245 100644 --- a/doc/sphinx-guides/source/developers/aux-file-support.rst +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -16,7 +16,7 @@ To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and export FORMAT_VERSION='v1' export SERVER_URL=https://demo.dataverse.org - curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION" + curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' "$SERVER_URL/api/access/datafile/$FILE_ID/metadata/$FORMAT_TAG/$FORMAT_VERSION" You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file.