From c3db55beaaf35f73a36fb0d9de06dcd26d4f6de5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 13 Oct 2020 13:19:15 -0400 Subject: [PATCH 01/76] Initial implementation --- .../iq/dataverse/EditDatafilesPage.java | 16 +- .../dataverse/api/DownloadInstanceWriter.java | 4 +- .../iq/dataverse/dataaccess/DataAccess.java | 69 +-- .../dataaccess/HTTPOverlayAccessIO.java | 533 ++++++++++++++++++ .../iq/dataverse/dataaccess/S3AccessIO.java | 19 +- .../iq/dataverse/dataaccess/StorageIO.java | 10 +- .../iq/dataverse/util/UrlSignerUtil.java | 140 +++++ .../dataverse/dataaccess/S3AccessIOTest.java | 2 +- 8 files changed, 746 insertions(+), 47 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b4feecfcdf4..eb3efcd117d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1995,7 +1995,7 @@ private void handleReplaceFileUpload(String fullStorageLocation, fileReplacePageHelper.resetReplaceFileHelper(); saveEnabled = false; - String storageIdentifier = DataAccess.getStorarageIdFromLocation(fullStorageLocation); + String storageIdentifier = DataAccess.getStorageIdFromLocation(fullStorageLocation); if (fileReplacePageHelper.handleNativeFileUpload(null, storageIdentifier, fileName, contentType, checkSum)){ saveEnabled = true; @@ -2131,8 +2131,20 @@ public void handleExternalUpload() { String checksumType = paramMap.get("checksumType"); String checksumValue = paramMap.get("checksumValue"); + //ToDo - move this to StorageIO subclasses + int lastColon = fullStorageIdentifier.lastIndexOf(':'); - String storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); + String storageLocation=null; + //Should check storage type, not parse name + //This works except with s3 stores with ids starting with 'http' + if(fullStorageIdentifier.startsWith("http")) { + //HTTP external URL case + //ToDo - check for valid URL + storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier() + "//" +fullStorageIdentifier.substring(lastColon+1); + } else { + //S3 direct upload case + storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); + } if (uploadInProgress.isFalse()) { uploadInProgress.setValue(true); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index b10412a577d..1361bff2167 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -228,7 +228,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] throw new NotFoundException("datafile access error: requested optional service (image scaling, format conversion, etc.) could not be performed on this datafile."); } } else { - if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { + if (!(dataFile.isTabularData()) && storageIO.downloadRedirectEnabled()) { // definitely close the (still open) S3 input stream, // since we are not going to use it. The S3 documentation // emphasizes that it is very important not to leave these @@ -238,7 +238,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // [attempt to] redirect: String redirect_url_str; try { - redirect_url_str = ((S3AccessIO)storageIO).generateTemporaryS3Url(); + redirect_url_str = storageIO.generateTemporaryDownloadUrl(); } catch (IOException ioex) { redirect_url_str = null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 0e2320401dd..4c6f1554250 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -54,40 +54,45 @@ public static StorageIO getStorageIO(T dvObject) throws } //passing DVObject instead of a datafile to accomodate for use of datafiles as well as datasets - public static StorageIO getStorageIO(T dvObject, DataAccessRequest req) throws IOException { + public static StorageIO getStorageIO(T dvObject, DataAccessRequest req) throws IOException { - if (dvObject == null - || dvObject.getStorageIdentifier() == null - || dvObject.getStorageIdentifier().isEmpty()) { - throw new IOException("getDataAccessObject: null or invalid datafile."); - } - String storageIdentifier = dvObject.getStorageIdentifier(); - int separatorIndex = storageIdentifier.indexOf("://"); - String storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; //default - if(separatorIndex>0) { - storageDriverId = storageIdentifier.substring(0,separatorIndex); - } - String storageType = getDriverType(storageDriverId); - switch(storageType) { - case "file": - return new FileAccessIO<>(dvObject, req, storageDriverId); - case "s3": - return new S3AccessIO<>(dvObject, req, storageDriverId); - case "swift": - return new SwiftAccessIO<>(dvObject, req, storageDriverId); - case "tmp": - throw new IOException("DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); - } + if (dvObject == null || dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) { + throw new IOException("getDataAccessObject: null or invalid datafile."); + } + String storageIdentifier = dvObject.getStorageIdentifier(); + int separatorIndex = storageIdentifier.indexOf("://"); + String storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default + if (separatorIndex > 0) { + storageDriverId = storageIdentifier.substring(0, separatorIndex); + } + return getStorageIO(dvObject, req, storageDriverId); + } - // TODO: - // This code will need to be extended with a system of looking up - // available storage plugins by the storage tag embedded in the - // "storage identifier". - // -- L.A. 4.0.2 + protected static StorageIO getStorageIO(T dvObject, DataAccessRequest req, + String storageDriverId) throws IOException { + String storageType = getDriverType(storageDriverId); + switch (storageType) { + case "file": + return new FileAccessIO<>(dvObject, req, storageDriverId); + case "s3": + return new S3AccessIO<>(dvObject, req, storageDriverId); + case "swift": + return new SwiftAccessIO<>(dvObject, req, storageDriverId); + case "http": + return new HTTPOverlayAccessIO<>(dvObject, req, storageDriverId); + case "tmp": + throw new IOException( + "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); + } + // TODO: + // This code will need to be extended with a system of looking up + // available storage plugins by the storage tag embedded in the + // "storage identifier". + // -- L.A. 4.0.2 - logger.warning("Could not find storage driver for: " + storageIdentifier); - throw new IOException("getDataAccessObject: Unsupported storage method."); - } + logger.warning("Could not find storage driver for: " + storageDriverId); + throw new IOException("getDataAccessObject: Unsupported storage method."); + } // Experimental extension of the StorageIO system allowing direct access to // stored physical files that may not be associated with any DvObjects @@ -122,7 +127,7 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { return new String[]{storageDriverId, storageIdentifier}; } - public static String getStorarageIdFromLocation(String location) { + public static String getStorageIdFromLocation(String location) { if(location.contains("://")) { //It's a full location with a driverId, so strip and reapply the driver id //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java new file mode 100644 index 00000000000..0bf4eb515de --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -0,0 +1,533 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.HttpMethod; +import com.amazonaws.SdkClientException; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.net.URLEncoder; +import java.nio.channels.Channel; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Random; +import java.util.logging.Logger; +import org.apache.commons.io.IOUtils; +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; + +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import javax.net.ssl.SSLContext; +import javax.validation.constraints.NotNull; + +/** + * @author qqmyers + * @param what it stores + */ +/* + * HTTP Overlay Driver + * + * StorageIdentifier format: + * ://// + */ +public class HTTPOverlayAccessIO extends StorageIO { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.HttpOverlayAccessIO"); + + private StorageIO baseStore = null; + private String urlPath = null; + private String baseUrl = null; + + private static HttpClientContext localContext = HttpClientContext.create(); + private PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + private int timeout = 1200; + private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + private static boolean trustCerts = false; + private int httpConcurrency = 4; + + public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + this.setIsLocalFile(false); + configureStores(req, driverId, null); + // TODO: validate the storage location supplied + urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//" + 2)); + logger.fine("Base URL: " + urlPath); + } + + public HTTPOverlayAccessIO(String storageLocation, String driverId) throws IOException { + super(null, null, driverId); + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + // TODO: validate the storage location supplied + urlPath = storageLocation.substring(storageLocation.lastIndexOf("//" + 2)); + logger.fine("Base URL: " + urlPath); + } + + @Override + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + // Fix new DataFiles: DataFiles that have not yet been saved may use this method + // when they don't have their storageidentifier in the final form + // So we fix it up here. ToDo: refactor so that storageidentifier is generated + // by the appropriate StorageIO class and is final from the start. + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + this.setSize(getSizeFromHttpHeader()); + } + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); + } else { + this.setSize(getSizeFromHttpHeader()); + } + } + + private long getSizeFromHttpHeader() { + long size = -1; + HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + try { + CloseableHttpResponse response = httpclient.execute(head, localContext); + + try { + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + size = Long.parseLong(response.getHeaders("Content-Length")[0].getValue()); + logger.fine("Found file size: " + size); + break; + default: + logger.warning("Response from " + head.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + } + return size; + } + + @Override + public InputStream getInputStream() throws IOException { + if (super.getInputStream() == null) { + try { + HttpGet get = new HttpGet(baseUrl + "/" + urlPath); + CloseableHttpResponse response = httpclient.execute(get, localContext); + + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + setInputStream(response.getEntity().getContent()); + break; + default: + logger.warning("Response from " + get.getURI().toString() + " was " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath); + + } + setChannel(Channels.newChannel(super.getInputStream())); + } + return super.getInputStream(); + } + + @Override + public Channel getChannel() throws IOException { + if (super.getChannel() == null) { + getInputStream(); + } + return channel; + } + + @Override + public ReadableByteChannel getReadChannel() throws IOException { + // Make sure StorageIO.channel variable exists + getChannel(); + return super.getReadChannel(); + } + + @Override + public void delete() throws IOException { + // Delete is best-effort - we tell the remote server and it may or may not + // implement this call + if (!isDirectAccess()) { + throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); + } + try { + HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + CloseableHttpResponse response = httpclient.execute(del, localContext); + try { + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + default: + logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath); + + } + + // Delete all the cached aux files as well: + deleteAllAuxObjects(); + + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf("://") + 3); + fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//") + 2); + if (this.getDvObject() instanceof Dataset) { + fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" + + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("HttpOverlayAccessIO: Dataverses are not a supported dvObject"); + } + return fullStorageLocation; + } + + @Override + public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); + } + + @Override + public boolean exists() { + return (getSizeFromHttpHeader() != -1); + } + + @Override + public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: there are no write Channels associated with S3 objects."); + } + + @Override + public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: there are no output Streams associated with S3 objects."); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + @Override + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + + public String generateTemporaryDownloadUrl() throws IOException { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); + if (secretKey == null) { + return baseUrl + "/" + urlPath; + } else { + return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", secretKey); + } + } + + int getUrlExpirationMinutes() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); + + if (baseStore == null) { + String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); + String fullStorageLocation = null; + if (this.getDvObject() != null) { + fullStorageLocation = getStorageLocation(); + + // S3 expects :/// + switch (System.getProperty("dataverse.files." + baseDriverId + ".type")) { + case "s3": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + + fullStorageLocation; + break; + case "file": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: HTTPOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + + } else if (storageLocation != null) { + // ://// + String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); + fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); + + switch (System.getProperty("dataverse.files." + baseDriverId + ".type")) { + case "s3": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + + fullStorageLocation; + break; + case "file": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: HTTPOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: savePath() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index c0defccfdef..533498cad97 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -817,16 +817,17 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S } return key; } - - public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - public String generateTemporaryS3Url() throws IOException { + @Override + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + + public String generateTemporaryDownloadUrl() throws IOException { //Questions: // Q. Should this work for private and public? // A. Yes! Since the URL has a limited, short life span. -- L.A. diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 2f66eec5f4c..148858ce544 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -183,7 +183,7 @@ public boolean canWrite() { public abstract void deleteAllAuxObjects() throws IOException; private DataAccessRequest req; - private InputStream in; + private InputStream in = null; private OutputStream out; protected Channel channel; protected DvObject dvObject; @@ -542,4 +542,12 @@ public boolean isBelowIngestSizeLimit() { return true; } } + + public boolean downloadRedirectEnabled() { + return false; + } + + public String generateTemporaryDownloadUrl() throws IOException { + throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java new file mode 100644 index 00000000000..9a04a056fa0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -0,0 +1,140 @@ +package edu.harvard.iq.dataverse.util; + +import java.net.URL; +import java.nio.charset.Charset; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.joda.time.LocalDateTime; + +/** + * Simple class to sign/validate URLs. + * + */ +public class UrlSignerUtil { + + private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); + + /** + * + * @param baseUrl - the URL to sign - cannot contain query params + * "until","user", "method", or "token" + * @param timeout - how many minutes to make the URL valid for (note - time skew + * between the creator and receiver could affect the validation + * @param user - a string representing the user - should be understood by the + * creator/receiver + * @param method - one of the HTTP methods + * @param key - a secret key shared by the creator/receiver. In Dataverse + * this could be an APIKey (when sending URL to a tool that will + * use it to retrieve info from Dataverse) + * @return - the signed URL + */ + public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { + StringBuilder signedUrl = new StringBuilder(baseUrl); + + boolean firstParam = true; + if (baseUrl.contains("?")) { + firstParam = false; + } + if (timeout != null) { + LocalDateTime validTime = LocalDateTime.now(); + validTime = validTime.plusMinutes(timeout); + validTime.toString(); + signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + } + if (user != null) { + signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + } + if (method != null) { + signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + } + signedUrl.append("&token=").append(DigestUtils.sha512Hex(signedUrl.toString() + key)); + logger.fine("Generated Signed URL: " + signedUrl.toString()); + if (logger.isLoggable(Level.FINE)) { + logger.fine( + "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); + } + return signedUrl.toString(); + } + + /** + * This method will only return true if the URL and parameters except the + * "token" are unchanged from the original/match the values sent to this method, + * and the "token" parameter matches what this method recalculates using the + * shared key THe method also assures that the "until" timestamp is after the + * current time. + * + * @param signedUrl - the signed URL as received from Dataverse + * @param method - an HTTP method. If provided, the method in the URL must + * match + * @param user - a string representing the user, if provided the value must + * match the one in the url + * @param key - the shared secret key to be used in validation + * @return - true if valid, false if not: e.g. the key is not the same as the + * one used to generate the "token" any part of the URL preceding the + * "token" has been altered the method doesn't match (e.g. the server + * has received a POST request and the URL only allows GET) the user + * string doesn't match (e.g. the server knows user A is logged in, but + * the URL is only for user B) the url has expired (was used after the + * until timestamp) + */ + public static boolean isValidUrl(String signedUrl, String method, String user, String key) { + boolean valid = true; + try { + URL url = new URL(signedUrl); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + String hash = null; + String dateString = null; + String allowedMethod = null; + String allowedUser = null; + for (NameValuePair nvp : params) { + if (nvp.getName().equals("token")) { + hash = nvp.getValue(); + } + if (nvp.getName().equals("until")) { + dateString = nvp.getValue(); + } + if (nvp.getName().equals("method")) { + allowedMethod = nvp.getValue(); + } + if (nvp.getName().equals("user")) { + allowedUser = nvp.getValue(); + } + } + + int index = signedUrl.indexOf("&token="); + // Assuming the token is last - doesn't have to be, but no reason for the URL + // params to be rearranged either, and this should only cause false negatives if + // it does happen + String urlToHash = signedUrl.substring(0, index); + String newHash = DigestUtils.sha512Hex(urlToHash + key); + if (!hash.contentEquals(newHash)) { + logger.fine("Hash doesn't match"); + valid = false; + } + if (LocalDateTime.parse(dateString).isAfter(LocalDateTime.now())) { + logger.fine("Url is expired"); + valid = false; + } + if (method != null && !method.equals(allowedMethod)) { + logger.fine("Method doesn't match"); + valid = false; + } + if (user != null && user.equals(allowedUser)) { + logger.fine("User doesn't match"); + valid = false; + } + } catch (Throwable t) { + // Want to catch anything like null pointers, etc. to force valid=false upon any + // error + logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); + valid = false; + } + return valid; + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java index 1f118a0ea68..e2756d70663 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java @@ -30,7 +30,7 @@ public class S3AccessIOTest { @Mock private AmazonS3 s3client; - private S3AccessIO dataSetAccess; + private StorageIO dataSetAccess; private S3AccessIO dataFileAccess; private Dataset dataSet; private DataFile dataFile; From e8c15785b16651e728881bf0856e5347e198e5ec Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 13 Oct 2020 13:32:25 -0400 Subject: [PATCH 02/76] null check on dateString --- src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 9a04a056fa0..3c91387f169 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -116,7 +116,7 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S logger.fine("Hash doesn't match"); valid = false; } - if (LocalDateTime.parse(dateString).isAfter(LocalDateTime.now())) { + if (dateString != null && LocalDateTime.parse(dateString).isAfter(LocalDateTime.now())) { logger.fine("Url is expired"); valid = false; } From 00d53ee4d43fd49a81faa1f31b9b0c6a1cad8b46 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 12:39:49 -0400 Subject: [PATCH 03/76] adjust incoming identifier for HttpOverlay drivers --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 655cdafe04c..7fd3b1ab63d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1782,6 +1782,12 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, if (null == contentDispositionHeader) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + String driverType = DataAccess.getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); + if(driverType.equals("http")) { + //Add a generated identifier for the aux files + int lastColon = newStorageIdentifier.lastIndexOf(':'); + newStorageIdentifier= newStorageIdentifier.substring(0,lastColon) + "/" + FileUtil.generateStorageIdentifier() + "//" +newStorageIdentifier.substring(lastColon+1); + } // ToDo - check that storageIdentifier is valid if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); From 94921bd2de1bfdff7bb73c6d7da55528fc9c418a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 12:40:12 -0400 Subject: [PATCH 04/76] support overlay case --- .../edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index bd0549622f0..f96f948f0a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -32,7 +32,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; - +import java.util.logging.Logger; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -48,6 +48,9 @@ public class FileAccessIO extends StorageIO { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO"); + public FileAccessIO() { //Constructor only for testing @@ -169,7 +172,8 @@ public void open (DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataverse) { dataverse = this.getDataverse(); } else { - throw new IOException("Data Access: Invalid DvObject type"); + logger.fine("Overlay case: FileAccessIO open for : " + physicalPath.toString()); + //throw new IOException("Data Access: Invalid DvObject type"); } // This "status" is a leftover from 3.6; we don't have a use for it // in 4.0 yet; and we may not need it at all. From cbdd35c0b186a535d6df358d9f57082c713c6ff3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 12:40:33 -0400 Subject: [PATCH 05/76] document need to update for overlay case --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 533498cad97..2b7b1b91ae2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -224,6 +224,9 @@ public void open(DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataverse) { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { + + //ToDo - skip this for overlay case + // Direct access, e.g. for external upload - no associated DVobject yet, but we want to be able to get the size // With small files, it looks like we may call before S3 says it exists, so try some retries before failing if(key!=null) { From 11535bd4e7401d9f0100aa4d0ecfddbd3d2a9da2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 12:40:57 -0400 Subject: [PATCH 06/76] keep owner for getStorageIO call for HttpOverlay case --- .../harvard/iq/dataverse/ingest/IngestServiceBean.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index f5eeaa1c316..5a5ab8cc86e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -290,10 +290,6 @@ public List saveAndAddFilesToDataset(DatasetVersion version, List saveAndAddFilesToDataset(DatasetVersion version, List Date: Wed, 14 Oct 2020 12:41:32 -0400 Subject: [PATCH 07/76] typos --- .../harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index 0bf4eb515de..a058dfc070e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -95,7 +95,7 @@ public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) t this.setIsLocalFile(false); configureStores(req, driverId, null); // TODO: validate the storage location supplied - urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//" + 2)); + urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); logger.fine("Base URL: " + urlPath); } @@ -105,7 +105,7 @@ public HTTPOverlayAccessIO(String storageLocation, String driverId) throws IOExc configureStores(null, driverId, storageLocation); // TODO: validate the storage location supplied - urlPath = storageLocation.substring(storageLocation.lastIndexOf("//" + 2)); + urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); logger.fine("Base URL: " + urlPath); } @@ -345,6 +345,7 @@ public void deleteAllAuxObjects() throws IOException { @Override public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf("://") + 3); fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//") + 2); if (this.getDvObject() instanceof Dataset) { From 239d5a8de208b6bf4bb2c809264d2069526e33ff Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 14:03:44 -0400 Subject: [PATCH 08/76] debug logging --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 ++ .../harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java | 1 + 2 files changed, 3 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7fd3b1ab63d..3fb2e7c2bc3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1785,8 +1785,10 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, String driverType = DataAccess.getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); if(driverType.equals("http")) { //Add a generated identifier for the aux files + logger.fine("in: " + newStorageIdentifier); int lastColon = newStorageIdentifier.lastIndexOf(':'); newStorageIdentifier= newStorageIdentifier.substring(0,lastColon) + "/" + FileUtil.generateStorageIdentifier() + "//" +newStorageIdentifier.substring(lastColon+1); + logger.fine("out: " + newStorageIdentifier); } // ToDo - check that storageIdentifier is valid if (optionalFileParams.hasFileName()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index a058dfc070e..3ebc5f807ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -94,6 +94,7 @@ public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) t super(dvObject, req, driverId); this.setIsLocalFile(false); configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); // TODO: validate the storage location supplied urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); logger.fine("Base URL: " + urlPath); From e86c2d0fca0693614c3e8d90adf89dfd5f1dd1da Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 15:27:51 -0400 Subject: [PATCH 09/76] more logging --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 3fb2e7c2bc3..0a8adc31591 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1782,7 +1782,9 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, if (null == contentDispositionHeader) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + logger.fine("found: " + newStorageIdentifier); String driverType = DataAccess.getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); + logger.fine("drivertype: " + driverType); if(driverType.equals("http")) { //Add a generated identifier for the aux files logger.fine("in: " + newStorageIdentifier); From 0062c681c124c40a016e775b8da6acb9646a9c43 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 15:59:00 -0400 Subject: [PATCH 10/76] fix storageidentifier parsing/updating --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 0a8adc31591..bd52ff1bece 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1788,8 +1788,8 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, if(driverType.equals("http")) { //Add a generated identifier for the aux files logger.fine("in: " + newStorageIdentifier); - int lastColon = newStorageIdentifier.lastIndexOf(':'); - newStorageIdentifier= newStorageIdentifier.substring(0,lastColon) + "/" + FileUtil.generateStorageIdentifier() + "//" +newStorageIdentifier.substring(lastColon+1); + int lastColon = newStorageIdentifier.lastIndexOf("://"); + newStorageIdentifier= newStorageIdentifier.substring(0,lastColon +3) + FileUtil.generateStorageIdentifier() + "//" +newStorageIdentifier.substring(lastColon+3); logger.fine("out: " + newStorageIdentifier); } // ToDo - check that storageIdentifier is valid From d6a5f65379ed6db78f896cddf0f3c88f5162a509 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 17:46:28 -0400 Subject: [PATCH 11/76] more info about errors handled by ThrowableHandler --- .../edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java index d3c6fd2df50..0f6be9c4dfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java @@ -215,6 +215,7 @@ public JsonResponseBuilder log(Logger logger, Level level, Optional e metadata.deleteCharAt(metadata.length()-1); if (ex.isPresent()) { + ex.get().printStackTrace(); metadata.append("|"); logger.log(level, metadata.toString(), ex); } else { From d821b626a804aedd3bba4f1bea99539649fb4a48 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 17:46:54 -0400 Subject: [PATCH 12/76] fine debug to show size --- .../java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 5a5ab8cc86e..6b79a3079f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -298,6 +298,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, List)dataAccess).removeTempTag(); From 1a8f0f12003322104b6130d12d2495afe5d6ae2c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 17:47:08 -0400 Subject: [PATCH 13/76] actually instantiate an HttpClient ! --- .../dataaccess/HTTPOverlayAccessIO.java | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index 3ebc5f807ab..b3f095b7bda 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -37,7 +37,10 @@ import java.util.List; import java.util.Random; import java.util.logging.Logger; + + import org.apache.commons.io.IOUtils; +import org.apache.http.Header; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; @@ -54,6 +57,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.protocol.HTTP; import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; @@ -148,6 +152,7 @@ public void open(DataAccessOption... options) throws IOException { if (dataFile.getFilesize() >= 0) { this.setSize(dataFile.getFilesize()); } else { + logger.fine("Setting size"); this.setSize(getSizeFromHttpHeader()); } if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") @@ -182,13 +187,18 @@ private long getSizeFromHttpHeader() { long size = -1; HttpHead head = new HttpHead(baseUrl + "/" + urlPath); try { - CloseableHttpResponse response = httpclient.execute(head, localContext); + CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); try { int code = response.getStatusLine().getStatusCode(); + logger.fine("Response for HEAD: " + code); switch (code) { case 200: - size = Long.parseLong(response.getHeaders("Content-Length")[0].getValue()); + Header[] headers =response.getHeaders(HTTP.CONTENT_LEN); + logger.fine("Num headers: " + headers.length); + String sizeString = response.getHeaders(HTTP.CONTENT_LEN )[0].getValue(); + logger.fine("Content-Length: " + sizeString); + size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN )[0].getValue()); logger.fine("Found file size: " + size); break; default: @@ -208,7 +218,7 @@ public InputStream getInputStream() throws IOException { if (super.getInputStream() == null) { try { HttpGet get = new HttpGet(baseUrl + "/" + urlPath); - CloseableHttpResponse response = httpclient.execute(get, localContext); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); int code = response.getStatusLine().getStatusCode(); switch (code) { @@ -217,11 +227,12 @@ public InputStream getInputStream() throws IOException { break; default: logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath); + e.printStackTrace(); + throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); } setChannel(Channels.newChannel(super.getInputStream())); @@ -253,7 +264,7 @@ public void delete() throws IOException { } try { HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); - CloseableHttpResponse response = httpclient.execute(del, localContext); + CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { @@ -267,7 +278,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath); + throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); } @@ -369,6 +380,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { + logger.fine("Exists called"); return (getSizeFromHttpHeader() != -1); } From ad86e4cdc68e3c518aac2f603439198bf192d304 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Oct 2020 18:59:20 -0400 Subject: [PATCH 14/76] algorithm fixes and logging --- .../iq/dataverse/util/UrlSignerUtil.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 3c91387f169..233b94ce007 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -45,14 +45,18 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin validTime = validTime.plusMinutes(timeout); validTime.toString(); signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + firstParam=false; } if (user != null) { signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + firstParam=false; } if (method != null) { signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); } - signedUrl.append("&token=").append(DigestUtils.sha512Hex(signedUrl.toString() + key)); + signedUrl.append("&token="); + logger.fine("String to sign: " + signedUrl.toString() + ""); + signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); logger.fine("Generated Signed URL: " + signedUrl.toString()); if (logger.isLoggable(Level.FINE)) { logger.fine( @@ -94,15 +98,19 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S for (NameValuePair nvp : params) { if (nvp.getName().equals("token")) { hash = nvp.getValue(); + logger.fine("Hash: " + hash); } if (nvp.getName().equals("until")) { dateString = nvp.getValue(); + logger.fine("Until: " + dateString); } if (nvp.getName().equals("method")) { allowedMethod = nvp.getValue(); + logger.fine("Method: " + allowedMethod); } if (nvp.getName().equals("user")) { allowedUser = nvp.getValue(); + logger.fine("User: " + allowedUser); } } @@ -110,13 +118,15 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S // Assuming the token is last - doesn't have to be, but no reason for the URL // params to be rearranged either, and this should only cause false negatives if // it does happen - String urlToHash = signedUrl.substring(0, index); + String urlToHash = signedUrl.substring(0, index + 7); + logger.fine("String to hash: " + urlToHash + ""); String newHash = DigestUtils.sha512Hex(urlToHash + key); - if (!hash.contentEquals(newHash)) { + logger.fine("Calculated Hash: " + newHash); + if (!hash.equals(newHash)) { logger.fine("Hash doesn't match"); valid = false; } - if (dateString != null && LocalDateTime.parse(dateString).isAfter(LocalDateTime.now())) { + if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { logger.fine("Url is expired"); valid = false; } From 4a9f2098640b305dee37d17da5d84e331b9ec620 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 09:34:51 -0400 Subject: [PATCH 15/76] log exception --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index ec18f23a5a0..01ee19bf2d0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -416,7 +416,7 @@ private static boolean isThumbnailCached(StorageIO storageIO, int size try { cached = storageIO.isAuxObjectCached(THUMBNAIL_SUFFIX + size); } catch (Exception ioex) { - logger.fine("caught Exception while checking for a cached thumbnail (file " + storageIO.getDataFile().getStorageIdentifier() + ")"); + logger.fine("caught Exception while checking for a cached thumbnail (file " + storageIO.getDataFile().getStorageIdentifier() + "): " + ioex.getMessage()); return false; } From b33958307e2726daa89e816bc5bccd7d341f52c4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 09:35:37 -0400 Subject: [PATCH 16/76] support auxPath for direct/overlay case --- .../edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index f96f948f0a9..4ac28713ec8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -69,6 +69,7 @@ public FileAccessIO(T dvObject, DataAccessRequest req, String driverId ) { public FileAccessIO(String storageLocation, String driverId) { super(storageLocation, driverId); this.setIsLocalFile(true); + logger.fine("Storage path: " + storageLocation); physicalPath = Paths.get(storageLocation); } @@ -297,7 +298,10 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { if (auxItemTag == null || "".equals(auxItemTag)) { throw new IOException("Null or invalid Auxiliary Object Tag."); } - + if(isDirectAccess()) { + //Overlay case + return Paths.get(physicalPath.toString() + "." + auxItemTag); + } String datasetDirectory = getDatasetDirectory(); if (dvObject.getStorageIdentifier() == null || "".equals(dvObject.getStorageIdentifier())) { @@ -549,7 +553,7 @@ public FileOutputStream openLocalFileAsOutputStream () { } private String getDatasetDirectory() throws IOException { - if (dvObject == null) { + if (isDirectAccess()) { throw new IOException("No DvObject defined in the Data Access Object"); } From 5131e5edf9e3e8eb5e83b142793861942054ed8a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 12:03:14 -0400 Subject: [PATCH 17/76] create dir when needed for aux --- .../edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 4ac28713ec8..91701418240 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -174,6 +174,10 @@ public void open (DataAccessOption... options) throws IOException { dataverse = this.getDataverse(); } else { logger.fine("Overlay case: FileAccessIO open for : " + physicalPath.toString()); + Path datasetPath= physicalPath.getParent(); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } //throw new IOException("Data Access: Invalid DvObject type"); } // This "status" is a leftover from 3.6; we don't have a use for it @@ -237,7 +241,7 @@ public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) th Path auxPath = getAuxObjectAsPath(auxItemTag); if (isWriteAccessRequested(options)) { - if (dvObject instanceof Dataset && !this.canWrite()) { + if (((dvObject instanceof Dataset) || isDirectAccess()) && !this.canWrite()) { // If this is a dataset-level auxilary file (a cached metadata export, // dataset logo, etc.) there's a chance that no "real" files // have been saved for this dataset yet, and thus the filesystem From afa37ef03ffb42995782177c58bf3cbaaf37f780 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 13:32:13 -0400 Subject: [PATCH 18/76] S3 flag to distinguish overlap and direct-upload cases --- .../dataaccess/HTTPOverlayAccessIO.java | 8 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 78 +++++++++++-------- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index b3f095b7bda..6d218d1800c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -441,11 +441,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor if (baseStore == null) { String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); String fullStorageLocation = null; + String baseDriverType= System.getProperty("dataverse.files." + baseDriverId + ".type"); if (this.getDvObject() != null) { fullStorageLocation = getStorageLocation(); // S3 expects :/// - switch (System.getProperty("dataverse.files." + baseDriverId + ".type")) { + switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" @@ -467,7 +468,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); - switch (System.getProperty("dataverse.files." + baseDriverId + ".type")) { + switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" @@ -485,6 +486,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } } baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + if(baseDriverType.contentEquals("s3")) { + ((S3AccessIO)baseStore).setMainDriver(false); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 2b7b1b91ae2..672d9b11aa7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -76,6 +76,8 @@ public class S3AccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); + + private boolean mainDriver = true; private static HashMap driverClientMap = new HashMap(); private static HashMap driverTMMap = new HashMap(); @@ -225,38 +227,41 @@ public void open(DataAccessOption... options) throws IOException { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { - //ToDo - skip this for overlay case - - // Direct access, e.g. for external upload - no associated DVobject yet, but we want to be able to get the size - // With small files, it looks like we may call before S3 says it exists, so try some retries before failing - if(key!=null) { - ObjectMetadata objectMetadata = null; - int retries = 20; - while(retries > 0) { - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - if(retries != 20) { - logger.warning("Success for key: " + key + " after " + ((20-retries)*3) + " seconds"); - } - retries = 0; - } catch (SdkClientException sce) { - if(retries > 1) { - retries--; - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - logger.warning("Retrying after: " + sce.getMessage()); - } else { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - } - } - this.setSize(objectMetadata.getContentLength()); - }else { - throw new IOException("Data Access: Invalid DvObject type"); - } + if (isMainDriver()) { + // Direct access, e.g. for external upload - no associated DVobject yet, but we + // want to be able to get the size + // With small files, it looks like we may call before S3 says it exists, so try + // some retries before failing + if (key != null) { + ObjectMetadata objectMetadata = null; + int retries = 20; + while (retries > 0) { + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + if (retries != 20) { + logger.warning( + "Success for key: " + key + " after " + ((20 - retries) * 3) + " seconds"); + } + retries = 0; + } catch (SdkClientException sce) { + if (retries > 1) { + retries--; + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + logger.warning("Retrying after: " + sce.getMessage()); + } else { + throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")"); + } + } + } + this.setSize(objectMetadata.getContentLength()); + } else { + throw new IOException("Data Access: Invalid DvObject type"); + } + } } } @@ -425,6 +430,7 @@ public void delete() throws IOException { @Override public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { if (isWriteAccessRequested(options)) { + //Need size to write to S3 throw new UnsupportedDataAccessOperationException("S3AccessIO: write mode openAuxChannel() not yet implemented in this storage driver."); } @@ -1171,4 +1177,12 @@ public static void completeMultipartUpload(String globalId, String storageIdenti s3Client.completeMultipartUpload(req); } + public boolean isMainDriver() { + return mainDriver; + } + + public void setMainDriver(boolean mainDriver) { + this.mainDriver = mainDriver; + } + } From 6aaabe23796f3ac11b60ef9cae5be5f590a4b76f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 14:30:08 -0400 Subject: [PATCH 19/76] fix s3 storagelocation --- .../harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index 6d218d1800c..eb97acb21ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -449,7 +449,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + ":" + fullStorageLocation; break; case "file": @@ -471,7 +471,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + ":" + fullStorageLocation; break; case "file": From bd37c2e93fa2e0c74bc94648b9aa40026a176a9a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 14:36:29 -0400 Subject: [PATCH 20/76] Revert "fix s3 storagelocation" This reverts commit 6aaabe23796f3ac11b60ef9cae5be5f590a4b76f. --- .../harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index eb97acb21ea..6d218d1800c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -449,7 +449,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + ":" + + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + fullStorageLocation; break; case "file": @@ -471,7 +471,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + ":" + + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + fullStorageLocation; break; case "file": From 14a119612f65e8de0d2026d1ea25c5cc5dfee652 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 14:48:44 -0400 Subject: [PATCH 21/76] fine logging --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 672d9b11aa7..adcc8ae95fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -109,6 +109,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { public S3AccessIO(String storageLocation, String driverId) { this(null, null, driverId); // TODO: validate the storage location supplied + logger.fine("Instantiating with location: " + storageLocation); bucketName = storageLocation.substring(0,storageLocation.indexOf('/')); minPartSize = getMinPartSize(driverId); key = storageLocation.substring(storageLocation.indexOf('/')+1); From e47eed7a75717f8538a0061baa022442d7798836 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Oct 2020 14:49:09 -0400 Subject: [PATCH 22/76] fix storagelocation issues --- .../iq/dataverse/dataaccess/HTTPOverlayAccessIO.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index 6d218d1800c..79f7d6b23a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -359,7 +359,7 @@ public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf("://") + 3); - fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//") + 2); + fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); if (this.getDvObject() instanceof Dataset) { fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; @@ -449,7 +449,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + fullStorageLocation; break; case "file": @@ -471,7 +471,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor switch (baseDriverType) { case "s3": fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucketName") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + fullStorageLocation; break; case "file": From 41dedcbcf299f7dde556302d4a068c9d93464bde Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 5 Aug 2021 07:01:08 -0400 Subject: [PATCH 23/76] format/cleanup --- .../api/util/JsonResponseBuilder.java | 2 +- .../iq/dataverse/dataaccess/FileAccessIO.java | 78 +- .../dataaccess/HTTPOverlayAccessIO.java | 959 +++++++++--------- .../iq/dataverse/dataaccess/S3AccessIO.java | 2 +- .../iq/dataverse/util/UrlSignerUtil.java | 250 ++--- 5 files changed, 635 insertions(+), 656 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java index cd72a5e3c3b..aef17d1ab34 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/util/JsonResponseBuilder.java @@ -222,7 +222,7 @@ public JsonResponseBuilder log(Logger logger, Level level, Optional e metadata.deleteCharAt(metadata.length()-1); if (ex.isPresent()) { - ex.get().printStackTrace(); + ex.get().printStackTrace(); metadata.append("|"); logger.log(level, metadata.toString(), ex); if(includeStackTrace) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 91701418240..5c2adee3da9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -48,15 +48,15 @@ public class FileAccessIO extends StorageIO { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO"); + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO"); + + + public FileAccessIO() { + // Constructor only for testing + super(null, null, null); + } - public FileAccessIO() { - //Constructor only for testing - super(null, null, null); - } - public FileAccessIO(T dvObject, DataAccessRequest req, String driverId ) { super(dvObject, req, driverId); @@ -67,9 +67,9 @@ public FileAccessIO(T dvObject, DataAccessRequest req, String driverId ) { // "Direct" File Access IO, opened on a physical file not associated with // a specific DvObject public FileAccessIO(String storageLocation, String driverId) { - super(storageLocation, driverId); - this.setIsLocalFile(true); - logger.fine("Storage path: " + storageLocation); + super(storageLocation, driverId); + this.setIsLocalFile(true); + logger.fine("Storage path: " + storageLocation); physicalPath = Paths.get(storageLocation); } @@ -124,10 +124,10 @@ public void open (DataAccessOption... options) throws IOException { } } else if (isWriteAccess) { // Creates a new directory as needed for a dataset. - Path datasetPath=Paths.get(getDatasetDirectory()); - if (datasetPath != null && !Files.exists(datasetPath)) { - Files.createDirectories(datasetPath); - } + Path datasetPath=Paths.get(getDatasetDirectory()); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } FileOutputStream fout = openLocalFileAsOutputStream(); if (fout == null) { @@ -163,21 +163,21 @@ public void open (DataAccessOption... options) throws IOException { // this.setInputStream(fin); } else if (isWriteAccess) { //this checks whether a directory for a dataset exists - Path datasetPath=Paths.get(getDatasetDirectory()); - if (datasetPath != null && !Files.exists(datasetPath)) { - Files.createDirectories(datasetPath); - } + Path datasetPath=Paths.get(getDatasetDirectory()); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } dataset.setStorageIdentifier(this.driverId + "://"+dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage()); } } else if (dvObject instanceof Dataverse) { dataverse = this.getDataverse(); } else { - logger.fine("Overlay case: FileAccessIO open for : " + physicalPath.toString()); - Path datasetPath= physicalPath.getParent(); - if (datasetPath != null && !Files.exists(datasetPath)) { - Files.createDirectories(datasetPath); - } + logger.fine("Overlay case: FileAccessIO open for : " + physicalPath.toString()); + Path datasetPath= physicalPath.getParent(); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } //throw new IOException("Data Access: Invalid DvObject type"); } // This "status" is a leftover from 3.6; we don't have a use for it @@ -303,8 +303,8 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { throw new IOException("Null or invalid Auxiliary Object Tag."); } if(isDirectAccess()) { - //Overlay case - return Paths.get(physicalPath.toString() + "." + auxItemTag); + //Overlay case + return Paths.get(physicalPath.toString() + "." + auxItemTag); } String datasetDirectory = getDatasetDirectory(); @@ -329,7 +329,7 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { } - @Override + @Override public void backupAsAux(String auxItemTag) throws IOException { Path auxPath = getAuxObjectAsPath(auxItemTag); @@ -584,14 +584,14 @@ private String getDatasetDirectory() throws IOException { } - private String getFilesRootDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory"); - - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - return filesRootDirectory; - } + private String getFilesRootDirectory() { + String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory"); + + if (filesRootDirectory == null || filesRootDirectory.equals("")) { + filesRootDirectory = "/tmp/files"; + } + return filesRootDirectory; + } private List listCachedFiles() throws IOException { List auxItems = new ArrayList<>(); @@ -654,10 +654,10 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException return in; } private String stripDriverId(String storageIdentifier) { - int separatorIndex = storageIdentifier.indexOf("://"); - if(separatorIndex>0) { - return storageIdentifier.substring(separatorIndex + 3); + int separatorIndex = storageIdentifier.indexOf("://"); + if(separatorIndex>0) { + return storageIdentifier.substring(separatorIndex + 3); } - return storageIdentifier; - } + return storageIdentifier; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index 79f7d6b23a7..1cd021de4cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -1,45 +1,27 @@ package edu.harvard.iq.dataverse.dataaccess; -import com.amazonaws.AmazonClientException; -import com.amazonaws.HttpMethod; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; -import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; -import java.io.File; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.URL; -import java.net.URLEncoder; import java.nio.channels.Channel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; -import java.nio.file.Paths; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; import java.util.List; -import java.util.Random; import java.util.logging.Logger; - -import org.apache.commons.io.IOUtils; import org.apache.http.Header; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; @@ -61,10 +43,7 @@ import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; -import javax.json.Json; -import javax.json.JsonObjectBuilder; import javax.net.ssl.SSLContext; -import javax.validation.constraints.NotNull; /** * @author qqmyers @@ -78,474 +57,474 @@ */ public class HTTPOverlayAccessIO extends StorageIO { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.HttpOverlayAccessIO"); - - private StorageIO baseStore = null; - private String urlPath = null; - private String baseUrl = null; - - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; - - public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { - super(dvObject, req, driverId); - this.setIsLocalFile(false); - configureStores(req, driverId, null); - logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - // TODO: validate the storage location supplied - urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - logger.fine("Base URL: " + urlPath); - } - - public HTTPOverlayAccessIO(String storageLocation, String driverId) throws IOException { - super(null, null, driverId); - this.setIsLocalFile(false); - configureStores(null, driverId, storageLocation); - - // TODO: validate the storage location supplied - urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - logger.fine("Base URL: " + urlPath); - } - - @Override - public void open(DataAccessOption... options) throws IOException { - - baseStore.open(options); - - DataAccessRequest req = this.getRequest(); - - if (isWriteAccessRequested(options)) { - isWriteAccess = true; - isReadAccess = false; - } else { - isWriteAccess = false; - isReadAccess = true; - } - - if (dvObject instanceof DataFile) { - String storageIdentifier = dvObject.getStorageIdentifier(); - - DataFile dataFile = this.getDataFile(); - - if (req != null && req.getParameter("noVarHeader") != null) { - baseStore.setNoVarHeader(true); - } - - if (storageIdentifier == null || "".equals(storageIdentifier)) { - throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); - } - - // Fix new DataFiles: DataFiles that have not yet been saved may use this method - // when they don't have their storageidentifier in the final form - // So we fix it up here. ToDo: refactor so that storageidentifier is generated - // by the appropriate StorageIO class and is final from the start. - logger.fine("StorageIdentifier is: " + storageIdentifier); - - if (isReadAccess) { - if (dataFile.getFilesize() >= 0) { - this.setSize(dataFile.getFilesize()); - } else { - logger.fine("Setting size"); - this.setSize(getSizeFromHttpHeader()); - } - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { - - List datavariables = dataFile.getDataTable().getDataVariables(); - String varHeaderLine = generateVariableHeader(datavariables); - this.setVarHeader(varHeaderLine); - } - - } - - this.setMimeType(dataFile.getContentType()); - - try { - this.setFileName(dataFile.getFileMetadata().getLabel()); - } catch (Exception ex) { - this.setFileName("unknown"); - } - } else if (dvObject instanceof Dataset) { - throw new IOException( - "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); - } else if (dvObject instanceof Dataverse) { - throw new IOException( - "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromHttpHeader()); - } - } - - private long getSizeFromHttpHeader() { - long size = -1; - HttpHead head = new HttpHead(baseUrl + "/" + urlPath); - try { - CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); - - try { - int code = response.getStatusLine().getStatusCode(); - logger.fine("Response for HEAD: " + code); - switch (code) { - case 200: - Header[] headers =response.getHeaders(HTTP.CONTENT_LEN); - logger.fine("Num headers: " + headers.length); - String sizeString = response.getHeaders(HTTP.CONTENT_LEN )[0].getValue(); - logger.fine("Content-Length: " + sizeString); - size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN )[0].getValue()); - logger.fine("Found file size: " + size); - break; - default: - logger.warning("Response from " + head.getURI().toString() + " was " + code); - } - } finally { - EntityUtils.consume(response.getEntity()); - } - } catch (Exception e) { - logger.warning(e.getMessage()); - } - return size; - } - - @Override - public InputStream getInputStream() throws IOException { - if (super.getInputStream() == null) { - try { - HttpGet get = new HttpGet(baseUrl + "/" + urlPath); - CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); - - int code = response.getStatusLine().getStatusCode(); - switch (code) { - case 200: - setInputStream(response.getEntity().getContent()); - break; - default: - logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); - } - } catch (Exception e) { - logger.warning(e.getMessage()); - e.printStackTrace(); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); - - } - setChannel(Channels.newChannel(super.getInputStream())); - } - return super.getInputStream(); - } - - @Override - public Channel getChannel() throws IOException { - if (super.getChannel() == null) { - getInputStream(); - } - return channel; - } - - @Override - public ReadableByteChannel getReadChannel() throws IOException { - // Make sure StorageIO.channel variable exists - getChannel(); - return super.getReadChannel(); - } - - @Override - public void delete() throws IOException { - // Delete is best-effort - we tell the remote server and it may or may not - // implement this call - if (!isDirectAccess()) { - throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); - } - try { - HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); - CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); - try { - int code = response.getStatusLine().getStatusCode(); - switch (code) { - case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); - default: - logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); - } - } finally { - EntityUtils.consume(response.getEntity()); - } - } catch (Exception e) { - logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); - - } - - // Delete all the cached aux files as well: - deleteAllAuxObjects(); - - } - - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf("://") + 3); - fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); - if (this.getDvObject() instanceof Dataset) { - fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("HttpOverlayAccessIO: Dataverses are not a supported dvObject"); - } - return fullStorageLocation; - } - - @Override - public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); - } - - @Override - public boolean exists() { - logger.fine("Exists called"); - return (getSizeFromHttpHeader() != -1); - } - - @Override - public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: there are no write Channels associated with S3 objects."); - } - - @Override - public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: there are no output Streams associated with S3 objects."); - } - - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - - @Override - public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - - public String generateTemporaryDownloadUrl() throws IOException { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); - if (secretKey == null) { - return baseUrl + "/" + urlPath; - } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", secretKey); - } - } - - int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); - - if (baseStore == null) { - String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); - String fullStorageLocation = null; - String baseDriverType= System.getProperty("dataverse.files." + baseDriverId + ".type"); - if (this.getDvObject() != null) { - fullStorageLocation = getStorageLocation(); - - // S3 expects :/// - switch (baseDriverType) { - case "s3": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case "file": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: HTTPOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - - } else if (storageLocation != null) { - // ://// - String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); - fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); - - switch (baseDriverType) { - case "s3": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case "file": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: HTTPOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - if(baseDriverType.contentEquals("s3")) { - ((S3AccessIO)baseStore).setMainDriver(false); - } - } - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); - } - - @Override - public void savePath(Path fileSystemPath) throws IOException { - throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: savePath() not implemented in this storage driver."); - - } - - @Override - public void saveInputStream(InputStream inputStream) throws IOException { - throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: saveInputStream() not implemented in this storage driver."); - - } - - @Override - public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { - throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); - - } + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.HttpOverlayAccessIO"); + + private StorageIO baseStore = null; + private String urlPath = null; + private String baseUrl = null; + + private static HttpClientContext localContext = HttpClientContext.create(); + private PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + private int timeout = 1200; + private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + private static boolean trustCerts = false; + private int httpConcurrency = 4; + + public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + this.setIsLocalFile(false); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + // TODO: validate the storage location supplied + urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + logger.fine("Base URL: " + urlPath); + } + + public HTTPOverlayAccessIO(String storageLocation, String driverId) throws IOException { + super(null, null, driverId); + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + // TODO: validate the storage location supplied + urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + logger.fine("Base URL: " + urlPath); + } + + @Override + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + // Fix new DataFiles: DataFiles that have not yet been saved may use this method + // when they don't have their storageidentifier in the final form + // So we fix it up here. ToDo: refactor so that storageidentifier is generated + // by the appropriate StorageIO class and is final from the start. + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(getSizeFromHttpHeader()); + } + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); + } else { + this.setSize(getSizeFromHttpHeader()); + } + } + + private long getSizeFromHttpHeader() { + long size = -1; + HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + try { + CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); + + try { + int code = response.getStatusLine().getStatusCode(); + logger.fine("Response for HEAD: " + code); + switch (code) { + case 200: + Header[] headers = response.getHeaders(HTTP.CONTENT_LEN); + logger.fine("Num headers: " + headers.length); + String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); + logger.fine("Content-Length: " + sizeString); + size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); + logger.fine("Found file size: " + size); + break; + default: + logger.warning("Response from " + head.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + } + return size; + } + + @Override + public InputStream getInputStream() throws IOException { + if (super.getInputStream() == null) { + try { + HttpGet get = new HttpGet(baseUrl + "/" + urlPath); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + setInputStream(response.getEntity().getContent()); + break; + default: + logger.warning("Response from " + get.getURI().toString() + " was " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + e.printStackTrace(); + throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); + + } + setChannel(Channels.newChannel(super.getInputStream())); + } + return super.getInputStream(); + } + + @Override + public Channel getChannel() throws IOException { + if (super.getChannel() == null) { + getInputStream(); + } + return channel; + } + + @Override + public ReadableByteChannel getReadChannel() throws IOException { + // Make sure StorageIO.channel variable exists + getChannel(); + return super.getReadChannel(); + } + + @Override + public void delete() throws IOException { + // Delete is best-effort - we tell the remote server and it may or may not + // implement this call + if (!isDirectAccess()) { + throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); + } + try { + HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); + try { + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + default: + logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); + + } + + // Delete all the cached aux files as well: + deleteAllAuxObjects(); + + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf("://") + 3); + fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); + if (this.getDvObject() instanceof Dataset) { + fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" + + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("HttpOverlayAccessIO: Dataverses are not a supported dvObject"); + } + return fullStorageLocation; + } + + @Override + public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); + } + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (getSizeFromHttpHeader() != -1); + } + + @Override + public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: there are no write Channels associated with S3 objects."); + } + + @Override + public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: there are no output Streams associated with S3 objects."); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + @Override + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + + public String generateTemporaryDownloadUrl() throws IOException { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); + if (secretKey == null) { + return baseUrl + "/" + urlPath; + } else { + return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", secretKey); + } + } + + int getUrlExpirationMinutes() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); + + if (baseStore == null) { + String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); + String fullStorageLocation = null; + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type"); + if (this.getDvObject() != null) { + fullStorageLocation = getStorageLocation(); + + // S3 expects :/// + switch (baseDriverType) { + case "s3": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case "file": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: HTTPOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + + } else if (storageLocation != null) { + // ://// + String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); + fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); + + switch (baseDriverType) { + case "s3": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case "file": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: HTTPOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + if (baseDriverType.contentEquals("s3")) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: savePath() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + "HttpOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 2819dabbe9b..9c2220a1002 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -116,7 +116,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { public S3AccessIO(String storageLocation, String driverId) { this(null, null, driverId); // TODO: validate the storage location supplied - logger.fine("Instantiating with location: " + storageLocation); + logger.fine("Instantiating with location: " + storageLocation); bucketName = storageLocation.substring(0,storageLocation.indexOf('/')); minPartSize = getMinPartSize(driverId); key = storageLocation.substring(storageLocation.indexOf('/')+1); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 233b94ce007..8f53799cb98 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -17,134 +17,134 @@ */ public class UrlSignerUtil { - private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); + private static final Logger logger = Logger.getLogger(UrlSignerUtil.class.getName()); - /** - * - * @param baseUrl - the URL to sign - cannot contain query params - * "until","user", "method", or "token" - * @param timeout - how many minutes to make the URL valid for (note - time skew - * between the creator and receiver could affect the validation - * @param user - a string representing the user - should be understood by the - * creator/receiver - * @param method - one of the HTTP methods - * @param key - a secret key shared by the creator/receiver. In Dataverse - * this could be an APIKey (when sending URL to a tool that will - * use it to retrieve info from Dataverse) - * @return - the signed URL - */ - public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { - StringBuilder signedUrl = new StringBuilder(baseUrl); + /** + * + * @param baseUrl - the URL to sign - cannot contain query params + * "until","user", "method", or "token" + * @param timeout - how many minutes to make the URL valid for (note - time skew + * between the creator and receiver could affect the validation + * @param user - a string representing the user - should be understood by the + * creator/receiver + * @param method - one of the HTTP methods + * @param key - a secret key shared by the creator/receiver. In Dataverse + * this could be an APIKey (when sending URL to a tool that will + * use it to retrieve info from Dataverse) + * @return - the signed URL + */ + public static String signUrl(String baseUrl, Integer timeout, String user, String method, String key) { + StringBuilder signedUrl = new StringBuilder(baseUrl); - boolean firstParam = true; - if (baseUrl.contains("?")) { - firstParam = false; - } - if (timeout != null) { - LocalDateTime validTime = LocalDateTime.now(); - validTime = validTime.plusMinutes(timeout); - validTime.toString(); - signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); - firstParam=false; - } - if (user != null) { - signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); - firstParam=false; - } - if (method != null) { - signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); - } - signedUrl.append("&token="); - logger.fine("String to sign: " + signedUrl.toString() + ""); - signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); - logger.fine("Generated Signed URL: " + signedUrl.toString()); - if (logger.isLoggable(Level.FINE)) { - logger.fine( - "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); - } - return signedUrl.toString(); - } + boolean firstParam = true; + if (baseUrl.contains("?")) { + firstParam = false; + } + if (timeout != null) { + LocalDateTime validTime = LocalDateTime.now(); + validTime = validTime.plusMinutes(timeout); + validTime.toString(); + signedUrl.append(firstParam ? "?" : "&").append("until=").append(validTime); + firstParam = false; + } + if (user != null) { + signedUrl.append(firstParam ? "?" : "&").append("user=").append(user); + firstParam = false; + } + if (method != null) { + signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + } + signedUrl.append("&token="); + logger.fine("String to sign: " + signedUrl.toString() + ""); + signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); + logger.fine("Generated Signed URL: " + signedUrl.toString()); + if (logger.isLoggable(Level.FINE)) { + logger.fine( + "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); + } + return signedUrl.toString(); + } - /** - * This method will only return true if the URL and parameters except the - * "token" are unchanged from the original/match the values sent to this method, - * and the "token" parameter matches what this method recalculates using the - * shared key THe method also assures that the "until" timestamp is after the - * current time. - * - * @param signedUrl - the signed URL as received from Dataverse - * @param method - an HTTP method. If provided, the method in the URL must - * match - * @param user - a string representing the user, if provided the value must - * match the one in the url - * @param key - the shared secret key to be used in validation - * @return - true if valid, false if not: e.g. the key is not the same as the - * one used to generate the "token" any part of the URL preceding the - * "token" has been altered the method doesn't match (e.g. the server - * has received a POST request and the URL only allows GET) the user - * string doesn't match (e.g. the server knows user A is logged in, but - * the URL is only for user B) the url has expired (was used after the - * until timestamp) - */ - public static boolean isValidUrl(String signedUrl, String method, String user, String key) { - boolean valid = true; - try { - URL url = new URL(signedUrl); - List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); - String hash = null; - String dateString = null; - String allowedMethod = null; - String allowedUser = null; - for (NameValuePair nvp : params) { - if (nvp.getName().equals("token")) { - hash = nvp.getValue(); - logger.fine("Hash: " + hash); - } - if (nvp.getName().equals("until")) { - dateString = nvp.getValue(); - logger.fine("Until: " + dateString); - } - if (nvp.getName().equals("method")) { - allowedMethod = nvp.getValue(); - logger.fine("Method: " + allowedMethod); - } - if (nvp.getName().equals("user")) { - allowedUser = nvp.getValue(); - logger.fine("User: " + allowedUser); - } - } + /** + * This method will only return true if the URL and parameters except the + * "token" are unchanged from the original/match the values sent to this method, + * and the "token" parameter matches what this method recalculates using the + * shared key THe method also assures that the "until" timestamp is after the + * current time. + * + * @param signedUrl - the signed URL as received from Dataverse + * @param method - an HTTP method. If provided, the method in the URL must + * match + * @param user - a string representing the user, if provided the value must + * match the one in the url + * @param key - the shared secret key to be used in validation + * @return - true if valid, false if not: e.g. the key is not the same as the + * one used to generate the "token" any part of the URL preceding the + * "token" has been altered the method doesn't match (e.g. the server + * has received a POST request and the URL only allows GET) the user + * string doesn't match (e.g. the server knows user A is logged in, but + * the URL is only for user B) the url has expired (was used after the + * until timestamp) + */ + public static boolean isValidUrl(String signedUrl, String method, String user, String key) { + boolean valid = true; + try { + URL url = new URL(signedUrl); + List params = URLEncodedUtils.parse(url.getQuery(), Charset.forName("UTF-8")); + String hash = null; + String dateString = null; + String allowedMethod = null; + String allowedUser = null; + for (NameValuePair nvp : params) { + if (nvp.getName().equals("token")) { + hash = nvp.getValue(); + logger.fine("Hash: " + hash); + } + if (nvp.getName().equals("until")) { + dateString = nvp.getValue(); + logger.fine("Until: " + dateString); + } + if (nvp.getName().equals("method")) { + allowedMethod = nvp.getValue(); + logger.fine("Method: " + allowedMethod); + } + if (nvp.getName().equals("user")) { + allowedUser = nvp.getValue(); + logger.fine("User: " + allowedUser); + } + } - int index = signedUrl.indexOf("&token="); - // Assuming the token is last - doesn't have to be, but no reason for the URL - // params to be rearranged either, and this should only cause false negatives if - // it does happen - String urlToHash = signedUrl.substring(0, index + 7); - logger.fine("String to hash: " + urlToHash + ""); - String newHash = DigestUtils.sha512Hex(urlToHash + key); - logger.fine("Calculated Hash: " + newHash); - if (!hash.equals(newHash)) { - logger.fine("Hash doesn't match"); - valid = false; - } - if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { - logger.fine("Url is expired"); - valid = false; - } - if (method != null && !method.equals(allowedMethod)) { - logger.fine("Method doesn't match"); - valid = false; - } - if (user != null && user.equals(allowedUser)) { - logger.fine("User doesn't match"); - valid = false; - } - } catch (Throwable t) { - // Want to catch anything like null pointers, etc. to force valid=false upon any - // error - logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); - valid = false; - } - return valid; - } + int index = signedUrl.indexOf("&token="); + // Assuming the token is last - doesn't have to be, but no reason for the URL + // params to be rearranged either, and this should only cause false negatives if + // it does happen + String urlToHash = signedUrl.substring(0, index + 7); + logger.fine("String to hash: " + urlToHash + ""); + String newHash = DigestUtils.sha512Hex(urlToHash + key); + logger.fine("Calculated Hash: " + newHash); + if (!hash.equals(newHash)) { + logger.fine("Hash doesn't match"); + valid = false; + } + if (dateString != null && LocalDateTime.parse(dateString).isBefore(LocalDateTime.now())) { + logger.fine("Url is expired"); + valid = false; + } + if (method != null && !method.equals(allowedMethod)) { + logger.fine("Method doesn't match"); + valid = false; + } + if (user != null && user.equals(allowedUser)) { + logger.fine("User doesn't match"); + valid = false; + } + } catch (Throwable t) { + // Want to catch anything like null pointers, etc. to force valid=false upon any + // error + logger.warning("Bad URL: " + signedUrl + " : " + t.getMessage()); + valid = false; + } + return valid; + } } From e7ddf8697f1ddc90f58c24e13f95d575c69f1397 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Sep 2021 16:29:37 -0400 Subject: [PATCH 24/76] fix for get dataset logo with overlay store to get the base store when starting from a dataset, we don't need to parse the storageidentifier (i.e. there's no base store identifier to parse out like there is with a datafile.) --- .../dataaccess/HTTPOverlayAccessIO.java | 88 ++++++++++--------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java index 1cd021de4cb..8a1568d436b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/HTTPOverlayAccessIO.java @@ -421,50 +421,54 @@ private void configureStores(DataAccessRequest req, String driverId, String stor String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); String fullStorageLocation = null; String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type"); - if (this.getDvObject() != null) { - fullStorageLocation = getStorageLocation(); - - // S3 expects :/// - switch (baseDriverType) { - case "s3": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case "file": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: HTTPOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - - } else if (storageLocation != null) { - // ://// - String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); - fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); - - switch (baseDriverType) { - case "s3": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case "file": - fullStorageLocation = baseDriverId + "://" - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: HTTPOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + if(dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStorageLocation(); + + // S3 expects :/// + switch (baseDriverType) { + case "s3": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case "file": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: HTTPOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + + } else if (storageLocation != null) { + // ://// + String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); + fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); + + switch (baseDriverType) { + case "s3": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case "file": + fullStorageLocation = baseDriverId + "://" + + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: HTTPOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); if (baseDriverType.contentEquals("s3")) { ((S3AccessIO) baseStore).setMainDriver(false); } From 6b9cdef9f9791b833a3d5086337c0713e9119ea7 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Sep 2021 16:46:13 -0400 Subject: [PATCH 25/76] update to check store type Note: I don't think this code gets used as there is as yet no UI to specify a remotely stored file. (but now it mirrors the code in Datasets.addFileToDataset --- .../edu/harvard/iq/dataverse/EditDatafilesPage.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 0458179a340..c024bfc1668 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2017,12 +2017,12 @@ public void handleExternalUpload() { int lastColon = fullStorageIdentifier.lastIndexOf(':'); String storageLocation=null; - //Should check storage type, not parse name - //This works except with s3 stores with ids starting with 'http' - if(fullStorageIdentifier.startsWith("http")) { + String driverType = DataAccess.getDriverType(fullStorageIdentifier.substring(0, fullStorageIdentifier.indexOf(":"))); + logger.fine("drivertype: " + driverType); + if(driverType.equals("http")) { //HTTP external URL case - //ToDo - check for valid URL - storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier() + "//" +fullStorageIdentifier.substring(lastColon+1); + //ToDo - check for valid URL + storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier() + "//" +fullStorageIdentifier.substring(lastColon+1); } else { //S3 direct upload case storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); From 60d7d0dff8f6f0063f94f44bf79080867e3ec0ae Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Sep 2021 17:04:17 -0400 Subject: [PATCH 26/76] refactor to support support addFiles api from #7901 --- .../harvard/iq/dataverse/api/Datasets.java | 54 ++++++++----------- .../iq/dataverse/dataaccess/DataAccess.java | 28 ++++++++++ .../datasetutility/AddReplaceFileHelper.java | 7 +-- 3 files changed, 52 insertions(+), 37 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d58eb739422..ddb3cf489d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2046,34 +2046,24 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, String newFilename = null; String newFileContentType = null; String newStorageIdentifier = null; - if (null == contentDispositionHeader) { - if (optionalFileParams.hasStorageIdentifier()) { - newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - logger.fine("found: " + newStorageIdentifier); - String driverType = DataAccess.getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); - logger.fine("drivertype: " + driverType); - if(driverType.equals("http")) { - //Add a generated identifier for the aux files - logger.fine("in: " + newStorageIdentifier); - int lastColon = newStorageIdentifier.lastIndexOf("://"); - newStorageIdentifier= newStorageIdentifier.substring(0,lastColon +3) + FileUtil.generateStorageIdentifier() + "//" +newStorageIdentifier.substring(lastColon+3); - logger.fine("out: " + newStorageIdentifier); - } - // ToDo - check that storageIdentifier is valid - if (optionalFileParams.hasFileName()) { - newFilename = optionalFileParams.getFileName(); - if (optionalFileParams.hasMimetype()) { - newFileContentType = optionalFileParams.getMimeType(); - } - } - } else { - return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); - } - } else { - newFilename = contentDispositionHeader.getFileName(); - newFileContentType = formDataBodyPart.getMediaType().toString(); - } + if (null == contentDispositionHeader) { + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + } else { + return error(BAD_REQUEST, + "You must upload a file or provide a storageidentifier, filename, and mimetype."); + } + } else { + newFilename = contentDispositionHeader.getFileName(); + newFileContentType = formDataBodyPart.getMediaType().toString(); + } //------------------- @@ -2523,7 +2513,7 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } + } Dataset dataset; @@ -2541,7 +2531,7 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, return ok("Storage driver set to: " + store.getKey() + "/" + store.getValue()); } } - return error(Response.Status.BAD_REQUEST, + return error(Response.Status.BAD_REQUEST, "No Storage Driver found for : " + storageDriverLabel); } @@ -2559,7 +2549,7 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } + } Dataset dataset; @@ -2571,7 +2561,7 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, dataset.setStorageDriverId(null); datasetService.merge(dataset); - return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 395996babf2..d36b03a421d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -22,6 +22,8 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.util.FileUtil; + import java.io.IOException; import java.util.HashMap; import java.util.Properties; @@ -255,4 +257,30 @@ public static String getStorageDriverLabelFor(String storageDriverId) { } return label; } + + /** + * This method checks to see if an overlay store is being used and, if so, + * defines a base storage identifier for use with auxiliary files, and adds it + * into the returned value + * + * @param newStorageIdentifier + * @return - the newStorageIdentifier (for file, S3, swift stores) - the + * newStorageIdentifier with a new base store identifier inserted (for + * an overlay store) + */ + public static String expandStorageIdentifierIfNeeded(String newStorageIdentifier) { + logger.fine("found: " + newStorageIdentifier); + String driverType = DataAccess + .getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); + logger.fine("drivertype: " + driverType); + if (driverType.equals("http")) { + // Add a generated identifier for the aux files + logger.fine("in: " + newStorageIdentifier); + int lastColon = newStorageIdentifier.lastIndexOf("://"); + newStorageIdentifier = newStorageIdentifier.substring(0, lastColon + 3) + + FileUtil.generateStorageIdentifier() + "//" + newStorageIdentifier.substring(lastColon + 3); + logger.fine("out: " + newStorageIdentifier); + } + return newStorageIdentifier; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 1fcc355ae6b..cc5cbe8a7cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -19,12 +19,10 @@ import edu.harvard.iq.dataverse.api.Files; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.impl.AbstractCreateDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; @@ -41,7 +39,6 @@ import java.util.Iterator; import java.util.List; import java.util.Objects; -import java.util.ResourceBundle; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -57,7 +54,6 @@ import javax.ws.rs.core.Response; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.io.IOUtils; import org.ocpsoft.common.util.Strings; @@ -2035,6 +2031,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { String newStorageIdentifier = null; if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { From da133ec7bca6aef265474f45685f0a61a360134f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Sep 2021 17:14:18 -0400 Subject: [PATCH 27/76] refactor UI code --- .../iq/dataverse/EditDatafilesPage.java | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index c024bfc1668..65b5784b3c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2013,20 +2013,12 @@ public void handleExternalUpload() { if (!checksumTypeString.isBlank()) { checksumType = ChecksumType.fromString(checksumTypeString); } - //ToDo - move this to StorageIO subclasses - + + //Should only be one colon with curent design int lastColon = fullStorageIdentifier.lastIndexOf(':'); - String storageLocation=null; - String driverType = DataAccess.getDriverType(fullStorageIdentifier.substring(0, fullStorageIdentifier.indexOf(":"))); - logger.fine("drivertype: " + driverType); - if(driverType.equals("http")) { - //HTTP external URL case - //ToDo - check for valid URL - storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier() + "//" +fullStorageIdentifier.substring(lastColon+1); - } else { - //S3 direct upload case - storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); - } + String storageLocation = fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); + storageLocation = DataAccess.expandStorageIdentifierIfNeeded(storageLocation); + if (uploadInProgress.isFalse()) { uploadInProgress.setValue(true); } From 7b68d57295853c0dca32cfc0f7fa51bb4be0f6e1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 09:27:33 -0400 Subject: [PATCH 28/76] Refactor to RemoteOverlay, use constants for store types/sep --- .../iq/dataverse/DvObjectContainer.java | 1 - .../iq/dataverse/EditDatafilesPage.java | 4 +- .../iq/dataverse/FileDownloadServiceBean.java | 4 +- .../harvard/iq/dataverse/api/Datasets.java | 2 +- .../iq/dataverse/dataaccess/DataAccess.java | 47 +++++++++++-------- .../iq/dataverse/dataaccess/FileAccessIO.java | 12 ++--- .../dataaccess/RemoteOverlayAccessIO.java | 28 +++++------ .../iq/dataverse/dataaccess/S3AccessIO.java | 18 +++---- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- .../dataverse/dataaccess/SwiftAccessIO.java | 10 ++-- .../iq/dataverse/dataset/DatasetUtil.java | 4 +- .../impl/AbstractCreateDatasetCommand.java | 2 +- .../harvard/iq/dataverse/util/FileUtil.java | 8 ++-- 13 files changed, 74 insertions(+), 68 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index 746efded48b..cf8f4d36d5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -15,7 +15,6 @@ public abstract class DvObjectContainer extends DvObject { - //Default to "file" is for tests only public static final String UNDEFINED_METADATA_LANGUAGE_CODE = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating inheriting the default) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 31634dd654f..b8dabd0e699 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1920,7 +1920,7 @@ private void handleReplaceFileUpload(String fullStorageLocation, fileReplacePageHelper.resetReplaceFileHelper(); saveEnabled = false; - String storageIdentifier = DataAccess.getStorarageIdFromLocation(fullStorageLocation); + String storageIdentifier = DataAccess.getStorageIdFromLocation(fullStorageLocation); if (fileReplacePageHelper.handleNativeFileUpload(null, storageIdentifier, fileName, contentType, checkSumValue, checkSumType)) { saveEnabled = true; @@ -3026,7 +3026,7 @@ public void saveAdvancedOptions() { } public boolean rsyncUploadSupported() { - // ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store. + // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, the test in the // Dataset Util method should be updated if (settingsWrapper.isRsyncUpload() && !DatasetUtil.isAppropriateStorageDriver(dataset)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 6d3929a55e2..cb3dd7b9f7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -561,12 +561,12 @@ public void addFileToCustomZipJob(String key, DataFile dataFile, Timestamp times public String getDirectStorageLocatrion(String storageLocation) { String storageDriverId; - int separatorIndex = storageLocation.indexOf("://"); + int separatorIndex = storageLocation.indexOf(DataAccess.SEPARATOR); if ( separatorIndex > 0 ) { storageDriverId = storageLocation.substring(0,separatorIndex); String storageType = DataAccess.getDriverType(storageDriverId); - if ("file".equals(storageType) || "s3".equals(storageType)) { + if (DataAccess.FILE.equals(storageType) || DataAccess.S3.equals(storageType)) { return storageType.concat(storageLocation.substring(separatorIndex)); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 686d3e863e3..e03ea7492f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1904,7 +1904,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String String message = wr.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to put the files into Dataverse. Message was '" + message + "'."); } - } else if(storageDriverType.equals("s3")) { + } else if(storageDriverType.equals(DataAccess.S3)) { logger.log(Level.INFO, "S3 storage driver used for DCM (dataset id={0})", dataset.getId()); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index d36b03a421d..14ead925445 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -44,9 +44,16 @@ public DataAccess() { }; + public static final String FILE = "file"; + public static final String S3 = "s3"; + static final String SWIFT = "swift"; + static final String REMOTE = "remote"; + static final String TMP = "tmp"; + public static final String SEPARATOR = "://"; //Default to "file" is for tests only - public static final String DEFAULT_STORAGE_DRIVER_IDENTIFIER = System.getProperty("dataverse.files.storage-driver-id", "file"); + public static final String DEFAULT_STORAGE_DRIVER_IDENTIFIER = System.getProperty("dataverse.files.storage-driver-id", FILE); public static final String UNDEFINED_STORAGE_DRIVER_IDENTIFIER = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating a null driver/inheriting the default) + // The getStorageIO() methods initialize StorageIO objects for // datafiles that are already saved using one of the supported Dataverse @@ -62,7 +69,7 @@ public static StorageIO getStorageIO(T dvObject, DataAcc throw new IOException("getDataAccessObject: null or invalid datafile."); } String storageIdentifier = dvObject.getStorageIdentifier(); - int separatorIndex = storageIdentifier.indexOf("://"); + int separatorIndex = storageIdentifier.indexOf(SEPARATOR); String storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default if (separatorIndex > 0) { storageDriverId = storageIdentifier.substring(0, separatorIndex); @@ -74,15 +81,15 @@ protected static StorageIO getStorageIO(T dvObject, Data String storageDriverId) throws IOException { String storageType = getDriverType(storageDriverId); switch (storageType) { - case "file": + case FILE: return new FileAccessIO<>(dvObject, req, storageDriverId); - case "s3": + case S3: return new S3AccessIO<>(dvObject, req, storageDriverId); - case "swift": + case SWIFT: return new SwiftAccessIO<>(dvObject, req, storageDriverId); - case "http": - return new HTTPOverlayAccessIO<>(dvObject, req, storageDriverId); - case "tmp": + case REMOTE: + return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId); + case TMP: throw new IOException( "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); } @@ -105,11 +112,11 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) String storageLocation=response[1]; String storageType = getDriverType(storageDriverId); switch(storageType) { - case "file": + case FILE: return new FileAccessIO<>(storageLocation, storageDriverId); - case "s3": + case S3: return new S3AccessIO<>(storageLocation, storageDriverId); - case "swift": + case SWIFT: return new SwiftAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); @@ -120,7 +127,7 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) public static String[] getDriverIdAndStorageLocation(String storageLocation) { //default if no prefix String storageIdentifier=storageLocation; - int separatorIndex = storageLocation.indexOf("://"); + int separatorIndex = storageLocation.indexOf(SEPARATOR); String storageDriverId = ""; //default if(separatorIndex>0) { storageDriverId = storageLocation.substring(0,separatorIndex); @@ -130,10 +137,10 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { } public static String getStorageIdFromLocation(String location) { - if(location.contains("://")) { + if(location.contains(SEPARATOR)) { //It's a full location with a driverId, so strip and reapply the driver id //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it - return location.substring(0,location.indexOf("://") +3) + location.substring(location.lastIndexOf('/')+1); + return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); } return location.substring(location.lastIndexOf('/')+1); } @@ -174,7 +181,7 @@ public static StorageIO createNewStorageIO(T dvObject, S * This if will catch any cases where that's attempted. */ // Tests send objects with no storageIdentifier set - if((dvObject.getStorageIdentifier()!=null) && dvObject.getStorageIdentifier().contains("://")) { + if((dvObject.getStorageIdentifier()!=null) && dvObject.getStorageIdentifier().contains(SEPARATOR)) { throw new IOException("Attempt to create new StorageIO for already stored object: " + dvObject.getStorageIdentifier()); } @@ -187,13 +194,13 @@ public static StorageIO createNewStorageIO(T dvObject, S } String storageType = getDriverType(storageDriverId); switch(storageType) { - case "file": + case FILE: storageIO = new FileAccessIO<>(dvObject, null, storageDriverId); break; - case "swift": + case SWIFT: storageIO = new SwiftAccessIO<>(dvObject, null, storageDriverId); break; - case "s3": + case S3: storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; default: @@ -273,10 +280,10 @@ public static String expandStorageIdentifierIfNeeded(String newStorageIdentifier String driverType = DataAccess .getDriverType(newStorageIdentifier.substring(0, newStorageIdentifier.indexOf(":"))); logger.fine("drivertype: " + driverType); - if (driverType.equals("http")) { + if (driverType.equals(REMOTE)) { // Add a generated identifier for the aux files logger.fine("in: " + newStorageIdentifier); - int lastColon = newStorageIdentifier.lastIndexOf("://"); + int lastColon = newStorageIdentifier.lastIndexOf(SEPARATOR); newStorageIdentifier = newStorageIdentifier.substring(0, lastColon + 3) + FileUtil.generateStorageIdentifier() + "//" + newStorageIdentifier.substring(lastColon + 3); logger.fine("out: " + newStorageIdentifier); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 5c2adee3da9..14ffcd46fce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -136,8 +136,8 @@ public void open (DataAccessOption... options) throws IOException { this.setOutputStream(fout); setChannel(fout.getChannel()); - if (!storageIdentifier.startsWith(this.driverId + "://")) { - dvObject.setStorageIdentifier(this.driverId + "://" + storageIdentifier); + if (!storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { + dvObject.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + storageIdentifier); } } @@ -167,7 +167,7 @@ public void open (DataAccessOption... options) throws IOException { if (datasetPath != null && !Files.exists(datasetPath)) { Files.createDirectories(datasetPath); } - dataset.setStorageIdentifier(this.driverId + "://"+dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage()); + dataset.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage()); } } else if (dvObject instanceof Dataverse) { @@ -437,7 +437,7 @@ public String getStorageLocation() { try { Path testPath = getFileSystemPath(); if (testPath != null) { - return this.driverId + "://" + testPath.toString(); + return this.driverId + DataAccess.SEPARATOR + testPath.toString(); } } catch (IOException ioex) { // just return null, below: @@ -654,9 +654,9 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException return in; } private String stripDriverId(String storageIdentifier) { - int separatorIndex = storageIdentifier.indexOf("://"); + int separatorIndex = storageIdentifier.indexOf(DataAccess.SEPARATOR); if(separatorIndex>0) { - return storageIdentifier.substring(separatorIndex + 3); + return storageIdentifier.substring(separatorIndex + DataAccess.SEPARATOR.length()); } return storageIdentifier; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 8a1568d436b..894a8ad52a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -55,7 +55,7 @@ * StorageIdentifier format: * ://// */ -public class HTTPOverlayAccessIO extends StorageIO { +public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.HttpOverlayAccessIO"); @@ -73,7 +73,7 @@ public class HTTPOverlayAccessIO extends StorageIO { private static boolean trustCerts = false; private int httpConcurrency = 4; - public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); configureStores(req, driverId, null); @@ -83,7 +83,7 @@ public HTTPOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) t logger.fine("Base URL: " + urlPath); } - public HTTPOverlayAccessIO(String storageLocation, String driverId) throws IOException { + public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { super(null, null, driverId); this.setIsLocalFile(false); configureStores(null, driverId, storageLocation); @@ -337,7 +337,7 @@ public void deleteAllAuxObjects() throws IOException { public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf("://") + 3); + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); if (this.getDvObject() instanceof Dataset) { fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" @@ -429,13 +429,13 @@ private void configureStores(DataAccessRequest req, String driverId, String stor // S3 expects :/// switch (baseDriverType) { - case "s3": - fullStorageLocation = baseDriverId + "://" + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + fullStorageLocation; break; - case "file": - fullStorageLocation = baseDriverId + "://" + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + fullStorageLocation; break; @@ -447,17 +447,17 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } else if (storageLocation != null) { // ://// - String storageId = storageLocation.substring(storageLocation.indexOf("://" + 3)); + String storageId = storageLocation.substring(storageLocation.indexOf(DataAccess.SEPARATOR + DataAccess.SEPARATOR.length())); fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); switch (baseDriverType) { - case "s3": - fullStorageLocation = baseDriverId + "://" + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + fullStorageLocation; break; - case "file": - fullStorageLocation = baseDriverId + "://" + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + fullStorageLocation; break; @@ -469,7 +469,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); } - if (baseDriverType.contentEquals("s3")) { + if (baseDriverType.contentEquals(DataAccess.S3)) { ((S3AccessIO) baseStore).setMainDriver(false); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 335a8c5592b..817136f8735 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -177,22 +177,22 @@ public void open(DataAccessOption... options) throws IOException { //Fix new DataFiles: DataFiles that have not yet been saved may use this method when they don't have their storageidentifier in the final ://: form // So we fix it up here. ToDo: refactor so that storageidentifier is generated by the appropriate StorageIO class and is final from the start. String newStorageIdentifier = null; - if (storageIdentifier.startsWith(this.driverId + "://")) { - if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { + if(!storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()).contains(":")) { //Driver id but no bucket if(bucketName!=null) { - newStorageIdentifier=this.driverId + "://" + bucketName + ":" + storageIdentifier.substring((this.driverId + "://").length()); + newStorageIdentifier=this.driverId + DataAccess.SEPARATOR + bucketName + ":" + storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()); } else { throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's bucketname) } else { - if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + if(!storageIdentifier.substring((this.driverId + DataAccess.SEPARATOR).length()).contains(":")) { //No driver id or bucket - newStorageIdentifier= this.driverId + "://" + bucketName + ":" + storageIdentifier; + newStorageIdentifier= this.driverId + DataAccess.SEPARATOR + bucketName + ":" + storageIdentifier; } else { //Just the bucketname - newStorageIdentifier= this.driverId + "://" + storageIdentifier; + newStorageIdentifier= this.driverId + DataAccess.SEPARATOR + storageIdentifier; } } if(newStorageIdentifier != null) { @@ -238,7 +238,7 @@ public void open(DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataset) { Dataset dataset = this.getDataset(); key = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - dataset.setStorageIdentifier(this.driverId + "://" + key); + dataset.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + key); } else if (dvObject instanceof Dataverse) { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { @@ -732,7 +732,7 @@ public String getStorageLocation() throws IOException { throw new IOException("Failed to obtain the S3 key for the file"); } - return this.driverId + "://" + bucketName + "/" + locationKey; + return this.driverId + DataAccess.SEPARATOR + bucketName + "/" + locationKey; } @Override @@ -831,7 +831,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); } - if (storageIdentifier.indexOf(driverId + "://")>=0) { + if (storageIdentifier.indexOf(driverId + DataAccess.SEPARATOR)>=0) { //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index b128d79f7f9..e499e851258 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -66,7 +66,7 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.req = new DataAccessRequest(); } if (this.driverId == null) { - this.driverId = "file"; + this.driverId = DataAccess.FILE; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 3bc29cb9836..2e5cebf47d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -508,7 +508,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt if (dvObject instanceof DataFile) { Dataset owner = this.getDataFile().getOwner(); - if (storageIdentifier.startsWith(this.driverId + "://")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { // This is a call on an already existing swift object. String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3); @@ -552,14 +552,14 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt //setSwiftContainerName(swiftFolderPath); //swiftFileName = dataFile.getDisplayName(); //Storage Identifier is now updated after the object is uploaded on Swift. - dvObject.setStorageIdentifier(this.driverId + "://" + swiftDefaultEndpoint + ":" + swiftFolderPath + ":" + swiftFileName); + dvObject.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + swiftDefaultEndpoint + ":" + swiftFolderPath + ":" + swiftFileName); } else { throw new IOException("SwiftAccessIO: unknown access mode."); } } else if (dvObject instanceof Dataset) { Dataset dataset = this.getDataset(); - if (storageIdentifier.startsWith(this.driverId + "://")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { // This is a call on an already existing swift object. //TODO: determine how storage identifier will give us info @@ -601,7 +601,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt swiftPseudoFolderPathSeparator + dataset.getIdentifierForFileStorage(); swiftFileName = auxItemTag; - dvObject.setStorageIdentifier(this.driverId + "://" + swiftEndPoint + ":" + swiftFolderPath); + dvObject.setStorageIdentifier(this.driverId + DataAccess.SEPARATOR + swiftEndPoint + ":" + swiftFolderPath); } else { throw new IOException("SwiftAccessIO: unknown access mode."); } @@ -628,7 +628,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt other swiftContainerName Object Store pseudo-folder can be created, which is not provide by the joss Java swift library as of yet. */ - if (storageIdentifier.startsWith(this.driverId + "://")) { + if (storageIdentifier.startsWith(this.driverId + DataAccess.SEPARATOR)) { // An existing swift object; the container must already exist as well. this.swiftContainer = account.getContainer(swiftContainerName); } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index ccf947b8868..ee670b187b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -457,12 +457,12 @@ public static List getDatasetSummaryFields(DatasetVersion datasetV } public static boolean isAppropriateStorageDriver(Dataset dataset){ - // ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store. + // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, this test should check that setting // instead of testing for the 's3" store, //This method is used by both the dataset and edit files page so one change here //will fix both - return dataset.getEffectiveStorageDriverId().equals("s3"); + return dataset.getEffectiveStorageDriverId().equals(DataAccess.S3); } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index ec544d9490a..1465cbd74e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -102,7 +102,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } if (theDataset.getStorageIdentifier() == null) { String driverId = theDataset.getEffectiveStorageDriverId(); - theDataset.setStorageIdentifier(driverId + "://" + theDataset.getAuthorityForFileStorage() + "/" + theDataset.getIdentifierForFileStorage()); + theDataset.setStorageIdentifier(driverId + DataAccess.SEPARATOR + theDataset.getAuthorityForFileStorage() + "/" + theDataset.getIdentifierForFileStorage()); } if (theDataset.getIdentifier()==null) { theDataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(theDataset, idServiceBean)); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 8d3d63da99d..c3b2b59a0b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1396,7 +1396,7 @@ public static void generateS3PackageStorageIdentifier(DataFile dataFile) { String driverId = dataFile.getOwner().getEffectiveStorageDriverId(); String bucketName = System.getProperty("dataverse.files." + driverId + ".bucket-name"); - String storageId = driverId + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); + String storageId = driverId + DataAccess.SEPARATOR + bucketName + ":" + dataFile.getFileMetadata().getLabel(); dataFile.setStorageIdentifier(storageId); } @@ -1842,7 +1842,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } public static String getStorageIdentifierFromLocation(String location) { - int driverEnd = location.indexOf("://") + 3; + int driverEnd = location.indexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length(); int bucketEnd = driverEnd + location.substring(driverEnd).indexOf("/"); return location.substring(0,bucketEnd) + ":" + location.substring(location.lastIndexOf("/") + 1); } @@ -1878,7 +1878,7 @@ public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServ } } String si = dataFile.getStorageIdentifier(); - if (si.contains("://")) { + if (si.contains(DataAccess.SEPARATOR)) { //Direct upload files will already have a store id in their storageidentifier //but they need to be associated with a dataset for the overall storagelocation to be calculated //so we temporarily set the owner @@ -1897,7 +1897,7 @@ public static void deleteTempFile(DataFile dataFile, Dataset dataset, IngestServ } catch (IOException ioEx) { // safe to ignore - it's just a temp file. logger.warning(ioEx.getMessage()); - if(dataFile.getStorageIdentifier().contains("://")) { + if(dataFile.getStorageIdentifier().contains(DataAccess.SEPARATOR)) { logger.warning("Failed to delete temporary file " + dataFile.getStorageIdentifier()); } else { logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/" From bebc2758c8245b1f2432e112671ed2c7adb51b5a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 09:49:25 -0400 Subject: [PATCH 29/76] refactor strings to RemoteOverlay --- .../dataaccess/RemoteOverlayAccessIO.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 894a8ad52a5..32c1a979928 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -50,14 +50,14 @@ * @param what it stores */ /* - * HTTP Overlay Driver + * Remote Overlay Driver * * StorageIdentifier format: * ://// */ public class RemoteOverlayAccessIO extends StorageIO { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.HttpOverlayAccessIO"); + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); private StorageIO baseStore = null; private String urlPath = null; @@ -153,10 +153,10 @@ public void open(DataAccessOption... options) throws IOException { } } else if (dvObject instanceof Dataset) { throw new IOException( - "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); } else if (dvObject instanceof Dataverse) { throw new IOException( - "Data Access: HTTPOverlay Storage driver does not support dvObject type Dataverse yet"); + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); } else { this.setSize(getSizeFromHttpHeader()); } @@ -346,7 +346,7 @@ public String getStorageLocation() throws IOException { fullStorageLocation = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStorageLocation; } else if (dvObject instanceof Dataverse) { - throw new IOException("HttpOverlayAccessIO: Dataverses are not a supported dvObject"); + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); } return fullStorageLocation; } @@ -354,7 +354,7 @@ public String getStorageLocation() throws IOException { @Override public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); + "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); } @Override @@ -366,13 +366,13 @@ public boolean exists() { @Override public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: there are no write Channels associated with S3 objects."); + "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects."); } @Override public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: there are no output Streams associated with S3 objects."); + "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); } @Override @@ -440,13 +440,13 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + fullStorageLocation; break; default: - logger.warning("Not Implemented: HTTPOverlay store with base store type: " + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); throw new IOException("Not implemented"); } } else if (storageLocation != null) { - // ://// + // ://// String storageId = storageLocation.substring(storageLocation.indexOf(DataAccess.SEPARATOR + DataAccess.SEPARATOR.length())); fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); @@ -462,7 +462,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + fullStorageLocation; break; default: - logger.warning("Not Implemented: HTTPOverlay store with base store type: " + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); throw new IOException("Not implemented"); } @@ -513,21 +513,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: savePath() not implemented in this storage driver."); + "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "HttpOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); } From edc915254c915cc7ec6e8f8e8ce75fd3b9658053 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 12:19:36 -0400 Subject: [PATCH 30/76] add basic support for remote tag/label in file table --- .../dataverse/dataaccess/RemoteOverlayAccessIO.java | 8 ++++++++ .../harvard/iq/dataverse/dataaccess/StorageIO.java | 11 +++++++++++ src/main/java/propertyFiles/Bundle.properties | 2 ++ src/main/webapp/filesFragment.xhtml | 6 +++++- src/main/webapp/resources/css/structure.css | 11 +++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 32c1a979928..89c7b7ed7c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -11,6 +11,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; @@ -473,6 +475,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor ((S3AccessIO) baseStore).setMainDriver(false); } } + remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remoteStoreName"); + try { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remoteStoreUrl")); + } catch(MalformedURLException mfue) { + logger.warning("Unable to read remoteStoreUrl for driver: " + this.driverId); + } } public CloseableHttpClient getSharedHttpClient() { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index e499e851258..95eabe51e96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; @@ -222,6 +223,8 @@ public boolean canWrite() { private String swiftFileName; private String remoteUrl; + protected String remoteStoreName = null; + protected URL remoteStoreUrl = null; // For HTTP-based downloads: /*private GetMethod method = null; @@ -330,6 +333,14 @@ public String getSwiftContainerName(){ return swiftContainerName; } + public String getRemoteStoreName() { + return remoteStoreName; + } + + public URL getRemoteStoreUrl() { + return remoteStoreUrl; + } + /*public GetMethod getHTTPMethod() { return method; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 588b26a1822..2fcef237fd5 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1932,6 +1932,8 @@ file.results.btn.sort.option.type=Type file.compute.fileAccessDenied=This file is restricted and you may not compute on it because you have not been granted access. file.configure.Button=Configure +file.remotelyStored=This file is stored remotely - click for more info + file.auxfiles.download.header=Download Auxiliary Files # These types correspond to the AuxiliaryFile.Type enum. file.auxfiles.types.DP=Differentially Private Statistics diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 49dc7fcbe68..7e05df63aae 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -484,7 +484,11 @@ #{bundle['file.accessRequested']}  - +
+ #{fileMetadata.dataFile.storageIO.remoteStoreName} + #{fileMetadata.dataFile.storageIO.remoteStoreName} +
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index a2c0f79e4fb..5a081bea063 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -771,6 +771,17 @@ div[id$="filesTable"] thead[id$="filesTable_head"] th.ui-selection-column .ui-ch /* Non standard for webkit */ word-break: break-word; } +/*Remote Store Branding*/ +.remote-info { + width: fit-content; + margin-left: auto; + margin-right: 10px; + display: block; + padding:5px; +} +.remote-info > a { + color:white; +} /* REQUEST ACCESS DOWNLOAD OPTION LINK */ div[id$="requestPanel"].iq-dropdown-list-item {display:list-item !important;} From 648ee1cd53bf9e4ca9118a328cb0d108aba0bf0e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 12:19:57 -0400 Subject: [PATCH 31/76] start doc changes --- doc/sphinx-guides/source/installation/config.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f890f5312ff..614871c6769 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,13 +238,15 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. -File Storage: Using a Local Filesystem and/or Swift and/or object stores ------------------------------------------------------------------------- +File Storage: Using a Local Filesystem and/or Swift and/or object stores and/or trusted remote services +------------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. +Dataverse may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. + The following sections describe how to set up various types of stores and how to configure for multiple stores. Multi-store Basics @@ -622,6 +624,10 @@ Migrating from Local Storage to S3 Is currently documented on the :doc:`/developers/deployment` page. +Trusted Remote Storage +++++++++++++++++++++++ + + .. _Branding Your Installation: From 570e97a04e8e9eec4b81f199a9d46eacad795113 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 14:19:41 -0400 Subject: [PATCH 32/76] documentation, tweak to new branding property names --- .../source/installation/config.rst | 25 +++++++++++++++++++ .../dataaccess/RemoteOverlayAccessIO.java | 4 +-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 614871c6769..359d38ec595 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -627,6 +627,31 @@ Is currently documented on the :doc:`/developers/deployment` page. Trusted Remote Storage ++++++++++++++++++++++ +In addition to having the type "remote" and requiring a label, Trusted Remote Stores are defined in terms of a baseURL - all files managed by this store must be at a path starting with this URL, and a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). +These and other available options are described in the table below. + +Remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity +and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in Dataverse. + +Trusted remote stores +.. table:: + :align: left + + =========================================== ================== ========================================================================== ============= + JVM Option Value Description Default value + =========================================== ================== ========================================================================== ============= + dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) + dataverse.files..label **Required** label to be shown in the UI for this storage (none) + dataverse.files..baseUrl **Required** All files must have URLs of the form /* (none) + dataverse.files..baseStore **Required** The id of a base store (of type file, s3, or swift) (none) + dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` + dataverse.files..secreteKey A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional (none) + dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) + + =========================================== ================== ========================================================================== ============= + .. _Branding Your Installation: diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 89c7b7ed7c9..2f6a2f80259 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -475,9 +475,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor ((S3AccessIO) baseStore).setMainDriver(false); } } - remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remoteStoreName"); + remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remoteStoreUrl")); + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); } catch(MalformedURLException mfue) { logger.warning("Unable to read remoteStoreUrl for driver: " + this.driverId); } From 62b548824e3eaa436e18a64b4019502f37b0c1f0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 14:23:34 -0400 Subject: [PATCH 33/76] typo --- doc/sphinx-guides/source/installation/config.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 359d38ec595..3e547fab513 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -633,7 +633,6 @@ These and other available options are described in the table below. Remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in Dataverse. -Trusted remote stores .. table:: :align: left From e62a163b7dafda7a04461ca68772f7ca63eb6ef6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Apr 2022 14:27:41 -0400 Subject: [PATCH 34/76] fix tabs in preexisting code --- .../java/edu/harvard/iq/dataverse/EditDatafilesPage.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b8dabd0e699..f697bd1f4ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2064,10 +2064,10 @@ public void handleExternalUpload() { String storageLocation = fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); storageLocation = DataAccess.expandStorageIdentifierIfNeeded(storageLocation); - if (uploadInProgress.isFalse()) { - uploadInProgress.setValue(true); - } - logger.fine("handleExternalUpload"); + if (uploadInProgress.isFalse()) { + uploadInProgress.setValue(true); + } + logger.fine("handleExternalUpload"); StorageIO sio; String localWarningMessage = null; From 3d3aab644ca68e1db7d42f05f2fcd3b6593f9d03 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 10 May 2022 13:09:48 -0400 Subject: [PATCH 35/76] typos --- src/main/webapp/filesFragment.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 7e05df63aae..13b36e1e23e 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -485,8 +485,8 @@ #{bundle['file.accessRequested']} 
- #{fileMetadata.dataFile.storageIO.remoteStoreName} + title="#{bundle['file.remotelyStored']}"> + #{fileMetadata.dataFile.storageIO.remoteStoreName} #{fileMetadata.dataFile.storageIO.remoteStoreName}
From 9133de7292cb8d7e7ffb23e015cc9e36956a0dec Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 8 Jun 2022 10:49:40 -0400 Subject: [PATCH 36/76] cut/paste logic error re: remote tag --- src/main/webapp/filesFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 13b36e1e23e..be3307ff081 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -487,7 +487,7 @@
#{fileMetadata.dataFile.storageIO.remoteStoreName} - #{fileMetadata.dataFile.storageIO.remoteStoreName} + #{fileMetadata.dataFile.storageIO.remoteStoreName}
From e8c3ed3296983bcefc8877f41faf8cdf1aee6ede Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Jul 2022 15:27:07 -0400 Subject: [PATCH 37/76] force lowercase for hash values - that's what is generated internally --- .../iq/dataverse/datasetutility/OptionalFileParams.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 35687151090..25240349bfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -396,7 +396,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // ------------------------------- if ((jsonObj.has(LEGACY_CHECKSUM_ATTR_NAME)) && (!jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).isJsonNull())){ - this.checkSumValue = jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).getAsString(); + this.checkSumValue = jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).getAsString().toLowerCase(); this.checkSumType= ChecksumType.MD5; } // ------------------------------- @@ -404,7 +404,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // ------------------------------- else if ((jsonObj.has(CHECKSUM_OBJECT_NAME)) && (!jsonObj.get(CHECKSUM_OBJECT_NAME).isJsonNull())){ - this.checkSumValue = ((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_VALUE).getAsString(); + this.checkSumValue = ((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_VALUE).getAsString().toLowerCase(); this.checkSumType = ChecksumType.fromString(((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_TYPE).getAsString()); } From 1bad2f38c1bcc91eccd2961abbe55edbddbecb5e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Jul 2022 15:40:07 -0400 Subject: [PATCH 38/76] log mismatched checksum values --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 1e4ea61ebc0..10393de057b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1851,6 +1851,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio if (!fixed) { String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); + logger.fine("Expected: " + dataFile.getChecksumValue() +", calculated: " + recalculatedChecksum); throw new IOException(info); } } From 37e258164ee5d595f1179e6b603dab8927812579 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jul 2022 16:32:16 -0400 Subject: [PATCH 39/76] refactor for download redirect in remoteoverlaystore --- .../dataverse/api/DownloadInstanceWriter.java | 11 +++++------ .../dataaccess/RemoteOverlayAccessIO.java | 19 ++++++++++++++----- .../iq/dataverse/dataaccess/StorageIO.java | 18 +++++++++++------- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index bba2a5dcdc0..3ac7f301ecd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -93,7 +93,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): - if (storageIO instanceof S3AccessIO && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { + if (storageIO.downloadRedirectEnabled()) { // Even if the above is true, there are a few cases where a // redirect is not applicable. @@ -188,16 +188,15 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // [attempt to] redirect: String redirect_url_str; try { - redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + redirect_url_str = storageIO.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); } catch (IOException ioex) { + logger.warning("Unable to generate downloadURL for " + dataFile.getId() + ": " + auxiliaryTag); + //Setting null will let us try to get the file/aux file w/o redirecting redirect_url_str = null; } - if (redirect_url_str == null) { - throw new ServiceUnavailableException(); - } - logger.fine("Data Access API: direct S3 url: " + redirect_url_str); + URI redirect_uri; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 2f6a2f80259..ec730e770d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -391,12 +391,21 @@ public boolean downloadRedirectEnabled() { return false; } - public String generateTemporaryDownloadUrl() throws IOException { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); - if (secretKey == null) { - return baseUrl + "/" + urlPath; + @Override + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { + + // ToDo - support remote auxiliary Files + if (auxiliaryTag == null) { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); + if (secretKey == null) { + return baseUrl + "/" + urlPath; + } else { + return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + secretKey); + } } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", secretKey); + return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 95eabe51e96..63c5af038e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -576,11 +576,15 @@ public boolean isBelowIngestSizeLimit() { } } - public boolean downloadRedirectEnabled() { - return false; - } - - public String generateTemporaryDownloadUrl() throws IOException { - throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); - } + public boolean downloadRedirectEnabled() { + return false; + } + + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); + } + + public static String getDriverPrefix(String driverId) { + return driverId+ DataAccess.SEPARATOR; + } } From a401048abeb2e70005a48045c4dd4fd87921e126 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 21 Jun 2022 11:24:05 -0400 Subject: [PATCH 40/76] refactor to allow URL token substitution outside tools framework --- .../dataverse/externaltools/ExternalTool.java | 57 ----- .../externaltools/ExternalToolHandler.java | 115 +-------- .../ExternalToolServiceBean.java | 3 +- .../iq/dataverse/util/URLTokenUtil.java | 231 ++++++++++++++++++ .../iq/dataverse/util/UrlTokenUtilTest.java | 50 ++++ 5 files changed, 289 insertions(+), 167 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index c996e332bdb..7f94b1bbbbf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -276,63 +276,6 @@ public JsonObjectBuilder toJson() { return jab; } - public enum ReservedWord { - - // TODO: Research if a format like "{reservedWord}" is easily parse-able or if another format would be - // better. The choice of curly braces is somewhat arbitrary, but has been observed in documenation for - // various REST APIs. For example, "Variable substitutions will be made when a variable is named in {brackets}." - // from https://swagger.io/specification/#fixed-fields-29 but that's for URLs. - FILE_ID("fileId"), - FILE_PID("filePid"), - SITE_URL("siteUrl"), - API_TOKEN("apiToken"), - // datasetId is the database id - DATASET_ID("datasetId"), - // datasetPid is the DOI or Handle - DATASET_PID("datasetPid"), - DATASET_VERSION("datasetVersion"), - FILE_METADATA_ID("fileMetadataId"), - LOCALE_CODE("localeCode"); - - private final String text; - private final String START = "{"; - private final String END = "}"; - - private ReservedWord(final String text) { - this.text = START + text + END; - } - - /** - * This is a centralized method that enforces that only reserved words - * are allowed to be used by external tools. External tool authors - * cannot pass their own query parameters through Dataverse such as - * "mode=mode1". - * - * @throws IllegalArgumentException - */ - public static ReservedWord fromString(String text) throws IllegalArgumentException { - if (text != null) { - for (ReservedWord reservedWord : ReservedWord.values()) { - if (text.equals(reservedWord.text)) { - return reservedWord; - } - } - } - // TODO: Consider switching to a more informative message that enumerates the valid reserved words. - boolean moreInformativeMessage = false; - if (moreInformativeMessage) { - throw new IllegalArgumentException("Unknown reserved word: " + text + ". A reserved word must be one of these values: " + Arrays.asList(ReservedWord.values()) + "."); - } else { - throw new IllegalArgumentException("Unknown reserved word: " + text); - } - } - - @Override - public String toString() { - return text; - } - } - public String getDescriptionLang() { String description = ""; if (this.toolName != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index a4a51666cc5..96fd2618459 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -4,14 +4,13 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; -import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.io.StringReader; import java.util.ArrayList; import java.util.List; -import java.util.logging.Logger; + import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -22,18 +21,9 @@ * instantiated. Applies logic based on an {@link ExternalTool} specification, * such as constructing a URL to access that file. */ -public class ExternalToolHandler { - - private static final Logger logger = Logger.getLogger(ExternalToolHandler.class.getCanonicalName()); +public class ExternalToolHandler extends URLTokenUtil { private final ExternalTool externalTool; - private final DataFile dataFile; - private final Dataset dataset; - private final FileMetadata fileMetadata; - - private ApiToken apiToken; - private String localeCode; - /** * File level tool * @@ -43,22 +33,8 @@ public class ExternalToolHandler { * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) { + super(dataFile, apiToken, fileMetadata, localeCode); this.externalTool = externalTool; - if (dataFile == null) { - String error = "A DataFile is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - if (fileMetadata == null) { - String error = "A FileMetadata is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - this.dataFile = dataFile; - this.apiToken = apiToken; - this.fileMetadata = fileMetadata; - dataset = fileMetadata.getDatasetVersion().getDataset(); - this.localeCode = localeCode; } /** @@ -70,33 +46,8 @@ public ExternalToolHandler(ExternalTool externalTool, DataFile dataFile, ApiToke * used anonymously. */ public ExternalToolHandler(ExternalTool externalTool, Dataset dataset, ApiToken apiToken, String localeCode) { + super(dataset, apiToken, localeCode); this.externalTool = externalTool; - if (dataset == null) { - String error = "A Dataset is required."; - logger.warning("Error in ExternalToolHandler constructor: " + error); - throw new IllegalArgumentException(error); - } - this.dataset = dataset; - this.apiToken = apiToken; - this.dataFile = null; - this.fileMetadata = null; - this.localeCode = localeCode; - } - - public DataFile getDataFile() { - return dataFile; - } - - public FileMetadata getFileMetadata() { - return fileMetadata; - } - - public ApiToken getApiToken() { - return apiToken; - } - - public String getLocaleCode() { - return localeCode; } // TODO: rename to handleRequest() to someday handle sending headers as well as query parameters. @@ -130,60 +81,6 @@ public String getQueryParametersForUrl(boolean preview) { } } - private String getQueryParam(String key, String value) { - ReservedWord reservedWord = ReservedWord.fromString(value); - switch (reservedWord) { - case FILE_ID: - // getDataFile is never null for file tools because of the constructor - return key + "=" + getDataFile().getId(); - case FILE_PID: - GlobalId filePid = getDataFile().getGlobalId(); - if (filePid != null) { - return key + "=" + getDataFile().getGlobalId(); - } - break; - case SITE_URL: - return key + "=" + SystemConfig.getDataverseSiteUrlStatic(); - case API_TOKEN: - String apiTokenString = null; - ApiToken theApiToken = getApiToken(); - if (theApiToken != null) { - apiTokenString = theApiToken.getTokenString(); - return key + "=" + apiTokenString; - } - break; - case DATASET_ID: - return key + "=" + dataset.getId(); - case DATASET_PID: - return key + "=" + dataset.getGlobalId().asString(); - case DATASET_VERSION: - String versionString = null; - if(fileMetadata!=null) { //true for file case - versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); - } else { //Dataset case - return the latest visible version (unless/until the dataset case allows specifying a version) - if (getApiToken() != null) { - versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); - } else { - versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); - } - } - if (("DRAFT").equals(versionString)) { - versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric - // version. - } - return key + "=" + versionString; - case FILE_METADATA_ID: - if(fileMetadata!=null) { //true for file case - return key + "=" + fileMetadata.getId(); - } - case LOCALE_CODE: - return key + "=" + getLocaleCode(); - default: - break; - } - return null; - } - public String getToolUrlWithQueryParams() { return externalTool.getToolUrl() + getQueryParametersForUrl(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index 95fd900e4d2..d49d66c26f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -3,8 +3,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.externaltools.ExternalTool.ReservedWord; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Type; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil.ReservedWord; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Scope; import java.io.StringReader; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java new file mode 100644 index 00000000000..78280cd0f0f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -0,0 +1,231 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Arrays; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; + +public class URLTokenUtil { + + protected static final Logger logger = Logger.getLogger(URLTokenUtil.class.getCanonicalName()); + protected final DataFile dataFile; + protected final Dataset dataset; + protected final FileMetadata fileMetadata; + protected ApiToken apiToken; + protected String localeCode; + + /** + * File level + * + * @param dataFile Required. + * @param apiToken The apiToken can be null + * @param fileMetadata Required. + * @param localeCode optional. + * + */ + public URLTokenUtil(DataFile dataFile, ApiToken apiToken, FileMetadata fileMetadata, String localeCode) + throws IllegalArgumentException { + if (dataFile == null) { + String error = "A DataFile is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + if (fileMetadata == null) { + String error = "A FileMetadata is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + this.dataFile = dataFile; + this.dataset = fileMetadata.getDatasetVersion().getDataset(); + this.fileMetadata = fileMetadata; + this.apiToken = apiToken; + this.localeCode = localeCode; + } + + /** + * Dataset level + * + * @param dataset Required. + * @param apiToken The apiToken can be null + */ + public URLTokenUtil(Dataset dataset, ApiToken apiToken, String localeCode) { + if (dataset == null) { + String error = "A Dataset is required."; + logger.warning("Error in URLTokenUtil constructor: " + error); + throw new IllegalArgumentException(error); + } + this.dataset = dataset; + this.dataFile = null; + this.fileMetadata = null; + this.apiToken = apiToken; + this.localeCode = localeCode; + } + + public DataFile getDataFile() { + return dataFile; + } + + public FileMetadata getFileMetadata() { + return fileMetadata; + } + + public ApiToken getApiToken() { + return apiToken; + } + + public String getLocaleCode() { + return localeCode; + } + + public String getQueryParam(String key, String value) { + String tokenValue = null; + tokenValue = getTokenValue(value); + if (tokenValue != null) { + return key + '=' + tokenValue; + } else { + return null; + } + } + + /** + * Tries to replace all occurrences of {} with the value for the + * corresponding ReservedWord + * + * @param url - the input string in which to replace tokens, normally a url + * @throws IllegalArgumentException if there is no matching ReservedWord or if + * the configuation of this instance doesn't + * have values for this ReservedWord (e.g. + * asking for FILE_PID when using the dataset + * constructor, etc.) + */ + public String replaceTokensWithValues(String url) { + String newUrl = url; + Pattern pattern = Pattern.compile("(\\{.*?\\})"); + Matcher matcher = pattern.matcher(url); + while(matcher.find()) { + String token = matcher.group(1); + ReservedWord reservedWord = ReservedWord.fromString(token); + String tValue = getTokenValue(token); + logger.info("Replacing " + reservedWord.toString() + " with " + tValue + " in " + newUrl); + newUrl = newUrl.replace(reservedWord.toString(), tValue); + } + return newUrl; + } + + private String getTokenValue(String value) { + ReservedWord reservedWord = ReservedWord.fromString(value); + switch (reservedWord) { + case FILE_ID: + // getDataFile is never null for file tools because of the constructor + return getDataFile().getId().toString(); + case FILE_PID: + GlobalId filePid = getDataFile().getGlobalId(); + if (filePid != null) { + return getDataFile().getGlobalId().asString(); + } + break; + case SITE_URL: + return SystemConfig.getDataverseSiteUrlStatic(); + case API_TOKEN: + String apiTokenString = null; + ApiToken theApiToken = getApiToken(); + if (theApiToken != null) { + apiTokenString = theApiToken.getTokenString(); + } + return apiTokenString; + case DATASET_ID: + return dataset.getId().toString(); + case DATASET_PID: + return dataset.getGlobalId().asString(); + case DATASET_VERSION: + String versionString = null; + if (fileMetadata != null) { // true for file case + versionString = fileMetadata.getDatasetVersion().getFriendlyVersionNumber(); + } else { // Dataset case - return the latest visible version (unless/until the dataset + // case allows specifying a version) + if (getApiToken() != null) { + versionString = dataset.getLatestVersion().getFriendlyVersionNumber(); + } else { + versionString = dataset.getLatestVersionForCopy().getFriendlyVersionNumber(); + } + } + if (("DRAFT").equals(versionString)) { + versionString = ":draft"; // send the token needed in api calls that can be substituted for a numeric + // version. + } + return versionString; + case FILE_METADATA_ID: + if (fileMetadata != null) { // true for file case + return fileMetadata.getId().toString(); + } + case LOCALE_CODE: + return getLocaleCode(); + default: + break; + } + throw new IllegalArgumentException("Cannot replace reserved word: " + value); + } + + public enum ReservedWord { + + // TODO: Research if a format like "{reservedWord}" is easily parse-able or if + // another format would be + // better. The choice of curly braces is somewhat arbitrary, but has been + // observed in documentation for + // various REST APIs. For example, "Variable substitutions will be made when a + // variable is named in {brackets}." + // from https://swagger.io/specification/#fixed-fields-29 but that's for URLs. + FILE_ID("fileId"), FILE_PID("filePid"), SITE_URL("siteUrl"), API_TOKEN("apiToken"), + // datasetId is the database id + DATASET_ID("datasetId"), + // datasetPid is the DOI or Handle + DATASET_PID("datasetPid"), DATASET_VERSION("datasetVersion"), FILE_METADATA_ID("fileMetadataId"), + LOCALE_CODE("localeCode"); + + private final String text; + private final String START = "{"; + private final String END = "}"; + + private ReservedWord(final String text) { + this.text = START + text + END; + } + + /** + * This is a centralized method that enforces that only reserved words are + * allowed to be used by external tools. External tool authors cannot pass their + * own query parameters through Dataverse such as "mode=mode1". + * + * @throws IllegalArgumentException + */ + public static ReservedWord fromString(String text) throws IllegalArgumentException { + if (text != null) { + for (ReservedWord reservedWord : ReservedWord.values()) { + if (text.equals(reservedWord.text)) { + return reservedWord; + } + } + } + // TODO: Consider switching to a more informative message that enumerates the + // valid reserved words. + boolean moreInformativeMessage = false; + if (moreInformativeMessage) { + throw new IllegalArgumentException( + "Unknown reserved word: " + text + ". A reserved word must be one of these values: " + + Arrays.asList(ReservedWord.values()) + "."); + } else { + throw new IllegalArgumentException("Unknown reserved word: " + text); + } + } + + @Override + public String toString() { + return text; + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java new file mode 100644 index 00000000000..ffc6b813045 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +public class UrlTokenUtilTest { + + @Test + public void testGetToolUrlWithOptionalQueryParameters() { + + DataFile dataFile = new DataFile(); + dataFile.setId(42l); + FileMetadata fmd = new FileMetadata(); + DatasetVersion dv = new DatasetVersion(); + Dataset ds = new Dataset(); + ds.setId(50L); + ds.setGlobalId(new GlobalId("doi:10.5072/FK2ABCDEF")); + dv.setDataset(ds); + fmd.setDatasetVersion(dv); + List fmdl = new ArrayList(); + fmdl.add(fmd); + dataFile.setFileMetadatas(fmdl); + ApiToken apiToken = new ApiToken(); + apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); + URLTokenUtil urlTokenUtil = new URLTokenUtil(dataFile, apiToken, fmd, "en"); + assertEquals("en", urlTokenUtil.replaceTokensWithValues("{localeCode}")); + assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); + assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); + + assertEquals("https://librascholar.org/api/files/42/metadata?key=" + apiToken.getTokenString(), urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); + + URLTokenUtil urlTokenUtil2 = new URLTokenUtil(ds, apiToken, "en"); + assertEquals("https://librascholar.org/api/datasets/50?key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); + assertEquals("https://librascholar.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); + } +} From e23fb30547149dd40e94bfa86bc7a46ea10a56bc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 26 Jul 2022 14:49:47 -0400 Subject: [PATCH 41/76] support passthrough for uploading files --- .../iq/dataverse/dataaccess/DataAccess.java | 3 +++ .../dataaccess/RemoteOverlayAccessIO.java | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 14ead925445..63730c4511d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -203,6 +203,9 @@ public static StorageIO createNewStorageIO(T dvObject, S case S3: storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; + case REMOTE: + storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + break; default: logger.warning("Could not find storage driver for: " + storageTag); throw new IOException("createDataAccessObject: Unsupported storage method " + storageDriverId); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ec730e770d2..7c70c6b867f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -339,8 +339,14 @@ public void deleteAllAuxObjects() throws IOException { public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStorageLocation.indexOf("//"); + if(suffixIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); + } if (this.getDvObject() instanceof Dataset) { fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; @@ -429,7 +435,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); if (baseStore == null) { - String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); + String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type"); if(dvObject instanceof Dataset) { @@ -548,4 +554,8 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } + public static String getBaseStoreIdFor(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".baseStore"); + } + } From c3db1ba15989fc7bfa717013c8af0913e0552b74 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 3 Aug 2022 10:23:59 -0400 Subject: [PATCH 42/76] doc typo --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b44927d44d8..a43411a3934 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -685,7 +685,7 @@ and/or managing access to a secure enclave. For specific remote stores, consult dataverse.files..baseUrl **Required** All files must have URLs of the form /* (none) dataverse.files..baseStore **Required** The id of a base store (of type file, s3, or swift) (none) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` - dataverse.files..secreteKey A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..secretKey A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) @@ -2598,7 +2598,7 @@ Number of errors to display to the user when creating DataFiles from a file uplo .. _:BagItHandlerEnabled: :BagItHandlerEnabled -+++++++++++++++++++++ +++++++++++++++++++++ Part of the database settings to configure the BagIt file handler. Enables the BagIt file handler. By default, the handler is disabled. From 846d86616e8d583cd2404f9fab3103ac2bba4f1b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 16:28:22 -0400 Subject: [PATCH 43/76] Apply suggestions from code review Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a43411a3934..223ff48c92f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,14 +238,14 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. -File Storage: Using a Local Filesystem and/or Swift and/or object stores and/or trusted remote services +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Services ------------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. -Dataverse may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. The following sections describe how to set up various types of stores and how to configure for multiple stores. @@ -672,7 +672,7 @@ In addition to having the type "remote" and requiring a label, Trusted Remote St These and other available options are described in the table below. Remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity -and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in Dataverse. +and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. .. table:: :align: left From 8c6b31a169584f490e8dde169398a4223583ff9d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 16:51:44 -0400 Subject: [PATCH 44/76] switch to hyphens per review --- doc/sphinx-guides/source/installation/config.rst | 8 ++++---- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 223ff48c92f..05019ea5230 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -681,13 +681,13 @@ and/or managing access to a secure enclave. For specific remote stores, consult JVM Option Value Description Default value =========================================== ================== ========================================================================== ============= dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) - dataverse.files..label **Required** label to be shown in the UI for this storage (none) - dataverse.files..baseUrl **Required** All files must have URLs of the form /* (none) - dataverse.files..baseStore **Required** The id of a base store (of type file, s3, or swift) (none) + dataverse.files..label **Required** label to be shown in the UI for this storage. (none) + dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (none) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` dataverse.files..secretKey A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 - dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional (none) + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) =========================================== ================== ========================================================================== ============= diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 7c70c6b867f..633237cc5d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -432,7 +432,7 @@ int getUrlExpirationMinutes() { } private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); @@ -555,7 +555,7 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } public static String getBaseStoreIdFor(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".baseStore"); + return System.getProperty("dataverse.files." + driverId + ".base-store"); } } From 984254a02d16fa3291ec1ad8ce3a717d54037997 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 16:57:01 -0400 Subject: [PATCH 45/76] reduce variations on trusted remote store --- doc/sphinx-guides/source/installation/config.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 05019ea5230..59267b77465 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,14 +238,14 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Services -------------------------------------------------------------------------------------------------------- +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores +----------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. -A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store. The following sections describe how to set up various types of stores and how to configure for multiple stores. @@ -671,7 +671,7 @@ Trusted Remote Storage In addition to having the type "remote" and requiring a label, Trusted Remote Stores are defined in terms of a baseURL - all files managed by this store must be at a path starting with this URL, and a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). These and other available options are described in the table below. -Remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity +Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. .. table:: From c3bbfecdd4a889f8f75eab323c3377a8963575e2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 18:20:37 -0400 Subject: [PATCH 46/76] add signer tests, flip param order so sign/validate match, fix val bug --- .../iq/dataverse/util/UrlSignerUtil.java | 11 ++-- .../iq/dataverse/util/UrlSignerUtilTest.java | 50 +++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 8f53799cb98..b11334520e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -53,14 +53,15 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin } if (method != null) { signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + firstParam=false; } - signedUrl.append("&token="); + signedUrl.append(firstParam ? "?" : "&").append("token="); logger.fine("String to sign: " + signedUrl.toString() + ""); signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); logger.fine("Generated Signed URL: " + signedUrl.toString()); if (logger.isLoggable(Level.FINE)) { logger.fine( - "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); + "URL signature is " + (isValidUrl(signedUrl.toString(), user, method, key) ? "valid" : "invalid")); } return signedUrl.toString(); } @@ -86,7 +87,7 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin * the URL is only for user B) the url has expired (was used after the * until timestamp) */ - public static boolean isValidUrl(String signedUrl, String method, String user, String key) { + public static boolean isValidUrl(String signedUrl, String user, String method, String key) { boolean valid = true; try { URL url = new URL(signedUrl); @@ -114,7 +115,7 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S } } - int index = signedUrl.indexOf("&token="); + int index = signedUrl.indexOf(((dateString==null && allowedMethod==null && allowedUser==null) ? "?":"&") + "token="); // Assuming the token is last - doesn't have to be, but no reason for the URL // params to be rearranged either, and this should only cause false negatives if // it does happen @@ -134,7 +135,7 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S logger.fine("Method doesn't match"); valid = false; } - if (user != null && user.equals(allowedUser)) { + if (user != null && !user.equals(allowedUser)) { logger.fine("User doesn't match"); valid = false; } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java new file mode 100644 index 00000000000..2b9d507758f --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.junit.Test; + +public class UrlSignerUtilTest { + + @Test + public void testSignAndValidate() { + + final String url = "http://localhost:8080/api/test1"; + final String get = "GET"; + final String post = "POST"; + + final String user1 = "Alice"; + final String user2 = "Bob"; + final int tooQuickTimeout = -1; + final int longTimeout = 1000; + final String key = "abracadabara open sesame"; + final String badkey = "abracadabara open says me"; + + Logger.getLogger(UrlSignerUtil.class.getName()).setLevel(Level.FINE); + + String signedUrl1 = UrlSignerUtil.signUrl(url, longTimeout, user1, get, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, user1, get, key)); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, user1, null, key)); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, null, get, key)); + + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, null, get, badkey)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, user2, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, user1, post, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), user1, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), user2, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), null, get, key)); + + String signedUrl2 = UrlSignerUtil.signUrl(url, null, null, null, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl2, null, null, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl2, null, post, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl2, user1, null, key)); + + String signedUrl3 = UrlSignerUtil.signUrl(url, tooQuickTimeout, user1, get, key); + + assertFalse(UrlSignerUtil.isValidUrl(signedUrl3, user1, get, key)); + } +} From 56f7676509b13667100a73092f156013dd5ce204 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 13:13:42 -0400 Subject: [PATCH 47/76] update secret-key, cleanup --- doc/sphinx-guides/source/installation/config.rst | 2 +- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 59267b77465..1bbc601da4b 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -685,7 +685,7 @@ and/or managing access to a secure enclave. For specific remote stores, consult dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (none) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` - dataverse.files..secretKey A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 633237cc5d2..13bb718dc6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -188,7 +188,7 @@ private long getSizeFromHttpHeader() { } finally { EntityUtils.consume(response.getEntity()); } - } catch (Exception e) { + } catch (IOException e) { logger.warning(e.getMessage()); } return size; @@ -403,7 +403,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary // ToDo - support remote auxiliary Files if (auxiliaryTag == null) { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); if (secretKey == null) { return baseUrl + "/" + urlPath; } else { @@ -494,7 +494,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor try { remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); } catch(MalformedURLException mfue) { - logger.warning("Unable to read remoteStoreUrl for driver: " + this.driverId); + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); } } From 1e4a72435b7e18165551c51e44a5f92eaf6bf234 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 15:49:46 -0400 Subject: [PATCH 48/76] Add tests/add support for local file base store tests --- .../iq/dataverse/dataaccess/FileAccessIO.java | 8 +- .../dataaccess/RemoteOverlayAccessIO.java | 4 +- .../dataaccess/RemoteOverlayAccessIOTest.java | 104 ++++++++++++++++++ 3 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 14ffcd46fce..1c60981c82a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -584,12 +584,8 @@ private String getDatasetDirectory() throws IOException { } - private String getFilesRootDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory"); - - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + protected String getFilesRootDirectory() { + String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files"); return filesRootDirectory; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 13bb718dc6d..ebe9ec99c90 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -453,7 +453,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + fullStorageLocation; break; default: @@ -475,7 +475,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + fullStorageLocation; break; default: diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java new file mode 100644 index 00000000000..c85a7e6adae --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -0,0 +1,104 @@ +/* + * Copyright 2018 Forschungszentrum Jülich GmbH + * SPDX-License-Identifier: Apache 2.0 + */ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import static org.junit.jupiter.api.Assertions.*; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import java.io.IOException; +import java.nio.file.Paths; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +public class RemoteOverlayAccessIOTest { + + @Mock + + private Dataset dataset; + private DataFile datafile; + private String logoPath = "resources/images/dataverse_project_logo.svg"; + private String pid = "10.5072/F2/ABCDEF"; + + @BeforeEach + public void setUp() { + System.setProperty("dataverse.files.test.type", "remote"); + System.setProperty("dataverse.files.test.label", "testOverlay"); + System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org"); + System.setProperty("dataverse.files.test.base-store", "file"); + System.setProperty("dataverse.files.test.download-redirect", "true"); + System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp"); + System.setProperty("dataverse.files.test.secret-key", "12345"); // Real keys should be much longer, more random + System.setProperty("dataverse.files.file.type", "file"); + System.setProperty("dataverse.files.file.label", "default"); + datafile = MocksFactory.makeDataFile(); + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(GlobalId.parse("doi:" + pid).get()); + datafile.setOwner(dataset); + datafile.setStorageIdentifier("test://" + logoPath); + + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.test.type"); + System.clearProperty("dataverse.files.test.label"); + System.clearProperty("dataverse.files.test.base-url"); + System.clearProperty("dataverse.files.test.base-store"); + System.clearProperty("dataverse.files.test.download-redirect"); + System.clearProperty("dataverse.files.test.label"); + System.clearProperty("dataverse.files.test.remote-store-name"); + System.clearProperty("dataverse.files.test.secret-key"); + System.clearProperty("dataverse.files.file.type"); + System.clearProperty("dataverse.files.file.label"); + } + + @Test + void testRemoteOverlayFile() throws IOException { + // We can read the storageIdentifier and get the driver + assertTrue(datafile.getStorageIdentifier() + .startsWith(DataAccess.getStorgageDriverFromIdentifier(datafile.getStorageIdentifier()))); + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("test").equals(System.getProperty("dataverse.files.test.type"))); + // When we get a StorageIO for the file, it is the right type + StorageIO storageIO = DataAccess.getStorageIO(datafile); + assertTrue(storageIO instanceof RemoteOverlayAccessIO); + // When we use it, we can get properties like the remote store name + RemoteOverlayAccessIO remoteIO = (RemoteOverlayAccessIO) storageIO; + assertTrue(remoteIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.test.remote-store-name"))); + // And can get a temporary download URL for the main file + String signedURL = remoteIO.generateTemporaryDownloadUrl(null, null, null); + // And the URL starts with the right stuff + assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.test.base-url") + "/" + logoPath)); + // And the signature is valid + assertTrue( + UrlSignerUtil.isValidUrl(signedURL, null, null, System.getProperty("dataverse.files.test.secret-key"))); + // And we get an unsigned URL with the right stuff with no key + System.clearProperty("dataverse.files.test.secret-key"); + String unsignedURL = remoteIO.generateTemporaryDownloadUrl(null, null, null); + assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.test.base-url") + "/" + logoPath)); + // Once we've opened, we can get the file size (only works if the HEAD call to + // the file URL works + remoteIO.open(DataAccessOption.READ_ACCESS); + assertTrue(remoteIO.getSize() > 0); + // If we ask for the path for an aux file, it is correct + assertTrue(Paths + .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, logoPath + ".auxobject") + .equals(remoteIO.getAuxObjectAsPath("auxobject"))); + + } + +} From 5705e67c32ac9bd27aff4495952769151119275f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 5 Aug 2022 16:08:07 -0400 Subject: [PATCH 49/76] add an API test for local dev/testing #7324 --- .../iq/dataverse/api/RemoteStoreIT.java | 76 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 10 +++ 2 files changed, 86 insertions(+) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java diff --git a/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java new file mode 100644 index 00000000000..45c6462dab0 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java @@ -0,0 +1,76 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.response.Response; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import org.junit.BeforeClass; +import org.junit.Test; + +public class RemoteStoreIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testRemoteStore() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response createUserNoPrivs = UtilIT.createRandomUser(); + createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenNoPrivs = UtilIT.getApiTokenFromResponse(createUserNoPrivs); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + /** + * Note that you must configure various JVM options for this to work: + * + * -Ddataverse.files.trsa.type=remote + * -Ddataverse.files.trsa.label=trsa + * -Ddataverse.files.trsa.base-url=https://qdr.syr.edu + * -Ddataverse.files.trsa.base-store=file + * -Ddataverse.files.trsa.secretkey=12345 + * -Ddataverse.files.trsa.url-expiration-minutes=120 + * + * (and probably download-redirect) + */ + JsonObjectBuilder remoteFileJson = Json.createObjectBuilder() + .add("description", "A remote image.") + .add("storageIdentifier", "trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png") + .add("checksumType", "MD5") + .add("md5Hash", "509ef88afa907eaf2c17c1c8d8fde77e") + .add("label", "testlogo.png") + .add("fileName", "testlogo.png") + .add("mimeType", "image/png"); + + Response addRemoteFile = UtilIT.addRemoteFile(datasetId.toString(), remoteFileJson.build().toString(), apiToken); + System.setProperty(apiToken, username); + addRemoteFile.prettyPrint(); + addRemoteFile.then().assertThat() + .statusCode(OK.getStatusCode()); + + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index c791ce72f41..5b8048a391f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -680,6 +680,16 @@ static Response uploadFileViaNative(String datasetId, String pathToFile, String return requestSpecification.post("/api/datasets/" + datasetId + "/add"); } + static Response addRemoteFile(String datasetId, String jsonAsString, String apiToken) { + RequestSpecification requestSpecification = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .multiPart("datasetId", datasetId); + if (jsonAsString != null) { + requestSpecification.multiPart("jsonData", jsonAsString); + } + return requestSpecification.post("/api/datasets/" + datasetId + "/add"); + } + static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, String formatVersion, String mimeType, boolean isPublic, String type, String apiToken) { String nullOrigin = null; return uploadAuxFile(fileId, pathToFile, formatTag, formatVersion, mimeType, isPublic, type, nullOrigin, apiToken); From 0902975d5096aa45329faf5e16701f5413db666c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 16:10:34 -0400 Subject: [PATCH 50/76] sign even for internal access --- .../harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ebe9ec99c90..0e18c46243f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -198,7 +198,7 @@ private long getSizeFromHttpHeader() { public InputStream getInputStream() throws IOException { if (super.getInputStream() == null) { try { - HttpGet get = new HttpGet(baseUrl + "/" + urlPath); + HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); int code = response.getStatusLine().getStatusCode(); From 7e9d066bbcbc98b4eacbb5dc9cec0bfb173b17ad Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 17:34:56 -0400 Subject: [PATCH 51/76] add some validation and test --- .../dataaccess/RemoteOverlayAccessIO.java | 19 +++++++++++++++++-- .../dataaccess/RemoteOverlayAccessIOTest.java | 14 +++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 0e18c46243f..05773888533 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -12,6 +12,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; @@ -80,8 +82,9 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) this.setIsLocalFile(false); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - // TODO: validate the storage location supplied urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(urlPath); + logger.fine("Base URL: " + urlPath); } @@ -90,10 +93,22 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE this.setIsLocalFile(false); configureStores(null, driverId, storageLocation); - // TODO: validate the storage location supplied urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(urlPath); logger.fine("Base URL: " + urlPath); } + + private void validatePath(String path) throws IOException { + try { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); + } + } catch(URISyntaxException use) { + throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); + } + } + @Override public void open(DataAccessOption... options) throws IOException { diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index c85a7e6adae..fc44984b263 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -30,14 +30,15 @@ public class RemoteOverlayAccessIOTest { private Dataset dataset; private DataFile datafile; - private String logoPath = "resources/images/dataverse_project_logo.svg"; + private DataFile badDatafile; + private String logoPath = "images/dataverse_project_logo.svg"; private String pid = "10.5072/F2/ABCDEF"; @BeforeEach public void setUp() { System.setProperty("dataverse.files.test.type", "remote"); System.setProperty("dataverse.files.test.label", "testOverlay"); - System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org"); + System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org/resources"); System.setProperty("dataverse.files.test.base-store", "file"); System.setProperty("dataverse.files.test.download-redirect", "true"); System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp"); @@ -50,6 +51,9 @@ public void setUp() { datafile.setOwner(dataset); datafile.setStorageIdentifier("test://" + logoPath); + badDatafile = MocksFactory.makeDataFile(); + badDatafile.setOwner(dataset); + badDatafile.setStorageIdentifier("test://../.." + logoPath); } @AfterEach @@ -67,7 +71,7 @@ public void tearDown() { } @Test - void testRemoteOverlayFile() throws IOException { + void testRemoteOverlayFiles() throws IOException { // We can read the storageIdentifier and get the driver assertTrue(datafile.getStorageIdentifier() .startsWith(DataAccess.getStorgageDriverFromIdentifier(datafile.getStorageIdentifier()))); @@ -98,6 +102,10 @@ void testRemoteOverlayFile() throws IOException { assertTrue(Paths .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, logoPath + ".auxobject") .equals(remoteIO.getAuxObjectAsPath("auxobject"))); + IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(badDatafile), + "Expected getStorageIO() to throw, but it didn't"); + // 'test' is the driverId in the IOException messages + assertTrue(thrown.getMessage().contains("test")); } From db4192ee916694b2c68487e53753f61885ed4af9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 18:27:46 -0400 Subject: [PATCH 52/76] typo in method name --- .../iq/dataverse/dataaccess/DataAccess.java | 42 ++++++++++++++++--- .../dataaccess/RemoteOverlayAccessIOTest.java | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 63730c4511d..bc0794a1932 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -62,18 +62,27 @@ public static StorageIO getStorageIO(T dvObject) throws return getStorageIO(dvObject, null); } + + + public static String getStorageDriverFromIdentifier(String storageIdentifier) { + + int separatorIndex = storageIdentifier.indexOf(SEPARATOR); + String driverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default + if (separatorIndex > 0) { + driverId = storageIdentifier.substring(0, separatorIndex); + } + return driverId; + } + //passing DVObject instead of a datafile to accomodate for use of datafiles as well as datasets public static StorageIO getStorageIO(T dvObject, DataAccessRequest req) throws IOException { if (dvObject == null || dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) { throw new IOException("getDataAccessObject: null or invalid datafile."); } - String storageIdentifier = dvObject.getStorageIdentifier(); - int separatorIndex = storageIdentifier.indexOf(SEPARATOR); - String storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default - if (separatorIndex > 0) { - storageDriverId = storageIdentifier.substring(0, separatorIndex); - } + + String storageDriverId = getStorageDriverFromIdentifier(dvObject.getStorageIdentifier()); + return getStorageIO(dvObject, req, storageDriverId); } @@ -151,6 +160,27 @@ public static String getDriverType(String driverId) { } return System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); } + + //This + public static String getDriverPrefix(String driverId) throws IOException { + if(driverId.isEmpty() || driverId.equals("tmp")) { + return "tmp" + SEPARATOR; + } + String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + switch(storageType) { + case FILE: + return FileAccessIO.getDriverPrefix(driverId); + case S3: + return S3AccessIO.getDriverPrefix(driverId); + case SWIFT: + return SwiftAccessIO.getDriverPrefix(driverId); + default: + logger.warning("Could not find storage driver for id: " + driverId); + throw new IOException("getDriverPrefix: Unsupported storage method."); + } + + + } // createDataAccessObject() methods create a *new*, empty DataAccess objects, // for saving new, not yet saved datafiles. diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index fc44984b263..122f84c5c19 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -74,7 +74,7 @@ public void tearDown() { void testRemoteOverlayFiles() throws IOException { // We can read the storageIdentifier and get the driver assertTrue(datafile.getStorageIdentifier() - .startsWith(DataAccess.getStorgageDriverFromIdentifier(datafile.getStorageIdentifier()))); + .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier()))); // We can get the driver type from it's ID assertTrue(DataAccess.getDriverType("test").equals(System.getProperty("dataverse.files.test.type"))); // When we get a StorageIO for the file, it is the right type From c4eee7c903a465c304b850212f9a0bf733a546f0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 8 Aug 2022 12:40:06 -0400 Subject: [PATCH 53/76] add curl example #7324 --- doc/sphinx-guides/source/api/native-api.rst | 30 +++++++++++++++++++ .../source/installation/config.rst | 4 +++ 2 files changed, 34 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 75eb7b5424e..8686734accc 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1411,7 +1411,37 @@ In practice, you only need one the ``dataset_id`` or the ``persistentId``. The e print '-' * 40 print r.json() print r.status_code + +.. _add-remote-file-api: +Add a Remote File to a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your Dataverse installation has been configured to support :ref:`trusted-remote-storage` +you can add files from remote URLs to datasets. These remote files appear in your Dataverse +installation as if they were ordinary files but are stored remotely. + +The location of the remote file is specified in the ``storageIdentifier`` field in JSON you supply. +The base URL of the file is contained in the "store" (e.g. "trsa" in the example below) and is followed by the +path to the file (e.g. "themes/custom..."). + +In the JSON example below, all fields are required except for ``description``. Other optional fields are shown under :ref:`add-file-api`. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + export JSON_DATA='{"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_ID" -F "jsonData=$JSON_DATA" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 1ec0a1e8558..73df842d84b 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -665,6 +665,8 @@ Migrating from Local Storage to S3 Is currently documented on the :doc:`/developers/deployment` page. +.. _trusted-remote-storage: + Trusted Remote Storage ++++++++++++++++++++++ @@ -674,6 +676,8 @@ These and other available options are described in the table below. Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. +Once you have configured a trusted remote store, you can point your users to the :ref:`add-remote-file-api` section of the API Guide. + .. table:: :align: left From 0ce597ac3e54204f1c7a7b713f47dded7c8deb27 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 13:15:25 -0400 Subject: [PATCH 54/76] Error handling or default on required params --- doc/sphinx-guides/source/installation/config.rst | 12 ++++++------ .../dataaccess/RemoteOverlayAccessIO.java | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 73df842d84b..29e34d4ae94 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -681,20 +681,20 @@ Once you have configured a trusted remote store, you can point your users to the .. table:: :align: left - =========================================== ================== ========================================================================== ============= + =========================================== ================== ========================================================================== =================== JVM Option Value Description Default value - =========================================== ================== ========================================================================== ============= + =========================================== ================== ========================================================================== =================== dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) dataverse.files..label **Required** label to be shown in the UI for this storage. (none) dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) - dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (none) + dataverse.files..base-store **Optional** The id of a base store (of type file, s3, or swift). (the default store) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` - dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) - =========================================== ================== ========================================================================== ============= + =========================================== ================== ========================================================================== =================== @@ -977,7 +977,7 @@ Some external tools are also ready to be translated, especially if they are usin Tools for Translators -+++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++ The list below depicts a set of tools that can be used to ease the amount of work necessary for translating the Dataverse software by facilitating this collaborative effort and enabling the reuse of previous work: diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 05773888533..a680ce7a06c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -448,11 +448,24 @@ int getUrlExpirationMinutes() { private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } else { + try { + new URI(baseUrl); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type"); + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + if(dvObject instanceof Dataset) { baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { From 800eca22cddaa31263ffcdfbe5df572f91b58ddc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 15:14:15 -0400 Subject: [PATCH 55/76] sanity check to make sure driver being specified in addFile exists --- .../iq/dataverse/datasetutility/OptionalFileParams.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 25240349bfb..cd234dfc335 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.DataFileTag; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.util.BundleUtil; import java.lang.reflect.Type; @@ -371,8 +372,12 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // get storage identifier as string // ------------------------------- if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ - - this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + //Basic sanity check that driver specified is defined. + String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); + if(!type.equals("tmp")&& !type.equals("Undefined")) { + this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + } } // ------------------------------- From f730afa9038af8a3a316c3dcf49d72e55727e0e6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 15:45:17 -0400 Subject: [PATCH 56/76] only get value from json once --- .../harvard/iq/dataverse/datasetutility/OptionalFileParams.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index cd234dfc335..ad141998b15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -376,7 +376,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); if(!type.equals("tmp")&& !type.equals("Undefined")) { - this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + this.storageIdentifier = storageId; } } From 1583788afc2afc98462a9c36405bd8a9135636e7 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 9 Aug 2022 10:17:07 -0400 Subject: [PATCH 57/76] update RemoteStoreIT test to show JVM options used #7324 --- .../java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java index 45c6462dab0..b1cd65176c5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java @@ -51,10 +51,11 @@ public void testRemoteStore() { * -Ddataverse.files.trsa.label=trsa * -Ddataverse.files.trsa.base-url=https://qdr.syr.edu * -Ddataverse.files.trsa.base-store=file - * -Ddataverse.files.trsa.secretkey=12345 - * -Ddataverse.files.trsa.url-expiration-minutes=120 * - * (and probably download-redirect) + * In practice, most installation will also enable download-redirect + * (below) to prevent the files from being streamed through Dataverse! + * + * -Ddataverse.files.trsa.download-redirect=true */ JsonObjectBuilder remoteFileJson = Json.createObjectBuilder() .add("description", "A remote image.") From 25b4059f8cecd641d56e8fa95ef06db47aa6a25c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 14:07:45 -0400 Subject: [PATCH 58/76] add separate downloadRedirectEnabled for aux objects method --- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 4 ++++ .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 4 ++++ .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index a680ce7a06c..2e53c82d184 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -411,6 +411,10 @@ public boolean downloadRedirectEnabled() { } return false; } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return baseStore.downloadRedirectEnabled(auxObjectTag); + } @Override public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 817136f8735..e550cae1373 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -851,6 +851,10 @@ public boolean downloadRedirectEnabled() { } return false; } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return downloadRedirectEnabled(); + } /** * Generates a temporary URL for a direct S3 download; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 63c5af038e1..19f1e26ef4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -580,6 +580,10 @@ public boolean downloadRedirectEnabled() { return false; } + public boolean downloadRedirectEnabled(String auxObjectTag) { + return false; + } + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } From e6fb48540d07e16420cba727ff49f712a569a768 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 14:28:22 -0400 Subject: [PATCH 59/76] add logic to check base store download redirect for aux objects --- .../harvard/iq/dataverse/api/DownloadInstanceWriter.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 3ac7f301ecd..6bde8b5b07a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -120,7 +120,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] auxiliaryTag = ImageThumbConverter.THUMBNAIL_SUFFIX + (requestedSize > 0 ? requestedSize : ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE); - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { auxiliaryType = ImageThumbConverter.THUMBNAIL_MIME_TYPE; String fileName = storageIO.getFileName(); if (fileName != null) { @@ -139,7 +139,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] auxiliaryTag = auxiliaryTag + "_" + auxVersion; } - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { String fileExtension = getFileExtension(di.getAuxiliaryFile()); auxiliaryFileName = storageIO.getFileName() + "." + auxiliaryTag + fileExtension; auxiliaryType = di.getAuxiliaryFile().getContentType(); @@ -162,7 +162,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // it has been cached already. auxiliaryTag = di.getConversionParamValue(); - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { auxiliaryType = di.getServiceFormatType(di.getConversionParam(), auxiliaryTag); auxiliaryFileName = FileUtil.replaceExtension(storageIO.getFileName(), auxiliaryTag); } else { @@ -201,7 +201,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] try { redirect_uri = new URI(redirect_url_str); - } catch (URISyntaxException ex) { + } catch (URISyntaxException|NullPointerException ex) { logger.info("Data Access API: failed to create S3 redirect url (" + redirect_url_str + ")"); redirect_uri = null; } From 0fd56cf2be9ff2e5a1331605fb038f2488cc461c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 14:34:42 -0400 Subject: [PATCH 60/76] minor error meg and comment changes --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Files.java | 2 +- .../iq/dataverse/datasetutility/OptionalFileParams.java | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 70f1043e6a3..9bc8f6b9147 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2383,7 +2383,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } } else { return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); + "You must upload a file or provide a valid storageidentifier, filename, and mimetype."); } } else { newFilename = contentDispositionHeader.getFileName(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 78847119ce4..d8313254ce0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -232,7 +232,7 @@ public Response replaceFileInDataset( } } else { return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); + "You must upload a file or provide a valid storageidentifier, filename, and mimetype."); } } else { newFilename = contentDispositionHeader.getFileName(); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index ad141998b15..080132409f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -372,7 +372,9 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // get storage identifier as string // ------------------------------- if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ - //Basic sanity check that driver specified is defined. + // Basic sanity check that driver specified is defined. Note that being able to + // specify a driver that does not support direct uploads is currently used with + // out-of-band uploads, e.g. for bulk migration. String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); if(!type.equals("tmp")&& !type.equals("Undefined")) { From 361018f0b1b6a6e978c635153a31242d60064aef Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 10 Aug 2022 12:15:01 -0400 Subject: [PATCH 61/76] remove cruft from tests #7324 --- .../java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java index b1cd65176c5..ae5bc8b7316 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java @@ -25,10 +25,6 @@ public void testRemoteStore() { UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); - Response createUserNoPrivs = UtilIT.createRandomUser(); - createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode()); - String apiTokenNoPrivs = UtilIT.getApiTokenFromResponse(createUserNoPrivs); - Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); createDataverseResponse.prettyPrint(); createDataverseResponse.then().assertThat() @@ -67,11 +63,10 @@ public void testRemoteStore() { .add("mimeType", "image/png"); Response addRemoteFile = UtilIT.addRemoteFile(datasetId.toString(), remoteFileJson.build().toString(), apiToken); - System.setProperty(apiToken, username); addRemoteFile.prettyPrint(); addRemoteFile.then().assertThat() .statusCode(OK.getStatusCode()); - + System.out.println("done!"); } } From cee4f9d32b326b3868ff31fb8b01e9f7c8bdb6b0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 10 Aug 2022 12:20:00 -0400 Subject: [PATCH 62/76] Added a note about limitations of what's in the PR. --- doc/sphinx-guides/source/installation/config.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5f7216a9214..1f700a0f81f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -676,6 +676,8 @@ These and other available options are described in the table below. Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. +Note that in the current implementation, activites where Dataverse needs access to data bytes, e.g. to create thumbnails or validate hash values at publication will fail if a remote store does not allow Dataverse access. Implementers of such trusted remote stores should consider using Dataverse's settings to disable ingest, validation of files at publication, etc. as needed. + Once you have configured a trusted remote store, you can point your users to the :ref:`add-remote-file-api` section of the API Guide. .. table:: From 909b9c72a2c5f49f761c7916198de8f962bb7f78 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 16 Aug 2022 12:17:15 -0400 Subject: [PATCH 63/76] use single file API call /add --- doc/sphinx-guides/source/api/native-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 8686734accc..d7183f43a0b 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1434,13 +1434,13 @@ In the JSON example below, all fields are required except for ``description``. O export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB export JSON_DATA='{"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' - curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_ID" -F "jsonData=$JSON_DATA" + curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID" -F "jsonData=$JSON_DATA" The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 5f633e4709e8e04377cb93357cbd1665a4a5560a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 16 Aug 2022 12:59:30 -0400 Subject: [PATCH 64/76] copy non-globus parts from #8891 per review request --- .../source/developers/big-data-support.rst | 62 +++++++++++++++++-- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 641307372ee..71822f53b1b 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -1,19 +1,19 @@ Big Data Support ================ -Big data support is highly experimental. Eventually this content will move to the Installation Guide. +Big data support includes some highly experimental options. Eventually more of this content will move to the Installation Guide. .. contents:: |toctitle| :local: -Various components need to be installed and/or configured for big data support. +Various components will need to be installed and/or configured for big data support via the methods described below. S3 Direct Upload and Download ----------------------------- A lightweight option for supporting file sizes beyond a few gigabytes - a size that can cause performance issues when uploaded through a Dataverse installation itself - is to configure an S3 store to provide direct upload and download via 'pre-signed URLs'. When these options are configured, file uploads and downloads are made directly to and from a configured S3 store using secure (https) connections that enforce a Dataverse installation's access controls. (The upload and download URLs are signed with a unique key that only allows access for a short time period and a Dataverse installation will only generate such a URL if the user has permission to upload/download the specific file in question.) -This option can handle files >40GB and could be appropriate for files up to a TB. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. +This option can handle files >300GB and could be appropriate for files up to a TB or larger. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. To configure these options, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: @@ -32,7 +32,7 @@ For AWS, the minimum allowed part size is 5*1024*1024 bytes and the maximum is 5 It is also possible to set file upload size limits per store. See the :MaxFileUploadSizeInBytes setting described in the :doc:`/installation/config` guide. -At present, one potential drawback for direct-upload is that files are only partially 'ingested', tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: +At present, one potential drawback for direct-upload is that files are only partially 'ingested' - tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` @@ -61,6 +61,60 @@ Alternatively, you can enable CORS using the AWS S3 web interface, using json-en Since the direct upload mechanism creates the final file rather than an intermediate temporary file, user actions, such as neither saving or canceling an upload session before closing the browser page, can leave an abandoned file in the store. The direct upload mechanism attempts to use S3 Tags to aid in identifying/removing such files. Upon upload, files are given a "dv-state":"temp" tag which is removed when the dataset changes are saved and the new file(s) are added in the Dataverse installation. Note that not all S3 implementations support Tags: Minio does not. WIth such stores, direct upload works, but Tags are not used. +Trusted Remote Storage with the ``remote`` Store Type +----------------------------------------------------- + +For very large, and/or very sensitive data, it may not make sense to transfer or copy files to Dataverse at all. The experimental ``remote`` store type in the Dataverse software now supports this use case. + +With this storage option Dataverse stores a URL reference for the file rather than transferring the file bytes to a store managed directly by Dataverse. Basic configuration for a remote store is described at :ref:`file-storage` in the Configuration Guide. + +Once the store is configured, it can be assigned to a collection or individual datasets as with other stores. In a dataset using this store, users can reference remote files, currently only via API, which will then appear the same basic way as other datafiles. If the store has been configured with a remote-store-name or remote-store-url, the dataset file table will include this information for remote files. (Users can also upload smaller files via the UI or API which will be stored in the configured base store.) + +The remote store leverages the same upload syntax as the :doc:`/developers/s3-direct-upload-api` (which itself uses the standard file upload API call): + +Rather than sending the file bytes, metadata for the remote file is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For remote references, the jsonData object must also include values for: + +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'trs://images/dataverse_project_logo.svg', 'fileName':'dataverse_logo.svg', 'mimeType':'image/svg+xml', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The variant allowing multiple files to be added once that is discussed in the :doc:`/developers/s3-direct-upload-api` document can also be used. + +Considerations: + +* Remote stores are configured with a base-url which limits what files can be referenced, i.e. the absolute URL for the file is /. +* Admins are trusting the organization managing the site/service at base-url to maintain the referenced files for as long as the Dataverse instance needs them. Formal agreements are recommended for production +* For large files, direct-download should always be used with a remote store. (Otherwise the Dataverse will be involved in the download.) +* For simple websites, a remote store should be marked public which will turn off restriction and embargo functionality in Dataverse (since Dataverse cannot restrict access to the file on the remote website) +* Remote stores can be configured with a secret-key. This key will be used to sign URLs when Dataverse retrieves the file content or redirects a user for download. If remote service is able to validate the signature and reject invalid requests, the remote store mechanism can be used to manage restricted and embargoes files, access requests in Dataverse, etc. Dataverse contains Java code that validates these signatures which could be used, for example, to create a validation proxy in front of a web server to allow Dataverse to manage access. The secret-key is a shared secret between Dataverse and the remote service and is not shared with/is not accessible by users or those with access to user's machines. +* Sophisticated remote services may wish to register file URLs that do not directly reference the file contents (bytes) but instead direct the user to a website where further information about the remote service's download process can be found. +* Due to the current design, ingest cannot be done on remote files and administrators should disable ingest when using a remote store. This can be done by setting the ingest size limit for the store to 0 and/or using the recently added option to not perform tabular ingest on upload. +* Dataverse will normally try to access the file contents itself, i.e. for ingest (in future versions), full-text indexing, thumbnail creation, etc. This processing may not be desirable for large/sensitive data, and, for the case where the URL does not reference the file itself, would not be possible. At present, administrators should configure the relevant size limits to avoid such actions. +* The current implementation of remote stores is experimental in the sense that future work to enahnce it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. + +To configure the options mentioned above, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: + +``./asadmin create-jvm-options "-Ddataverse.files..download-redirect=true"`` +``./asadmin create-jvm-options "-Ddataverse.files..secret-key=somelongrandomalphanumerickeythelongerthebetter123456"`` +``./asadmin create-jvm-options "-Ddataverse.files..public=true"`` +``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` + Data Capture Module (DCM) ------------------------- From cb1755d5a1d083a4697a04d9aae22773874e9848 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 16 Aug 2022 13:31:46 -0400 Subject: [PATCH 65/76] add missing label --- doc/sphinx-guides/source/installation/config.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 1f700a0f81f..c96a819db26 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,6 +238,8 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. +.. _file-storage: + File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores ----------------------------------------------------------------------------------------------------- From 3d9418e8553f830ee89d1aca32ccf298b67f84ba Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 16 Aug 2022 16:14:14 -0400 Subject: [PATCH 66/76] Handle null file size per QA discussion --- src/main/java/edu/harvard/iq/dataverse/DataFile.java | 6 +++++- src/main/java/propertyFiles/Bundle.properties | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index b21ab5fb7ba..b0e39d95e45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -605,7 +605,11 @@ public void setFilesize(long filesize) { * @return */ public String getFriendlySize() { - return FileSizeChecker.bytesToHumanReadable(filesize); + if (filesize != null) { + return FileSizeChecker.bytesToHumanReadable(filesize); + } else { + return BundleUtil.getStringFromBundle("file.sizeNotAvilable"); + } } public boolean isRestricted() { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index fedc14def7d..8dbaf327c0a 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1695,6 +1695,7 @@ file.download.header=Download file.download.subset.header=Download Data Subset file.preview=Preview: file.fileName=File Name +file.sizeNotAvailable=Size not available file.type.tabularData=Tabular Data file.originalChecksumType=Original File {0} file.checksum.exists.tip=A file with this checksum already exists in the dataset. From 643b9242ff8f658c88192858f6c41ac480b0272c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 10:11:27 -0400 Subject: [PATCH 67/76] add checking w.r.t. dataset storage driver/base driver --- .../edu/harvard/iq/dataverse/api/Datasets.java | 5 +++++ .../iq/dataverse/dataaccess/DataAccess.java | 17 +++++++++++++++++ .../datasetutility/AddReplaceFileHelper.java | 17 ++++++++--------- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4597007114b..33df0d39ba7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2374,6 +2374,11 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + return error(BAD_REQUEST, + "Dataset store configuration does not allow provided storageIdentifier."); + } if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index bc0794a1932..660a25d487d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -323,4 +323,21 @@ public static String expandStorageIdentifierIfNeeded(String newStorageIdentifier } return newStorageIdentifier; } + + public static boolean uploadToDatasetAllowed(Dataset d, String storageIdentifier) { + boolean allowed=true; + String driverId = DataAccess.getStorageDriverFromIdentifier(storageIdentifier); + String effectiveDriverId = d.getEffectiveStorageDriverId(); + if(!effectiveDriverId.equals(driverId)) { + if(getDriverType(driverId).equals(REMOTE)) { + String baseDriverId = RemoteOverlayAccessIO.getBaseStoreIdFor(driverId); + if(!effectiveDriverId.equals(baseDriverId)) { + allowed = false; + } + } else { + allowed=false; + } + } + return allowed; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 2f133c14ca5..e97b66d325b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2046,6 +2046,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + addErrorSevere("Dataset store configuration does not allow provided storageIdentifier."); + } if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { @@ -2054,14 +2057,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } msgt("ADD! = " + newFilename); - - runAddFileByDataset(dataset, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - + if (!hasError()) { + runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, + null, optionalFileParams, true); + } if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("storageIdentifier", newStorageIdentifier) @@ -2085,8 +2084,8 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); jarr.add(fileoutput); } - } successNumberofFiles = successNumberofFiles + 1; + } } else { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("errorMessage", "You must provide a storageidentifier, filename, and mimetype.") From 7301c621acc10d9723c2665311f70e2acc84cdb4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 10:58:11 -0400 Subject: [PATCH 68/76] add remote store in direct access to support sending file delete call and avoid failure warning in log. --- .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 660a25d487d..285bef02272 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -127,6 +127,8 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) return new S3AccessIO<>(storageLocation, storageDriverId); case SWIFT: return new SwiftAccessIO<>(storageLocation, storageDriverId); + case REMOTE: + return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); throw new IOException("getDirectStorageIO: Unsupported storage method."); From 0da52fcc48afe46f2aa7eead5c4e945651df1973 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 12:47:49 -0400 Subject: [PATCH 69/76] typo --- src/main/java/edu/harvard/iq/dataverse/DataFile.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index b0e39d95e45..cb43dff0e20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -608,7 +608,7 @@ public String getFriendlySize() { if (filesize != null) { return FileSizeChecker.bytesToHumanReadable(filesize); } else { - return BundleUtil.getStringFromBundle("file.sizeNotAvilable"); + return BundleUtil.getStringFromBundle("file.sizeNotAvailable"); } } From 45aa976723118910fee79052a68bf02241729b02 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 14:05:08 -0400 Subject: [PATCH 70/76] fix for delete --- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 2e53c82d184..ebe0cd2c1b4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -363,14 +363,13 @@ public String getStorageLocation() throws IOException { fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); } if (this.getDvObject() instanceof Dataset) { - fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; + throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStorageLocation; + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; } else if (dvObject instanceof Dataverse) { throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); } + logger.fine("fullStorageLocation: " + fullStorageLocation); return fullStorageLocation; } From 94ffcbf09d03ed44e26ee03c7e539810b3a32d1a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 14:05:21 -0400 Subject: [PATCH 71/76] update to docs per QA --- doc/sphinx-guides/source/developers/big-data-support.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 71822f53b1b..858168b0513 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -68,9 +68,11 @@ For very large, and/or very sensitive data, it may not make sense to transfer or With this storage option Dataverse stores a URL reference for the file rather than transferring the file bytes to a store managed directly by Dataverse. Basic configuration for a remote store is described at :ref:`file-storage` in the Configuration Guide. -Once the store is configured, it can be assigned to a collection or individual datasets as with other stores. In a dataset using this store, users can reference remote files, currently only via API, which will then appear the same basic way as other datafiles. If the store has been configured with a remote-store-name or remote-store-url, the dataset file table will include this information for remote files. (Users can also upload smaller files via the UI or API which will be stored in the configured base store.) +Once the store is configured, it can be assigned to a collection or individual datasets as with other stores. In a dataset using this store, users can reference remote files which will then appear the same basic way as other datafiles. -The remote store leverages the same upload syntax as the :doc:`/developers/s3-direct-upload-api` (which itself uses the standard file upload API call): +Currently, remote files can only be added via the API. Users can also upload smaller files via the UI or API which will be stored in the configured base store. + +If the store has been configured with a remote-store-name or remote-store-url, the dataset file table will include this information for remote files. These provide a visual indicator that the files are not managed directly by Dataverse and are stored/managed by a remote trusted store. Rather than sending the file bytes, metadata for the remote file is added using the "jsonData" parameter. jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For remote references, the jsonData object must also include values for: @@ -85,6 +87,8 @@ jsonData normally includes information such as a file description, tags, provena The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +(The remote store leverages the same JSON upload syntax as the last step in direct upload to S3 described in the :doc:`/developers/s3-direct-upload-api` (which itself is just a variant of the standard file add API call syntax where the file location (storageIdentifier) is sent instead of sending the file bytes)). + .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx From 708637dc5b9931207b1f2069fd37dcbd759cca33 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 14:30:54 -0400 Subject: [PATCH 72/76] keep remote and base identifiers in getStorageLocation, fix base config --- .../dataverse/dataaccess/RemoteOverlayAccessIO.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ebe0cd2c1b4..bc5b6125d71 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -358,10 +358,6 @@ public String getStorageLocation() throws IOException { if(driverIndex >=0) { fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } - int suffixIndex = fullStorageLocation.indexOf("//"); - if(suffixIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); - } if (this.getDvObject() instanceof Dataset) { throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); } else if (this.getDvObject() instanceof DataFile) { @@ -495,8 +491,13 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } else if (storageLocation != null) { // ://// - String storageId = storageLocation.substring(storageLocation.indexOf(DataAccess.SEPARATOR + DataAccess.SEPARATOR.length())); - fullStorageLocation = storageId.substring(0, storageId.indexOf("//")); + //remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if(index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + //THe base store needs the baseStoreIdentifier and not the relative URL + fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); switch (baseDriverType) { case DataAccess.S3: From 37bba52452ffa61dd2f335f1e40ca68402ecc3b6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 14:32:41 -0400 Subject: [PATCH 73/76] add direct link to s3 call --- doc/sphinx-guides/source/developers/big-data-support.rst | 2 +- doc/sphinx-guides/source/developers/s3-direct-upload-api.rst | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 858168b0513..3ca363a93ce 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -87,7 +87,7 @@ jsonData normally includes information such as a file description, tags, provena The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 -(The remote store leverages the same JSON upload syntax as the last step in direct upload to S3 described in the :doc:`/developers/s3-direct-upload-api` (which itself is just a variant of the standard file add API call syntax where the file location (storageIdentifier) is sent instead of sending the file bytes)). +(The remote store leverages the same JSON upload syntax as the last step in direct upload to S3 described in the :ref:`Adding the Uploaded file to the Dataset ` section of the :doc:`/developers/s3-direct-upload-api`.) .. code-block:: bash diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index d1a71c313ca..3dc73ce6a0c 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -88,6 +88,8 @@ If the client is unable to complete the multipart upload, it should call the abo curl -X DELETE "$SERVER_URL/api/datasets/mpload?..." +.. _direct-add-to-dataset-api: + Adding the Uploaded file to the Dataset --------------------------------------- @@ -117,7 +119,7 @@ Note that this API call can be used independently of the others, e.g. supporting With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. To add multiple Uploaded Files to the Dataset -------------------------------------------------- +--------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: From 38856efef7c87520927b1b71624aa21a127de94f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 15:38:24 -0400 Subject: [PATCH 74/76] fix base store config/related test that missed --- .../dataaccess/RemoteOverlayAccessIO.java | 27 ++++++++++++++++++- .../dataaccess/RemoteOverlayAccessIOTest.java | 10 ++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index bc5b6125d71..b80478baa92 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -469,7 +469,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { if (this.getDvObject() != null) { - fullStorageLocation = getStorageLocation(); + fullStorageLocation = getStoragePath(); // S3 expects :/// switch (baseDriverType) { @@ -530,6 +530,31 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } } + //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + private String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if(suffixIndex >=0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (this.getDvObject() instanceof Dataset) { + fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (this.getDvObject() instanceof DataFile) { + fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + }else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + public CloseableHttpClient getSharedHttpClient() { if (httpclient == null) { try { diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index 122f84c5c19..2dd3f372ce1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -31,6 +31,7 @@ public class RemoteOverlayAccessIOTest { private Dataset dataset; private DataFile datafile; private DataFile badDatafile; + private String baseStoreId="182ad2bda2f-c3508e719076"; private String logoPath = "images/dataverse_project_logo.svg"; private String pid = "10.5072/F2/ABCDEF"; @@ -49,11 +50,11 @@ public void setUp() { dataset = MocksFactory.makeDataset(); dataset.setGlobalId(GlobalId.parse("doi:" + pid).get()); datafile.setOwner(dataset); - datafile.setStorageIdentifier("test://" + logoPath); + datafile.setStorageIdentifier("test://" + baseStoreId + "//" + logoPath); badDatafile = MocksFactory.makeDataFile(); badDatafile.setOwner(dataset); - badDatafile.setStorageIdentifier("test://../.." + logoPath); + badDatafile.setStorageIdentifier("test://" + baseStoreId + "//../.." + logoPath); } @AfterEach @@ -99,8 +100,11 @@ void testRemoteOverlayFiles() throws IOException { remoteIO.open(DataAccessOption.READ_ACCESS); assertTrue(remoteIO.getSize() > 0); // If we ask for the path for an aux file, it is correct + System.out.println(Paths + .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, baseStoreId + ".auxobject").toString()); + System.out.println(remoteIO.getAuxObjectAsPath("auxobject").toString()); assertTrue(Paths - .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, logoPath + ".auxobject") + .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, baseStoreId + ".auxobject") .equals(remoteIO.getAuxObjectAsPath("auxobject"))); IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(badDatafile), "Expected getStorageIO() to throw, but it didn't"); From e72def0e476471b8a131673900ffde132676ea29 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 18 Aug 2022 08:49:13 -0400 Subject: [PATCH 75/76] Add test for bad remote URLs --- .../iq/dataverse/dataaccess/DataAccess.java | 23 +++++++++++++++++++ .../dataaccess/RemoteOverlayAccessIO.java | 17 +++++++++++++- .../iq/dataverse/dataaccess/StorageIO.java | 16 +++++++++++++ .../datasetutility/OptionalFileParams.java | 9 ++++---- .../dataaccess/RemoteOverlayAccessIOTest.java | 8 +++++++ 5 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 285bef02272..bccaf58edfc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -342,4 +342,27 @@ public static boolean uploadToDatasetAllowed(Dataset d, String storageIdentifier } return allowed; } + + + //Method to verify that a submitted storageIdentifier (i.e. in direct/remote uploads) is consistent with the store's configuration. + public static boolean isValidDirectStorageIdentifier(String storageId) { + String driverId = DataAccess.getStorageDriverFromIdentifier(storageId); + String storageType = DataAccess.getDriverType(driverId); + if (storageType.equals("tmp") || storageType.equals("Undefined")) { + return false; + } + switch (storageType) { + case FILE: + return FileAccessIO.isValidIdentifier(driverId, storageId); + case SWIFT: + return SwiftAccessIO.isValidIdentifier(driverId, storageId); + case S3: + return S3AccessIO.isValidIdentifier(driverId, storageId); + case REMOTE: + return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + default: + logger.warning("Request to validate for storage driver: " + driverId); + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index b80478baa92..bc421949ed7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -611,8 +611,23 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } + protected static boolean isValidIdentifier(String driverId, String storageId) { + String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + try { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); + return false; + } + } catch(URISyntaxException use) { + logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); + return false; + } + return true; + } + public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 19f1e26ef4f..46b4b13a889 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -37,6 +37,8 @@ import java.nio.file.Path; import java.util.Iterator; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; //import org.apache.commons.httpclient.Header; @@ -591,4 +593,18 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary public static String getDriverPrefix(String driverId) { return driverId+ DataAccess.SEPARATOR; } + + //Check that storageIdentifier is consistent with store's config + //False will prevent direct uploads + protected static boolean isValidIdentifier(String driverId, String storageId) { + return true; + } + + //Utility to verify the standard UUID pattern for stored files. + protected static boolean usesStandardNamePattern(String identifier) { + + Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); + Matcher m = r.matcher(identifier); + return m.find(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 080132409f5..959dbc4e262 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -372,14 +372,15 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // get storage identifier as string // ------------------------------- if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ - // Basic sanity check that driver specified is defined. Note that being able to + // Basic sanity check that driver specified is defined and the overall + // identifier is consistent with that store's config. Note that being able to // specify a driver that does not support direct uploads is currently used with // out-of-band uploads, e.g. for bulk migration. String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); - String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); - if(!type.equals("tmp")&& !type.equals("Undefined")) { - this.storageIdentifier = storageId; + if (DataAccess.isValidDirectStorageIdentifier(storageId)) { + this.storageIdentifier = storageId; } + } // ------------------------------- diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index 2dd3f372ce1..f66b3306dda 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -113,4 +113,12 @@ void testRemoteOverlayFiles() throws IOException { } + @Test + void testRemoteOverlayIdentifierFormats() throws IOException { + + assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier())); + assertFalse(DataAccess.isValidDirectStorageIdentifier(badDatafile.getStorageIdentifier())); + assertFalse(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("test", "bad"))); + } + } From 70a8b3b29e0fd7e94ae54308c77efcac1d18a73d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 18 Aug 2022 08:49:46 -0400 Subject: [PATCH 76/76] note re 404 URLs --- doc/sphinx-guides/source/developers/big-data-support.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 3ca363a93ce..50bb2224610 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -103,6 +103,7 @@ The variant allowing multiple files to be added once that is discussed in the :d Considerations: * Remote stores are configured with a base-url which limits what files can be referenced, i.e. the absolute URL for the file is /. +* The current store will not prevent you from providing a relative URL that results in a 404 when resolved. (I.e. if you make a typo). You should check to make sure the file exists at the location you specify - by trying to download in Dataverse, by checking to see that Dataverse was able to get the file size (which it does by doing a HEAD call to that location), or just manually trying the URL in your browser. * Admins are trusting the organization managing the site/service at base-url to maintain the referenced files for as long as the Dataverse instance needs them. Formal agreements are recommended for production * For large files, direct-download should always be used with a remote store. (Otherwise the Dataverse will be involved in the download.) * For simple websites, a remote store should be marked public which will turn off restriction and embargo functionality in Dataverse (since Dataverse cannot restrict access to the file on the remote website) @@ -110,7 +111,7 @@ Considerations: * Sophisticated remote services may wish to register file URLs that do not directly reference the file contents (bytes) but instead direct the user to a website where further information about the remote service's download process can be found. * Due to the current design, ingest cannot be done on remote files and administrators should disable ingest when using a remote store. This can be done by setting the ingest size limit for the store to 0 and/or using the recently added option to not perform tabular ingest on upload. * Dataverse will normally try to access the file contents itself, i.e. for ingest (in future versions), full-text indexing, thumbnail creation, etc. This processing may not be desirable for large/sensitive data, and, for the case where the URL does not reference the file itself, would not be possible. At present, administrators should configure the relevant size limits to avoid such actions. -* The current implementation of remote stores is experimental in the sense that future work to enahnce it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. +* The current implementation of remote stores is experimental in the sense that future work to enhance it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. To configure the options mentioned above, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: