From cc0833035208d6c03de18a00b936d5fd35dc8d8b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 4 Dec 2019 18:02:24 -0500 Subject: [PATCH 01/31] multistore implementation (built on direct upload) Conflicts: pom.xml src/main/java/edu/harvard/iq/dataverse/api/Datasets.java src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java --- pom.xml | 4 + .../iq/dataverse/DataFileServiceBean.java | 3 +- .../harvard/iq/dataverse/api/Datasets.java | 10 +- .../iq/dataverse/dataaccess/DataAccess.java | 119 +++-- .../iq/dataverse/dataaccess/FileAccessIO.java | 73 +-- .../iq/dataverse/dataaccess/S3AccessIO.java | 113 +++-- .../iq/dataverse/dataaccess/StorageIO.java | 13 +- .../dataverse/dataaccess/SwiftAccessIO.java | 81 +-- .../impl/AbstractCreateDatasetCommand.java | 11 +- .../dataverse/ingest/IngestServiceBean.java | 462 ++++++++++-------- .../harvard/iq/dataverse/util/FileUtil.java | 8 +- .../dataaccess/FileAccessIOTest.java | 8 +- .../dataverse/dataaccess/S3AccessIOTest.java | 4 +- .../dataverse/dataaccess/StorageIOTest.java | 5 +- .../dataaccess/SwiftAccessIOTest.java | 5 +- 15 files changed, 527 insertions(+), 392 deletions(-) diff --git a/pom.xml b/pom.xml index 481f0418c3c..06f3d26a650 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,11 @@ --> edu.harvard.iq dataverse +<<<<<<< IQSS/6485 4.18.1 +======= + 4.18.1-tdl-dev-ms +>>>>>>> 08f6849 multistore implementation (built on direct upload) war dataverse diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index a35bfb0df15..54a88c27d91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1565,7 +1565,8 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } - StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); + logger.info("deleting: " + storageLocation); + StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7fa85473d1a..f7f31077864 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -78,6 +78,8 @@ import edu.harvard.iq.dataverse.S3PackageImporter; import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand; @@ -92,6 +94,7 @@ import edu.harvard.iq.dataverse.util.ArchiverUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.search.IndexServiceBean; @@ -1297,11 +1300,12 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String if ("validation passed".equals(statusMessageFromDcm)) { logger.log(Level.INFO, "Checksum Validation passed for DCM."); - String storageDriver = (System.getProperty("dataverse.files.storage-driver-id") != null) ? System.getProperty("dataverse.files.storage-driver-id") : "file"; + String storageDriver = DataAccess.getStorageDriverId(dataset.getDataverseContext()); String uploadFolder = jsonFromDcm.getString("uploadFolder"); int totalSize = jsonFromDcm.getInt("totalSize"); + String storageDriverType = System.getProperty("dataverse.file." + storageDriver + ".type"); - if (storageDriver.equals("file")) { + if (storageDriverType.equals("file")) { logger.log(Level.INFO, "File storage driver used for (dataset id={0})", dataset.getId()); ImportMode importMode = ImportMode.MERGE; @@ -1317,7 +1321,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String String message = wr.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to put the files into Dataverse. Message was '" + message + "'."); } - } else if(storageDriver.equals("s3")) { + } else if(storageDriverType.equals("s3")) { logger.log(Level.INFO, "S3 storage driver used for DCM (dataset id={0})", dataset.getId()); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index acb9336b5c6..de8be99152c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -20,8 +20,11 @@ package edu.harvard.iq.dataverse.dataaccess; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import java.io.IOException; +import java.util.HashMap; +import java.util.Properties; /** * * @author Leonid Andreev @@ -51,16 +54,22 @@ public static StorageIO getStorageIO(T dvObject, DataAcc || dvObject.getStorageIdentifier().isEmpty()) { throw new IOException("getDataAccessObject: null or invalid datafile."); } - - if (dvObject.getStorageIdentifier().startsWith("file://") - || (!dvObject.getStorageIdentifier().matches("^[a-z][a-z0-9]*://.*"))) { - return new FileAccessIO<>(dvObject, req); - } else if (dvObject.getStorageIdentifier().startsWith("swift://")){ - return new SwiftAccessIO<>(dvObject, req); - } else if (dvObject.getStorageIdentifier().startsWith("s3://")){ - return new S3AccessIO<>(dvObject, req); - } else if (dvObject.getStorageIdentifier().startsWith("tmp://")) { - throw new IOException("DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); + String storageIdentifier = dvObject.getStorageIdentifier(); + int separatorIndex = storageIdentifier.indexOf("://"); + String storageDriverId = "file"; //default + if(separatorIndex>0) { + storageDriverId = storageIdentifier.substring(0,separatorIndex); + } + String storageType = getDriverType(storageDriverId); + switch(storageType) { + case "file": + return new FileAccessIO<>(dvObject, req, storageDriverId); + case "s3": + return new S3AccessIO<>(dvObject, req, storageDriverId); + case "swift": + return new SwiftAccessIO<>(dvObject, req, storageDriverId); + case "tmp": + throw new IOException("DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); } // TODO: @@ -76,26 +85,47 @@ public static StorageIO getStorageIO(T dvObject, DataAcc // Experimental extension of the StorageIO system allowing direct access to // stored physical files that may not be associated with any DvObjects - public static StorageIO getDirectStorageIO(String storageLocation) throws IOException { - if (storageLocation.startsWith("file://")) { - return new FileAccessIO(storageLocation.substring(7)); - } else if (storageLocation.startsWith("swift://")){ - return new SwiftAccessIO<>(storageLocation.substring(8)); - } else if (storageLocation.startsWith("s3://")){ - return new S3AccessIO<>(storageLocation.substring(5)); + public static StorageIO getDirectStorageIO(String storageLocation) throws IOException { + String[] response = getDriverIdAndStorageId(storageLocation); + String storageDriverId = response[0]; + String storageIdentifier=response[1]; + String storageType = getDriverType(storageDriverId); + switch(storageType) { + case "file": + return new FileAccessIO<>(storageIdentifier, storageDriverId); + case "s3": + return new S3AccessIO<>(storageIdentifier, storageDriverId); + case "swift": + return new SwiftAccessIO<>(storageIdentifier, storageDriverId); + default: + throw new IOException("getDirectStorageIO: Unsupported storage method."); } - - throw new IOException("getDirectStorageIO: Unsupported storage method."); + } + + public static String[] getDriverIdAndStorageId(String storageLocation) { + //default if no prefix + String storageIdentifier=storageLocation; + int separatorIndex = storageLocation.indexOf("://"); + String storageDriverId = "file"; //default + if(separatorIndex>0) { + storageDriverId = storageLocation.substring(0,separatorIndex); + storageIdentifier = storageLocation.substring(separatorIndex + 3); + } + return new String[]{storageDriverId, storageIdentifier}; + } + + public static String getDriverType(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".type", "file"); } // createDataAccessObject() methods create a *new*, empty DataAccess objects, // for saving new, not yet saved datafiles. public static StorageIO createNewStorageIO(T dvObject, String storageTag) throws IOException { - return createNewStorageIO(dvObject, storageTag, DEFAULT_STORAGE_DRIVER_IDENTIFIER); + return createNewStorageIO(dvObject, storageTag, getStorageDriverId(dvObject.getDataverseContext())); } - public static StorageIO createNewStorageIO(T dvObject, String storageTag, String driverIdentifier) throws IOException { + public static StorageIO createNewStorageIO(T dvObject, String storageTag, String storageDriverId) throws IOException { if (dvObject == null || storageTag == null || storageTag.isEmpty()) { @@ -106,23 +136,48 @@ public static StorageIO createNewStorageIO(T dvObject, S dvObject.setStorageIdentifier(storageTag); - if (driverIdentifier == null) { - driverIdentifier = "file"; + if (storageDriverId == null) { + storageDriverId = "file"; } - - if (driverIdentifier.equals("file")) { - storageIO = new FileAccessIO<>(dvObject, null); - } else if (driverIdentifier.equals("swift")) { - storageIO = new SwiftAccessIO<>(dvObject, null); - } else if (driverIdentifier.equals("s3")) { - storageIO = new S3AccessIO<>(dvObject, null); - } else { - throw new IOException("createDataAccessObject: Unsupported storage method " + driverIdentifier); + String storageType = getDriverType(storageDriverId); + switch(storageType) { + case "file": + storageIO = new FileAccessIO<>(dvObject, null, storageDriverId); + break; + case "swift": + storageIO = new SwiftAccessIO<>(dvObject, null, storageDriverId); + break; + case "s3": + storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); + break; + default: + throw new IOException("createDataAccessObject: Unsupported storage method " + storageDriverId); } storageIO.open(DataAccessOption.WRITE_ACCESS); return storageIO; } + static HashMap drivers = null; + + public static String getStorageDriverId(Dataverse dataverse) { + if (drivers==null) { + drivers = new HashMap(); + Properties p = System.getProperties(); + for(String property: p.stringPropertyNames()) { + if(property.startsWith("dataverse.files.") && property.endsWith(".affiliation")) { + String driverId = property.substring(16); + driverId=driverId.substring(0,driverId.indexOf('.')); + drivers.put(p.get(property).toString(), driverId); + } + + } + } + if(drivers.containsKey(dataverse.getAffiliation())) { + return drivers.get(dataverse.getAffiliation()); + } + return DEFAULT_STORAGE_DRIVER_IDENTIFIER; + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 3afab58c1ce..6284e7fd51e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -49,25 +49,23 @@ public class FileAccessIO extends StorageIO { - public FileAccessIO () { - this((T)null); - } - - public FileAccessIO(T dvObject) { - this (dvObject, null); - - } + public FileAccessIO() { + //Partially functional StorageIO object - constructor only for testing + super(); + } + + public FileAccessIO(T dvObject, DataAccessRequest req, String driverId ) { - public FileAccessIO(T dvObject, DataAccessRequest req) { - - super(dvObject, req); + super(dvObject, req, driverId); this.setIsLocalFile(true); } // "Direct" File Access IO, opened on a physical file not associated with // a specific DvObject - public FileAccessIO(String storageLocation) { + public FileAccessIO(String storageLocation, String driverId) { + super(storageLocation, driverId); + this.setIsLocalFile(true); physicalPath = Paths.get(storageLocation); } @@ -122,9 +120,10 @@ public void open (DataAccessOption... options) throws IOException { } } else if (isWriteAccess) { // Creates a new directory as needed for a dataset. - if (dataFile.getOwner().getFileSystemDirectory() != null && !Files.exists(dataFile.getOwner().getFileSystemDirectory())) { - Files.createDirectories(dataFile.getOwner().getFileSystemDirectory()); - } + Path datasetPath=Paths.get(getDatasetDirectory()); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } FileOutputStream fout = openLocalFileAsOutputStream(); if (fout == null) { @@ -158,10 +157,11 @@ public void open (DataAccessOption... options) throws IOException { // this.setInputStream(fin); } else if (isWriteAccess) { //this checks whether a directory for a dataset exists - if (dataset.getFileSystemDirectory() != null && !Files.exists(dataset.getFileSystemDirectory())) { - Files.createDirectories(dataset.getFileSystemDirectory()); - } - dataset.setStorageIdentifier("file://"+dataset.getAuthority()+"/"+dataset.getIdentifier()); + Path datasetPath=Paths.get(getDatasetDirectory()); + if (datasetPath != null && !Files.exists(datasetPath)) { + Files.createDirectories(datasetPath); + } + dataset.setStorageIdentifier(this.driverId + "://"+dataset.getAuthority()+"/"+dataset.getIdentifier()); } } else if (dvObject instanceof Dataverse) { @@ -290,7 +290,7 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { } Path auxPath = null; if (dvObject instanceof DataFile) { - auxPath = Paths.get(datasetDirectory, dvObject.getStorageIdentifier() + "." + auxItemTag); + auxPath = Paths.get(datasetDirectory, stripDriverId(dvObject.getStorageIdentifier()) + "." + auxItemTag); } else if (dvObject instanceof Dataset) { auxPath = Paths.get(datasetDirectory, auxItemTag); } else if (dvObject instanceof Dataverse) { @@ -305,7 +305,8 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { return auxPath; } - @Override + + @Override public void backupAsAux(String auxItemTag) throws IOException { Path auxPath = getAuxObjectAsPath(auxItemTag); @@ -369,7 +370,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) thr } List cachedFileNames = new ArrayList<>(); - String baseName = this.getDataFile().getStorageIdentifier() + "."; + String baseName = stripDriverId(this.getDataFile().getStorageIdentifier()) + "."; for (Path auxPath : cachedFiles) { cachedFileNames.add(auxPath.getFileName().toString().substring(baseName.length())); } @@ -427,7 +428,7 @@ public Path getFileSystemPath() throws IOException { throw new IOException("Data Access: No local storage identifier defined for this datafile."); } - physicalPath = Paths.get(datasetDirectory, dvObject.getStorageIdentifier()); + physicalPath = Paths.get(datasetDirectory, stripDriverId(dvObject.getStorageIdentifier())); return physicalPath; } @@ -533,9 +534,9 @@ private String getDatasetDirectory() throws IOException { Path datasetDirectoryPath=null; if (dvObject instanceof Dataset) { - datasetDirectoryPath = this.getDataset().getFileSystemDirectory(); + datasetDirectoryPath = Paths.get(this.getDataset().getAuthorityForFileStorage(), this.getDataset().getIdentifierForFileStorage()); } else if (dvObject instanceof DataFile) { - datasetDirectoryPath = this.getDataFile().getOwner().getFileSystemDirectory(); + datasetDirectoryPath = Paths.get(this.getDataFile().getOwner().getAuthorityForFileStorage(), this.getDataFile().getOwner().getIdentifierForFileStorage()); } else if (dvObject instanceof Dataverse) { throw new IOException("FileAccessIO: Dataverses are not a supported dvObject"); } @@ -543,7 +544,7 @@ private String getDatasetDirectory() throws IOException { if (datasetDirectoryPath == null) { throw new IOException("Could not determine the filesystem directory of the parent dataset."); } - String datasetDirectory = datasetDirectoryPath.toString(); + String datasetDirectory = Paths.get(getFilesRootDirectory(), datasetDirectoryPath.toString()).toString(); if (dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) { throw new IOException("Data Access: No local storage identifier defined for this datafile."); @@ -552,6 +553,16 @@ private String getDatasetDirectory() throws IOException { return datasetDirectory; } + + private String getFilesRootDirectory() { + String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory"); + + if (filesRootDirectory == null || filesRootDirectory.equals("")) { + filesRootDirectory = "/tmp/files"; + } + return filesRootDirectory; + } + private List listCachedFiles() throws IOException { List auxItems = new ArrayList<>(); @@ -571,7 +582,7 @@ private List listCachedFiles() throws IOException { throw new IOException("Null or invalid DataFile in FileAccessIO object."); } - baseName = this.getDataFile().getStorageIdentifier(); + baseName = stripDriverId(this.getDataFile().getStorageIdentifier()); datasetDirectoryPath = this.getDataFile().getOwner().getFileSystemDirectory(); } @@ -612,5 +623,11 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } return in; } - + private String stripDriverId(String storageIdentifier) { + int separatorIndex = storageIdentifier.indexOf("://"); + if(separatorIndex>0) { + return storageIdentifier.substring(separatorIndex + 3); + } + return storageIdentifier; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index d2429f6ebe7..5efd612901d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -3,6 +3,7 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; @@ -64,16 +65,9 @@ public class S3AccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); - public S3AccessIO() { - this((T)null); - } - - public S3AccessIO(T dvObject) { - this(dvObject, null); - } - - public S3AccessIO(T dvObject, DataAccessRequest req) { - super(dvObject, req); + public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { + super(dvObject, req, driverId); + readSettings(); this.setIsLocalFile(false); try { @@ -91,6 +85,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req) { // Boolean is inverted, otherwise setting dataverse.files.s3-chunked-encoding=false would result in leaving Chunked Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + s3CB.setCredentials(new ProfileCredentialsProvider(s3profile)); // let's build the client :-) this.s3 = s3CB.build(); @@ -105,50 +100,32 @@ public S3AccessIO(T dvObject, DataAccessRequest req) { } } - public S3AccessIO(String storageLocation) { - this((T)null); - + + public S3AccessIO(String storageLocation, String driverId) { + this(null, null, driverId); // TODO: validate the storage location supplied bucketName = storageLocation.substring(0,storageLocation.indexOf('/')); key = storageLocation.substring(storageLocation.indexOf('/')+1); } - public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client) { - super(dvObject, req); + public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, String driverId) { + super(dvObject, req, driverId); + readSettings(); this.setIsLocalFile(false); this.s3 = s3client; } - - public static String S3_IDENTIFIER_PREFIX = "s3"; private AmazonS3 s3 = null; private TransferManager tm = null; - /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html - */ - private String s3CEUrl = System.getProperty("dataverse.files.s3-custom-endpoint-url", ""); - /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. - */ - private String s3CERegion = System.getProperty("dataverse.files.s3-custom-endpoint-region", "dataverse"); - /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - private boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files.s3-path-style-access", "false")); - /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - private boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files.s3-payload-signing","false")); - /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. - */ - private boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files.s3-chunked-encoding","true")); - private String bucketName = System.getProperty("dataverse.files.s3-bucket-name"); + //See readSettings() for the source of these values + private String s3CEUrl = null; + private String s3CERegion = null; + private boolean s3pathStyleAccess = false; + private boolean s3payloadSigning = false; + private boolean s3chunkedEncoding = true; + private String s3profile = "default"; + private String bucketName = null; + private String key; @Override @@ -212,11 +189,11 @@ public void open(DataAccessOption... options) throws IOException { } else if (isWriteAccess) { key = dataFile.getOwner().getAuthorityForFileStorage() + "/" + this.getDataFile().getOwner().getIdentifierForFileStorage(); - if (storageIdentifier.startsWith(S3_IDENTIFIER_PREFIX + "://")) { + if (storageIdentifier.startsWith(this.driverId + "://")) { key += "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); } else { key += "/" + storageIdentifier; - dvObject.setStorageIdentifier(S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + storageIdentifier); + dvObject.setStorageIdentifier(this.driverId + "://" + bucketName + ":" + storageIdentifier); } } @@ -231,7 +208,7 @@ public void open(DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataset) { Dataset dataset = this.getDataset(); key = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - dataset.setStorageIdentifier(S3_IDENTIFIER_PREFIX + "://" + key); + dataset.setStorageIdentifier(this.driverId + "://" + key); } else if (dvObject instanceof Dataverse) { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { @@ -688,7 +665,7 @@ public String getStorageLocation() throws IOException { throw new IOException("Failed to obtain the S3 key for the file"); } - return S3_IDENTIFIER_PREFIX + "://" + bucketName + "/" + locationKey; + return this.driverId + "://" + bucketName + "/" + locationKey; } @Override @@ -772,8 +749,8 @@ String getMainFileKey() throws IOException { throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); } - if (storageIdentifier.startsWith(S3_IDENTIFIER_PREFIX + "://")) { - bucketName = storageIdentifier.substring((S3_IDENTIFIER_PREFIX + "://").length(), storageIdentifier.lastIndexOf(":")); + if (storageIdentifier.startsWith(this.driverId + "://")) { + bucketName = storageIdentifier.substring((this.driverId + "://").length(), storageIdentifier.lastIndexOf(":")); key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); } else { throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object."); @@ -858,4 +835,42 @@ int getUrlExpirationMinutes() { } return 60; } + + private void readSettings() { + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + s3CEUrl = System.getProperty("dataverse.files." + this.driverId + ".custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + s3CERegion = System.getProperty("dataverse.files." + this.driverId + ".custom-endpoint-region", "dataverse"); + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".path-style-access", "false")); + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".payload-signing","false")); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".chunked-encoding","true")); + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + s3profile = System.getProperty("dataverse.files." + this.driverId + ".profile","default"); + + bucketName = System.getProperty("dataverse.files." + this.driverId + ".bucket-name"); + + + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 1043d3f44aa..9e0cf7e11b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -53,17 +53,21 @@ public abstract class StorageIO { public StorageIO() { } - - public StorageIO(T dvObject) { - this(dvObject, null); + + public StorageIO(String storageLocation, String driverId) { + this.driverId=driverId; } - public StorageIO(T dvObject, DataAccessRequest req) { + public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.dvObject = dvObject; this.req = req; + this.driverId=driverId; if (this.req == null) { this.req = new DataAccessRequest(); } + if (this.driverId == null) { + this.driverId = "file"; + } } @@ -183,6 +187,7 @@ public boolean canWrite() { private OutputStream out; protected Channel channel; protected DvObject dvObject; + protected String driverId; /*private int status;*/ private long size; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 73ee28e17b9..3bc29cb9836 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -48,43 +48,48 @@ public class SwiftAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO"); - public SwiftAccessIO() { - this((T)null); - } - - public SwiftAccessIO(T dvObject) { - this(dvObject, null); - } - - public SwiftAccessIO(T dvObject, DataAccessRequest req) { - super(dvObject, req); - + public SwiftAccessIO() { + //Partially functional StorageIO object - constructor only for testing + super(); + } + + public SwiftAccessIO(T dvObject, DataAccessRequest req, String driverId) { + super(dvObject, req, driverId); + readSettings(); this.setIsLocalFile(false); } - public SwiftAccessIO(String swiftLocation) { - this((T)null); + public SwiftAccessIO(String swiftLocation, String driverId) { + super(swiftLocation, driverId); + readSettings(); this.swiftLocation = swiftLocation; + this.setIsLocalFile(false); } - private Account account = null; + private void readSettings() { + isPublicContainer = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".isPublicContainer", "true")); + swiftFolderPathSeparator = System.getProperty("dataverse.files." + this.driverId + ".folderPathSeparator", "_"); + swiftDefaultEndpoint = System.getProperty("dataverse.files." + this.driverId + ".defaultEndpoint"); + tempUrlExpires = Integer.parseInt(System.getProperty("dataverse.files." + this.driverId + ".temporaryUrlExpiryTime", "60")); + + } + + private Account account = null; private StoredObject swiftFileObject = null; private Container swiftContainer = null; - private boolean isPublicContainer = Boolean.parseBoolean(System.getProperty("dataverse.files.swift.isPublicContainer", "true")); - private String swiftFolderPathSeparator = System.getProperty("dataverse.files.swift.folderPathSeparator", "_"); - private String swiftDefaultEndpoint = System.getProperty("dataverse.files.swift.defaultEndpoint"); + private boolean isPublicContainer = true; + private String swiftFolderPathSeparator = "_"; + private String swiftDefaultEndpoint = null; //for hash private static final String HMAC_SHA1_ALGORITHM = "HmacSHA1"; //TODO: should this be dynamically generated based on size of file? //Also, this is in seconds - private static int TEMP_URL_EXPIRES = Integer.parseInt(System.getProperty("dataverse.files.swift.temporaryUrlExpiryTime", "60")); + private int tempUrlExpires = 60; private static int LIST_PAGE_LIMIT = 100; - public static String SWIFT_IDENTIFIER_PREFIX = "swift"; - @Override public void open(DataAccessOption... options) throws IOException { DataAccessRequest req = this.getRequest(); @@ -503,7 +508,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt if (dvObject instanceof DataFile) { Dataset owner = this.getDataFile().getOwner(); - if (storageIdentifier.startsWith(SWIFT_IDENTIFIER_PREFIX + "://")) { + if (storageIdentifier.startsWith(this.driverId + "://")) { // This is a call on an already existing swift object. String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3); @@ -547,17 +552,17 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt //setSwiftContainerName(swiftFolderPath); //swiftFileName = dataFile.getDisplayName(); //Storage Identifier is now updated after the object is uploaded on Swift. - dvObject.setStorageIdentifier(SWIFT_IDENTIFIER_PREFIX + "://" + swiftDefaultEndpoint + ":" + swiftFolderPath + ":" + swiftFileName); + dvObject.setStorageIdentifier(this.driverId + "://" + swiftDefaultEndpoint + ":" + swiftFolderPath + ":" + swiftFileName); } else { throw new IOException("SwiftAccessIO: unknown access mode."); } } else if (dvObject instanceof Dataset) { Dataset dataset = this.getDataset(); - if (storageIdentifier.startsWith(SWIFT_IDENTIFIER_PREFIX + "://")) { + if (storageIdentifier.startsWith(this.driverId + "://")) { // This is a call on an already existing swift object. - //TODO: determine how storage identifer will give us info + //TODO: determine how storage identifier will give us info String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3); //number of tokens should be two because there is not main file if (swiftStorageTokens.length != 2) { @@ -596,7 +601,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt swiftPseudoFolderPathSeparator + dataset.getIdentifierForFileStorage(); swiftFileName = auxItemTag; - dvObject.setStorageIdentifier(SWIFT_IDENTIFIER_PREFIX + "://" + swiftEndPoint + ":" + swiftFolderPath); + dvObject.setStorageIdentifier(this.driverId + "://" + swiftEndPoint + ":" + swiftFolderPath); } else { throw new IOException("SwiftAccessIO: unknown access mode."); } @@ -623,7 +628,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt other swiftContainerName Object Store pseudo-folder can be created, which is not provide by the joss Java swift library as of yet. */ - if (storageIdentifier.startsWith(SWIFT_IDENTIFIER_PREFIX + "://")) { + if (storageIdentifier.startsWith(this.driverId + "://")) { // An existing swift object; the container must already exist as well. this.swiftContainer = account.getContainer(swiftContainerName); } else { @@ -659,9 +664,9 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt setRemoteUrl(getSwiftFileURI(fileObject)); if (!this.isWriteAccess && !this.getDataFile().isIngestInProgress()) { //otherwise this gets called a bunch on upload - setTemporarySwiftUrl(generateTemporarySwiftUrl(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES)); - setTempUrlSignature(generateTempUrlSignature(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES)); - setTempUrlExpiry(generateTempUrlExpiry(TEMP_URL_EXPIRES, System.currentTimeMillis())); + setTemporarySwiftUrl(generateTemporarySwiftUrl(swiftEndPoint, swiftContainerName, swiftFileName, tempUrlExpires)); + setTempUrlSignature(generateTempUrlSignature(swiftEndPoint, swiftContainerName, swiftFileName, tempUrlExpires)); + setTempUrlExpiry(generateTempUrlExpiry(tempUrlExpires, System.currentTimeMillis())); } setSwiftFileName(swiftFileName); @@ -732,12 +737,12 @@ private StoredObject openSwiftAuxFile(boolean writeAccess, String auxItemTag) th } Account authenticateWithSwift(String swiftEndPoint) throws IOException { - String swiftEndPointAuthUrl = System.getProperty("dataverse.files.swift.authUrl." + swiftEndPoint); - String swiftEndPointUsername = System.getProperty("dataverse.files.swift.username." + swiftEndPoint); - String swiftEndPointSecretKey = System.getProperty("dataverse.files.swift.password." + swiftEndPoint); - String swiftEndPointTenantName = System.getProperty("dataverse.files.swift.tenant." + swiftEndPoint); - String swiftEndPointAuthMethod = System.getProperty("dataverse.files.swift.authType." + swiftEndPoint); - String swiftEndPointTenantId = System.getProperty("dataverse.files.swift.tenant." + swiftEndPoint); + String swiftEndPointAuthUrl = System.getProperty("dataverse.files." + this.driverId + ".authUrl." + swiftEndPoint); + String swiftEndPointUsername = System.getProperty("dataverse.files." + this.driverId + ".username." + swiftEndPoint); + String swiftEndPointSecretKey = System.getProperty("dataverse.files." + this.driverId + ".password." + swiftEndPoint); + String swiftEndPointTenantName = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint); + String swiftEndPointAuthMethod = System.getProperty("dataverse.files." + this.driverId + ".authType." + swiftEndPoint); + String swiftEndPointTenantId = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint); if (swiftEndPointAuthUrl == null || swiftEndPointUsername == null || swiftEndPointSecretKey == null || "".equals(swiftEndPointAuthUrl) || "".equals(swiftEndPointUsername) || "".equals(swiftEndPointSecretKey)) { @@ -806,9 +811,9 @@ private String getSwiftFileURI(StoredObject fileObject) throws IOException { private String hmac = null; public String generateTempUrlSignature(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException { if (hmac == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) { - String secretKey = System.getProperty("dataverse.files.swift.hashKey." + swiftEndPoint); + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint); if (secretKey == null) { - throw new IOException("Please input a hash key under dataverse.files.swift.hashKey." + swiftEndPoint); + throw new IOException("Please input a hash key under dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint); } String path = "/v1/" + containerName + "/" + objectName; Long expires = generateTempUrlExpiry(duration, System.currentTimeMillis()); @@ -833,7 +838,7 @@ public long generateTempUrlExpiry(int duration, long currentTime) { private String temporaryUrl = null; private String generateTemporarySwiftUrl(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException { - String baseUrl = System.getProperty("dataverse.files.swift.endpoint." + swiftEndPoint); + String baseUrl = System.getProperty("dataverse.files." + this.driverId + ".endpoint." + swiftEndPoint); String path = "/v1/" + containerName + "/" + objectName; if (temporaryUrl == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index eb21f70f2cb..864bb700e0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -96,15 +96,8 @@ public Dataset execute(CommandContext ctxt) throws CommandException { theDataset.setAuthority(ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound)); } if (theDataset.getStorageIdentifier() == null) { - try { - DataAccess.createNewStorageIO(theDataset, "placeholder"); - } catch (IOException ioex) { - // if setting the storage identifier through createNewStorageIO fails, dataset creation - // does not have to fail. we just set the storage id to a default -SF - String storageDriver = (System.getProperty("dataverse.files.storage-driver-id") != null) ? System.getProperty("dataverse.files.storage-driver-id") : "file"; - theDataset.setStorageIdentifier(storageDriver + "://" + theDataset.getGlobalId().asString()); - logger.log(Level.INFO, "Failed to create StorageIO. StorageIdentifier set to default. Not fatal.({0})", ioex.getMessage()); - } + String driverId = DataAccess.getStorageDriverId(theDataset.getDataverseContext()); + theDataset.setStorageIdentifier(driverId + "://" + theDataset.getGlobalId().asString()); } if (theDataset.getIdentifier()==null) { theDataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(theDataset, idServiceBean)); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index eb1b2b658d5..710e8d462df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -36,10 +36,12 @@ import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator; @@ -151,221 +153,251 @@ public class IngestServiceBean { // DataFileCategory objects, if any were already assigned to the files). // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. - public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles) { - List ret = new ArrayList<>(); - - if (newFiles != null && newFiles.size() > 0) { - //ret = new ArrayList<>(); - // final check for duplicate file names; - // we tried to make the file names unique on upload, but then - // the user may have edited them on the "add files" page, and - // renamed FOOBAR-1.txt back to FOOBAR.txt... - - IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles); - - Dataset dataset = version.getDataset(); - - for (DataFile dataFile : newFiles) { - String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier(); - - // Try to save the file in its permanent location: - String storageId = dataFile.getStorageIdentifier().replaceFirst("^tmp://", ""); - - Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId); - WritableByteChannel writeChannel = null; - FileChannel readChannel = null; - - boolean unattached = false; - boolean savedSuccess = false; - StorageIO dataAccess = null; - - try { - logger.fine("Attempting to create a new storageIO object for " + storageId); - if (dataFile.getOwner() == null) { - unattached = true; - dataFile.setOwner(dataset); - } - dataAccess = DataAccess.createNewStorageIO(dataFile, storageId); - - logger.fine("Successfully created a new storageIO object."); - /* - This commented-out code demonstrates how to copy bytes - from a local InputStream (or a readChannel) into the - writable byte channel of a Dataverse DataAccessIO object: - */ - - /* - storageIO.open(DataAccessOption.WRITE_ACCESS); - - writeChannel = storageIO.getWriteChannel(); - readChannel = new FileInputStream(tempLocationPath.toFile()).getChannel(); - - long bytesPerIteration = 16 * 1024; // 16K bytes - long start = 0; - while ( start < readChannel.size() ) { - readChannel.transferTo(start, bytesPerIteration, writeChannel); - start += bytesPerIteration; - } - */ - - /* - But it's easier to use this convenience method from the - DataAccessIO: - - (if the underlying storage method for this file is - local filesystem, the DataAccessIO will simply copy - the file using Files.copy, like this: - - Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), StandardCopyOption.REPLACE_EXISTING); - */ - dataAccess.savePath(tempLocationPath); - - // Set filesize in bytes - // - dataFile.setFilesize(dataAccess.getSize()); - savedSuccess = true; - logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); - - } catch (IOException ioex) { - logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); - } finally { - if (readChannel != null) { - try { - readChannel.close(); - } catch (IOException e) { - } - } - if (writeChannel != null) { - try { - writeChannel.close(); - } catch (IOException e) { - } - } - } - - // Since we may have already spent some CPU cycles scaling down image thumbnails, - // we may as well save them, by moving these generated images to the permanent - // dataset directory. We should also remember to delete any such files in the - // temp directory: - List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), storageId); - if (generatedTempFiles != null) { - for (Path generated : generatedTempFiles) { - if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to save the main file! - logger.fine("(Will also try to permanently save generated thumbnail file " + generated.toString() + ")"); - try { - //Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), generated.getFileName().toString())); - int i = generated.toString().lastIndexOf("thumb"); - if (i > 1) { - String extensionTag = generated.toString().substring(i); - dataAccess.savePathAsAux(generated, extensionTag); - logger.fine("Saved generated thumbnail as aux object. \"preview available\" status: " + dataFile.isPreviewImageAvailable()); - } else { - logger.warning("Generated thumbnail file name does not match the expected pattern: " + generated.toString()); - } - - } catch (IOException ioex) { - logger.warning("Failed to save generated file " + generated.toString()); - } - } - - // ... but we definitely want to delete it: - try { - Files.delete(generated); - } catch (IOException ioex) { - logger.warning("Failed to delete generated file " + generated.toString()); - } - } - } - - // ... and let's delete the main temp file: - try { - logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); - Files.delete(tempLocationPath); - } catch (IOException ex) { - // (non-fatal - it's just a temp file.) - logger.warning("Failed to delete temp file " + tempLocationPath.toString()); - } - - if (unattached) { - dataFile.setOwner(null); - } - // Any necessary post-processing: - //performPostProcessingTasks(dataFile); - - if (savedSuccess) { - // These are all brand new files, so they should all have - // one filemetadata total. -- L.A. - FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); - String fileName = fileMetadata.getLabel(); - - boolean metadataExtracted = false; - if (FileUtil.canIngestAsTabular(dataFile)) { - /* - * Note that we don't try to ingest the file right away - - * instead we mark it as "scheduled for ingest", then at - * the end of the save process it will be queued for async. - * ingest in the background. In the meantime, the file - * will be ingested as a regular, non-tabular file, and - * appear as such to the user, until the ingest job is - * finished with the Ingest Service. - */ - dataFile.SetIngestScheduled(); - } else if (fileMetadataExtractable(dataFile)) { - - try { - // FITS is the only type supported for metadata - // extraction, as of now. -- L.A. 4.0 - dataFile.setContentType("application/fits"); - metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); - } catch (IOException mex) { - logger.severe("Caught exception trying to extract indexable metadata from file " + fileName + ", " + mex.getMessage()); - } - if (metadataExtracted) { - logger.fine("Successfully extracted indexable metadata from file " + fileName); - } else { - logger.fine("Failed to extract indexable metadata from file " + fileName); - } - } - // temp dbug line - //System.out.println("ADDING FILE: " + fileName + "; for dataset: " + dataset.getGlobalId()); - // Make sure the file is attached to the dataset and to the version, if this - // hasn't been done yet: - if (dataFile.getOwner() == null) { - dataFile.setOwner(dataset); - - version.getFileMetadatas().add(dataFile.getFileMetadata()); - dataFile.getFileMetadata().setDatasetVersion(version); - dataset.getFiles().add(dataFile); - - if (dataFile.getFileMetadata().getCategories() != null) { - ListIterator dfcIt = dataFile.getFileMetadata().getCategories().listIterator(); - - while (dfcIt.hasNext()) { - DataFileCategory dataFileCategory = dfcIt.next(); - - if (dataFileCategory.getDataset() == null) { - DataFileCategory newCategory = dataset.getCategoryByName(dataFileCategory.getName()); - if (newCategory != null) { - newCategory.addFileMetadata(dataFile.getFileMetadata()); - //dataFileCategory = newCategory; - dfcIt.set(newCategory); - } else { - dfcIt.remove(); - } - } - } - } - } - - ret.add(dataFile); - } - } - - logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); - } - - return ret; - } + public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles) { + List ret = new ArrayList<>(); + + if (newFiles != null && newFiles.size() > 0) { + // ret = new ArrayList<>(); + // final check for duplicate file names; + // we tried to make the file names unique on upload, but then + // the user may have edited them on the "add files" page, and + // renamed FOOBAR-1.txt back to FOOBAR.txt... + + IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles); + + Dataset dataset = version.getDataset(); + + for (DataFile dataFile : newFiles) { + boolean unattached = false; + boolean savedSuccess = false; + if (dataFile.getOwner() == null) { + unattached = true; + dataFile.setOwner(dataset); + } + + String[] storageInfo = DataAccess.getDriverIdAndStorageId(dataFile.getStorageIdentifier()); + String driverType = DataAccess.getDriverType(storageInfo[0]); + String storageId = storageInfo[1]; + if (driverType.equals("tmp")|| driverType.contentEquals("file")) { //"file" is the default if no prefix + String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageId; + + // Try to save the file in its permanent location: + Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId); + WritableByteChannel writeChannel = null; + FileChannel readChannel = null; + + StorageIO dataAccess = null; + + try { + logger.fine("Attempting to create a new storageIO object for " + storageId); + dataAccess = DataAccess.createNewStorageIO(dataFile, storageId); + + logger.fine("Successfully created a new storageIO object."); + /* + * This commented-out code demonstrates how to copy bytes from a local + * InputStream (or a readChannel) into the writable byte channel of a Dataverse + * DataAccessIO object: + */ + + /* + * storageIO.open(DataAccessOption.WRITE_ACCESS); + * + * writeChannel = storageIO.getWriteChannel(); readChannel = new + * FileInputStream(tempLocationPath.toFile()).getChannel(); + * + * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( + * start < readChannel.size() ) { readChannel.transferTo(start, + * bytesPerIteration, writeChannel); start += bytesPerIteration; } + */ + + /* + * But it's easier to use this convenience method from the DataAccessIO: + * + * (if the underlying storage method for this file is local filesystem, the + * DataAccessIO will simply copy the file using Files.copy, like this: + * + * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), + * StandardCopyOption.REPLACE_EXISTING); + */ + dataAccess.savePath(tempLocationPath); + + // Set filesize in bytes + // + dataFile.setFilesize(dataAccess.getSize()); + savedSuccess = true; + logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + + } catch (IOException ioex) { + logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + + ioex.getMessage() + ")"); + } finally { + if (readChannel != null) { + try { + readChannel.close(); + } catch (IOException e) { + } + } + if (writeChannel != null) { + try { + writeChannel.close(); + } catch (IOException e) { + } + } + } + + // Since we may have already spent some CPU cycles scaling down image + // thumbnails, + // we may as well save them, by moving these generated images to the permanent + // dataset directory. We should also remember to delete any such files in the + // temp directory: + List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), + storageId); + if (generatedTempFiles != null) { + for (Path generated : generatedTempFiles) { + if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to + // save the main file! + logger.fine("(Will also try to permanently save generated thumbnail file " + + generated.toString() + ")"); + try { + // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), + // generated.getFileName().toString())); + int i = generated.toString().lastIndexOf("thumb"); + if (i > 1) { + String extensionTag = generated.toString().substring(i); + dataAccess.savePathAsAux(generated, extensionTag); + logger.fine( + "Saved generated thumbnail as aux object. \"preview available\" status: " + + dataFile.isPreviewImageAvailable()); + } else { + logger.warning( + "Generated thumbnail file name does not match the expected pattern: " + + generated.toString()); + } + + } catch (IOException ioex) { + logger.warning("Failed to save generated file " + generated.toString()); + } + } + + // ... but we definitely want to delete it: + try { + Files.delete(generated); + } catch (IOException ioex) { + logger.warning("Failed to delete generated file " + generated.toString()); + } + } + } + + // ... and let's delete the main temp file: + try { + logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); + Files.delete(tempLocationPath); + } catch (IOException ex) { + // (non-fatal - it's just a temp file.) + logger.warning("Failed to delete temp file " + tempLocationPath.toString()); + } + + if (unattached) { + dataFile.setOwner(null); + } + // Any necessary post-processing: + // performPostProcessingTasks(dataFile); + + if (savedSuccess) { + // These are all brand new files, so they should all have + // one filemetadata total. -- L.A. + FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); + String fileName = fileMetadata.getLabel(); + + boolean metadataExtracted = false; + if (FileUtil.canIngestAsTabular(dataFile)) { + /* + * Note that we don't try to ingest the file right away - instead we mark it as + * "scheduled for ingest", then at the end of the save process it will be queued + * for async. ingest in the background. In the meantime, the file will be + * ingested as a regular, non-tabular file, and appear as such to the user, + * until the ingest job is finished with the Ingest Service. + */ + dataFile.SetIngestScheduled(); + } else if (fileMetadataExtractable(dataFile)) { + + try { + // FITS is the only type supported for metadata + // extraction, as of now. -- L.A. 4.0 + dataFile.setContentType("application/fits"); + metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); + } catch (IOException mex) { + logger.severe("Caught exception trying to extract indexable metadata from file " + + fileName + ", " + mex.getMessage()); + } + if (metadataExtracted) { + logger.fine("Successfully extracted indexable metadata from file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from file " + fileName); + } + } + } + } else { + try { + StorageIO dataAccess = DataAccess.getStorageIO(dataFile); + //Populate metadata + dataAccess.open(DataAccessOption.READ_ACCESS); + //set file size + dataFile.setFilesize(dataAccess.getSize()); + } catch (IOException ioex) { + logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " (" + + ioex.getMessage() + ")"); + } + savedSuccess = true; + logger.info("unattached: " + unattached); + dataFile.setOwner(null); + + } + + logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); + + if (savedSuccess) { + // temp dbug line + // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + + // dataset.getGlobalId()); + // Make sure the file is attached to the dataset and to the version, if this + // hasn't been done yet: + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + version.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(version); + dataset.getFiles().add(dataFile); + + if (dataFile.getFileMetadata().getCategories() != null) { + ListIterator dfcIt = dataFile.getFileMetadata().getCategories() + .listIterator(); + + while (dfcIt.hasNext()) { + DataFileCategory dataFileCategory = dfcIt.next(); + + if (dataFileCategory.getDataset() == null) { + DataFileCategory newCategory = dataset + .getCategoryByName(dataFileCategory.getName()); + if (newCategory != null) { + newCategory.addFileMetadata(dataFile.getFileMetadata()); + // dataFileCategory = newCategory; + dfcIt.set(newCategory); + } else { + dfcIt.remove(); + } + } + } + } + } + } + + ret.add(dataFile); + } + } + + return ret; + } public List listGeneratedTempFiles(Path tempDirectory, String baseName) { List generatedFiles = new ArrayList<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 45ce6949127..71786001ad9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -27,8 +27,8 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; @@ -1281,8 +1281,10 @@ public static String getFilesTempDirectory() { } public static void generateS3PackageStorageIdentifier(DataFile dataFile) { - String bucketName = System.getProperty("dataverse.files.s3-bucket-name"); - String storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); + String driverId = DataAccess.getStorageDriverId(dataFile.getDataverseContext()); + + String bucketName = System.getProperty("dataverse.files." + driverId + ".bucket-name"); + String storageId = driverId + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); dataFile.setStorageIdentifier(storageId); } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java index 96c74515c76..e80c82d059b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java @@ -56,10 +56,10 @@ public void setUpClass() throws IOException { dataFile = MocksFactory.makeDataFile(); dataFile.setOwner(dataset); dataFile.setStorageIdentifier("DataFile"); - - datasetAccess = new FileAccessIO<>(dataset); - dataFileAccess = new FileAccessIO<>(dataFile); - dataverseAccess = new FileAccessIO<>(dataverse); + String dummyDriverId = "dummmy"; + datasetAccess = new FileAccessIO<>(dataset,null, dummyDriverId); + dataFileAccess = new FileAccessIO<>(dataFile, null, dummyDriverId); + dataverseAccess = new FileAccessIO<>(dataverse, null, dummyDriverId); File file = new File("/tmp/files/tmp/dataset/Dataset"); file.getParentFile().mkdirs(); diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java index 4ce821a5fee..86d129af664 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java @@ -43,8 +43,8 @@ public void setup() throws IOException { dataFile.setOwner(dataSet); dataFileId = UtilIT.getRandomIdentifier(); dataFile.setStorageIdentifier("s3://bucket:"+dataFileId); - dataSetAccess = new S3AccessIO<>(dataSet, null, s3client); - dataFileAccess = new S3AccessIO<>(dataFile, null, s3client); + dataSetAccess = new S3AccessIO<>(dataSet, null, s3client, "s3"); + dataFileAccess = new S3AccessIO<>(dataFile, null, s3client, "s3"); } /* diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java index 2700bf3448c..4d2e1072950 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java @@ -77,8 +77,9 @@ public void testGetDvObject() { } catch (ClassCastException ex) { assertEquals(ex.getMessage(), "edu.harvard.iq.dataverse.Dataset cannot be cast to edu.harvard.iq.dataverse.Dataverse"); } - assertEquals(new DataFile(), new FileAccessIO<>(new DataFile()).getDataFile()); - assertEquals(new Dataverse(), new FileAccessIO<>(new Dataverse()).getDataverse()); + String dummyDriverId="dummy"; + assertEquals(new DataFile(), new FileAccessIO<>(new DataFile(), null, dummyDriverId).getDataFile()); + assertEquals(new Dataverse(), new FileAccessIO<>(new Dataverse(), null, dummyDriverId).getDataverse()); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIOTest.java index 554242c9311..c1aa6b5fca3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIOTest.java @@ -39,8 +39,9 @@ public void setUpClass() throws IOException { datafile = MocksFactory.makeDataFile(); dataset = MocksFactory.makeDataset(); datafile.setOwner(dataset); - datasetAccess = new SwiftAccessIO<>(dataset); - datafileAccess = new SwiftAccessIO<>(datafile); + String dummyDriverId="dummy"; + datasetAccess = new SwiftAccessIO<>(dataset, null, dummyDriverId); + datafileAccess = new SwiftAccessIO<>(datafile, null, dummyDriverId); swiftAccess = new SwiftAccessIO(); } From cdbd6f2a0571146b156f34b1defd047a11fef59d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jan 2020 09:53:16 -0500 Subject: [PATCH 02/31] merge issue --- pom.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pom.xml b/pom.xml index 06f3d26a650..481f0418c3c 100644 --- a/pom.xml +++ b/pom.xml @@ -7,11 +7,7 @@ --> edu.harvard.iq dataverse -<<<<<<< IQSS/6485 4.18.1 -======= - 4.18.1-tdl-dev-ms ->>>>>>> 08f6849 multistore implementation (built on direct upload) war dataverse From 9cc034a351761d5994aee931ff102369ff439102 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jan 2020 10:28:27 -0500 Subject: [PATCH 03/31] dcm - use new option name - assume store with id 's3' Note: DCM doesn't use any of the other S3 options in creating the Amazon client, so it is somewhat hardcoded that was as well. A rewrite might identify a specific s3 store to use, build the amazon client from it's settings and still use the general dcm-s3-bucket-name entry. If DCM should be supported across multiple stores, a dataverse.files..dcm-s3-bucket-name option could be created so that everything is ties to a specific store. --- src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java index f6f46e375ff..054ed61f320 100644 --- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java @@ -69,7 +69,7 @@ public void copyFromS3(Dataset dataset, String s3ImportPath) throws IOException String dcmBucketName = System.getProperty("dataverse.files.dcm-s3-bucket-name"); String dcmDatasetKey = s3ImportPath; - String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name"); + String dvBucketName = System.getProperty("dataverse.files.s3.bucket-name"); String dvDatasetKey = getS3DatasetKey(dataset); @@ -137,7 +137,7 @@ public DataFile createPackageDataFile(Dataset dataset, String folderName, long t //This is a brittle calculation, changes of the dcm post_upload script will blow this up String rootPackageName = "package_" + folderName.replace("/", ""); - String dvBucketName = System.getProperty("dataverse.files.s3-bucket-name"); + String dvBucketName = System.getProperty("dataverse.files.s3.bucket-name"); String dvDatasetKey = getS3DatasetKey(dataset); //getting the name of the .sha file via substring, ${packageName}.sha From ae6ecfa0bfe254bc7e16cc84112d5d76a9073091 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jan 2020 14:24:29 -0500 Subject: [PATCH 04/31] control store via dataverse.storagedriver param manageable by superuser only Conflicts: src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java --- .../edu/harvard/iq/dataverse/Dataverse.java | 10 +++ .../harvard/iq/dataverse/DataversePage.java | 11 +++ .../harvard/iq/dataverse/api/Datasets.java | 2 +- .../iq/dataverse/dataaccess/DataAccess.java | 82 ++++++++++++++----- .../impl/AbstractCreateDatasetCommand.java | 2 +- .../harvard/iq/dataverse/util/FileUtil.java | 10 ++- src/main/java/propertyFiles/Bundle.properties | 2 + .../migration/V4.19.0.1__tbd_multistore.sql | 2 + src/main/webapp/dataverse.xhtml | 13 +++ 9 files changed, 109 insertions(+), 25 deletions(-) create mode 100644 src/main/resources/db/migration/V4.19.0.1__tbd_multistore.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 024281b0b96..02dd1fa09a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -147,6 +147,8 @@ public String getIndexableCategoryName() { } private String affiliation; + + private String storageDriver; // Note: We can't have "Remove" here, as there are role assignments that refer // to this role. So, adding it would mean violating a forign key contstraint. @@ -756,4 +758,12 @@ public boolean isAncestorOf( DvObject other ) { } return false; } + + public String getStorageDriverId() { + return storageDriver; + } + + public void setStorageDriverId(String storageDriver) { + this.storageDriver = storageDriver; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 6dc792aefed..60922f4f849 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -4,6 +4,7 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataverse.DataverseUtil; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -39,6 +40,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import javax.faces.component.UIComponent; @@ -1197,4 +1200,12 @@ public List completeHostDataverseMenuList(String query) { return null; } } + + public Set> getStorageDriverOptions() { + return DataAccess.getStorageDriverLabels(); + } + + public String getCurrentStorageDriverLabel() { + return DataAccess.getStorageDriverLabelFor(dataverse.getStorageDriverId()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index f7f31077864..65f3a214fea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1300,7 +1300,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String if ("validation passed".equals(statusMessageFromDcm)) { logger.log(Level.INFO, "Checksum Validation passed for DCM."); - String storageDriver = DataAccess.getStorageDriverId(dataset.getDataverseContext()); + String storageDriver = dataset.getDataverseContext().getStorageDriverId(); String uploadFolder = jsonFromDcm.getString("uploadFolder"); int totalSize = jsonFromDcm.getInt("totalSize"); String storageDriverType = System.getProperty("dataverse.file." + storageDriver + ".type"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index de8be99152c..d57363975d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -20,18 +20,25 @@ package edu.harvard.iq.dataverse.dataaccess; -import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.util.StringUtil; + import java.io.IOException; import java.util.HashMap; import java.util.Properties; +import java.util.logging.Logger; +import java.util.Map.Entry; +import java.util.Set; /** * * @author Leonid Andreev */ + public class DataAccess { + private static final Logger logger = Logger.getLogger(DataAccess.class.getCanonicalName()); + public DataAccess() { }; @@ -85,24 +92,24 @@ public static StorageIO getStorageIO(T dvObject, DataAcc // Experimental extension of the StorageIO system allowing direct access to // stored physical files that may not be associated with any DvObjects - public static StorageIO getDirectStorageIO(String storageLocation) throws IOException { - String[] response = getDriverIdAndStorageId(storageLocation); + public static StorageIO getDirectStorageIO(String fullStorageLocation) throws IOException { + String[] response = getDriverIdAndStorageLocation(fullStorageLocation); String storageDriverId = response[0]; - String storageIdentifier=response[1]; + String storageLocation=response[1]; String storageType = getDriverType(storageDriverId); switch(storageType) { case "file": - return new FileAccessIO<>(storageIdentifier, storageDriverId); + return new FileAccessIO<>(storageLocation, storageDriverId); case "s3": - return new S3AccessIO<>(storageIdentifier, storageDriverId); + return new S3AccessIO<>(storageLocation, storageDriverId); case "swift": - return new SwiftAccessIO<>(storageIdentifier, storageDriverId); + return new SwiftAccessIO<>(storageLocation, storageDriverId); default: throw new IOException("getDirectStorageIO: Unsupported storage method."); } } - public static String[] getDriverIdAndStorageId(String storageLocation) { + public static String[] getDriverIdAndStorageLocation(String storageLocation) { //default if no prefix String storageIdentifier=storageLocation; int separatorIndex = storageLocation.indexOf("://"); @@ -114,6 +121,10 @@ public static String[] getDriverIdAndStorageId(String storageLocation) { return new String[]{storageDriverId, storageIdentifier}; } + public static String getStorarageIdFromLocation(String location) { + return location.substring(location.lastIndexOf('/')+1); + } + public static String getDriverType(String driverId) { return System.getProperty("dataverse.files." + driverId + ".type", "file"); } @@ -122,7 +133,7 @@ public static String getDriverType(String driverId) { // for saving new, not yet saved datafiles. public static StorageIO createNewStorageIO(T dvObject, String storageTag) throws IOException { - return createNewStorageIO(dvObject, storageTag, getStorageDriverId(dvObject.getDataverseContext())); + return createNewStorageIO(dvObject, storageTag, dvObject.getDataverseContext().getStorageDriverId()); } public static StorageIO createNewStorageIO(T dvObject, String storageTag, String storageDriverId) throws IOException { @@ -160,24 +171,51 @@ public static StorageIO createNewStorageIO(T dvObject, S static HashMap drivers = null; - public static String getStorageDriverId(Dataverse dataverse) { + public static String getStorageDriverId(String driverLabel) { if (drivers==null) { - drivers = new HashMap(); - Properties p = System.getProperties(); - for(String property: p.stringPropertyNames()) { - if(property.startsWith("dataverse.files.") && property.endsWith(".affiliation")) { - String driverId = property.substring(16); - driverId=driverId.substring(0,driverId.indexOf('.')); - drivers.put(p.get(property).toString(), driverId); - } - - } + populateDrivers(); } - if(drivers.containsKey(dataverse.getAffiliation())) { - return drivers.get(dataverse.getAffiliation()); + if(StringUtil.nonEmpty(driverLabel) && drivers.containsKey(driverLabel)) { + return drivers.get(driverLabel); } return DEFAULT_STORAGE_DRIVER_IDENTIFIER; + } + + public static Set> getStorageDriverLabels() { + if (drivers==null) { + populateDrivers(); + } + return drivers.entrySet(); + } + + private static void populateDrivers() { + drivers = new HashMap(); + Properties p = System.getProperties(); + for(String property: p.stringPropertyNames()) { + if(property.startsWith("dataverse.files.") && property.endsWith(".label")) { + String driverId = property.substring(16); // "dataverse.files.".length + driverId=driverId.substring(0,driverId.indexOf('.')); + logger.info("Found Storage Driver: " + driverId + " for " + p.get(property).toString()); + drivers.put(p.get(property).toString(), driverId); + } + } } + public static String getStorageDriverLabelFor(String storageDriverId) { + String label = "<>"; + if (drivers==null) { + populateDrivers(); + } + if(drivers.containsValue(storageDriverId)) { + for(String key: drivers.keySet()) { + if(drivers.get(key).equals(storageDriverId)) { + label = key; + break; + } + + } + } + return label; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index 864bb700e0e..971c005a902 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -96,7 +96,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { theDataset.setAuthority(ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound)); } if (theDataset.getStorageIdentifier() == null) { - String driverId = DataAccess.getStorageDriverId(theDataset.getDataverseContext()); + String driverId = theDataset.getDataverseContext().getStorageDriverId(); theDataset.setStorageIdentifier(driverId + "://" + theDataset.getGlobalId().asString()); } if (theDataset.getIdentifier()==null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 71786001ad9..2cd8ab69bbf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -24,11 +24,13 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.DataFileServiceBean; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; +import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; @@ -80,6 +82,7 @@ import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import org.apache.commons.io.FilenameUtils; +import com.amazonaws.AmazonServiceException; /** * a 4.0 implementation of the DVN FileUtil; @@ -1281,7 +1284,7 @@ public static String getFilesTempDirectory() { } public static void generateS3PackageStorageIdentifier(DataFile dataFile) { - String driverId = DataAccess.getStorageDriverId(dataFile.getDataverseContext()); + String driverId = dataFile.getDataverseContext().getStorageDriverId(); String bucketName = System.getProperty("dataverse.files." + driverId + ".bucket-name"); String storageId = driverId + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); @@ -1614,4 +1617,9 @@ public static boolean isPackageFile(DataFile dataFile) { return DataFileServiceBean.MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(dataFile.getContentType()); } + public static String getStorageIdentifierFromLocation(String location) { + int driverEnd = location.indexOf("://") + 3; + int bucketEnd = driverEnd + location.substring(driverEnd).indexOf("/"); + return location.substring(0,bucketEnd) + ":" + location.substring(location.lastIndexOf("/") + 1); + } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index fc88691d9cc..b9a123fd517 100755 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -35,6 +35,7 @@ name=Name institution=Institution position=Position affiliation=Affiliation +storage=Storage Driver createDataverse=Create Dataverse remove=Remove done=Done @@ -691,6 +692,7 @@ dataverse.host.tip=Changing the host dataverse will clear any fields you may hav dataverse.host.autocomplete.nomatches=No matches dataverse.identifier.title=Short name used for the URL of this dataverse. dataverse.affiliation.title=The organization with which this dataverse is affiliated. +dataverse.storage.title=A storage service to be used for datasets in this dataverse. dataverse.category=Category dataverse.category.title=The type that most closely reflects this dataverse. dataverse.type.selectTab.top=Select one... diff --git a/src/main/resources/db/migration/V4.19.0.1__tbd_multistore.sql b/src/main/resources/db/migration/V4.19.0.1__tbd_multistore.sql new file mode 100644 index 00000000000..273de02fef7 --- /dev/null +++ b/src/main/resources/db/migration/V4.19.0.1__tbd_multistore.sql @@ -0,0 +1,2 @@ +ALTER TABLE dataverse +ADD COLUMN IF NOT EXISTS storagedriver TEXT; diff --git a/src/main/webapp/dataverse.xhtml b/src/main/webapp/dataverse.xhtml index a9f013fc0a3..855b53ab10e 100644 --- a/src/main/webapp/dataverse.xhtml +++ b/src/main/webapp/dataverse.xhtml @@ -139,6 +139,19 @@ +
+ + #{bundle.storage} + + +
+ + + + +
+
From 79fbd8ce4781ae996c17f0b8e24e1d385bd6a37b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jan 2020 11:25:05 -0500 Subject: [PATCH 05/31] redirect and url lifetime store specific --- .../iq/dataverse/api/DownloadInstanceWriter.java | 11 +---------- .../harvard/iq/dataverse/dataaccess/S3AccessIO.java | 12 ++++++++++-- .../edu/harvard/iq/dataverse/util/SystemConfig.java | 2 +- .../iq/dataverse/dataaccess/S3AccessIOTest.java | 6 +++--- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 8c480bdcc36..a9b4746299d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -228,7 +228,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] throw new NotFoundException("datafile access error: requested optional service (image scaling, format conversion, etc.) could not be performed on this datafile."); } } else { - if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && isRedirectToS3()) { + if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { // definitely close the (still open) S3 input stream, // since we are not going to use it. The S3 documentation // emphasizes that it is very important not to leave these @@ -445,13 +445,4 @@ private long getFileSize(DownloadInstance di, String extraHeader) { } return -1; } - - private boolean isRedirectToS3() { - String optionValue = System.getProperty("dataverse.files.s3-download-redirect"); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 5efd612901d..8194ab80c58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -82,7 +82,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false s3CB.setPayloadSigningEnabled(s3payloadSigning); // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files.s3-chunked-encoding=false would result in leaving Chunked Encoding enabled + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); s3CB.setCredentials(new ProfileCredentialsProvider(s3profile)); @@ -760,6 +760,14 @@ String getMainFileKey() throws IOException { return key; } + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + public String generateTemporaryS3Url() throws IOException { //Questions: // Q. Should this work for private and public? @@ -821,7 +829,7 @@ public String generateTemporaryS3Url() throws IOException { } int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files.s3-url-expiration-minutes"); + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); if (optionValue != null) { Integer num; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index b8836060069..8d0cb276a93 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -862,7 +862,7 @@ public String toString() { /** * See FileUploadMethods. * - * TODO: Consider if dataverse.files.s3-download-redirect belongs here since + * TODO: Consider if dataverse.files..download-redirect belongs here since * it's a way to bypass Glassfish when downloading. */ public enum FileDownloadMethods { diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java index 86d129af664..f92b44862e2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java @@ -99,7 +99,7 @@ void keyNullstorageIdNull_getMainFileKey() throws IOException { @Test void default_getUrlExpirationMinutes() { // given - System.clearProperty("dataverse.files.s3-url-expiration-minutes"); + System.clearProperty("dataverse.files.s3.url-expiration-minutes"); // when & then assertEquals(60, dataFileAccess.getUrlExpirationMinutes()); } @@ -107,7 +107,7 @@ void default_getUrlExpirationMinutes() { @Test void validSetting_getUrlExpirationMinutes() { // given - System.setProperty("dataverse.files.s3-url-expiration-minutes", "120"); + System.setProperty("dataverse.files.s3.url-expiration-minutes", "120"); // when & then assertEquals(120, dataFileAccess.getUrlExpirationMinutes()); } @@ -115,7 +115,7 @@ void validSetting_getUrlExpirationMinutes() { @Test void invalidSetting_getUrlExpirationMinutes() { // given - System.setProperty("dataverse.files.s3-url-expiration-minutes", "NaN"); + System.setProperty("dataverse.files.s3.url-expiration-minutes", "NaN"); // when & then assertEquals(60, dataFileAccess.getUrlExpirationMinutes()); } From 6c506ab5c9a602364563c6afaead1fc875c35135 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jan 2020 11:47:26 -0500 Subject: [PATCH 06/31] limit rsync setup/panel to datasets using 's3' store --- .../edu/harvard/iq/dataverse/EditDatafilesPage.java | 10 +++++++++- src/main/webapp/editFilesFragment.xhtml | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 3c3da259f75..55318bac371 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -571,7 +571,7 @@ public String init() { } saveEnabled = true; - if (mode == FileEditMode.UPLOAD && workingVersion.getFileMetadatas().isEmpty() && settingsWrapper.isRsyncUpload()) { + if (mode == FileEditMode.UPLOAD && workingVersion.getFileMetadatas().isEmpty() && rsyncUploadSupported()) { setUpRsync(); } @@ -2757,6 +2757,14 @@ public void saveAdvancedOptions() { fileMetadataSelectedForIngestOptionsPopup = null; } + public boolean rsyncUploadSupported() { + // ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store. + // When those restrictions are lifted/rsync can be configured per store, this test should check that setting + // instead of testing for the 's3" store. + return settingsWrapper.isRsyncUpload() && dataset.getDataverseContext().getStorageDriverId().equals("s3"); + } + + private void populateFileMetadatas() { fileMetadatas = new ArrayList<>(); if (selectedFileIdsList == null || selectedFileIdsList.isEmpty()) { diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index b451a43c9fe..811e6c4d55f 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -153,7 +153,7 @@
-
+