From 8431d7f9e95ad38f854e81a62b0ada45777897de Mon Sep 17 00:00:00 2001 From: Peter Kiraly Date: Wed, 18 Sep 2019 17:14:41 +0200 Subject: [PATCH 001/322] Fix at 'Show the dataset whose id is passed' section #6083 --- doc/sphinx-guides/source/api/native-api.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 0cb2dcead64..9b39cc0ca20 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -413,10 +413,15 @@ Get JSON Representation of a Dataset curl http://localhost:8080/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB - |CORS| Show the dataset whose id is passed:: - GET http://$SERVER/api/datasets/$id?key=$apiKey + curl http://$SERVER/api/datasets/$id?key=$apiKey + +fully expanded:: + + curl http://localhost:8080/api/datasets/xxxx?key=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + +The (numeric) id isn't shown up at the user interface (which uses the DOI for identification). You can get it from the JSON representation of the dataset. List Versions of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~ From d78f20d7795846626c67ff27c81c7b4a57677727 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 08:50:18 -0500 Subject: [PATCH 002/322] same utility function from 3B work --- .../java/edu/harvard/iq/dataverse/util/json/JsonUtil.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index ae6935945e8..f4a3c635f8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -3,6 +3,8 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; + +import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -56,4 +58,9 @@ public static String prettyPrint(javax.json.JsonObject jsonObject) { return stringWriter.toString(); } + public static javax.json.JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readObject(); + } + } } From e7636b64bb9dd41dd08d2bcc687c2dce01797875 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 08:50:48 -0500 Subject: [PATCH 003/322] refactor common code to abstract base re: datacite xml --- .../command/impl/AbstractSubmitToArchiveCommand.java | 10 ++++++++++ .../command/impl/DuraCloudSubmitToArchiveCommand.java | 5 +---- .../impl/GoogleCloudSubmitToArchiveCommand.java | 5 +---- .../command/impl/LocalSubmitToArchiveCommand.java | 5 +---- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 77ea680598f..a235dd57d91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; +import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.authorization.Permission; @@ -13,6 +15,7 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import java.nio.charset.Charset; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -72,5 +75,12 @@ public String describe() { return super.describe() + "DatasetVersion: [" + version.getId() + " (v" + version.getFriendlyVersionNumber()+")]"; } + + String getDataCiteXml(DatasetVersion dv) { + DataCitation dc = new DataCitation(dv); + Map metadata = dc.getDataCiteMetadata(); + return DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), metadata, + dv.getDataset()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 468e99f24c1..e595940d2ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -76,10 +76,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t store = storeManager.getPrimaryContentStore(); // Create space to copy archival files to store.createSpace(spaceName); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index cb729a9807a..04d16784876 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -69,10 +69,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + String dataciteXml = getDataCiteXml(dv); String blobIdString = null; MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index d87c3011c15..1f838efdd8e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -62,10 +62,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); - DataCitation dc = new DataCitation(dv); - Map metadata = dc.getDataCiteMetadata(); - String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( - dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); + String dataciteXml = getDataCiteXml(dv); FileUtils.writeStringToFile(new File(localPath+"/"+spaceName + "-datacite.v" + dv.getFriendlyVersionNumber()+".xml"), dataciteXml); From 07910eca3d84fe9335ff6cf6c940ac4c6bb260be Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 12:26:48 -0500 Subject: [PATCH 004/322] S3 archiver --- .../impl/S3SubmitToArchiveCommand.java | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java new file mode 100644 index 00000000000..07ef5ebb475 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -0,0 +1,235 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.util.Map; +import java.util.logging.Logger; + +import javax.json.JsonObject; + +import org.apache.commons.codec.binary.Hex; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProviderChain; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; + +@RequiredPermissions(Permission.PublishDataset) +public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(S3SubmitToArchiveCommand.class.getName()); + private static final String S3_CONFIG = ":S3ArchivalConfig"; + private static final String S3_PROFILE = ":S3ArchivalProfile"; + + private static final Config config = ConfigProvider.getConfig(); + private AmazonS3 s3 = null; + private TransferManager tm = null; + + public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + logger.fine("In S3SubmitToArchiveCommand..."); + JsonObject configObject = null; + String profileName = requestedSettings.get(S3_PROFILE); + String bucketName = null; + logger.fine("Profile: " + profileName + " Config: " + configObject); + try { + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + bucketName = configObject.getString("bucket-name", null); + } catch (Exception e) { + logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + } + if (configObject != null && profileName != null && bucketName != null) { + + s3 = createClient(configObject, profileName); + tm = TransferManagerBuilder.standard() + .withS3Client(s3) + .build(); + try { + + Dataset dataset = dv.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) == null) { + + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + String dataciteXml = getDataCiteXml(dv); + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + ObjectMetadata om = new ObjectMetadata(); + om.setContentLength(dataciteIn.available()); + String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; + tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, dcKey); + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + + // Store BagIt file + String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + String bagKey = spaceName + "/" + fileName; + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + messageDigest = MessageDigest.getInstance("MD5"); + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); + localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + //Unsigned URL - gives location but not access without creds + dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } + + } + } else { + logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + return new Failure("Dataset locked"); + } + } catch (Exception e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("S3 Archiver Submission Failure", + e.getLocalizedMessage() + ": check log for details"); + + } + return WorkflowStepResult.OK; + } else { + return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); + } + } + + private AmazonS3 createClient(JsonObject configObject, String profileName) { + // get a standard client, using the standard way of configuration the credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + ClientConfiguration cc = new ClientConfiguration(); + Integer poolSize = configObject.getInt("connection-pool-size", 256); + cc.setMaxConnections(poolSize); + s3CB.setClientConfiguration(cc); + + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = configObject.getString("custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); + // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); + + // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env + // vars or system properties to provide these, but use the secrets config source provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") + )); + + // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); + s3CB.setCredentials(providerChain); + + // let's build the client :-) + AmazonS3 client = s3CB.build(); + return client; + } + +} From 70241596800b24600812020b8728186218d987bd Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 14:42:50 -0500 Subject: [PATCH 005/322] Don't create md5 hashes since S3 won't use them or create ones to compare with our local one. I'l probably add this back in the DRS archiver where we'll have other means to send the hashes. --- .../impl/S3SubmitToArchiveCommand.java | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 07ef5ebb475..2520ace16ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -84,18 +84,16 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t .replace('.', '-').toLowerCase(); String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8"))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; - tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); + if(om==null) { + logger.warning("Could not write datacite xml to S3"); + return new Failure("S3 Archiver failed writing datacite xml file"); } // Store BagIt file @@ -111,18 +109,17 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (bagger.generateBag(fileName, false)) { File bagFile = bagger.getBagFile(fileName); - try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { + try (FileInputStream in = new FileInputStream(bagFile)) { om = new ObjectMetadata(); om.setContentLength(bagFile.length()); - tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); - localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, bagKey); - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); + if (om ==null) { + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); } } catch (RuntimeException rte) { logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); From 6c97b38164e840db993e4a09a7efb070172ec06c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 14:43:13 -0500 Subject: [PATCH 006/322] Add QDR-developed version table addition for archiving --- src/main/webapp/dataset-versions.xhtml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 936c43d07a7..6cb8c11dff7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -131,7 +131,7 @@ - + @@ -147,6 +147,17 @@ + + + + + + + + + + From 4f71ca4d3dd845082c0f56fde4675f83edc61672 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Mar 2022 15:23:32 -0500 Subject: [PATCH 007/322] initial copy --- .../impl/DRSSubmitToArchiveCommand.java | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java new file mode 100644 index 00000000000..92f1c2ff71d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -0,0 +1,235 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.util.Map; +import java.util.logging.Logger; + +import javax.json.JsonObject; + +import org.apache.commons.codec.binary.Hex; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProviderChain; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.profile.ProfileCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; + +@RequiredPermissions(Permission.PublishDataset) +public class DRSSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); + private static final String S3_CONFIG = ":S3ArchivalConfig"; + private static final String S3_PROFILE = ":S3ArchivalProfile"; + + private static final Config config = ConfigProvider.getConfig(); + private AmazonS3 s3 = null; + private TransferManager tm = null; + + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + logger.fine("In DRSSubmitToArchiveCommand..."); + JsonObject configObject = null; + String profileName = requestedSettings.get(S3_PROFILE); + String bucketName = null; + logger.fine("Profile: " + profileName + " Config: " + configObject); + try { + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + bucketName = configObject.getString("bucket-name", null); + } catch (Exception e) { + logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + } + if (configObject != null && profileName != null && bucketName != null) { + + s3 = createClient(configObject, profileName); + tm = TransferManagerBuilder.standard() + .withS3Client(s3) + .build(); + try { + + Dataset dataset = dv.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) == null) { + + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + String dataciteXml = getDataCiteXml(dv); + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + ObjectMetadata om = new ObjectMetadata(); + om.setContentLength(dataciteIn.available()); + String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; + tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, dcKey); + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + + // Store BagIt file + String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + String bagKey = spaceName + "/" + fileName; + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + messageDigest = MessageDigest.getInstance("MD5"); + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); + localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (!om.getContentMD5().equals(localchecksum)) { + logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); + return new Failure("Error in transferring DataCite.xml file to S3", + "S3 Submission Failure: incomplete metadata transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + //Unsigned URL - gives location but not access without creds + dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } + + } + } else { + logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + return new Failure("Dataset locked"); + } + } catch (Exception e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("S3 Archiver Submission Failure", + e.getLocalizedMessage() + ": check log for details"); + + } + return WorkflowStepResult.OK; + } else { + return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); + } + } + + private AmazonS3 createClient(JsonObject configObject, String profileName) { + // get a standard client, using the standard way of configuration the credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + ClientConfiguration cc = new ClientConfiguration(); + Integer poolSize = configObject.getInt("connection-pool-size", 256); + cc.setMaxConnections(poolSize); + s3CB.setClientConfiguration(cc); + + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = configObject.getString("custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); + // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); + + // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env + // vars or system properties to provide these, but use the secrets config source provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") + )); + + // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); + s3CB.setCredentials(providerChain); + + // let's build the client :-) + AmazonS3 client = s3CB.build(); + return client; + } + +} From 041155d20d5840b04d0c4f508e116b663898dcc3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 4 Mar 2022 15:23:53 -0500 Subject: [PATCH 008/322] simple DRS archiver, refactor S3 archiver --- .../impl/DRSSubmitToArchiveCommand.java | 263 ++++++------------ .../impl/S3SubmitToArchiveCommand.java | 195 +++++++------ 2 files changed, 191 insertions(+), 267 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 92f1c2ff71d..52be7dbba6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -2,234 +2,135 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.security.DigestInputStream; -import java.security.MessageDigest; +import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; import java.util.logging.Logger; +import javax.json.Json; import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonValue; -import org.apache.commons.codec.binary.Hex; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProviderChain; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.TransferManagerBuilder; @RequiredPermissions(Permission.PublishDataset) -public class DRSSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { +public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); - private static final String S3_CONFIG = ":S3ArchivalConfig"; - private static final String S3_PROFILE = ":S3ArchivalProfile"; + private static final String DRS_CONFIG = ":DRSArchivalConfig"; private static final Config config = ConfigProvider.getConfig(); private AmazonS3 s3 = null; private TransferManager tm = null; - + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { logger.fine("In DRSSubmitToArchiveCommand..."); - JsonObject configObject = null; - String profileName = requestedSettings.get(S3_PROFILE); - String bucketName = null; - logger.fine("Profile: " + profileName + " Config: " + configObject); + JsonObject drsConfigObject = null; + try { - configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); - bucketName = configObject.getString("bucket-name", null); + drsConfigObject = JsonUtil.getJsonObject(requestedSettings.get(DRS_CONFIG)); } catch (Exception e) { - logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } - if (configObject != null && profileName != null && bucketName != null) { - - s3 = createClient(configObject, profileName); - tm = TransferManagerBuilder.standard() - .withS3Client(s3) - .build(); - try { - - Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null) { - - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); - String dataciteXml = getDataCiteXml(dv); - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8")); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + if (drsConfigObject != null) { + Set collections = drsConfigObject.getJsonObject("collections").keySet(); + Dataset dataset = dv.getDataset(); + Dataverse ancestor = dataset.getOwner(); + String alias = ancestor.getAlias(); + while (ancestor != null && !collections.contains(alias)) { + ancestor = ancestor.getOwner(); + if (ancestor != null) { + alias = ancestor.getAlias(); + } else { + alias = null; + } + } + if (alias != null) { + JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); + + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + + if (s3Result == WorkflowStepResult.OK) { + // Now contact DRS + JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); + job.remove("collections"); + for (Entry entry : collectionConfig.entrySet()) { + job.add(entry.getKey(), entry.getValue()); + } + + String drsConfigString = JsonUtil.prettyPrint(job.build()); + try (ByteArrayInputStream configIn = new ByteArrayInputStream(drsConfigString.getBytes("UTF-8"))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); - om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; - tm.upload(new PutObjectRequest(bucketName, dcKey, digestInputStream, om)).waitForCompletion(); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + om.setContentLength(configIn.available()); + String dcKey = getSpaceName(dataset) + "/drsConfig." + getSpaceName(dataset) + "_v" + + dv.getFriendlyVersionNumber() + ".json"; + tm.upload(new PutObjectRequest(bucketName, dcKey, configIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + dcKey); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); - } - - // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - String bagKey = spaceName + "/" + fileName; - // Add BagIt ZIP file - // Google uses MD5 as one way to verify the - // transfer - messageDigest = MessageDigest.getInstance("MD5"); - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setAuthenticationKey(token.getTokenString()); - if (bagger.generateBag(fileName, false)) { - File bagFile = bagger.getBagFile(fileName); - - try (FileInputStream in = new FileInputStream(bagFile); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest);) { - om = new ObjectMetadata(); - om.setContentLength(bagFile.length()); - - tm.upload(new PutObjectRequest(bucketName, bagKey, digestInputStream2, om)).waitForCompletion(); - localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - om = s3.getObjectMetadata(bucketName, bagKey); - - if (!om.getContentMD5().equals(localchecksum)) { - logger.severe(om.getContentMD5() + " not equal to " + localchecksum + " for " + fileName); - return new Failure("Error in transferring DataCite.xml file to S3", - "S3 Submission Failure: incomplete metadata transfer"); - } - } catch (RuntimeException rte) { - logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); - return new Failure("Error in generating Bag", - "S3 Submission Failure: archive file not created"); - } - - logger.fine("S3 Submission step: Content Transferred"); - - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) - - //Unsigned URL - gives location but not access without creds - dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); - } else { - logger.warning("Could not write local Bag file " + fileName); - return new Failure("S3 Archiver fail writing temp local bag"); + } catch (RuntimeException rte) { + logger.warning("Error creating DRS Config file during DRS archiving: " + rte.getMessage()); + return new Failure("Error in generating Config file", + "DRS Submission Failure: config file not created"); + } catch (InterruptedException e) { + logger.warning("DRS Archiver failure: " + e.getLocalizedMessage()); + e.printStackTrace(); + return new Failure("DRS Archiver fail in config transfer"); + } catch (UnsupportedEncodingException e1) { + logger.warning("UTF-8 not supported!"); + } catch (IOException e1) { + logger.warning("Failure creating ByteArrayInputStream from string!"); } - - } - } else { - logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); - return new Failure("Dataset locked"); + + logger.fine("DRS Submission step: Config Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + // Unsigned URL - gives location but not access without creds + } else { + + logger.warning("DRS: S3 archiving failed - will not send config: " + getSpaceName(dataset) + "_v" + + dv.getFriendlyVersionNumber()); + return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } - } catch (Exception e) { - logger.warning(e.getLocalizedMessage()); - e.printStackTrace(); - return new Failure("S3 Archiver Submission Failure", - e.getLocalizedMessage() + ": check log for details"); + } else { + logger.fine("DRS Archiver: No matching collection found - will not archive: " + getSpaceName(dataset) + + "_v" + dv.getFriendlyVersionNumber()); + return WorkflowStepResult.OK; } - return WorkflowStepResult.OK; - } else { - return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); - } - } - private AmazonS3 createClient(JsonObject configObject, String profileName) { - // get a standard client, using the standard way of configuration the credentials, etc. - AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); - - ClientConfiguration cc = new ClientConfiguration(); - Integer poolSize = configObject.getInt("connection-pool-size", 256); - cc.setMaxConnections(poolSize); - s3CB.setClientConfiguration(cc); - - /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html - */ - String s3CEUrl = configObject.getString("custom-endpoint-url", ""); - /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. - */ - String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); - - // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. - if (!s3CEUrl.isEmpty()) { - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } else { + logger.warning(DRS_CONFIG + " not found"); + return new Failure("DRS Submission not configured - no " + DRS_CONFIG + " found."); } - /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); - // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false - s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); - - /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); - /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. - */ - Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); - // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false - s3CB.setPayloadSigningEnabled(s3payloadSigning); - // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled - s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. - */ - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); - - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") - )); - - // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); - s3CB.setCredentials(providerChain); - - // let's build the client :-) - AmazonS3 client = s3CB.build(); - return client; + return WorkflowStepResult.OK; } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 2520ace16ed..e352caa6944 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -49,49 +49,49 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand imp private static final String S3_PROFILE = ":S3ArchivalProfile"; private static final Config config = ConfigProvider.getConfig(); - private AmazonS3 s3 = null; - private TransferManager tm = null; - + protected AmazonS3 s3 = null; + protected TransferManager tm = null; + private String spaceName = null; + protected String bucketName = null; + public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { logger.fine("In S3SubmitToArchiveCommand..."); JsonObject configObject = null; String profileName = requestedSettings.get(S3_PROFILE); - String bucketName = null; + logger.fine("Profile: " + profileName + " Config: " + configObject); try { - configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); - bucketName = configObject.getString("bucket-name", null); + configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); + bucketName = configObject.getString("bucket-name", null); } catch (Exception e) { logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); } if (configObject != null && profileName != null && bucketName != null) { s3 = createClient(configObject, profileName); - tm = TransferManagerBuilder.standard() - .withS3Client(s3) - .build(); + tm = TransferManagerBuilder.standard().withS3Client(s3).build(); try { Dataset dataset = dv.getDataset(); if (dataset.getLockFor(Reason.finalizePublication) == null) { - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + spaceName = getSpaceName(dataset); String dataciteXml = getDataCiteXml(dv); - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (ByteArrayInputStream dataciteIn = new ByteArrayInputStream(dataciteXml.getBytes("UTF-8"))) { // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber()+".xml"; + String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber() + + ".xml"; tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); - if(om==null) { + if (om == null) { logger.warning("Could not write datacite xml to S3"); return new Failure("S3 Archiver failed writing datacite xml file"); } @@ -102,47 +102,48 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer - messageDigest = MessageDigest.getInstance("MD5"); - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setAuthenticationKey(token.getTokenString()); - if (bagger.generateBag(fileName, false)) { - File bagFile = bagger.getBagFile(fileName); - - try (FileInputStream in = new FileInputStream(bagFile)) { - om = new ObjectMetadata(); - om.setContentLength(bagFile.length()); - - tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); - om = s3.getObjectMetadata(bucketName, bagKey); - - if (om ==null) { - logger.severe("Error sending file to S3: " + fileName); - return new Failure("Error in transferring Bag file to S3", - "S3 Submission Failure: incomplete transfer"); - } - } catch (RuntimeException rte) { - logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); - return new Failure("Error in generating Bag", - "S3 Submission Failure: archive file not created"); - } - logger.fine("S3 Submission step: Content Transferred"); - - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) + // Generate bag + BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); + + try (FileInputStream in = new FileInputStream(bagFile)) { + om = new ObjectMetadata(); + om.setContentLength(bagFile.length()); + + tm.upload(new PutObjectRequest(bucketName, bagKey, in, om)).waitForCompletion(); + om = s3.getObjectMetadata(bucketName, bagKey); + + if (om == null) { + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); + } + } catch (RuntimeException rte) { + logger.severe("Error creating Bag during S3 archiving: " + rte.getMessage()); + return new Failure("Error in generating Bag", + "S3 Submission Failure: archive file not created"); + } + + logger.fine("S3 Submission step: Content Transferred"); + + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + + // Unsigned URL - gives location but not access without creds + dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + } else { + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); + } - //Unsigned URL - gives location but not access without creds - dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); - } else { - logger.warning("Could not write local Bag file " + fileName); - return new Failure("S3 Archiver fail writing temp local bag"); } - - } - } else { - logger.warning("S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); + } else { + logger.warning( + "S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); return new Failure("Dataset locked"); } } catch (Exception e) { @@ -154,78 +155,100 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } return WorkflowStepResult.OK; } else { - return new Failure("GoogleCloud Submission not configured - no \":GoogleCloudBucket\" and/or \":GoogleCloudProject\"."); + return new Failure( + "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); } } + protected String getSpaceName(Dataset dataset) { + if (spaceName == null) { + spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') + .toLowerCase(); + } + return spaceName; + } + private AmazonS3 createClient(JsonObject configObject, String profileName) { - // get a standard client, using the standard way of configuration the credentials, etc. + // get a standard client, using the standard way of configuration the + // credentials, etc. AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); ClientConfiguration cc = new ClientConfiguration(); Integer poolSize = configObject.getInt("connection-pool-size", 256); cc.setMaxConnections(poolSize); s3CB.setClientConfiguration(cc); - + /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + * Pass in a URL pointing to your S3 compatible storage. For possible values see + * https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html */ String s3CEUrl = configObject.getString("custom-endpoint-url", ""); /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + * Pass in a region to use for SigV4 signing of requests. Defaults to + * "dataverse" as it is not relevant for custom S3 implementations. */ String s3CERegion = configObject.getString("custom-endpoint-region", "dataverse"); - // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + // if the admin has set a system property (see below) we use this endpoint URL + // instead of the standard ones. if (!s3CEUrl.isEmpty()) { s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); } /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + * Pass in a boolean value if path style access should be used within the S3 + * client. Anything but case-insensitive "true" will lead to value of false, + * which is default value, too. */ Boolean s3pathStyleAccess = configObject.getBoolean("path-style-access", false); - // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + // some custom S3 implementations require "PathStyleAccess" as they us a path, + // not a subdomain. default = false s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + * Pass in a boolean value if payload signing should be used within the S3 + * client. Anything but case-insensitive "true" will lead to value of false, + * which is default value, too. */ - Boolean s3payloadSigning = configObject.getBoolean("payload-signing",false); + Boolean s3payloadSigning = configObject.getBoolean("payload-signing", false); /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + * Pass in a boolean value if chunked encoding should not be used within the S3 + * client. Anything but case-insensitive "false" will lead to value of true, + * which is default value, too. */ - Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding",true); - // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + Boolean s3chunkedEncoding = configObject.getBoolean("chunked-encoding", true); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. + // default = false s3CB.setPayloadSigningEnabled(s3payloadSigning); - // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. + // default = true + // Boolean is inverted, otherwise setting + // dataverse.files..chunked-encoding=false would result in leaving Chunked + // Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. + * Pass in a string value if this storage driver should use a non-default AWS S3 + * profile. The default is "default" which should work when only one profile + * exists. */ ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(profileName); - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env + // vars or system properties to provide these, but use the secrets config source + // provided by Payara. + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(new BasicAWSCredentials( config.getOptionalValue("dataverse.s3archiver.access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse("") - )); - - // Add both providers to chain - the first working provider will be used (so static credentials are the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, staticCredentials); + config.getOptionalValue("dataverse.s3archiver.secret-key", String.class).orElse(""))); + + // Add both providers to chain - the first working provider will be used (so + // static credentials are the fallback) + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(profileCredentials, + staticCredentials); s3CB.setCredentials(providerChain); - + // let's build the client :-) - AmazonS3 client = s3CB.build(); + AmazonS3 client = s3CB.build(); return client; } From 1dbdc1b85d8c4e063582b2aa42e233efee594272 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 4 Mar 2022 16:41:35 -0500 Subject: [PATCH 009/322] fix validation of file --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 4 ++-- .../harvard/iq/dataverse/util/bagit/BagValidationJob.java | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 958e61f33e6..47f0287f18e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -903,8 +903,8 @@ public void incrementTotalDataSize(long inc) { totalDataSize += inc; } - public String getHashtype() { - return hashtype.toString(); + public ChecksumType getHashtype() { + return hashtype; } // Get's all "Has Part" children, standardized to send an array with 0,1, or diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java index 7a32b96f4a0..fb5507c1b56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java @@ -25,6 +25,7 @@ import org.apache.commons.compress.archivers.zip.ZipFile; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFile.ChecksumType; import org.apache.commons.compress.utils.IOUtils; @@ -41,7 +42,7 @@ public class BagValidationJob implements Runnable { private String hash; private String name; - private static String hashtype; + private static ChecksumType hashtype; public BagValidationJob(String value, String key) throws IllegalStateException { if (zf == null || bagGenerator == null) { @@ -64,7 +65,7 @@ public void run() { if (hash.equals(realHash)) { log.fine("Valid hash for " + name); } else { - log.severe("Invalid " + bagGenerator.getHashtype() + " for " + name); + log.severe("Invalid " + bagGenerator.getHashtype().name() + " for " + name); log.fine("As sent: " + hash); log.fine("As calculated: " + realHash); } @@ -89,7 +90,7 @@ private String generateFileHash(String name, ZipFile zf) { } else if (hashtype.equals(DataFile.ChecksumType.MD5)) { realHash = DigestUtils.md5Hex(inputStream); } else { - log.warning("Unknown hash type: " + hashtype); + log.warning("Unknown hash type: " + hashtype.name()); } } catch (ZipException e) { From 7c8b18fd5a45d312fbe7fd06e930b61d0fa7950f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:29:00 -0500 Subject: [PATCH 010/322] Use separate ZipFile in executor, fix path issue breaking validation --- .../iq/dataverse/util/bagit/BagGenerator.java | 65 +++++++++++-------- .../util/bagit/BagValidationJob.java | 17 +++-- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 47f0287f18e..118b44e0b58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -359,6 +359,7 @@ public boolean generateBag(String bagName, boolean temp) { // Create an output stream backed by the file bagFileOS = new FileOutputStream(bagFile); if (generateBag(bagFileOS)) { + //The generateBag call sets this.bagName to the correct value validateBagFile(bagFile); if (usetemp) { logger.fine("Moving tmp zip"); @@ -384,7 +385,8 @@ public void validateBag(String bagId) { ZipFile zf = null; InputStream is = null; try { - zf = new ZipFile(getBagFile(bagId)); + File bagFile = getBagFile(bagId); + zf = new ZipFile(bagFile); ZipArchiveEntry entry = zf.getEntry(getValidName(bagId) + "/manifest-sha1.txt"); if (entry != null) { logger.info("SHA1 hashes used"); @@ -424,7 +426,7 @@ public void validateBag(String bagId) { } IOUtils.closeQuietly(is); logger.info("HashMap Map contains: " + checksumMap.size() + " entries"); - checkFiles(checksumMap, zf); + checkFiles(checksumMap, bagFile); } catch (IOException io) { logger.log(Level.SEVERE,"Could not validate Hashes", io); } catch (Exception e) { @@ -453,14 +455,14 @@ public File getBagFile(String bagID) throws Exception { private void validateBagFile(File bagFile) throws IOException { // Run a confirmation test - should verify all files and hashes - ZipFile zf = new ZipFile(bagFile); + // Check files calculates the hashes and file sizes and reports on // whether hashes are correct - checkFiles(checksumMap, zf); + checkFiles(checksumMap, bagFile); logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - zf.close(); + //zf.close(); } public static String getValidName(String bagName) { @@ -477,7 +479,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce } else if (item.has(JsonLDTerm.schemaOrg("name").getLabel())) { title = item.get(JsonLDTerm.schemaOrg("name").getLabel()).getAsString(); } - + logger.fine("Adding " + title + "/ to path " + currentPath); currentPath = currentPath + title + "/"; int containerIndex = -1; try { @@ -553,6 +555,7 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: " + childHash + " in: " + bagID); } + logger.fine("Adding " + childPath + " with hash " + childHash + " to checksumMap"); checksumMap.put(childPath, childHash); } } @@ -696,29 +699,39 @@ private void createFileFromURL(final String relPath, final String uri) addEntry(archiveEntry, supp); } - private void checkFiles(HashMap shaMap, ZipFile zf) { + private void checkFiles(HashMap shaMap, File bagFile) { ExecutorService executor = Executors.newFixedThreadPool(numConnections); - BagValidationJob.setZipFile(zf); - BagValidationJob.setBagGenerator(this); - logger.fine("Validating hashes for zipped data files"); - int i = 0; - for (Entry entry : shaMap.entrySet()) { - BagValidationJob vj = new BagValidationJob(entry.getValue(), entry.getKey()); - executor.execute(vj); - i++; - if (i % 1000 == 0) { - logger.info("Queuing Hash Validations: " + i); - } - } - logger.fine("All Hash Validations Queued: " + i); - - executor.shutdown(); + ZipFile zf = null; try { - while (!executor.awaitTermination(10, TimeUnit.MINUTES)) { - logger.fine("Awaiting completion of hash calculations."); + zf = new ZipFile(bagFile); + + BagValidationJob.setZipFile(zf); + BagValidationJob.setBagGenerator(this); + logger.fine("Validating hashes for zipped data files"); + int i = 0; + for (Entry entry : shaMap.entrySet()) { + BagValidationJob vj = new BagValidationJob(bagName, entry.getValue(), entry.getKey()); + executor.execute(vj); + i++; + if (i % 1000 == 0) { + logger.info("Queuing Hash Validations: " + i); + } } - } catch (InterruptedException e) { - logger.log(Level.SEVERE,"Hash Calculations interrupted", e); + logger.fine("All Hash Validations Queued: " + i); + + executor.shutdown(); + try { + while (!executor.awaitTermination(10, TimeUnit.MINUTES)) { + logger.fine("Awaiting completion of hash calculations."); + } + } catch (InterruptedException e) { + logger.log(Level.SEVERE, "Hash Calculations interrupted", e); + } + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + } finally { + IOUtils.closeQuietly(zf); } logger.fine("Hash Validations Completed"); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java index fb5507c1b56..7ac9fd701b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagValidationJob.java @@ -42,13 +42,15 @@ public class BagValidationJob implements Runnable { private String hash; private String name; + private String basePath; private static ChecksumType hashtype; - public BagValidationJob(String value, String key) throws IllegalStateException { + public BagValidationJob(String bagName, String value, String key) throws IllegalStateException { if (zf == null || bagGenerator == null) { throw new IllegalStateException( "Static Zipfile and BagGenerator must be set before creating ValidationJobs"); } + basePath=bagName; hash = value; name = key; @@ -61,7 +63,7 @@ public BagValidationJob(String value, String key) throws IllegalStateException { */ public void run() { - String realHash = generateFileHash(name, zf); + String realHash = generateFileHash(basePath + "/" + name, zf); if (hash.equals(realHash)) { log.fine("Valid hash for " + name); } else { @@ -73,12 +75,16 @@ public void run() { private String generateFileHash(String name, ZipFile zf) { + String realHash = null; + ZipArchiveEntry archiveEntry1 = zf.getEntry(name); + + if(archiveEntry1 != null) { // Error check - add file sizes to compare against supplied stats - + log.fine("Getting stream for " + name); long start = System.currentTimeMillis(); InputStream inputStream = null; - String realHash = null; + try { inputStream = zf.getInputStream(archiveEntry1); if (hashtype.equals(DataFile.ChecksumType.SHA1)) { @@ -105,6 +111,9 @@ private String generateFileHash(String name, ZipFile zf) { log.fine("Retrieve/compute time = " + (System.currentTimeMillis() - start) + " ms"); // Error check - add file sizes to compare against supplied stats bagGenerator.incrementTotalDataSize(archiveEntry1.getSize()); + } else { + log.warning("Entry " + name + " not found in zipped bag: not validated"); + } return realHash; } From 4892ab8419f3752e4fa6022efb8580a144b6beef Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:31:23 -0500 Subject: [PATCH 011/322] update commons-codec probably not required - good practice/helped in debugging to be able to check sourcecode online which is for the current version. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index dc3a0111bf5..a92ff1d259e 100644 --- a/pom.xml +++ b/pom.xml @@ -542,7 +542,7 @@ commons-codec commons-codec - 1.9 + 1.15 From f63bbd859278b8b5d28ba22c07aae370a8c12984 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:33:19 -0500 Subject: [PATCH 012/322] fix bag name, remove digest imports note that the write to file part of the bag generation already tests the hash values of the individual files internally. --- .../engine/command/impl/S3SubmitToArchiveCommand.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index e352caa6944..3009e422037 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -17,14 +17,11 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.security.DigestInputStream; -import java.security.MessageDigest; import java.util.Map; import java.util.logging.Logger; import javax.json.JsonObject; -import org.apache.commons.codec.binary.Hex; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -97,8 +94,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - String bagKey = spaceName + "/" + fileName; + String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer From 2a6042998302dc92170c86bbc32f127f6782a619 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:34:16 -0500 Subject: [PATCH 013/322] refactor, add isArchivable method --- .../impl/AbstractSubmitToArchiveCommand.java | 8 +-- .../impl/DRSSubmitToArchiveCommand.java | 53 ++++++++++++------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index a235dd57d91..e919f81e6e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -2,8 +2,9 @@ import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.DataCitation; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -15,8 +16,6 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.nio.charset.Charset; -import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -83,4 +82,7 @@ String getDataCiteXml(DatasetVersion dv) { dv.getDataset()); } + public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrapper) { + return true; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 52be7dbba6e..e82fe66c8c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; @@ -25,13 +26,8 @@ import javax.json.JsonObjectBuilder; import javax.json.JsonValue; -import org.eclipse.microprofile.config.Config; -import org.eclipse.microprofile.config.ConfigProvider; - -import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.transfer.TransferManager; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { @@ -39,10 +35,6 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; - private static final Config config = ConfigProvider.getConfig(); - private AmazonS3 s3 = null; - private TransferManager tm = null; - public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @@ -62,15 +54,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Set collections = drsConfigObject.getJsonObject("collections").keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); - String alias = ancestor.getAlias(); - while (ancestor != null && !collections.contains(alias)) { - ancestor = ancestor.getOwner(); - if (ancestor != null) { - alias = ancestor.getAlias(); - } else { - alias = null; - } - } + String alias = getArchivableAncestor(ancestor, collections); + if (alias != null) { JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); @@ -95,6 +80,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t om = s3.getObjectMetadata(bucketName, dcKey); } catch (RuntimeException rte) { logger.warning("Error creating DRS Config file during DRS archiving: " + rte.getMessage()); + rte.printStackTrace(); return new Failure("Error in generating Config file", "DRS Submission Failure: config file not created"); } catch (InterruptedException e) { @@ -133,4 +119,35 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } return WorkflowStepResult.OK; } + + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { + String alias = ancestor.getAlias(); + while (ancestor != null && !collections.contains(alias)) { + ancestor = ancestor.getOwner(); + if (ancestor != null) { + alias = ancestor.getAlias(); + } else { + alias = null; + } + } + return null; + } + + public static boolean isArchivable(Dataset d, SettingsWrapper sw) { + JsonObject drsConfigObject = null; + + try { + String config = sw.get(DRS_CONFIG, null); + if(config!=null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + Set collections = drsConfigObject.getJsonObject("collections").keySet(); + return getArchivableAncestor(d.getOwner(),collections)!=null; + } + return false; + } } From 63cf130f82ef0dea579c3038fe4bd012c4eac038 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:35:17 -0500 Subject: [PATCH 014/322] Reflexive call of isArchivable method on archive provider class --- .../edu/harvard/iq/dataverse/DatasetPage.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 61720efafb2..8d31c3895ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -62,6 +62,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.time.Instant; @@ -5478,10 +5480,8 @@ public void refreshPaginator() { */ public void archiveVersion(Long id) { if (session.getUser() instanceof AuthenticatedUser) { - AuthenticatedUser au = ((AuthenticatedUser) session.getUser()); - DatasetVersion dv = datasetVersionService.retrieveDatasetVersionByVersionId(id).getDatasetVersion(); - String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { @@ -5505,6 +5505,24 @@ public void archiveVersion(Long id) { } } } + + boolean isArchiveable() { + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + try { + Class clazz = Class.forName(className); + + Method m = clazz.getMethod("isArchivable", Dataset.class, SettingsWrapper.class); + Object[] params = { dataset, settingsWrapper }; + return (Boolean) m.invoke(null, params); + } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException | SecurityException e) { + logger.warning("Failed to call is Archivable on configured archiver class: " + className); + e.printStackTrace(); + } + } + return false; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); From 7852cc4ccd0b5af64e381921bbab5b8fcfd28152 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 7 Mar 2022 13:37:03 -0500 Subject: [PATCH 015/322] only display archiving column when needed there are archived copies, or one can archive this dataset --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 6cb8c11dff7..c9c90d17619 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,12 +147,12 @@ - + - From e3da7576530ff9b371a5a5300a1f25e719511d81 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 9 Mar 2022 10:03:19 -0500 Subject: [PATCH 016/322] typos in per-collection display logic --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index c9c90d17619..e105ac30df7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,12 +147,12 @@ - + - From 54557ada77ee912ded47ed3549f8b6dfbd4cd083 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 9 Mar 2022 17:29:22 -0500 Subject: [PATCH 017/322] Bug fix in refactored collection check --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index e82fe66c8c8..cb88f9e030e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -130,7 +130,7 @@ private static String getArchivableAncestor(Dataverse ancestor, Set coll alias = null; } } - return null; + return alias; } public static boolean isArchivable(Dataset d, SettingsWrapper sw) { From a04c1be0f08bd8adf6b87bab497cf73ea187cc55 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 9 Mar 2022 17:29:41 -0500 Subject: [PATCH 018/322] make isArchivable public so it can be used in .xhtml --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 18e051946e8..919a5c50666 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5506,12 +5506,11 @@ public void archiveVersion(Long id) { } } - boolean isArchiveable() { + public boolean isArchivable() { String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); if (className != null) { try { Class clazz = Class.forName(className); - Method m = clazz.getMethod("isArchivable", Dataset.class, SettingsWrapper.class); Object[] params = { dataset, settingsWrapper }; return (Boolean) m.invoke(null, params); From 29c95995106e6f6716d7e3f2f4564646eeb680a5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 18 Mar 2022 09:58:22 -0400 Subject: [PATCH 019/322] add method to get URL (same signature as Dataset in 3b - inherit?) --- src/main/java/edu/harvard/iq/dataverse/Dataverse.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 342aaec187a..db5f9d172cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; @@ -765,4 +767,8 @@ public boolean isAncestorOf( DvObject other ) { } return false; } + + public String getLocalURL() { + return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias(); + } } From 12e74d90742e93ff66225ab87afd54fbc8fee95c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 18 Mar 2022 09:58:46 -0400 Subject: [PATCH 020/322] change to PID URL, add owner info --- .../iq/dataverse/util/bagit/OREMap.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 38a04b36314..637ff2ccfff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.branding.BrandingUtil; @@ -85,7 +86,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except localContext.putIfAbsent(JsonLDNamespace.schema.getPrefix(), JsonLDNamespace.schema.getUrl()); Dataset dataset = version.getDataset(); - String id = dataset.getGlobalId().asString(); + String id = dataset.getGlobalId().toURL().toExternalForm(); JsonArrayBuilder fileArray = Json.createArrayBuilder(); // The map describes an aggregation JsonObjectBuilder aggBuilder = Json.createObjectBuilder(); @@ -213,7 +214,9 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } aggBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(), - BrandingUtil.getRootDataverseCollectionName()); + BrandingUtil.getInstallationBrandName()); + + aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner())); // The aggregation aggregates aggregatedresources (Datafiles) which each have // their own entry and metadata @@ -315,6 +318,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } } + private JsonObjectBuilder getDataverseDescription(Dataverse dv) { + //Schema.org is already in local context, no updates needed as long as we only use chemaOrg and "@id" here + JsonObjectBuilder dvjob = Json.createObjectBuilder().add(JsonLDTerm.schemaOrg("name").getLabel(), dv.getCurrentName()).add("@id", dv.getLocalURL()); + addIfNotNull(dvjob, JsonLDTerm.schemaOrg("description"), dv.getDescription()); + Dataverse owner = dv.getOwner(); + if(owner!=null) { + dvjob.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(owner)); + } + return dvjob; + } + /* * Simple methods to only add an entry to JSON if the value of the term is * non-null. Methods created for string, JsonValue, boolean, and long From a53f94f0d9616e15a4608b150aea7159f7b361c0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 3 Apr 2022 17:07:27 -0400 Subject: [PATCH 021/322] update to use DRS defined bucket param --- .../dataverse/engine/command/impl/S3SubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 3009e422037..391a2f7c94a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -65,7 +65,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.fine("Profile: " + profileName + " Config: " + configObject); try { configObject = JsonUtil.getJsonObject(requestedSettings.get(S3_CONFIG)); - bucketName = configObject.getString("bucket-name", null); + bucketName = configObject.getString("s3_bucket_name", null); } catch (Exception e) { logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); } From fcc78a5d9d774e60442bc0128188eb90664c2020 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 3 Apr 2022 17:08:17 -0400 Subject: [PATCH 022/322] call ingest endpoint --- .../impl/DRSSubmitToArchiveCommand.java | 107 +++++++++++------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index cb88f9e030e..32e95ed3c3a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -13,9 +13,10 @@ import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -26,14 +27,19 @@ import javax.json.JsonObjectBuilder; import javax.json.JsonValue; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; + private static String PENDING = "Pending"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); @@ -55,71 +61,92 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); - + if (alias != null) { JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add("status", "Failure"); + statusObject.add("message", "Bag not transferred"); + if (s3Result == WorkflowStepResult.OK) { + statusObject.add("status", "Attempted"); + statusObject.add("message", "Bag transferred"); + // Now contact DRS JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); job.remove("collections"); + job.remove("DRSendpoint"); + String spaceName = getSpaceName(dataset); + job.add("package_id", spaceName + ".v" + dv.getFriendlyVersionNumber()); + + job.add("s3_path", spaceName); for (Entry entry : collectionConfig.entrySet()) { job.add(entry.getKey(), entry.getValue()); } String drsConfigString = JsonUtil.prettyPrint(job.build()); - try (ByteArrayInputStream configIn = new ByteArrayInputStream(drsConfigString.getBytes("UTF-8"))) { - // Add datacite.xml file - ObjectMetadata om = new ObjectMetadata(); - om.setContentLength(configIn.available()); - String dcKey = getSpaceName(dataset) + "/drsConfig." + getSpaceName(dataset) + "_v" - + dv.getFriendlyVersionNumber() + ".json"; - tm.upload(new PutObjectRequest(bucketName, dcKey, configIn, om)).waitForCompletion(); - om = s3.getObjectMetadata(bucketName, dcKey); - } catch (RuntimeException rte) { - logger.warning("Error creating DRS Config file during DRS archiving: " + rte.getMessage()); - rte.printStackTrace(); - return new Failure("Error in generating Config file", - "DRS Submission Failure: config file not created"); - } catch (InterruptedException e) { - logger.warning("DRS Archiver failure: " + e.getLocalizedMessage()); - e.printStackTrace(); - return new Failure("DRS Archiver fail in config transfer"); - } catch (UnsupportedEncodingException e1) { - logger.warning("UTF-8 not supported!"); - } catch (IOException e1) { - logger.warning("Failure creating ByteArrayInputStream from string!"); - } - - logger.fine("DRS Submission step: Config Transferred"); - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) + CloseableHttpClient client = HttpClients.createDefault(); + HttpPost ingestPost; + try { + ingestPost = new HttpPost(); + ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); + String body = drsConfigString; + logger.fine("Body: " + body); + ingestPost.setEntity(new StringEntity(body, "utf-8")); + ingestPost.setHeader("Content-Type", "application/json"); + + } catch (URISyntaxException e) { + return new Failure( + "LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); + } + // execute + + try (CloseableHttpResponse response = client.execute(ingestPost)) { + int code = response.getStatusLine().getStatusCode(); + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + if (code >= 200 && code < 300) { + logger.fine("Status: " + code); + logger.fine("Response" + responseBody); + JsonObject responseObject = JsonUtil.getJsonObject(responseBody); + String status = responseObject.getString("status"); + if (!PENDING.equals(status)) { + logger.warning("Unexpected Status: " + status); + } else { + logger.fine("DRS Ingest succeded: " + responseObject.toString()); + statusObject.add("status", status); + statusObject.add("message", responseObject.getString("message")); + } + } + } catch (ClientProtocolException e2) { + e2.printStackTrace(); + } catch (IOException e2) { + e2.printStackTrace(); + } - // Unsigned URL - gives location but not access without creds } else { - logger.warning("DRS: S3 archiving failed - will not send config: " + getSpaceName(dataset) + "_v" + logger.warning("DRS: S3 archiving failed - will not call ingest: " + getSpaceName(dataset) + "_v" + dv.getFriendlyVersionNumber()); return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } - + dv.setArchivalCopyLocation(statusObject.build().toString()); } else { logger.fine("DRS Archiver: No matching collection found - will not archive: " + getSpaceName(dataset) + "_v" + dv.getFriendlyVersionNumber()); return WorkflowStepResult.OK; } - } else { logger.warning(DRS_CONFIG + " not found"); return new Failure("DRS Submission not configured - no " + DRS_CONFIG + " found."); } return WorkflowStepResult.OK; } - + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { String alias = ancestor.getAlias(); while (ancestor != null && !collections.contains(alias)) { @@ -138,15 +165,15 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { try { String config = sw.get(DRS_CONFIG, null); - if(config!=null) { - drsConfigObject = JsonUtil.getJsonObject(config); + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); } } catch (Exception e) { logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { Set collections = drsConfigObject.getJsonObject("collections").keySet(); - return getArchivableAncestor(d.getOwner(),collections)!=null; + return getArchivableAncestor(d.getOwner(), collections) != null; } return false; } From 8bd83cd00c97a839356316d5eb0fe8b98cac8be5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 3 Apr 2022 19:09:27 -0400 Subject: [PATCH 023/322] reminder --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 32e95ed3c3a..101d31f4cc7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -89,6 +89,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String drsConfigString = JsonUtil.prettyPrint(job.build()); + + //TODO - ADD code to ignore self-signed cert CloseableHttpClient client = HttpClients.createDefault(); HttpPost ingestPost; try { From aee2ebb8a818404d129a679c38e52ec5a798952d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 09:27:12 -0400 Subject: [PATCH 024/322] update Json format to match /ingest, add flag to trust cert --- .../impl/DRSSubmitToArchiveCommand.java | 99 +++++++++++++++---- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 101d31f4cc7..fba236457a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -17,6 +17,9 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -26,20 +29,31 @@ import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.json.JsonValue; +import javax.net.ssl.SSLContext; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; +import org.apache.http.ssl.SSLContextBuilder; @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; - private static String PENDING = "Pending"; + private static final String FAILURE = "failure"; + private static final String PENDING = "pending"; + private static final String ADMIN_METADATA = "admin_metadata"; + private static final String S3_BUCKET_NAME = "s3_bucket_name"; + private static final String COLLECTIONS = "collections"; + private static final String PACKAGE_ID = "package_id"; + private static final String TRUST_CERT = "trust_cert"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); @@ -57,13 +71,15 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject("collections").keySet(); + Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); + String spaceName = getSpaceName(dataset); + String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); if (alias != null) { - JsonObject collectionConfig = drsConfigObject.getJsonObject("collections").getJsonObject(alias); + JsonObject collectionConfig = drsConfigObject.getJsonObject(COLLECTIONS).getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); @@ -77,21 +93,56 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Now contact DRS JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); - job.remove("collections"); + JsonObjectBuilder amob = Json.createObjectBuilder(); + if (drsConfigObject.containsKey(ADMIN_METADATA)) { + amob = Json.createObjectBuilder(drsConfigObject.getJsonObject(ADMIN_METADATA)); + } + + boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); + job.remove(TRUST_CERT); + job.remove(COLLECTIONS); + job.remove(ADMIN_METADATA); job.remove("DRSendpoint"); - String spaceName = getSpaceName(dataset); - job.add("package_id", spaceName + ".v" + dv.getFriendlyVersionNumber()); + job.add(PACKAGE_ID, packageId); job.add("s3_path", spaceName); + if (collectionConfig.containsKey(S3_BUCKET_NAME)) { + job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); + } + for (Entry entry : collectionConfig.entrySet()) { - job.add(entry.getKey(), entry.getValue()); + if (!entry.getKey().equals(S3_BUCKET_NAME)) { + amob.add(entry.getKey(), entry.getValue()); + } } + job.add(ADMIN_METADATA, amob); String drsConfigString = JsonUtil.prettyPrint(job.build()); - - //TODO - ADD code to ignore self-signed cert - CloseableHttpClient client = HttpClients.createDefault(); + // TODO - ADD code to ignore self-signed cert + CloseableHttpClient client = null; + if (trustCert) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + try { + SSLContext sslContext = SSLContextBuilder + .create() + .loadTrustMaterial(new TrustAllStrategy()) + .build(); + client = HttpClients.custom().setSSLContext(sslContext).setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); + } catch (KeyManagementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (KeyStoreException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + if(client == null) { + client = HttpClients.createDefault(); + } HttpPost ingestPost; try { ingestPost = new HttpPost(); @@ -111,18 +162,28 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t int code = response.getStatusLine().getStatusCode(); String responseBody = new String(response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8); - if (code >= 200 && code < 300) { + if (code == 202) { logger.fine("Status: " + code); logger.fine("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); String status = responseObject.getString("status"); - if (!PENDING.equals(status)) { - logger.warning("Unexpected Status: " + status); - } else { - logger.fine("DRS Ingest succeded: " + responseObject.toString()); + switch (status) { + case PENDING: + logger.fine("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); statusObject.add("status", status); statusObject.add("message", responseObject.getString("message")); + break; + case FAILURE: + logger.severe( + "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + default: + logger.warning("Unexpected Status: " + status); } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); } } catch (ClientProtocolException e2) { e2.printStackTrace(); @@ -132,14 +193,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } else { - logger.warning("DRS: S3 archiving failed - will not call ingest: " + getSpaceName(dataset) + "_v" - + dv.getFriendlyVersionNumber()); + logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } dv.setArchivalCopyLocation(statusObject.build().toString()); } else { - logger.fine("DRS Archiver: No matching collection found - will not archive: " + getSpaceName(dataset) - + "_v" + dv.getFriendlyVersionNumber()); + logger.fine("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; } } else { @@ -174,7 +233,7 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject("collections").keySet(); + Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); return getArchivableAncestor(d.getOwner(), collections) != null; } return false; From 44a52bdd46e32ee7f5d3732c26682a03b987b36f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 09:28:19 -0400 Subject: [PATCH 025/322] cleanup --- .../command/impl/DRSSubmitToArchiveCommand.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index fba236457a6..d8876b57405 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -35,7 +35,6 @@ import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustAllStrategy; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; @@ -124,11 +123,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (trustCert) { // use the TrustSelfSignedStrategy to allow Self Signed Certificates try { - SSLContext sslContext = SSLContextBuilder - .create() - .loadTrustMaterial(new TrustAllStrategy()) + SSLContext sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()) .build(); - client = HttpClients.custom().setSSLContext(sslContext).setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); + client = HttpClients.custom().setSSLContext(sslContext) + .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); } catch (KeyManagementException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -139,8 +137,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // TODO Auto-generated catch block e.printStackTrace(); } - } - if(client == null) { + } + if (client == null) { client = HttpClients.createDefault(); } HttpPost ingestPost; From 48835b232dd212c7685a79e622a4dfb9329f4f50 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 09:50:03 -0400 Subject: [PATCH 026/322] align json config structure with api --- .../impl/DRSSubmitToArchiveCommand.java | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index d8876b57405..cd7bc672ef0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -50,6 +50,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String PENDING = "pending"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; + private static final String S3_PATH = "s3_path"; private static final String COLLECTIONS = "collections"; private static final String PACKAGE_ID = "package_id"; private static final String TRUST_CERT = "trust_cert"; @@ -70,7 +71,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + Set collections = adminMetadata.getJsonObject(COLLECTIONS).keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); @@ -78,7 +80,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); if (alias != null) { - JsonObject collectionConfig = drsConfigObject.getJsonObject(COLLECTIONS).getJsonObject(alias); + JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); @@ -91,20 +93,20 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add("message", "Bag transferred"); // Now contact DRS - JsonObjectBuilder job = Json.createObjectBuilder(drsConfigObject); - JsonObjectBuilder amob = Json.createObjectBuilder(); - if (drsConfigObject.containsKey(ADMIN_METADATA)) { - amob = Json.createObjectBuilder(drsConfigObject.getJsonObject(ADMIN_METADATA)); - } - boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); - job.remove(TRUST_CERT); - job.remove(COLLECTIONS); - job.remove(ADMIN_METADATA); - job.remove("DRSendpoint"); + + JsonObjectBuilder job = Json.createObjectBuilder(); + + job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); + job.add(PACKAGE_ID, packageId); + job.add(S3_PATH, spaceName); - job.add("s3_path", spaceName); + //We start with the default admin_metadata + JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); + //Remove collections and then override any params for the given alias + amob.remove(COLLECTIONS); + //Allow override of bucket name if (collectionConfig.containsKey(S3_BUCKET_NAME)) { job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); } From f36791060fbd5b2293d9ae3c6c629c5679f3667f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 10:24:25 -0400 Subject: [PATCH 027/322] update isArchivable logic to match json change --- .../impl/DRSSubmitToArchiveCommand.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index cd7bc672ef0..aa98fe957b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -94,19 +94,19 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); - + JsonObjectBuilder job = Json.createObjectBuilder(); - + job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); job.add(PACKAGE_ID, packageId); job.add(S3_PATH, spaceName); - //We start with the default admin_metadata + // We start with the default admin_metadata JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); - //Remove collections and then override any params for the given alias + // Remove collections and then override any params for the given alias amob.remove(COLLECTIONS); - //Allow override of bucket name + // Allow override of bucket name if (collectionConfig.containsKey(S3_BUCKET_NAME)) { job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); } @@ -233,8 +233,14 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); } if (drsConfigObject != null) { - Set collections = drsConfigObject.getJsonObject(COLLECTIONS).keySet(); - return getArchivableAncestor(d.getOwner(), collections) != null; + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + if (adminMetadata != null) { + JsonObject collectionObj = adminMetadata.getJsonObject(COLLECTIONS); + if (collectionObj != null) { + Set collections = collectionObj.keySet(); + return getArchivableAncestor(d.getOwner(), collections) != null; + } + } } return false; } From b9eb8fe65bfd26847abc453a6b2db09394fe4778 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Apr 2022 12:25:45 -0400 Subject: [PATCH 028/322] change log level to use on a Payara 5.2021.5 machine --- .../command/impl/DRSSubmitToArchiveCommand.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index aa98fe957b8..ecebf13bd8e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -62,7 +62,7 @@ public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versi @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { - logger.fine("In DRSSubmitToArchiveCommand..."); + logger.info("In DRSSubmitToArchiveCommand..."); JsonObject drsConfigObject = null; try { @@ -148,7 +148,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t ingestPost = new HttpPost(); ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); String body = drsConfigString; - logger.fine("Body: " + body); + logger.info("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); ingestPost.setHeader("Content-Type", "application/json"); @@ -163,13 +163,13 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String responseBody = new String(response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8); if (code == 202) { - logger.fine("Status: " + code); - logger.fine("Response" + responseBody); + logger.info("Status: " + code); + logger.info("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); String status = responseObject.getString("status"); switch (status) { case PENDING: - logger.fine("DRS Ingest successfully started for: " + packageId + " : " + logger.info("DRS Ingest successfully started for: " + packageId + " : " + responseObject.toString()); statusObject.add("status", status); statusObject.add("message", responseObject.getString("message")); @@ -198,7 +198,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } dv.setArchivalCopyLocation(statusObject.build().toString()); } else { - logger.fine("DRS Archiver: No matching collection found - will not archive: " + packageId); + logger.info("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; } } else { From f77b536da8549d1513a7ec4edd929953c7924e0a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 7 Apr 2022 15:00:37 -0400 Subject: [PATCH 029/322] first draft of status API --- .../edu/harvard/iq/dataverse/api/Admin.java | 103 ++++++++++++++++-- .../impl/AbstractSubmitToArchiveCommand.java | 9 ++ 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 719b4aeb1ba..d90a99aa674 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -44,6 +44,7 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; +import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; @@ -51,6 +52,7 @@ import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.PathParam; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -99,6 +101,8 @@ import java.io.IOException; import java.io.OutputStream; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -1728,31 +1732,37 @@ public Response validateDataFileHashValue(@PathParam("fileId") String fileId) { } - @GET - @Path("/submitDataVersionToArchive/{id}/{version}") - public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, @PathParam("version") String versionNumber) { + @POST + @Path("/submitDatasetVersionToArchive/{id}/{version}") + public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + // Note - the user is being set in the session so it becomes part of the + // DataverseRequest and is sent to the back-end command where it is used to get + // the API Token which is then used to retrieve files (e.g. via S3 direct + // downloads) to create the Bag session.setUser(au); // TODO: Stop using session. Use createDataverseRequest instead. Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, + dvRequestService.getDataverseRequest(), dv); if (cmd != null) { new Thread(new Runnable() { public void run() { try { DatasetVersion dv = commandEngine.submit(cmd); if (dv.getArchivalCopyLocation() != null) { - logger.info("DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); + logger.info( + "DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + + " submitted to Archive at: " + dv.getArchivalCopyLocation()); } else { logger.severe("Error submitting version due to conflict/error at Archive"); } @@ -1761,7 +1771,8 @@ public void run() { } } }).start(); - return ok("Archive submission using " + cmd.getClass().getCanonicalName() + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); + return ok("Archive submission using " + cmd.getClass().getCanonicalName() + + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); } else { logger.log(Level.SEVERE, "Could not find Archiver class: " + className); return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); @@ -1774,6 +1785,74 @@ public void run() { } } + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/submitDataVersionToArchive/{id}/{version}/status") + public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + Dataset ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if (dv.getArchivalCopyLocation() == null) { + return error(Status.NO_CONTENT, "This dataset version has not been archived"); + } else { + JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); + return ok(status); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } + + @PUT + @Consumes(MediaType.APPLICATION_JSON) + @Path("/submitDataVersionToArchive/{id}/{version}/status") + public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber, JsonObject update) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + if (update.containsKey(AbstractSubmitToArchiveCommand.STATUS) + && update.containsKey(AbstractSubmitToArchiveCommand.MESSAGE)) { + String status = update.getString(AbstractSubmitToArchiveCommand.STATUS); + if (status.equals(AbstractSubmitToArchiveCommand.PENDING) + || status.equals(AbstractSubmitToArchiveCommand.FAILURE) + || status.equals(AbstractSubmitToArchiveCommand.SUCCESS)) { + + try { + Dataset ds; + + ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if(dv==null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + + } catch (WrappedResponse e) { + return error(Status.NOT_FOUND, "Dataset not found"); + } + } + } + return error(Status.BAD_REQUEST, "Unacceptable status format"); + } + + + /** * Iteratively archives all unarchived dataset versions @@ -1783,7 +1862,7 @@ public void run() { * lastestonly - only archive the latest versions * @return */ - @GET + @POST @Path("/archiveAllUnarchivedDatasetVersions") public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index e919f81e6e9..dad17df38c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -27,6 +27,15 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); + //Status message required components + public static final String STATUS = "status"; + public static final String MESSAGE = "message"; + //Allowed Statuses + public static final String PENDING = "pending"; + public static final String SUCCESS = "success"; + public static final String FAILURE = "failure"; + + public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version.getDataset()); this.version = version; From 4a86f48347e7e8ff4496f7ed34f4a4c378273fb9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 11:05:33 -0400 Subject: [PATCH 030/322] revert changes from TDL --- src/main/java/propertyFiles/Bundle.properties | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index fb788d42664..9895cffe0e7 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -145,7 +145,7 @@ contact.header=Contact {0} contact.dataverse.header=Email Dataverse Contact contact.dataset.header=Email Dataset Contact contact.to=To -contact.support=TDL Dataverse Support +contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. @@ -317,9 +317,9 @@ login.System=Login System login.forgot.text=Forgot your password? login.builtin=Dataverse Account login.institution=Institutional Account -login.institution.blurb=Log in or sign up with your institutional account — learn more. If you are not affiliated with a TDR member institution (see dropdown menu), please use the Google Login option. +login.institution.blurb=Log in or sign up with your institutional account — more information about account creation. login.institution.support.blurbwithLink=Leaving your institution? Please contact {0} for assistance. -login.builtin.credential.usernameOrEmail=Admin ID +login.builtin.credential.usernameOrEmail=Username/Email login.builtin.credential.password=Password login.builtin.invalidUsernameEmailOrPassword=The username, email address, or password you entered is invalid. Need assistance accessing your account? login.signup.blurb=Sign up for a Dataverse account. @@ -335,12 +335,12 @@ login.button.orcid=Create or Connect your ORCID # authentication providers auth.providers.title=Other options auth.providers.tip=You can convert a Dataverse account to use one of the options above. More information about account creation. -auth.providers.title.builtin=Admin ID +auth.providers.title.builtin=Username/Email auth.providers.title.shib=Your Institution auth.providers.title.orcid=ORCID -auth.providers.title.google=Google (No TDR affiliation) +auth.providers.title.google=Google auth.providers.title.github=GitHub -auth.providers.blurb=Log in or sign up with your Google account — learn more. If you are not affiliated with a TDR member institution, please use the Google Login option. Having trouble? Please contact {3} for assistance. +auth.providers.blurb=Log in or sign up with your {0} account — more information about account creation. Having trouble? Please contact {3} for assistance. auth.providers.persistentUserIdName.orcid=ORCID iD auth.providers.persistentUserIdName.github=ID auth.providers.persistentUserIdTooltip.orcid=ORCID provides a persistent digital identifier that distinguishes you from other researchers. @@ -383,7 +383,7 @@ shib.welcomeExistingUserMessageDefaultInstitution=your institution shib.dataverseUsername=Dataverse Username shib.currentDataversePassword=Current Dataverse Password shib.accountInformation=Account Information -shib.offerToCreateNewAccount=Contact your TDR liaison to get help and training. Published content cannot be easily deleted. +shib.offerToCreateNewAccount=This information is provided by your institution and will be used to create your Dataverse account. shib.passwordRejected=Validation Error - Your account can only be converted if you provide the correct password for your existing account. If your existing account has been deactivated by an administrator, you cannot convert your account. # oauth2/firstLogin.xhtml From d286841fe5464a7a481a20d79247499d761342b4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 14:27:00 -0400 Subject: [PATCH 031/322] add canonicalization --- pom.xml | 16 ++- .../impl/DRSSubmitToArchiveCommand.java | 123 +++++++++++++----- .../impl/DRSSubmitToArchiveCommandTest.java | 93 +++++++++++++ 3 files changed, 197 insertions(+), 35 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java diff --git a/pom.xml b/pom.xml index 995e90b0029..6d75bdd39e0 100644 --- a/pom.xml +++ b/pom.xml @@ -50,7 +50,7 @@ --> - + @@ -502,7 +502,19 @@ google-cloud-storage - + + + + com.auth0 + java-jwt + 3.19.1 + + + + io.github.erdtman + java-json-canonicalization + 1.1 + diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index ecebf13bd8e..aa4d1255477 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; @@ -17,9 +18,17 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.security.KeyFactory; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; +import java.security.interfaces.RSAPrivateKey; +import java.security.interfaces.RSAPublicKey; +import java.security.spec.InvalidKeySpecException; +import java.security.spec.PKCS8EncodedKeySpec; +import java.time.Instant; +import java.util.Base64; +import java.util.Date; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -31,6 +40,7 @@ import javax.json.JsonValue; import javax.net.ssl.SSLContext; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; @@ -41,6 +51,13 @@ import org.apache.http.impl.client.HttpClients; import org.apache.http.ssl.SSLContextBuilder; +import org.erdtman.jcs.JsonCanonicalizer; + +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.exceptions.JWTCreationException; +import com.auth0.jwt.interfaces.DecodedJWT; + @RequiredPermissions(Permission.PublishDataset) public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { @@ -53,6 +70,9 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String S3_PATH = "s3_path"; private static final String COLLECTIONS = "collections"; private static final String PACKAGE_ID = "package_id"; + + private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; + private static final String TRUST_CERT = "trust_cert"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { @@ -147,50 +167,79 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t try { ingestPost = new HttpPost(); ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); + + byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY)); + + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RS256"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + */ + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + String body = drsConfigString; + String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, 5); + logger.info("JWT: " + jwtString); + + ingestPost.setHeader("Authorization: Bearer", jwtString); + logger.info("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); ingestPost.setHeader("Content-Type", "application/json"); + try (CloseableHttpResponse response = client.execute(ingestPost)) { + int code = response.getStatusLine().getStatusCode(); + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + if (code == 202) { + logger.info("Status: " + code); + logger.info("Response" + responseBody); + JsonObject responseObject = JsonUtil.getJsonObject(responseBody); + String status = responseObject.getString("status"); + switch (status) { + case PENDING: + logger.info("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); + statusObject.add("status", status); + statusObject.add("message", responseObject.getString("message")); + break; + case FAILURE: + logger.severe( + "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + default: + logger.warning("Unexpected Status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); + } + } catch (ClientProtocolException e2) { + e2.printStackTrace(); + } catch (IOException e2) { + e2.printStackTrace(); + } } catch (URISyntaxException e) { return new Failure( "LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); + } catch (JWTCreationException exception) { + // Invalid Signing configuration / Couldn't convert Claims. } // execute - - try (CloseableHttpResponse response = client.execute(ingestPost)) { - int code = response.getStatusLine().getStatusCode(); - String responseBody = new String(response.getEntity().getContent().readAllBytes(), - StandardCharsets.UTF_8); - if (code == 202) { - logger.info("Status: " + code); - logger.info("Response" + responseBody); - JsonObject responseObject = JsonUtil.getJsonObject(responseBody); - String status = responseObject.getString("status"); - switch (status) { - case PENDING: - logger.info("DRS Ingest successfully started for: " + packageId + " : " - + responseObject.toString()); - statusObject.add("status", status); - statusObject.add("message", responseObject.getString("message")); - break; - case FAILURE: - logger.severe( - "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); - return new Failure("DRS Archiver fail in Ingest call"); - default: - logger.warning("Unexpected Status: " + status); - } - } else { - logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); - return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); - } - } catch (ClientProtocolException e2) { - e2.printStackTrace(); - } catch (IOException e2) { - e2.printStackTrace(); + catch (InvalidKeySpecException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { +// TODO Auto-generated catch block + e.printStackTrace(); } - } else { logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); @@ -208,6 +257,14 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { + String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); + String digest = DigestUtils.sha256Hex(canonicalBody); + return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) + .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); + } + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { String alias = ancestor.getAlias(); while (ancestor != null && !collections.contains(alias)) { diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java new file mode 100644 index 00000000000..bf6f4dd8a4e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -0,0 +1,93 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.branding.BrandingUtil; + +import org.apache.commons.codec.digest.DigestUtils; +import org.junit.Assert; +import org.junit.Test; + +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.interfaces.DecodedJWT; + +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.security.KeyFactory; +import java.security.interfaces.RSAPrivateKey; +import java.security.interfaces.RSAPublicKey; +import java.security.spec.PKCS8EncodedKeySpec; +import java.time.Instant; +import java.util.Base64; +import java.util.Date; + +/** + * + * @author michael + */ +public class DRSSubmitToArchiveCommandTest { + + @Test + public void createJWT() throws CommandException { + + String privKeyString = "MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCzSwj+c/uiRz5A" + + "OiDWsV5pxJrdzlDRV2PKKwRGCzhv1MEPwQCvFp6wZRDgCE4EfpVUuByNInV1eOfr" + + "BjwIlxp8hv9RPYCAsPCFV46VLeZsr8FOfvqI6IswYqB3qwdi5NW+CuJRLgTFJP87" + + "X5GgoItVnE0/DxIuZobuaEEzPa8TV8kUvdehzxTlkMTay5J/USeyKsUjPozqgKtN" + + "4ScCWrQx2FXEuKoCg85wNgFRJHgSGBH07lNAYV2tOz+w0ToSNzKswNqhTpRl7W61" + + "gzDCFJu6IYreH9bH5eh/Z9BzjNOs16k0Ok2PmQhOhHYCT3fdkKogriSREVN5dlHi" + + "FV7eB577AgMBAAECggEAPGfLX+8zmDjogDsVVT/szzWt94zLLbyDollb1z1whjzn" + + "zqb31AWK8WMbjF8/6cO8DA77j5FMgYd6m3Q+RaajBdF1s6lE4ha68jHNl/Ue7P9J" + + "4WhmgDnYqzSPW8IDew4d9Sk1lqQqd0E/vIE2TyfHydAfNl+dgISKcUgur1TY52rb" + + "taldnMP44BoXSeKM1qMAE7tWXDQlRjDdcx2Vn6nKJ4iCC6490JSGaFpsoock9wkF" + + "Fi1euzVnvX3ksyioXHMZwzZ9ErCHsI+Px25xiroyloxeoj0zfcA8kZcC9vyoa9HF" + + "2p62iK6RM7JCQc7yMcSN2Fp8PzyHlOLgdI+8CKV4AQKBgQDYmVFenIbapLgN3uyW" + + "gPTgUQGdnLf2S1g1HHHw7+74aZuMKq20w8Ikv6qWMx07R05gm8yxQ1Z4ciLcEw2z" + + "KBurLte/t6ZAJXQ7wnbPyX1JPFQNxKJrPKq+FynnANrdPVgwUunmO9JJbsudU/cG" + + "WKaQiG0w5ltvXg1NY5i1doifawKBgQDT6HFxh31nGUySNRQloE9mpvbzT35ornvl" + + "0oMlCYX2M52C3/nH/rq30woP4hDMBlvq3V6blOzPHzQwlu4+4OKBqvxlAluYIoXP" + + "QD1vJhb7eti+mYnIWyQ6hnAhrg/WDxn69mixEson2EL68+WRawz61h3WbfKoivbe" + + "YP02G2uysQKBgBOPFLf0boED6tLl1HtqvbIb3od7BWmqOBbjsK5PHEc2UiOAHxt5" + + "qehjnmXdy7/0mnFC4GMJb5+Evv0cg1owPv9gRX88eDjGqQ5UayIsUbHxTq3HmdsR" + + "KWHs+Y2wmBLuXS5P7msp771N0fktAduC2denWiTWSF9wIMdiPQH16DRtAoGBAKs4" + + "ABmEKT4ZgfYMryervRwrQhPcIj5A5VkP2+kcJcKFd/pcMH15A7Mt8M5ekcXYSYKe" + + "tSeukBzWkJvGB+CEYl/1IRQYcJufIVERDdJ2C1HMs75lXp+ljMNBBu8frin+b7aI" + + "TJTuoqrJIW2VjeMOhSFTyi4NDmlCRy/tXArQ4xcxAoGAUppOsJZeF/1kPQIFwBkS" + + "bVuGxMscWKswHy6dXEq2VabVGBL8H33PkpJRBnw7S/f+8wvk9dX63NuTF6VYM546" + + "J73YadnpU82C+7OnaTTCDVPfXYgPFLpE9xKFKkRFacgUbEnvZ2i0zSUquH0RAyaK" + + "tJ0d/dnd5TQUccAZwT8Nrw0="; + + //Todo - not in pkcs8 form + String pubKeyString = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs0sI/nP7okc+QDog1rFe" + + "acSa3c5Q0VdjyisERgs4b9TBD8EArxaesGUQ4AhOBH6VVLgcjSJ1dXjn6wY8CJca" + + "fIb/UT2AgLDwhVeOlS3mbK/BTn76iOiLMGKgd6sHYuTVvgriUS4ExST/O1+RoKCL" + + "VZxNPw8SLmaG7mhBMz2vE1fJFL3Xoc8U5ZDE2suSf1EnsirFIz6M6oCrTeEnAlq0" + + "MdhVxLiqAoPOcDYBUSR4EhgR9O5TQGFdrTs/sNE6EjcyrMDaoU6UZe1utYMwwhSb" + + "uiGK3h/Wx+Xof2fQc4zTrNepNDpNj5kIToR2Ak933ZCqIK4kkRFTeXZR4hVe3gee" + + "+wIDAQAB"; + + byte[] encoded = Base64.getDecoder().decode(privKeyString); + try { + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RSA"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + */ + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), "{\"stuff\":\"important\"}", 5); + + System.out.println("JWT: " + token1); + DecodedJWT jwt = JWT.decode(token1); + System.out.println(jwt.getPayload()); + } catch (Exception e) { + System.out.println(e.getLocalizedMessage()); + Assert.fail(e.getLocalizedMessage()); + } + + } +} From cf9363211ef3e66842f9725f0d26ddd2160a1a76 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 14:33:54 -0400 Subject: [PATCH 032/322] catch exception, log canonical form --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index aa4d1255477..b1312f8f103 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -239,6 +239,9 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } catch (NoSuchAlgorithmException e) { // TODO Auto-generated catch block e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } } else { @@ -259,6 +262,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); + logger.fine("Canonical body: " + canonicalBody); String digest = DigestUtils.sha256Hex(canonicalBody); return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) From a1c4484b40e5aec8043907df1fe59df98d37a552 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 15:21:59 -0400 Subject: [PATCH 033/322] refactor, add initial archiving status display --- .../harvard/iq/dataverse/DatasetVersion.java | 41 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/Admin.java | 12 +++--- .../impl/AbstractSubmitToArchiveCommand.java | 9 ---- src/main/webapp/dataset-versions.xhtml | 15 ++++--- 4 files changed, 57 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index f211ccd0410..4007f7f1cbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.Serializable; @@ -26,6 +27,7 @@ import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.persistence.CascadeType; import javax.persistence.Column; @@ -93,6 +95,14 @@ public enum VersionState { public static final int ARCHIVE_NOTE_MAX_LENGTH = 1000; public static final int VERSION_NOTE_MAX_LENGTH = 1000; + //Archival copies: Status message required components + public static final String STATUS = "status"; + public static final String MESSAGE = "message"; + //Archival Copies: Allowed Statuses + public static final String PENDING = "pending"; + public static final String SUCCESS = "success"; + public static final String FAILURE = "failure"; + @Id @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; @@ -179,6 +189,8 @@ public enum VersionState { @Transient private DatasetVersionDifference dvd; + @Transient + private JsonObject archivalStatus; public Long getId() { return this.id; @@ -318,9 +330,38 @@ public void setArchiveNote(String note) { public String getArchivalCopyLocation() { return archivalCopyLocation; } + + public String getArchivalCopyLocationStatus() { + populateArchivalStatus(); + + if(archivalStatus!=null) { + return archivalStatus.getString(STATUS); + } + return null; + } + public String getArchivalCopyLocationMessage() { + populateArchivalStatus(); + if(archivalStatus!=null) { + return archivalStatus.getString(MESSAGE); + } + return null; + } + + private void populateArchivalStatus() { + if(archivalStatus ==null) { + if(archivalCopyLocation!=null) { + try { + archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); + } catch(Exception e) { + logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); + } + } + } + } public void setArchivalCopyLocation(String location) { this.archivalCopyLocation = location; + populateArchivalStatus(); } public String getDeaccessionLink() { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d90a99aa674..93a6abee9fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1825,12 +1825,12 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } catch (WrappedResponse e1) { return error(Status.UNAUTHORIZED, "api key required"); } - if (update.containsKey(AbstractSubmitToArchiveCommand.STATUS) - && update.containsKey(AbstractSubmitToArchiveCommand.MESSAGE)) { - String status = update.getString(AbstractSubmitToArchiveCommand.STATUS); - if (status.equals(AbstractSubmitToArchiveCommand.PENDING) - || status.equals(AbstractSubmitToArchiveCommand.FAILURE) - || status.equals(AbstractSubmitToArchiveCommand.SUCCESS)) { + if (update.containsKey(DatasetVersion.STATUS) + && update.containsKey(DatasetVersion.MESSAGE)) { + String status = update.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) + || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { try { Dataset ds; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index dad17df38c6..e919f81e6e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -27,15 +27,6 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); - //Status message required components - public static final String STATUS = "status"; - public static final String MESSAGE = "message"; - //Allowed Statuses - public static final String PENDING = "pending"; - public static final String SUCCESS = "success"; - public static final String FAILURE = "failure"; - - public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version.getDataset()); this.version = version; diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index e105ac30df7..00462acc9c2 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -149,13 +149,18 @@ - - + + + - + + + + - - + + From 8075a7156d86c90197c16d7bf835da0cc14a184c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:43:47 -0400 Subject: [PATCH 034/322] revert TDL change --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 042933a3b67..ab5a915e7e9 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -129,7 +129,7 @@ - 5.10-tdl-dev + 5.10.1 11 UTF-8 From a10212177be59ea59e993593a2dec400f2288fb1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:44:04 -0400 Subject: [PATCH 035/322] fix typos/errors --- src/main/webapp/dataset-versions.xhtml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 00462acc9c2..70f53ea9a75 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -149,14 +149,14 @@ - + - - + + From 96bd083d488233c5b9837e625cd87ef94d779495 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:44:51 -0400 Subject: [PATCH 036/322] handle key with line breaks, add/update additional statuses for fails --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index b1312f8f103..83997c11d50 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -105,11 +105,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); JsonObjectBuilder statusObject = Json.createObjectBuilder(); - statusObject.add("status", "Failure"); + statusObject.add("status", DatasetVersion.FAILURE); statusObject.add("message", "Bag not transferred"); if (s3Result == WorkflowStepResult.OK) { - statusObject.add("status", "Attempted"); + //This will be overwritten if the further steps are successful + statusObject.add("status", DatasetVersion.FAILURE); statusObject.add("message", "Bag transferred"); // Now contact DRS @@ -168,7 +169,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t ingestPost = new HttpPost(); ingestPost.setURI(new URI(drsConfigObject.getString("DRSendpoint"))); - byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY)); + byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY).replaceAll("[\\r\\n]", "")); KeyFactory keyFactory = KeyFactory.getInstance("RSA"); PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); @@ -211,6 +212,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add("message", responseObject.getString("message")); break; case FAILURE: + statusObject.add("status", status); + statusObject.add("message", responseObject.getString("message")); logger.severe( "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); return new Failure("DRS Archiver fail in Ingest call"); From 3e860ea4aea7475279968ec3b0070e310977d83e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Apr 2022 18:59:25 -0400 Subject: [PATCH 037/322] add realistic body, print canonical form --- .../impl/DRSSubmitToArchiveCommandTest.java | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index bf6f4dd8a4e..0bfd8ac18f2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import org.apache.commons.codec.digest.DigestUtils; +import org.erdtman.jcs.JsonCanonicalizer; import org.junit.Assert; import org.junit.Test; @@ -64,6 +65,32 @@ public void createJWT() throws CommandException { + "uiGK3h/Wx+Xof2fQc4zTrNepNDpNj5kIToR2Ak933ZCqIK4kkRFTeXZR4hVe3gee" + "+wIDAQAB"; + String fakeBody = "{\n" + + " \"s3_bucket_name\": \"dataverse-export-dev\",\n" + + " \"package_id\": \"doi-10-5072-fk2-e6cmkr.v1.18\",\n" + + " \"s3_path\": \"doi-10-5072-fk2-e6cmkr\",\n" + + " \"admin_metadata\": {\n" + + " \"accessFlag\": \"N\",\n" + + " \"contentModel\": \"opaque\",\n" + + " \"depositingSystem\": \"Harvard Dataverse\",\n" + + " \"firstGenerationInDrs\": \"unspecified\",\n" + + " \"objectRole\": \"CG:DATASET\",\n" + + " \"usageClass\": \"LOWUSE\",\n" + + " \"storageClass\": \"AR\",\n" + + " \"s3_bucket_name\": \"dataverse-export-dev\",\n" + + " \"ownerCode\": \"123\",\n" + + " \"billingCode\": \"456\",\n" + + " \"resourceNamePattern\": \"pattern\",\n" + + " \"urnAuthorityPath\": \"path\",\n" + + " \"depositAgent\": \"789\",\n" + + " \"depositAgentEmail\": \"someone@mailinator.com\",\n" + + " \"successEmail\": \"winner@mailinator.com\",\n" + + " \"failureEmail\": \"loser@mailinator.com\",\n" + + " \"successMethod\": \"method\",\n" + + " \"adminCategory\": \"root\"\n" + + " }\n" + + "}"; + byte[] encoded = Base64.getDecoder().decode(privKeyString); try { KeyFactory keyFactory = KeyFactory.getInstance("RSA"); @@ -77,9 +104,14 @@ public void createJWT() throws CommandException { * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new * RSAPublicKey(System.getProperty(RS256_KEY)); + * + * */ + String canonicalBody = new JsonCanonicalizer(fakeBody).getEncodedString(); + System.out.println("Canonical form:"+ canonicalBody); + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); - String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), "{\"stuff\":\"important\"}", 5); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), fakeBody, 5); System.out.println("JWT: " + token1); DecodedJWT jwt = JWT.decode(token1); From 24336e176eb43c1be911c5c4f1c89544071c7d20 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 11:06:23 -0400 Subject: [PATCH 038/322] change api names, fix ok response on status put --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 93a6abee9fa..807eb089c16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1787,7 +1787,7 @@ public void run() { @GET @Produces(MediaType.APPLICATION_JSON) - @Path("/submitDataVersionToArchive/{id}/{version}/status") + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, @PathParam("version") String versionNumber) { @@ -1812,7 +1812,7 @@ public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, @PUT @Consumes(MediaType.APPLICATION_JSON) - @Path("/submitDataVersionToArchive/{id}/{version}/status") + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, @PathParam("version") String versionNumber, JsonObject update) { @@ -1842,6 +1842,7 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, return error(Status.NOT_FOUND, "Dataset version not found"); } dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + return ok("Status updated"); } catch (WrappedResponse e) { return error(Status.NOT_FOUND, "Dataset not found"); @@ -1850,9 +1851,6 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } return error(Status.BAD_REQUEST, "Unacceptable status format"); } - - - /** * Iteratively archives all unarchived dataset versions From 9cef6d4a16a8e5d204f512f16089e17588906420 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 11:06:39 -0400 Subject: [PATCH 039/322] store extra fields from ingest --- .../impl/DRSSubmitToArchiveCommand.java | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 83997c11d50..d8cbfe5b114 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -63,8 +63,6 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); private static final String DRS_CONFIG = ":DRSArchivalConfig"; - private static final String FAILURE = "failure"; - private static final String PENDING = "pending"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; private static final String S3_PATH = "s3_path"; @@ -111,7 +109,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (s3Result == WorkflowStepResult.OK) { //This will be overwritten if the further steps are successful statusObject.add("status", DatasetVersion.FAILURE); - statusObject.add("message", "Bag transferred"); + statusObject.add("message", "Bag transferred, ingest failed"); // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); @@ -203,26 +201,40 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.info("Status: " + code); logger.info("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); - String status = responseObject.getString("status"); - switch (status) { - case PENDING: - logger.info("DRS Ingest successfully started for: " + packageId + " : " - + responseObject.toString()); - statusObject.add("status", status); - statusObject.add("message", responseObject.getString("message")); - break; - case FAILURE: - statusObject.add("status", status); - statusObject.add("message", responseObject.getString("message")); - logger.severe( - "DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); - return new Failure("DRS Archiver fail in Ingest call"); - default: - logger.warning("Unexpected Status: " + status); + if (responseObject.containsKey(DatasetVersion.STATUS) + && responseObject.containsKey(DatasetVersion.MESSAGE)) { + String status = responseObject.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { + statusObject.addAll(Json.createObjectBuilder(responseObject)); + switch (status) { + case DatasetVersion.PENDING: + logger.info("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); + break; + case DatasetVersion.FAILURE: + logger.severe("DRS Ingest Failed for: " + packageId + " : " + + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + case DatasetVersion.SUCCESS: + // We don't expect this from DRS + logger.warning("Unexpected Status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with returned status: " + + status); + return new Failure( + "DRS Archiver fail in Ingest call with returned status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + + " - response does not include status and message"); + return new Failure( + "DRS Archiver fail in Ingest call \" - response does not include status and message"); } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); - return new Failure("DRS Archiver fail in Ingest call with status cvode: " + code); + return new Failure("DRS Archiver fail in Ingest call with status code: " + code); } } catch (ClientProtocolException e2) { e2.printStackTrace(); From f1887d5067c65a7ea52391a57e1daca831891afa Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 11:07:08 -0400 Subject: [PATCH 040/322] show basic archival state to those who can viewunpublisheddataset --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 70f53ea9a75..aaaaf8d4593 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,7 +147,7 @@ - + From ee40121a87445ddf1c400ba0401e0b7d51ca5945 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 13 Apr 2022 10:32:00 -0400 Subject: [PATCH 041/322] Don't check permissions when dataset id is null (during create) Not sure why the dataset-version.xhtml is getting included at all on the dataset create page - should stop this instead of just avoiding a failure in the render logic for the "Archived" column. --- src/main/webapp/dataset-versions.xhtml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index aaaaf8d4593..37f7906c640 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -147,7 +147,8 @@ - + + From 2d5fc86360fdc434450a2fe74c276ba22aa98a49 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 13 Apr 2022 16:18:43 -0400 Subject: [PATCH 042/322] move archiving status api to datasets, fix merge/caching issues since api/admin is usually blocked, putting the status apis in api/datesets makes it easier for a remote archiving service to set status. Still keeping the calls to start archiving manually/start a batch archiving job in admin Also fixed display status caching issue and adjusted for the fact that Admin is a @Stateless bean and Datasets is not (the former appears to update the db when dv.setArchivalCopyLocation is set, the latter doesn't unless a merge is called.) --- .../harvard/iq/dataverse/DatasetVersion.java | 10 +-- .../dataverse/DatasetVersionServiceBean.java | 8 ++ .../edu/harvard/iq/dataverse/api/Admin.java | 67 +--------------- .../harvard/iq/dataverse/api/Datasets.java | 80 ++++++++++++++++++- 4 files changed, 93 insertions(+), 72 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4007f7f1cbc..8d10f970786 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -332,7 +332,7 @@ public String getArchivalCopyLocation() { } public String getArchivalCopyLocationStatus() { - populateArchivalStatus(); + populateArchivalStatus(false); if(archivalStatus!=null) { return archivalStatus.getString(STATUS); @@ -340,15 +340,15 @@ public String getArchivalCopyLocationStatus() { return null; } public String getArchivalCopyLocationMessage() { - populateArchivalStatus(); + populateArchivalStatus(false); if(archivalStatus!=null) { return archivalStatus.getString(MESSAGE); } return null; } - private void populateArchivalStatus() { - if(archivalStatus ==null) { + private void populateArchivalStatus(boolean force) { + if(archivalStatus ==null || force) { if(archivalCopyLocation!=null) { try { archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); @@ -361,7 +361,7 @@ private void populateArchivalStatus() { public void setArchivalCopyLocation(String location) { this.archivalCopyLocation = location; - populateArchivalStatus(); + populateArchivalStatus(true); } public String getDeaccessionLink() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 150cd656aed..9c79646e7d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1211,4 +1211,12 @@ public List getUnarchivedDatasetVersions(){ } } // end getUnarchivedDatasetVersions + /** + * Merges the passed datasetversion to the persistence context. + * @param ver the DatasetVersion whose new state we want to persist. + * @return The managed entity representing {@code ver}. + */ + public DatasetVersion merge( DatasetVersion ver ) { + return em.merge(ver); + } } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 807eb089c16..440d64985df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; +import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleDTO; import edu.harvard.iq.dataverse.authorization.AuthenticatedUserDisplayInfo; import edu.harvard.iq.dataverse.authorization.AuthenticationProvider; @@ -1784,73 +1785,7 @@ public void run() { return error(Status.UNAUTHORIZED, "api key required"); } } - - @GET - @Produces(MediaType.APPLICATION_JSON) - @Path("/submitDatasetVersionToArchive/{id}/{version}/status") - public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, - @PathParam("version") String versionNumber) { - - try { - AuthenticatedUser au = findAuthenticatedUserOrDie(); - if (!au.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Superusers only."); - } - Dataset ds = findDatasetOrDie(dsid); - - DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); - if (dv.getArchivalCopyLocation() == null) { - return error(Status.NO_CONTENT, "This dataset version has not been archived"); - } else { - JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); - return ok(status); - } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); - } - } - - @PUT - @Consumes(MediaType.APPLICATION_JSON) - @Path("/submitDatasetVersionToArchive/{id}/{version}/status") - public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, - @PathParam("version") String versionNumber, JsonObject update) { - - try { - AuthenticatedUser au = findAuthenticatedUserOrDie(); - - if (!au.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Superusers only."); - } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); - } - if (update.containsKey(DatasetVersion.STATUS) - && update.containsKey(DatasetVersion.MESSAGE)) { - String status = update.getString(DatasetVersion.STATUS); - if (status.equals(DatasetVersion.PENDING) - || status.equals(DatasetVersion.FAILURE) - || status.equals(DatasetVersion.SUCCESS)) { - - try { - Dataset ds; - - ds = findDatasetOrDie(dsid); - - DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); - if(dv==null) { - return error(Status.NOT_FOUND, "Dataset version not found"); - } - dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); - return ok("Status updated"); - } catch (WrappedResponse e) { - return error(Status.NOT_FOUND, "Dataset not found"); - } - } - } - return error(Status.BAD_REQUEST, "Unacceptable status format"); - } /** * Iteratively archives all unarchived dataset versions diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index e21396dd487..1bd8384d4b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -57,7 +57,7 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; - +import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -86,6 +86,7 @@ import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -217,6 +218,9 @@ public class Datasets extends AbstractApiBean { @Inject DataverseRoleServiceBean dataverseRoleService; + + @EJB + DatasetVersionServiceBean datasetversionService; /** * Used to consolidate the way we parse and handle dataset versions. @@ -3279,4 +3283,78 @@ public Response getCurationStates() throws WrappedResponse { csvSB.append("\n"); return ok(csvSB.toString(), MediaType.valueOf(FileUtil.MIME_TYPE_CSV), "datasets.status.csv"); } + + + @GET + @Produces(MediaType.APPLICATION_JSON) + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") + public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + Dataset ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if (dv.getArchivalCopyLocation() == null) { + return error(Status.NO_CONTENT, "This dataset version has not been archived"); + } else { + JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); + return ok(status); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } + + @PUT + @Consumes(MediaType.APPLICATION_JSON) + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") + public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber, JsonObject update) { + + logger.info(JsonUtil.prettyPrint(update)); + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + if (update.containsKey(DatasetVersion.STATUS) + && update.containsKey(DatasetVersion.MESSAGE)) { + String status = update.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) + || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { + + try { + Dataset ds; + + ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if(dv==null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); + dv = datasetversionService.merge(dv); + logger.info("location now: " + dv.getArchivalCopyLocation()); + logger.info("status now: " + dv.getArchivalCopyLocationStatus()); + logger.info("message now: " + dv.getArchivalCopyLocationMessage()); + + return ok("Status updated"); + + } catch (WrappedResponse e) { + return error(Status.NOT_FOUND, "Dataset not found"); + } + } + } + return error(Status.BAD_REQUEST, "Unacceptable status format"); + } } From 51c1ba8a569725bd0372156206f463eb2fa27df2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Apr 2022 14:06:59 -0400 Subject: [PATCH 043/322] fix wrapped error handling --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c58df903652..baa9644700e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3308,8 +3308,8 @@ public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, JsonObject status = JsonUtil.getJsonObject(dv.getArchivalCopyLocation()); return ok(status); } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } @@ -3326,8 +3326,8 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, if (!au.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); + } catch (WrappedResponse wr) { + return wr.getResponse(); } if (update.containsKey(DatasetVersion.STATUS) && update.containsKey(DatasetVersion.MESSAGE)) { @@ -3353,8 +3353,8 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, return ok("Status updated"); - } catch (WrappedResponse e) { - return error(Status.NOT_FOUND, "Dataset not found"); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } } From 5b2936321e6628a544fb3a4d142e44a0729f2028 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Apr 2022 14:07:39 -0400 Subject: [PATCH 044/322] add debug logging for 5.2021.5 --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 6adadb98429..db377090cf6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -355,6 +355,7 @@ private void populateArchivalStatus(boolean force) { archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); } catch(Exception e) { logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); + logger.info(archivalCopyLocation); } } } From 9aac7e95c1a7b14b271b63b41ab032886259defc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 27 Apr 2022 14:12:10 -0400 Subject: [PATCH 045/322] fix header, error status, debug logging --- .../command/impl/DRSSubmitToArchiveCommand.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index d8cbfe5b114..391ecb7b1d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -109,7 +109,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (s3Result == WorkflowStepResult.OK) { //This will be overwritten if the further steps are successful statusObject.add("status", DatasetVersion.FAILURE); - statusObject.add("message", "Bag transferred, ingest failed"); + statusObject.add("message", "Bag transferred, DRS ingest call failed"); // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); @@ -187,7 +187,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, 5); logger.info("JWT: " + jwtString); - ingestPost.setHeader("Authorization: Bearer", jwtString); + ingestPost.setHeader("Authorization", "Bearer " + jwtString); logger.info("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); @@ -234,6 +234,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + logger.info("Status: " + code); + logger.info("Response" + responseBody); return new Failure("DRS Archiver fail in Ingest call with status code: " + code); } } catch (ClientProtocolException e2) { @@ -249,21 +251,21 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // execute catch (InvalidKeySpecException e) { - // TODO Auto-generated catch block e.printStackTrace(); } catch (NoSuchAlgorithmException e) { -// TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); + } finally { + //Set status after success or failure + dv.setArchivalCopyLocation(statusObject.build().toString()); } } else { - logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); + dv.setArchivalCopyLocation(statusObject.build().toString()); return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); } - dv.setArchivalCopyLocation(statusObject.build().toString()); + } else { logger.info("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; From 343155d4b15834721b0724b03ca34995d1b87ed9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 5 May 2022 11:00:42 -0400 Subject: [PATCH 046/322] adding configurable timeout --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 391ecb7b1d6..8f1805f2b91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -72,6 +72,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; private static final String TRUST_CERT = "trust_cert"; + private static final String TIMEOUT = "timeout"; public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); @@ -113,7 +114,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); - + int jwtTimeout = drsConfigObject.getInt(TIMEOUT, 5); JsonObjectBuilder job = Json.createObjectBuilder(); job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); @@ -184,7 +185,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); String body = drsConfigString; - String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, 5); + String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, jwtTimeout); logger.info("JWT: " + jwtString); ingestPost.setHeader("Authorization", "Bearer " + jwtString); From 98a013fea218bde39ad1e5f40b9f33ab35cba04e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 13:43:27 -0400 Subject: [PATCH 047/322] add delete archival status method --- .../harvard/iq/dataverse/api/Datasets.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index baa9644700e..93884cf3f2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3360,4 +3360,31 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } return error(Status.BAD_REQUEST, "Unacceptable status format"); } + + @DELETE + @Produces(MediaType.APPLICATION_JSON) + @Path("/submitDatasetVersionToArchive/{id}/{version}/status") + public Response deleteDatasetVersionToArchiveStatus(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + Dataset ds = findDatasetOrDie(dsid); + + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); + if (dv == null) { + return error(Status.NOT_FOUND, "Dataset version not found"); + } + dv.setArchivalCopyLocation(null); + dv = datasetversionService.merge(dv); + + return ok("Status deleted"); + + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } From 6255418d9cd93f57c4510d5b6fbba83bdb5b57e2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 13:44:02 -0400 Subject: [PATCH 048/322] add isSingleVersion option false by default, can be true for DRS Archiver --- .../impl/AbstractSubmitToArchiveCommand.java | 8 +++++++- .../command/impl/DRSSubmitToArchiveCommand.java | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 99ef4b811cd..321d51a0595 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -98,5 +98,11 @@ String getDataCiteXml(DatasetVersion dv) { public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrapper) { return true; - } + } + + public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { + return false; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 391ecb7b1d6..89b589f8a39 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -322,4 +322,21 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { } return false; } + + public static boolean isSingleVersion(SettingsWrapper sw) { + JsonObject drsConfigObject = null; + + try { + String config = sw.get(DRS_CONFIG, null); + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + return drsConfigObject.getBoolean("single_version", false); + } + return false; + } } From 43382da166930af1a0b80f56d2d0216e6ecb7589 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 15:06:36 -0400 Subject: [PATCH 049/322] adjust so can call from api as well --- .../impl/AbstractSubmitToArchiveCommand.java | 6 ++++++ .../command/impl/DRSSubmitToArchiveCommand.java | 13 +++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 321d51a0595..9124cec751e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -100,9 +100,15 @@ public static boolean isArchivable(Dataset dataset, SettingsWrapper settingsWrap return true; } + //Check if the chosen archiver imposes single-version-only archiving - in a View context public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { return false; } + + //Check if the chosen archiver imposes single-version-only archiving - in the API + public static boolean isSingleVersion(SettingsServiceBean settingsService) { + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 89b589f8a39..8f3a179a2d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -324,10 +325,18 @@ public static boolean isArchivable(Dataset d, SettingsWrapper sw) { } public static boolean isSingleVersion(SettingsWrapper sw) { - JsonObject drsConfigObject = null; + String config = sw.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + public static boolean isSingleVersion(SettingsServiceBean ss) { + String config = ss.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + + private static boolean isSingleVersion(String config) { + JsonObject drsConfigObject = null; try { - String config = sw.get(DRS_CONFIG, null); if (config != null) { drsConfigObject = JsonUtil.getJsonObject(config); } From 0b83efe8f07245d86dbd41411be3cba026a63418 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 15:21:05 -0400 Subject: [PATCH 050/322] API changes for single version semantics --- .../edu/harvard/iq/dataverse/api/Admin.java | 7 +++++ .../harvard/iq/dataverse/api/Datasets.java | 28 ++++++++++++++++++- .../iq/dataverse/util/ArchiverUtil.java | 24 ++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 678d563d7bb..b962136c557 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1752,6 +1752,13 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { + if(ArchiverUtil.onlySingleVersionArchiving(cmd.getClass(), settingsService)) { + for (DatasetVersion version : ds.getVersions()) { + if ((dv != version) && version.getArchivalCopyLocation() != null) { + return error(Status.CONFLICT, "Dataset already archived."); + } + } + } new Thread(new Runnable() { public void run() { try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 93884cf3f2b..7a69a720971 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -100,6 +100,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.StringReader; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.URI; import java.sql.Timestamp; import java.text.MessageFormat; @@ -3300,7 +3302,7 @@ public Response getDatasetVersionToArchiveStatus(@PathParam("id") String dsid, return error(Response.Status.FORBIDDEN, "Superusers only."); } Dataset ds = findDatasetOrDie(dsid); - + DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { return error(Status.NO_CONTENT, "This dataset version has not been archived"); @@ -3345,6 +3347,14 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, if(dv==null) { return error(Status.NOT_FOUND, "Dataset version not found"); } + if (isSingleVersionArchiving()) { + for (DatasetVersion version : ds.getVersions()) { + if ((dv != version) && version.getArchivalCopyLocation() != null) { + return error(Status.CONFLICT, "Dataset already archived."); + } + } + } + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); dv = datasetversionService.merge(dv); logger.info("location now: " + dv.getArchivalCopyLocation()); @@ -3387,4 +3397,20 @@ public Response deleteDatasetVersionToArchiveStatus(@PathParam("id") String dsid return wr.getResponse(); } } + + private boolean isSingleVersionArchiving() { + String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + Class clazz; + try { + clazz = Class.forName(className).asSubclass(AbstractSubmitToArchiveCommand.class); + return ArchiverUtil.onlySingleVersionArchiving(clazz, settingsService); + } catch (ClassNotFoundException e) { + logger.warning(":ArchiverClassName does not refer to a known Archiver"); + } catch (ClassCastException cce) { + logger.warning(":ArchiverClassName does not refer to an Archiver class"); + } + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java index fc97f972f5c..31466470674 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ArchiverUtil.java @@ -1,11 +1,15 @@ package edu.harvard.iq.dataverse.util; import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.logging.Logger; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; /** * Simple class to reflectively get an instance of the desired class for @@ -35,4 +39,24 @@ public static AbstractSubmitToArchiveCommand createSubmitToArchiveCommand(String } return null; } + + public static boolean onlySingleVersionArchiving(Class clazz, SettingsServiceBean settingsService) { + Method m; + try { + m = clazz.getMethod("isSingleVersion", SettingsServiceBean.class); + Object[] params = { settingsService }; + return (Boolean) m.invoke(null, params); + } catch (NoSuchMethodException e) { + e.printStackTrace(); + } catch (SecurityException e) { + e.printStackTrace(); + } catch (IllegalAccessException e) { + e.printStackTrace(); + } catch (IllegalArgumentException e) { + e.printStackTrace(); + } catch (InvocationTargetException e) { + e.printStackTrace(); + } + return (AbstractSubmitToArchiveCommand.isSingleVersion(settingsService)); + } } From 3f4043ebdefae5a7f2df14727153dd8dba65e94e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 May 2022 15:21:26 -0400 Subject: [PATCH 051/322] UI changes for single version and new bundle strings --- .../edu/harvard/iq/dataverse/DatasetPage.java | 37 +++++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 6 +++ src/main/webapp/dataset-versions.xhtml | 16 ++++---- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index cff0f34b816..5caf0427f38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5567,6 +5567,43 @@ public boolean isArchivable() { } return false; } + + public boolean isVersionArchivable() { + // If this dataset isn't in an archivable collection retuyrn false + if (isArchivable()) { + boolean checkForArchivalCopy = false; + // Otherwise, we need to know if the archiver is single-version-only + // If it is, we have to check for an existing archived version to answer the + // question + String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); + if (className != null) { + try { + Class clazz = Class.forName(className); + Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Object[] params = { settingsWrapper }; + checkForArchivalCopy = (Boolean) m.invoke(null, params); + } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException + | InvocationTargetException | NoSuchMethodException | SecurityException e) { + logger.warning("Failed to call is Archivable on configured archiver class: " + className); + e.printStackTrace(); + } + if (checkForArchivalCopy) { + // If we have to check (single version archiving), we can't allow archiving if + // one version is already archived (or attempted - any non-null status) + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.getArchivalCopyLocation() != null) { + return false; + } + } + } + // If we allow multiple versions or didn't find one that has had archiving run + // on it, we can archive, so return true + return true; + } + } + //not in an archivable collection + return false; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 9fa0fc71f3f..7bfcfbcbfa6 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1835,6 +1835,12 @@ file.dataFilesTab.versions.headers.summary=Summary file.dataFilesTab.versions.headers.contributors=Contributors file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) withheld file.dataFilesTab.versions.headers.published=Published on +file.dataFilesTab.versions.headers.archived=Archival Status +file.dataFilesTab.versions.headers.archived.success=Archived +file.dataFilesTab.versions.headers.archived.pending=Pending +file.dataFilesTab.versions.headers.archived.failure=Failed +file.dataFilesTab.versions.headers.archived.notarchived=Not Archived +file.dataFilesTab.versions.headers.archived.submit=Submit file.dataFilesTab.versions.viewDiffBtn=View Differences file.dataFilesTab.versions.citationMetadata=Citation Metadata: file.dataFilesTab.versions.added=Added diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 37f7906c640..f4c80b43efe 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -148,20 +148,22 @@ - + - + - + - - - + + - + + + From 2f087c4287ec6e8a3b2b83336ec69f6a89eff9ef Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 13 May 2022 11:38:51 -0400 Subject: [PATCH 052/322] update display in unarchivable collections --- .../edu/harvard/iq/dataverse/DatasetPage.java | 17 +++++++++++------ src/main/webapp/dataset-versions.xhtml | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 5caf0427f38..2f98e43dd93 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5569,7 +5569,7 @@ public boolean isArchivable() { } public boolean isVersionArchivable() { - // If this dataset isn't in an archivable collection retuyrn false + // If this dataset isn't in an archivable collection return false if (isArchivable()) { boolean checkForArchivalCopy = false; // Otherwise, we need to know if the archiver is single-version-only @@ -5590,11 +5590,7 @@ public boolean isVersionArchivable() { if (checkForArchivalCopy) { // If we have to check (single version archiving), we can't allow archiving if // one version is already archived (or attempted - any non-null status) - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.getArchivalCopyLocation() != null) { - return false; - } - } + return !isSomeVersionArchived(); } // If we allow multiple versions or didn't find one that has had archiving run // on it, we can archive, so return true @@ -5604,6 +5600,15 @@ public boolean isVersionArchivable() { //not in an archivable collection return false; } + + public boolean isSomeVersionArchived() { + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.getArchivalCopyLocation() != null) { + return true; + } + } + return false; + } private static Date getFileDateToCompare(FileMetadata fileMetadata) { DataFile datafile = fileMetadata.getDataFile(); diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index f4c80b43efe..6f144ec46d9 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -148,7 +148,7 @@ - + @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From baec83e6b8eb2bbfdce64e384704f2a89044217b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 13 May 2022 11:46:34 -0400 Subject: [PATCH 053/322] single version for command/workflow --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 8f3a179a2d7..b2a99ce6d44 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -99,6 +99,15 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); if (alias != null) { + if (drsConfigObject.getBoolean("single_version", false)) { + for (DatasetVersion version : dataset.getVersions()) { + if (version.getArchivalCopyLocation() != null) { + return new Failure("DRS Archiver fail: version " + version.getFriendlyVersionNumber() + + " already archived."); + } + } + } + JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); From 4212a2119ae0d1e5e9084ca9e0803bc039be2a97 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 13 May 2022 11:53:54 -0400 Subject: [PATCH 054/322] typo --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 6f144ec46d9..4d04546133e 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From c3b3cb219f7aac841b1dae1e79059b0b3a477ff4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 10:47:26 -0400 Subject: [PATCH 055/322] update for #8592 semantic mapping update --- .../internalspi/LDNAnnounceDatasetVersionStep.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 7ce65359968..3388e54e5bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -155,13 +155,13 @@ HttpPost buildAnnouncement(boolean b, WorkflowContext ctxt, JsonObject target) t for (DatasetFieldType cdft : childTypes) { switch (cdft.getName()) { case "publicationURL": - publicationURL = OREMap.getTermFor(dft, cdft); + publicationURL = cdft.getJsonLDTerm(); break; case "publicationIDType": - publicationIDType = OREMap.getTermFor(dft, cdft); + publicationIDType = cdft.getJsonLDTerm(); break; case "publicationIDNumber": - publicationIDNumber = OREMap.getTermFor(dft, cdft); + publicationIDNumber = cdft.getJsonLDTerm(); break; } @@ -188,7 +188,7 @@ HttpPost buildAnnouncement(boolean b, WorkflowContext ctxt, JsonObject target) t default: if (jv != null) { includeLocalContext = true; - coarContext.add(OREMap.getTermFor(dft).getLabel(), jv); + coarContext.add(dft.getJsonLDTerm().getLabel(), jv); } } From 7dd6f2e71fb546e105e7492c35de1737721b3409 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:56:19 -0400 Subject: [PATCH 056/322] bug - related to sem api change --- src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 84423f60eca..4b31e5cf0a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -324,7 +324,7 @@ public JsonLDTerm getDescriptionTextTerm() { private JsonLDTerm getTermFor(String fieldTypeName) { //Could call datasetFieldService.findByName(fieldTypeName) - is that faster/prefereable? - for (DatasetField dsf : version.getDatasetFields()) { + for (DatasetField dsf : version.getFlatDatasetFields()) { DatasetFieldType dsft = dsf.getDatasetFieldType(); if (dsft.getName().equals(fieldTypeName)) { return dsft.getJsonLDTerm(); From 2104ec63ddd00ea4110de7e3fe568d2492d4b8f4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:57:21 -0400 Subject: [PATCH 057/322] bug - superadmin no version w/status display of archival status --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 4d04546133e..ddd305c50f7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From dfe70fe23ff8e59dba332692d20a02d6add77205 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:56:19 -0400 Subject: [PATCH 058/322] bug - related to sem api change --- src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 1bc43dae466..a295f264d66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -395,7 +395,7 @@ public JsonLDTerm getDescriptionTextTerm() { private JsonLDTerm getTermFor(String fieldTypeName) { //Could call datasetFieldService.findByName(fieldTypeName) - is that faster/prefereable? - for (DatasetField dsf : version.getDatasetFields()) { + for (DatasetField dsf : version.getFlatDatasetFields()) { DatasetFieldType dsft = dsf.getDatasetFieldType(); if (dsft.getName().equals(fieldTypeName)) { return dsft.getJsonLDTerm(); From 96bab6a2f6ddf15aedb922c05b88a1e3eb4a66eb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 May 2022 14:57:21 -0400 Subject: [PATCH 059/322] bug - superadmin no version w/status display of archival status --- src/main/webapp/dataset-versions.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 4d04546133e..ddd305c50f7 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -162,7 +162,7 @@ action="#{DatasetPage.archiveVersion(versionTab.id)}"> - + From f4ba94b8515597f1f785735b0e02bb00771a8ea6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 17 May 2022 09:09:31 -0400 Subject: [PATCH 060/322] fix status single version logic --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7a69a720971..8b6d75d3629 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3349,7 +3349,7 @@ public Response setDatasetVersionToArchiveStatus(@PathParam("id") String dsid, } if (isSingleVersionArchiving()) { for (DatasetVersion version : ds.getVersions()) { - if ((dv != version) && version.getArchivalCopyLocation() != null) { + if ((!dv.equals(version)) && (version.getArchivalCopyLocation() != null)) { return error(Status.CONFLICT, "Dataset already archived."); } } From 631091debff4e2ccc2d17870eef8e48fd08c7145 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 14:46:43 -0400 Subject: [PATCH 061/322] update test --- .../impl/DRSSubmitToArchiveCommandTest.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 0bfd8ac18f2..64c5956f28f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -1,11 +1,13 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.branding.BrandingUtilTest; -import org.apache.commons.codec.digest.DigestUtils; import org.erdtman.jcs.JsonCanonicalizer; import org.junit.Assert; import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import com.auth0.jwt.JWT; import com.auth0.jwt.algorithms.Algorithm; @@ -16,9 +18,7 @@ import java.security.interfaces.RSAPrivateKey; import java.security.interfaces.RSAPublicKey; import java.security.spec.PKCS8EncodedKeySpec; -import java.time.Instant; import java.util.Base64; -import java.util.Date; /** * @@ -26,6 +26,16 @@ */ public class DRSSubmitToArchiveCommandTest { + + @BeforeAll + private static void setUpAll() { + BrandingUtilTest.setupMocks(); + } + @AfterAll + private static void tearDownAll() { + BrandingUtilTest.tearDownMocks(); + } + @Test public void createJWT() throws CommandException { From 8606475c3696fafedb022260618bd0dddff164ea Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 14:57:20 -0400 Subject: [PATCH 062/322] hardcode brandname in test --- .../command/impl/DRSSubmitToArchiveCommandTest.java | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 64c5956f28f..62135287d18 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -26,16 +26,6 @@ */ public class DRSSubmitToArchiveCommandTest { - - @BeforeAll - private static void setUpAll() { - BrandingUtilTest.setupMocks(); - } - @AfterAll - private static void tearDownAll() { - BrandingUtilTest.tearDownMocks(); - } - @Test public void createJWT() throws CommandException { @@ -121,7 +111,7 @@ public void createJWT() throws CommandException { System.out.println("Canonical form:"+ canonicalBody); Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); - String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), fakeBody, 5); + String token1 = DRSSubmitToArchiveCommand.createJWTString(algorithmRSA, "InstallationBrandName", fakeBody, 5); System.out.println("JWT: " + token1); DecodedJWT jwt = JWT.decode(token1); From 0c515209572567a12ec3bc49fa5c389366bf14fe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 11:48:27 -0400 Subject: [PATCH 063/322] restore bagger improvements --- .../iq/dataverse/util/bagit/BagGenerator.java | 102 +++++++++++------- 1 file changed, 64 insertions(+), 38 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index a6ee60198c3..27bf96c3e71 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,6 +47,7 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -58,7 +59,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; - +import org.apache.http.util.EntityUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -89,7 +90,8 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; @@ -278,7 +280,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); + logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); + createFileFromString("manifest-md5.txt", ""); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); @@ -463,7 +466,6 @@ private void validateBagFile(File bagFile) throws IOException { logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - //zf.close(); } public static String getValidName(String bagName) { @@ -1003,46 +1005,70 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uri) { + InputStreamSupplier getInputStreamSupplier(final String uriString) { return new InputStreamSupplier() { public InputStream get() { - int tries = 0; - while (tries < 5) { - try { - logger.fine("Get # " + tries + " for " + uri); - HttpGet getMap = createNewGetRequest(new URI(uri), null); - logger.finest("Retrieving " + tries + ": " + uri); - CloseableHttpResponse response; - //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. - response = client.execute(getMap); - if (response.getStatusLine().getStatusCode() == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uri); + try { + URI uri = new URI(uriString); + + int tries = 0; + while (tries < 5) { + + logger.fine("Get # " + tries + " for " + uriString); + HttpGet getFile = createNewGetRequest(uri, null); + logger.finest("Retrieving " + tries + ": " + uriString); + CloseableHttpResponse response = null; + try { + response = client.execute(getFile); + // Note - if we ever need to pass an HttpClientContext, we need a new one per + // thread. + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); + } + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + + " : " + statusCode); + if (statusCode < 500) { + logger.fine("Will not retry for 40x errors"); + tries += 5; + } else { + tries++; + } + // Error handling + if (response != null) { + try { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } catch (IOException io) { + logger.warning( + "Exception closing response after status: " + statusCode + " on " + uri); + } + } + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, + e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uriString); + } + e.printStackTrace(); } - e.printStackTrace(); - } catch (URISyntaxException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); + } + + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - logger.severe("Could not read: " + uri); + logger.severe("Could not read: " + uriString); return null; } }; From df40225a1e108b9922678e5be84681701439a31f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 11:48:27 -0400 Subject: [PATCH 064/322] restore bagger improvements --- .../iq/dataverse/util/bagit/BagGenerator.java | 102 +++++++++++------- 1 file changed, 64 insertions(+), 38 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index a6ee60198c3..27bf96c3e71 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,6 +47,7 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -58,7 +59,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; - +import org.apache.http.util.EntityUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -89,7 +90,8 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; @@ -278,7 +280,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); + logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); + createFileFromString("manifest-md5.txt", ""); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); @@ -463,7 +466,6 @@ private void validateBagFile(File bagFile) throws IOException { logger.info("Data Count: " + dataCount); logger.info("Data Size: " + totalDataSize); - //zf.close(); } public static String getValidName(String bagName) { @@ -1003,46 +1005,70 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uri) { + InputStreamSupplier getInputStreamSupplier(final String uriString) { return new InputStreamSupplier() { public InputStream get() { - int tries = 0; - while (tries < 5) { - try { - logger.fine("Get # " + tries + " for " + uri); - HttpGet getMap = createNewGetRequest(new URI(uri), null); - logger.finest("Retrieving " + tries + ": " + uri); - CloseableHttpResponse response; - //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. - response = client.execute(getMap); - if (response.getStatusLine().getStatusCode() == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uri); + try { + URI uri = new URI(uriString); + + int tries = 0; + while (tries < 5) { + + logger.fine("Get # " + tries + " for " + uriString); + HttpGet getFile = createNewGetRequest(uri, null); + logger.finest("Retrieving " + tries + ": " + uriString); + CloseableHttpResponse response = null; + try { + response = client.execute(getFile); + // Note - if we ever need to pass an HttpClientContext, we need a new one per + // thread. + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); + } + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + + " : " + statusCode); + if (statusCode < 500) { + logger.fine("Will not retry for 40x errors"); + tries += 5; + } else { + tries++; + } + // Error handling + if (response != null) { + try { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } catch (IOException io) { + logger.warning( + "Exception closing response after status: " + statusCode + " on " + uri); + } + } + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, + e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uriString); + } + e.printStackTrace(); } - e.printStackTrace(); - } catch (URISyntaxException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); + } + + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - logger.severe("Could not read: " + uri); + logger.severe("Could not read: " + uriString); return null; } }; From d0163b5ae8772158b12a4e0185093c580d3a0cee Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 13:15:52 -0400 Subject: [PATCH 065/322] get non-URL form of PID (now that OREMap uses the URL) --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 27bf96c3e71..2a3a34507b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -73,6 +73,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; public class BagGenerator { @@ -206,7 +207,9 @@ public boolean generateBag(OutputStream outputStream) throws Exception { // The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString()); - bagID = aggregation.get("@id").getAsString() + "v." + String pidUrlString = aggregation.get("@id").getAsString(); + String pidString = GlobalId.parse(pidUrlString).get().asString(); + bagID = pidString + "v." + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString(); logger.info("Generating Bag: " + bagID); From be58313d8c30af98e8779687f9cff62499f7df36 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 3 Jun 2022 13:40:08 -0400 Subject: [PATCH 066/322] Fix parsing - convert url to local pid form --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 2a3a34507b1..51e7ffd63de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -208,7 +208,13 @@ public boolean generateBag(OutputStream outputStream) throws Exception { aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString()); String pidUrlString = aggregation.get("@id").getAsString(); - String pidString = GlobalId.parse(pidUrlString).get().asString(); + String pidString=pidUrlString; + //ToDo - put this conversion in GlobalId + if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); + } bagID = pidString + "v." + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString(); From a9d9b4fc7c11e6c4e56391b06ae28aa9179afbc6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 8 Jun 2022 15:15:00 -0400 Subject: [PATCH 067/322] fix display width in dataset md edit mode --- src/main/webapp/metadataFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 0104615025c..324fd2e0b84 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -315,7 +315,7 @@ - +
From c6aa44951beb54a55070536d48fac55cab22e80b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 8 Jun 2022 15:15:15 -0400 Subject: [PATCH 068/322] flyway for instructions column in template --- .../db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql diff --git a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql new file mode 100644 index 00000000000..ee58d91333d --- /dev/null +++ b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql @@ -0,0 +1 @@ +ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; From 125d45818257426e0cb5b10314982acf44b996cb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 13 Jun 2022 21:36:04 -0400 Subject: [PATCH 069/322] update flyway --- .../V5.11.0.2__hdc-3b2-template-instructions.sql | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql index ee58d91333d..df1d3068159 100644 --- a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql +++ b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql @@ -1 +1,14 @@ ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; + +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS template_id BIGINT; + +DO $$ +BEGIN + + BEGIN + ALTER TABLE dataset ADD CONSTRAINT fx_dataset_template_id FOREIGN KEY (template_id) REFERENCES template(id); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table constraint fk_dataset_template_id already exists'; + END; + +END $$; From 6f57d11aa09423f0d9b82360d7cafe672caf643b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 13 Jun 2022 23:37:38 -0400 Subject: [PATCH 070/322] OK when metadata doesn't exist and won't send message --- .../workflow/internalspi/LDNAnnounceDatasetVersionStep.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 3388e54e5bf..5b570fe2e96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -81,6 +81,10 @@ public WorkflowStepResult run(WorkflowContext context) { } catch (URISyntaxException e) { return new Failure("LDNAnnounceDatasetVersion workflow step failed: unable to parse inbox in :LDNTarget setting."); } + if(announcement==null) { + logger.info(context.getDataset().getGlobalId().asString() + "does not have metadata required to send LDN message. Nothing sent."); + return OK; + } // execute try (CloseableHttpResponse response = client.execute(announcement)) { int code = response.getStatusLine().getStatusCode(); @@ -114,7 +118,7 @@ public void rollback(WorkflowContext context, Failure reason) { throw new UnsupportedOperationException("Not supported yet."); // This class does not need to resume. } - HttpPost buildAnnouncement(boolean b, WorkflowContext ctxt, JsonObject target) throws URISyntaxException { + HttpPost buildAnnouncement(boolean qb, WorkflowContext ctxt, JsonObject target) throws URISyntaxException { // First check that we have what is required DatasetVersion dv = ctxt.getDataset().getReleasedVersion(); From 3d7683ff094303c177fab0e610f8c31086eebd85 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 14:56:42 -0400 Subject: [PATCH 071/322] update package_id --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 7 ++++++- .../engine/command/impl/S3SubmitToArchiveCommand.java | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 80969c80c3e..7ded0f785ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -97,7 +97,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); String spaceName = getSpaceName(dataset); - String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String packageId = getFileName(spaceName, dv); if (alias != null) { if (drsConfigObject.getBoolean("single_version", false)) { @@ -288,6 +288,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } + @Override + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + (".v" + dv.getFriendlyVersionNumber()).replace('.', '_'); + } + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 391a2f7c94a..af7dc86bf4b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -94,7 +94,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String fileName = getFileName(spaceName, dv); + String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the @@ -157,6 +158,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + ".v" + dv.getFriendlyVersionNumber(); + } + protected String getSpaceName(Dataset dataset) { if (spaceName == null) { spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') From 18d3f5d999157ddb492993cabf977d7aa596e49a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 16:15:42 -0400 Subject: [PATCH 072/322] allow datacite filename override --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 6 ++++++ .../engine/command/impl/S3SubmitToArchiveCommand.java | 7 +++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 7ded0f785ed..f185359d32e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -292,6 +292,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t protected String getFileName(String spaceName, DatasetVersion dv) { return spaceName + (".v" + dv.getFriendlyVersionNumber()).replace('.', '_'); } + + @Override + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + ("_datacite.v" + dv.getFriendlyVersionNumber()).replace('.','_'); + } + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index af7dc86bf4b..a18f99f4bee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -84,8 +84,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber() - + ".xml"; + String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); if (om == null) { @@ -158,6 +157,10 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + "_datacite.v" + dv.getFriendlyVersionNumber(); + } + protected String getFileName(String spaceName, DatasetVersion dv) { return spaceName + ".v" + dv.getFriendlyVersionNumber(); } From 9d4d815e3decc2b652845b0fb63e5d8a739537db Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 17:02:05 -0400 Subject: [PATCH 073/322] fix missing field check --- .../workflow/internalspi/LDNAnnounceDatasetVersionStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java index 5b570fe2e96..3478d9398f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/LDNAnnounceDatasetVersionStep.java @@ -130,7 +130,7 @@ HttpPost buildAnnouncement(boolean qb, WorkflowContext ctxt, JsonObject target) } Set reqFields = fields.keySet(); for (DatasetField df : dvf) { - if (reqFields.contains(df.getDatasetFieldType().getName())) { + if(!df.isEmpty() && reqFields.contains(df.getDatasetFieldType().getName())) { fields.put(df.getDatasetFieldType().getName(), df); } } From 9f3fadbcbf1bf3d6c246a0e693015462eb90de89 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Jul 2022 16:38:02 -0400 Subject: [PATCH 074/322] missing property --- src/main/java/propertyFiles/Bundle.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index c43f2c1ede9..4f5b1201e35 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -266,6 +266,7 @@ notification.typeDescription.WORKFLOW_SUCCESS=External workflow run has succeede notification.typeDescription.WORKFLOW_FAILURE=External workflow run has failed notification.typeDescription.STATUSUPDATED=Status of dataset has been updated notification.typeDescription.DATASETCREATED=Dataset was created by user +notification.typeDescription.DATASETMENTIONED=Dataset was referenced in remote system groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. user.message.signup.label=Create Account From 867c8b85778659dfd5f9435be7029e72256d2c95 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 15:35:46 +0200 Subject: [PATCH 075/322] fix(jpa): introduce real defaults for DataSourceDefinition via MPCONFIG. #7980 Previously, with Dataverse software 5.3, the option to configure the database connection has been moved into the codebase. Admins can set details via MicroProfile Config. With updating to Payara 5.2021.4, we can provide default values for the connection details. Before, this had been tried with adding them to META-INF/microprofile-config.properties. However, this is not possible due to the timing of resource creation in the application server vs. reading the properties file. IQSS/dataverse#7980 --- .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 8 ++++++-- .../resources/META-INF/microprofile-config.properties | 6 +----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 630f192890b..322542c10d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -16,9 +16,13 @@ // HINT: PGSimpleDataSource would work too, but as we use a connection pool, go with a javax.sql.ConnectionPoolDataSource // HINT: PGXADataSource is unnecessary (no distributed transactions used) and breaks ingest. className = "org.postgresql.ds.PGConnectionPoolDataSource", - user = "${MPCONFIG=dataverse.db.user}", + + // BEWARE: as this resource is created before defaults are read from META-INF/microprofile-config.properties, + // defaults must be provided in this Payara-proprietary manner. + user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host}:${MPCONFIG=dataverse.db.port}/${MPCONFIG=dataverse.db.name}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}", + // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. // diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 09d71dfbf3a..9e5d126d305 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -1,5 +1 @@ -# DATABASE -dataverse.db.host=localhost -dataverse.db.port=5432 -dataverse.db.user=dataverse -dataverse.db.name=dataverse +# Entries use key=value From 9e16306fb6731493b778abcbf933f1cd20bcb099 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 16:49:19 +0200 Subject: [PATCH 076/322] feat(jpa): add advanced pool config options to DataSourceDefinition #7980 --- .../iq/dataverse/util/DataSourceProducer.java | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 322542c10d6..52860971243 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -34,18 +34,36 @@ maxPoolSize = 100, // "The number of seconds that a physical connection should remain unused in the pool before the connection is closed for a connection pool. " // Payara DataSourceDefinitionDeployer default value = 300 (seconds) - maxIdleTime = 300) -// It's possible to add additional properties like this... -// -//properties = { -// "fish.payara.log-jdbc-calls=true" -//}) -// -// ... but at this time we don't think we need any. The full list -// of properties can be found at https://docs.payara.fish/community/docs/5.2021.6/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties -// -// All these properties cannot be configured via MPCONFIG as Payara doesn't support this (yet). To be enhanced. -// See also https://github.com/payara/Payara/issues/5024 + maxIdleTime = 300, + + // Set more options via MPCONFIG, including defaults where applicable. + // TODO: Future versions of Payara might support setting integer properties like pool size, + // idle times, etc in a Payara-propietary way. See https://github.com/payara/Payara/pull/5272 + properties = { + // The following options are documented here: + // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", + "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", + "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", + "fish.payara.connection-creation-retry-attempts=${MPCONFIG=dataverse.db.connection-creation-retry-attempts:0}", + "fish.payara.connection-creation-retry-interval-in-seconds=${MPCONFIG=dataverse.db.connection-creation-retry-interval-in-seconds:10}", + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", + "fish.payara.lazy-connection-enlistment=${MPCONFIG=dataverse.db.lazy-connection-enlistment:false}", + "fish.payara.lazy-connection-association=${MPCONFIG=dataverse.db.lazy-connection-association:false}", + "fish.payara.pooling=${MPCONFIG=dataverse.db.pooling:true}", + "fish.payara.statement-cache-size=${MPCONFIG=dataverse.db.statement-cache-size:0}", + "fish.payara.match-connections=${MPCONFIG=dataverse.db.match-connections:true}", + "fish.payara.max-connection-usage-count=${MPCONFIG=dataverse.db.max-connection-usage-count:0}", + "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", + "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", + "fish.payara.statement-cache-type=${MPCONFIG=dataverse.db.statement-cache-type}", + "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", + "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" + }) public class DataSourceProducer { @Resource(lookup = "java:app/jdbc/dataverse") From 11d6258a034f78a0505007cc24c34cad19d6e59e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 19:57:25 +0200 Subject: [PATCH 077/322] docs(jpa): add extensive docs about database connection configuration. #7980 --- .../source/installation/config.rst | 193 +++++++++++++----- 1 file changed, 142 insertions(+), 51 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a70dcd4e8db..14c52ed0a35 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,6 +238,148 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. + + +Database Persistence +-------------------- + +The Dataverse software uses a PostgreSQL server and a Solr Search Index to store objects users create. +You can configure basic and advanced settings of the PostgreSQL database connection with the help of +MicroProfile Config API. + +Basic Database Settings ++++++++++++++++++++++++ + +1. Any of these settings can be set via system properties (see :ref:`jvm-options`), environment variables or other + MicroProfile Config mechanisms supported by the appserver. + `See Payara docs for supported sources `_. +2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same + as the key (OK) or use the "dir config source" of Payara (best). + + Alias creation example: + + .. code-block:: shell + + echo "AS_ADMIN_ALIASPASSWORD=changeme" > /tmp/p.txt + asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password + rm /tmp/p.txt + +3. Environment variables follow the key, replacing any dot, colon, dash, etc into an underscore "_" and all uppercase + letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.host + - The PostgreSQL server to connect to. + - ``localhost`` + * - dataverse.db.port + - The PostgreSQL server port to connect to. + - ``5432`` + * - dataverse.db.user + - The PostgreSQL user name to connect with. + - | ``dataverse`` + | (installer sets to ``dvnapp``) + * - dataverse.db.password + - The PostgreSQL users password to connect with. + + **Please note the safety advisory above.** + - *No default* + * - dataverse.db.name + - The PostgreSQL database name to use for the Dataverse installation. + - | ``dataverse`` + | (installer sets to ``dvndb``) + +Advanced Database Settings +++++++++++++++++++++++++++ + +The following options are useful in many scenarios. You might be interested in debug output during development or +monitoring performance in production. + +You can find more details within the +`Payara docs on Advanced Connection Pool Configuration `_. + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.is-connection-validation-required + - ``true``: Validate connections, allow server to reconnect in case of failure + - false + * - dataverse.db.connection-validation-method + - | The method of connection validation: + | ``table|autocommit|meta-data|custom-validation`` + - *No default* + * - dataverse.db.validation-table-name + - The name of the table used for validation if the validation method is set to ``table`` + - *No default* + * - dataverse.db.validation-classname + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` + - *No default* + * - dataverse.db.validate-atmost-once-period-in-seconds + - Specifies the time interval in seconds between successive requests to validate a connection at most once. + - ``0`` (disabled) + * - dataverse.db.connection-leak-timeout-in-seconds + - Specify timeout when connections count as "leaked". + - ``0`` (disabled) + * - dataverse.db.connection-leak-reclaim + - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. + - ``false`` + * - dataverse.db.connection-creation-retry-attempts + - Number of attempts to create a new connection. + - ``0`` (no retries) + * - dataverse.db.connection-creation-retry-interval-in-seconds + - Time interval between retries while attempting to create a connection. Effective when "Creation Retry Attempts" is ``> 0``. + - ``10`` + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) + * - dataverse.db.lazy-connection-enlistment + - Enlist a resource to the transaction only when it is actually used in a method + - ``false`` + * - dataverse.db.lazy-connection-association + - Connections are lazily associated when an operation is performed on them + - ``false`` + * - dataverse.db.pooling + - When set to false, disables connection pooling for the pool + - ``true`` (enabled) + * - dataverse.db.statement-cache-size + - Caching is enabled when set to a positive non-zero value (for example, 10) + - ``0`` + * - dataverse.db.match-connections + - Turns connection matching for the pool on or off + - ``true`` + * - dataverse.db.max-connection-usage-count + - Connections will be reused by the pool for the specified number of times, after which they will be closed. + - ``0`` (disabled) + * - dataverse.db.statement-leak-timeout-in-seconds + - Specifiy timeout when statements should be considered to be "leaked" + - ``0`` (disabled) + * - dataverse.db.statement-leak-reclaim + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs + - ``false`` + * - dataverse.db.statement-cache-type + - + - + * - dataverse.db.slow-query-threshold-in-seconds + - SQL queries that exceed this time in seconds will be logged. + - ``-1`` (disabled) + * - dataverse.db.log-jdbc-calls + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL + - ``false`` + + + + File Storage: Using a Local Filesystem and/or Swift and/or object stores ------------------------------------------------------------------------ @@ -1373,57 +1515,6 @@ dataverse.auth.password-reset-timeout-in-minutes Users have 60 minutes to change their passwords by default. You can adjust this value here. -dataverse.db.name -+++++++++++++++++ - -The PostgreSQL database name to use for the Dataverse installation. - -Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. - -dataverse.db.user -+++++++++++++++++ - -The PostgreSQL user name to connect with. - -Defaults to ``dataverse`` (but the installer sets it to ``dvnapp``). - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_USER``. - -dataverse.db.password -+++++++++++++++++++++ - -The PostgreSQL users password to connect with. - -Preferrably use a JVM alias, as passwords in environment variables aren't safe. - -.. code-block:: shell - - echo "AS_ADMIN_ALIASPASSWORD=change-me-super-secret" > /tmp/password.txt - asadmin create-password-alias --passwordfile /tmp/password.txt dataverse.db.password - rm /tmp/password.txt - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PASSWORD``. - -dataverse.db.host -+++++++++++++++++ - -The PostgreSQL server to connect to. - -Defaults to ``localhost``. - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_HOST``. - -dataverse.db.port -+++++++++++++++++ - -The PostgreSQL server port to connect to. - -Defaults to ``5432``, the default PostgreSQL port. - -Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. - dataverse.rserve.host +++++++++++++++++++++ From 8df07cf21c85827e64c9114dd553757299249cbf Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 20:41:33 +0200 Subject: [PATCH 078/322] feat(jpa): remove exotic database connection options. #7980 As requested by @pdurbin, the long list was quite overwhelming. It's now damped down to 12 options in 3 subsubsections of the docs. --- .../source/installation/config.rst | 67 ++++++++++--------- .../iq/dataverse/util/DataSourceProducer.java | 14 ++-- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 14c52ed0a35..69ec666ff16 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -301,8 +301,13 @@ Advanced Database Settings The following options are useful in many scenarios. You might be interested in debug output during development or monitoring performance in production. -You can find more details within the -`Payara docs on Advanced Connection Pool Configuration `_. +You can find more details within the Payara docs: + +- `User Guide: Connection Pool Configuration `_ +- `Tech Doc: Advanced Connection Pool Configuration `_. + +Connection Validation +^^^^^^^^^^^^^^^^^^^^^ .. list-table:: :widths: 15 60 25 @@ -328,48 +333,45 @@ You can find more details within the * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. - ``0`` (disabled) + +Connection & Statement Leaks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default * - dataverse.db.connection-leak-timeout-in-seconds - Specify timeout when connections count as "leaked". - ``0`` (disabled) * - dataverse.db.connection-leak-reclaim - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. - ``false`` - * - dataverse.db.connection-creation-retry-attempts - - Number of attempts to create a new connection. - - ``0`` (no retries) - * - dataverse.db.connection-creation-retry-interval-in-seconds - - Time interval between retries while attempting to create a connection. Effective when "Creation Retry Attempts" is ``> 0``. - - ``10`` - * - dataverse.db.statement-timeout-in-seconds - - Timeout property of a connection to enable termination of abnormally long running queries. - - ``-1`` (disabled) - * - dataverse.db.lazy-connection-enlistment - - Enlist a resource to the transaction only when it is actually used in a method - - ``false`` - * - dataverse.db.lazy-connection-association - - Connections are lazily associated when an operation is performed on them - - ``false`` - * - dataverse.db.pooling - - When set to false, disables connection pooling for the pool - - ``true`` (enabled) - * - dataverse.db.statement-cache-size - - Caching is enabled when set to a positive non-zero value (for example, 10) - - ``0`` - * - dataverse.db.match-connections - - Turns connection matching for the pool on or off - - ``true`` - * - dataverse.db.max-connection-usage-count - - Connections will be reused by the pool for the specified number of times, after which they will be closed. - - ``0`` (disabled) * - dataverse.db.statement-leak-timeout-in-seconds - Specifiy timeout when statements should be considered to be "leaked" - ``0`` (disabled) * - dataverse.db.statement-leak-reclaim - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs - ``false`` - * - dataverse.db.statement-cache-type - - - - + +Logging & Slow Performance +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 15 60 25 + :header-rows: 1 + :align: left + + * - MPCONFIG Key + - Description + - Default + * - dataverse.db.statement-timeout-in-seconds + - Timeout property of a connection to enable termination of abnormally long running queries. + - ``-1`` (disabled) * - dataverse.db.slow-query-threshold-in-seconds - SQL queries that exceed this time in seconds will be logged. - ``-1`` (disabled) @@ -379,7 +381,6 @@ You can find more details within the - File Storage: Using a Local Filesystem and/or Swift and/or object stores ------------------------------------------------------------------------ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 52860971243..75b892064f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -42,25 +42,19 @@ properties = { // The following options are documented here: // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html + // VALIDATION "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", + // LEAK DETECTION "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", "fish.payara.connection-leak-reclaim=${MPCONFIG=dataverse.db.connection-leak-reclaim:false}", - "fish.payara.connection-creation-retry-attempts=${MPCONFIG=dataverse.db.connection-creation-retry-attempts:0}", - "fish.payara.connection-creation-retry-interval-in-seconds=${MPCONFIG=dataverse.db.connection-creation-retry-interval-in-seconds:10}", - "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", - "fish.payara.lazy-connection-enlistment=${MPCONFIG=dataverse.db.lazy-connection-enlistment:false}", - "fish.payara.lazy-connection-association=${MPCONFIG=dataverse.db.lazy-connection-association:false}", - "fish.payara.pooling=${MPCONFIG=dataverse.db.pooling:true}", - "fish.payara.statement-cache-size=${MPCONFIG=dataverse.db.statement-cache-size:0}", - "fish.payara.match-connections=${MPCONFIG=dataverse.db.match-connections:true}", - "fish.payara.max-connection-usage-count=${MPCONFIG=dataverse.db.max-connection-usage-count:0}", "fish.payara.statement-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-leak-timeout-in-seconds:0}", "fish.payara.statement-leak-reclaim=${MPCONFIG=dataverse.db.statement-leak-reclaim:false}", - "fish.payara.statement-cache-type=${MPCONFIG=dataverse.db.statement-cache-type}", + // LOGGING, SLOWNESS, PERFORMANCE + "fish.payara.statement-timeout-in-seconds=${MPCONFIG=dataverse.db.statement-timeout-in-seconds:-1}", "fish.payara.slow-query-threshold-in-seconds=${MPCONFIG=dataverse.db.slow-query-threshold-in-seconds:-1}", "fish.payara.log-jdbc-calls=${MPCONFIG=dataverse.db.log-jdbc-calls:false}" }) From 59dbdd84b32691b4cc9c1de0c362794aef24c804 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 30 Jun 2021 20:54:10 +0200 Subject: [PATCH 079/322] docs(jpa): add release note for 5.6 about #7980 --- doc/release-notes/7980-enhanced-dsd.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 doc/release-notes/7980-enhanced-dsd.md diff --git a/doc/release-notes/7980-enhanced-dsd.md b/doc/release-notes/7980-enhanced-dsd.md new file mode 100644 index 00000000000..6a86a2c4b37 --- /dev/null +++ b/doc/release-notes/7980-enhanced-dsd.md @@ -0,0 +1,10 @@ +### Default Values for Database Connections fixed + +Introduced in Dataverse release 5.3 a regression might have hit you: +the announced default values for the database connection never actually worked. + +With the update to Payara 5.2022.3 it was possible to introduce working +defaults. The documentation has been changed accordingly. + +Together with this change, you can now enable advanced connection pool +configurations useful for debugging and monitoring. See the docs for details. \ No newline at end of file From 8a1ee7e3af9a8d7e470c75249d421a4e292d532a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 23:42:54 +0200 Subject: [PATCH 080/322] feat(jpa): add JDBC connection parameter setting #7980 The avoid hacky parameter additions via the database name, this commit adds support for adding parameters to the JDBC URL. It defaults to empty (no parameters). --- doc/sphinx-guides/source/installation/config.rst | 4 ++++ .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 69ec666ff16..269b9eeee55 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -294,6 +294,10 @@ Basic Database Settings - The PostgreSQL database name to use for the Dataverse installation. - | ``dataverse`` | (installer sets to ``dvndb``) + * - dataverse.db.parameters + - Connection parameters, see `Postgres JDBC docs `_ + Note: you don't need to provide the initial "?". + - *Empty string* Advanced Database Settings ++++++++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 75b892064f5..4cb0f49190b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -21,7 +21,7 @@ // defaults must be provided in this Payara-proprietary manner. user = "${MPCONFIG=dataverse.db.user:dataverse}", password = "${MPCONFIG=dataverse.db.password}", - url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}", + url = "jdbc:postgresql://${MPCONFIG=dataverse.db.host:localhost}:${MPCONFIG=dataverse.db.port:5432}/${MPCONFIG=dataverse.db.name:dataverse}?${MPCONFIG=dataverse.db.parameters:}", // If we ever need to change these pool settings, we need to remove this class and create the resource // from web.xml. We can use MicroProfile Config in there for these values, impossible to do in the annotation. From e81b20c38c1f23bdd7c6cf2a347ddbf72d7db450 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 23:45:02 +0200 Subject: [PATCH 081/322] fix(jpa): make advanced JDBC options not log warnings #7980 With the addition of the advanced (but proprietary, Payara-only) settings for database connection monitoring, the non-present default for connection validation triggered unnecessary log clutter. Adding an empty default makes these go away and is inline with the default of Payara. --- doc/sphinx-guides/source/installation/config.rst | 6 +++--- .../edu/harvard/iq/dataverse/util/DataSourceProducer.java | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 269b9eeee55..04c17298a97 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -327,13 +327,13 @@ Connection Validation * - dataverse.db.connection-validation-method - | The method of connection validation: | ``table|autocommit|meta-data|custom-validation`` - - *No default* + - *Empty string* * - dataverse.db.validation-table-name - The name of the table used for validation if the validation method is set to ``table`` - - *No default* + - *Empty string* * - dataverse.db.validation-classname - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` - - *No default* + - *Empty string* * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. - ``0`` (disabled) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 4cb0f49190b..800c05ae6dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -44,9 +44,9 @@ // https://docs.payara.fish/community/docs/documentation/payara-server/jdbc/advanced-connection-pool-properties.html // VALIDATION "fish.payara.is-connection-validation-required=${MPCONFIG=dataverse.db.is-connection-validation-required:false}", - "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method}", - "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name}", - "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname}", + "fish.payara.connection-validation-method=${MPCONFIG=dataverse.db.connection-validation-method:}", + "fish.payara.validation-table-name=${MPCONFIG=dataverse.db.validation-table-name:}", + "fish.payara.validation-classname=${MPCONFIG=dataverse.db.validation-classname:}", "fish.payara.validate-atmost-once-period-in-seconds=${MPCONFIG=dataverse.db.validate-atmost-once-period-in-seconds:0}", // LEAK DETECTION "fish.payara.connection-leak-timeout-in-seconds=${MPCONFIG=dataverse.db.connection-leak-timeout-in-seconds:0}", From b6a526215d014de83d9399cfb345996847b97575 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 6 Sep 2022 17:00:27 -0400 Subject: [PATCH 082/322] cleanup differences with develop --- .../java/edu/harvard/iq/dataverse/api/Admin.java | 2 -- .../edu/harvard/iq/dataverse/api/Datasets.java | 3 --- .../V5.11.0.2__hdc-3b2-template-instructions.sql | 14 -------------- .../impl/DRSSubmitToArchiveCommandTest.java | 15 +++++---------- 4 files changed, 5 insertions(+), 29 deletions(-) delete mode 100644 src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 4fc2d9d88e3..ef08444af69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -102,8 +102,6 @@ import java.io.IOException; import java.io.OutputStream; -import edu.harvard.iq.dataverse.util.json.JsonUtil; - import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 492a12540f0..92cee9fffc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -58,7 +58,6 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -101,8 +100,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.StringReader; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.net.URI; import java.sql.Timestamp; import java.text.MessageFormat; diff --git a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql b/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql deleted file mode 100644 index df1d3068159..00000000000 --- a/src/main/resources/db/migration/V5.11.0.2__hdc-3b2-template-instructions.sql +++ /dev/null @@ -1,14 +0,0 @@ -ALTER TABLE template ADD COLUMN IF NOT EXISTS instructions TEXT; - -ALTER TABLE dataset ADD COLUMN IF NOT EXISTS template_id BIGINT; - -DO $$ -BEGIN - - BEGIN - ALTER TABLE dataset ADD CONSTRAINT fx_dataset_template_id FOREIGN KEY (template_id) REFERENCES template(id); - EXCEPTION - WHEN duplicate_object THEN RAISE NOTICE 'Table constraint fk_dataset_template_id already exists'; - END; - -END $$; diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 62135287d18..ec5f4b8c69d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -1,14 +1,8 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.branding.BrandingUtil; -import edu.harvard.iq.dataverse.branding.BrandingUtilTest; - import org.erdtman.jcs.JsonCanonicalizer; import org.junit.Assert; import org.junit.Test; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; - import com.auth0.jwt.JWT; import com.auth0.jwt.algorithms.Algorithm; import com.auth0.jwt.interfaces.DecodedJWT; @@ -20,12 +14,12 @@ import java.security.spec.PKCS8EncodedKeySpec; import java.util.Base64; -/** - * - * @author michael - */ + public class DRSSubmitToArchiveCommandTest { + /* Simple test of JWT encode/decode functionality + * + */ @Test public void createJWT() throws CommandException { @@ -118,6 +112,7 @@ public void createJWT() throws CommandException { System.out.println(jwt.getPayload()); } catch (Exception e) { System.out.println(e.getLocalizedMessage()); + //Any exception is a failure, otherwise decoding worked. Assert.fail(e.getLocalizedMessage()); } From c4bc6c4879541f134f09b8af2478da531d285a3a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 6 Sep 2022 17:09:45 -0400 Subject: [PATCH 083/322] more cleanup --- .../engine/command/impl/DRSSubmitToArchiveCommandTest.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index ec5f4b8c69d..9cc9fae67ba 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -10,7 +10,7 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.security.KeyFactory; import java.security.interfaces.RSAPrivateKey; -import java.security.interfaces.RSAPublicKey; +//import java.security.interfaces.RSAPublicKey; import java.security.spec.PKCS8EncodedKeySpec; import java.util.Base64; @@ -50,7 +50,6 @@ public void createJWT() throws CommandException { + "J73YadnpU82C+7OnaTTCDVPfXYgPFLpE9xKFKkRFacgUbEnvZ2i0zSUquH0RAyaK" + "tJ0d/dnd5TQUccAZwT8Nrw0="; - //Todo - not in pkcs8 form String pubKeyString = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs0sI/nP7okc+QDog1rFe" + "acSa3c5Q0VdjyisERgs4b9TBD8EArxaesGUQ4AhOBH6VVLgcjSJ1dXjn6wY8CJca" + "fIb/UT2AgLDwhVeOlS3mbK/BTn76iOiLMGKgd6sHYuTVvgriUS4ExST/O1+RoKCL" @@ -90,7 +89,7 @@ public void createJWT() throws CommandException { KeyFactory keyFactory = KeyFactory.getInstance("RSA"); PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); - RSAPublicKey publicKey; + //RSAPublicKey publicKey; /* * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); * From d74f9d780106d9f474236e942f28e9ca14bcd405 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 7 Sep 2022 10:29:01 -0400 Subject: [PATCH 084/322] Change setting name --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index ecea6f1dcb2..89666f02db2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -61,7 +61,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); - private static final String DRS_CONFIG = ":DRSArchivalConfig"; + private static final String DRS_CONFIG = ":DRSArchiverConfig"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; private static final String S3_PATH = "s3_path"; From 82c9ec8652c236600674871e18807e72c7bf45df Mon Sep 17 00:00:00 2001 From: noobyu6 <1044510784@qq.com> Date: Sat, 10 Sep 2022 03:00:54 +0800 Subject: [PATCH 085/322] modify class field to local variable --- .../datadeposit/SWORDv2ContainerServlet.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SWORDv2ContainerServlet.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SWORDv2ContainerServlet.java index d8ba8eec4ca..441186cc63f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SWORDv2ContainerServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SWORDv2ContainerServlet.java @@ -17,9 +17,11 @@ public class SWORDv2ContainerServlet extends SwordServlet { ContainerManagerImpl containerManagerImpl; @Inject StatementManagerImpl statementManagerImpl; - private ContainerManager cm; + // this field can be replaced by local variable +// private ContainerManager cm; private ContainerAPI api; - private StatementManager sm; + // this field can be replaced by local variable +// private StatementManager sm; private final ReentrantLock lock = new ReentrantLock(); @@ -28,13 +30,15 @@ public void init() throws ServletException { super.init(); // load the container manager implementation - this.cm = containerManagerImpl; - - // load the statement manager implementation - this.sm = statementManagerImpl; +// this.cm = containerManagerImpl; + ContainerManager cm = containerManagerImpl; + // load the statement manager implementation +// this.sm = statementManagerImpl; + StatementManager sm = statementManagerImpl; // initialise the underlying servlet processor - this.api = new ContainerAPI(this.cm, this.sm, this.config); +// this.api = new ContainerAPI(this.cm, this.sm, this.config); + this.api = new ContainerAPI(cm, sm, this.config); } @Override From f7e7e4aed8e2e089ac7ce55bb583795230d6849e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 18:22:08 +0200 Subject: [PATCH 086/322] refactor(settings): replace lookups of dataverse.files.directory with MPCONFIG #7000 - Adding dataverse.files.directory equivalent to JvmSettings - Remove all System.getPropert("dataverse.files.directory") or similar - Add default with /tmp/dataverse via microprofile-config.properties as formerly seen at FileUtil and Dataset only - Refactor SwordConfigurationImpl to reuse the NoSuchElementException thrown by MPCONFIG - Refactor GoogleCloudSubmitToArchiveCommand to use the JvmSettings.lookup and create file stream in try-with-resources --- .../edu/harvard/iq/dataverse/Dataset.java | 9 ++-- .../iq/dataverse/EditDatafilesPage.java | 7 ++- .../datadeposit/SwordConfigurationImpl.java | 52 +++++++++---------- .../filesystem/FileRecordJobListener.java | 7 ++- .../importer/filesystem/FileRecordReader.java | 9 ++-- .../GoogleCloudSubmitToArchiveCommand.java | 31 +++++------ .../impl/ImportFromFileSystemCommand.java | 48 +++++++++-------- .../iq/dataverse/settings/JvmSettings.java | 4 ++ .../harvard/iq/dataverse/util/FileUtil.java | 8 ++- .../iq/dataverse/util/SystemConfig.java | 5 -- .../META-INF/microprofile-config.properties | 3 ++ 11 files changed, 94 insertions(+), 89 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..e2f00d0b54b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,6 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -528,11 +530,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index f53e2377a69..a895c90dabe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -2425,10 +2426,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index ce5f9415fcc..1e506c6a0b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api.datadeposit; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.util.Arrays; @@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() { @Override public String getTempDirectory() { - String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY); - if (tmpFileDir != null) { - String swordDirString = tmpFileDir + File.separator + "sword"; - File swordDirFile = new File(swordDirString); - /** - * @todo Do we really need this check? It seems like we do because - * if you create a dataset via the native API and then later try to - * upload a file via SWORD, the directory defined by - * dataverse.files.directory may not exist and we get errors deep in - * the SWORD library code. Could maybe use a try catch in the doPost - * method of our SWORDv2MediaResourceServlet. - */ - if (swordDirFile.exists()) { + // will throw a runtime exception when not found + String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup(); + + String swordDirString = tmpFileDir + File.separator + "sword"; + File swordDirFile = new File(swordDirString); + /** + * @todo Do we really need this check? It seems like we do because + * if you create a dataset via the native API and then later try to + * upload a file via SWORD, the directory defined by + * dataverse.files.directory may not exist and we get errors deep in + * the SWORD library code. Could maybe use a try catch in the doPost + * method of our SWORDv2MediaResourceServlet. + */ + if (swordDirFile.exists()) { + return swordDirString; + } else { + boolean mkdirSuccess = swordDirFile.mkdirs(); + if (mkdirSuccess) { + logger.info("Created directory " + swordDirString); return swordDirString; } else { - boolean mkdirSuccess = swordDirFile.mkdirs(); - if (mkdirSuccess) { - logger.info("Created directory " + swordDirString); - return swordDirString; - } else { - String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); - logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); - } + String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); + logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); + // sadly, must throw RunTimeException to communicate with SWORD user + throw new RuntimeException(msgForSwordUsers); } - } else { - String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details."); - logger.severe(msgForSwordUsers); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 6b82a665c17..ecb998c66af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -57,6 +57,7 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import java.io.FileReader; @@ -433,8 +434,10 @@ private void loadChecksumManifest() { manifest = checksumManifest; getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)"); } - // construct full path - String manifestAbsolutePath = System.getProperty("dataverse.files.directory") + + // Construct full path - retrieve base dir via MPCONFIG. + // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties) + String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup() + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index b3d3a7107a6..e3b67e9b0d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.filefilter.NotFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -96,9 +97,11 @@ public void init() { @Override public void open(Serializable checkpoint) throws Exception { - - directory = new File(System.getProperty("dataverse.files.directory") - + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String baseDir = JvmSettings.FILES_DIRECTORY.lookup(); + + directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 5d017173685..da2701a41e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,16 +1,27 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import org.apache.commons.codec.binary.Hex; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PipedInputStream; @@ -21,17 +32,6 @@ import java.util.Map; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonObjectBuilder; - -import org.apache.commons.codec.binary.Hex; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageException; -import com.google.cloud.storage.StorageOptions; - @RequiredPermissions(Permission.PublishDataset) public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); + String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json"; + + try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) { storage = StorageOptions.newBuilder() - .setCredentials(ServiceAccountCredentials.fromStream(fis)) + .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream)) .setProjectId(projectName) .build() .getService(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 64beba82450..5f31ea756eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -12,17 +12,20 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.File; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; + import javax.batch.operations.JobOperator; import javax.batch.operations.JobSecurityException; import javax.batch.operations.JobStartException; import javax.batch.runtime.BatchRuntime; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import java.io.File; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @RequiredPermissions(Permission.EditDataset) public class ImportFromFileSystemCommand extends AbstractCommand { @@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { logger.info(error); throw new IllegalCommandException(error, this); } - File directory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); - // TODO: - // The above goes directly to the filesystem directory configured by the - // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementation (#6488). - // We probably want package files to be able to use specific stores instead. - // More importantly perhaps, the approach above does not take into account - // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer - // needs to be used to name the storage directory, instead of the main/current - // persistent identifier above. + + File directory = new File( + String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier())); + + // TODO: The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementation (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(error, this); } - File uploadDirectory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() - + File.separator + uploadFolder); - // TODO: - // see the comment above. + File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier(), uploadFolder)); + + // TODO: see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..12e5e311278 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,10 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // FILES SETTINGS + SCOPE_FILES(PREFIX, "files"), + FILES_DIRECTORY(SCOPE_FILES, "directory"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 893c62b3cb0..a2c55d41613 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; @@ -1389,11 +1390,8 @@ public static boolean canIngestAsTabular(String mimeType) { } public static String getFilesTempDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String filesTempDirectory = filesRootDirectory + "/temp"; if (!Files.exists(Paths.get(filesTempDirectory))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..e9313e70218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -78,11 +78,6 @@ public class SystemConfig { */ public static final String SITE_URL = "dataverse.siteUrl"; - /** - * A JVM option for where files are stored on the file system. - */ - public static final String FILES_DIRECTORY = "dataverse.files.directory"; - /** * Some installations may not want download URLs to their files to be * available in Schema.org JSON-LD output. diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..ab219071767 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,9 @@ dataverse.version=${project.version} dataverse.build= +# FILES +dataverse.files.directory=/tmp/dataverse + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 From 5c2c7022ad9f11234b0e33ddaf3a0aa2696ab154 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 22:27:30 +0200 Subject: [PATCH 087/322] docs(settings): provide more detail for dataverse.files.directory --- doc/sphinx-guides/source/api/native-api.rst | 2 ++ doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 339a291bf4d..6dd1bbab728 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..89329ea3821 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -274,6 +274,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -1404,7 +1406,19 @@ dataverse.siteUrl dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``//`` data location for file system imports, see + :ref:`api-import-dataset`. +4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + +This directory might also be used for permanent storage of data, but this setting is independent from +:ref:`storage-files-dir` configuration. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ From d7ab9f6e5359356db3b01ab9e6f87347cf117fe7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 15:11:01 +0200 Subject: [PATCH 088/322] style: replace system prop 'file.separator' with File.separator --- .../batch/jobs/importer/filesystem/FileRecordJobListener.java | 3 ++- .../batch/jobs/importer/filesystem/FileRecordReader.java | 2 +- .../java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index ecb998c66af..7837474fc27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -60,6 +60,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Timestamp; @@ -80,7 +81,7 @@ @Dependent public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index e3b67e9b0d2..a4f8ffd2378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -55,7 +55,7 @@ @Dependent public class FileRecordReader extends AbstractItemReader { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; @Inject JobContext jobContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java index 4a778dc7abb..a2f76ca953d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java @@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) { try { Logger jobLogger = Logger.getLogger("job-"+jobId); FileHandler fh; - String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") - + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator"); + String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator + + "logs" + File.separator + "batch-jobs" + File.separator; checkCreateLogDirectory( logDir ); fh = new FileHandler(logDir + "job-" + jobId + ".log"); logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log"); From e26092a596ecf5b92b831e38b5459eec0371b4a5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Oct 2022 11:07:06 -0400 Subject: [PATCH 089/322] files may not have pids --- .../edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java | 4 +++- .../java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 0189faf6598..50c8c4098a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -513,7 +513,9 @@ public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) for (Object[] result : results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.ID, (int) result[0]); - job.add(MetricsUtil.PID, (String) result[1]); + if(result[1]!=null) { + job.add(MetricsUtil.PID, (String) result[1]); + } job.add(MetricsUtil.COUNT, (long) result[2]); jab.add(job); } diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java index 90b61bcb29c..72d8f5402bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsUtil.java @@ -227,7 +227,9 @@ public static JsonArray timeSeriesByIDAndPIDToJson(List results) { JsonObjectBuilder job = Json.createObjectBuilder(); job.add(MetricsUtil.DATE, date); job.add(ID, id); - job.add(PID, pids.get(id)); + if(pids.get(id)!=null) { + job.add(PID, pids.get(id)); + } job.add(COUNT, totals.get(id)); jab.add(job); } From 0376a72f9a632e7256c1618acb7ab701d19442d1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Oct 2022 11:17:59 -0400 Subject: [PATCH 090/322] handle missing PIDs in CSV --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 339de904f9e..39d1d332884 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -2098,7 +2098,9 @@ public static String jsonArrayOfObjectsToCSV(JsonArray jsonArray, String... head JsonObject jo = (JsonObject) jv; String[] values = new String[headers.length]; for (int i = 0; i < headers.length; i++) { - values[i] = jo.get(headers[i]).toString(); + if(jo.containsKey(headers[i])) { + values[i] = jo.get(headers[i]).toString(); + } } csvSB.append("\n").append(String.join(",", values)); }); From d518f93dabbd0b3ab037b9344e4138d32f8a845f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Oct 2022 11:22:00 -0400 Subject: [PATCH 091/322] minor doc update --- doc/sphinx-guides/source/api/metrics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 6a878d73a98..f1eb1f88c71 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -72,7 +72,7 @@ Return Formats There are a number of API calls that provide time series, information reported per item (e.g. per dataset, per file, by subject, by category, and by file Mimetype), or both (time series per item). Because these calls all report more than a single number, the API provides two optional formats for the return that can be selected by specifying an HTTP Accept Header for the desired format: -* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. +* application/json - a JSON array of objects. For time-series, the objects include key/values for the ``date`` and ``count`` for that month. For per-item calls, the objects include the item (e.g. for a subject), or it's id/pid (for a dataset or datafile (which may/may not not have a PID)). For timeseries per-item, the objects also include a date. In all cases, the response is a single array. * Example: ``curl -H 'Accept:application/json' https://demo.dataverse.org/api/info/metrics/downloads/monthly`` @@ -120,7 +120,7 @@ Example: ``curl https://demo.dataverse.org/api/info/metrics/makeDataCount/viewsT Endpoint Table -------------- -The following table lists the available metrics endpoints (not including the Make Data Counts endpoints a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. +The following table lists the available metrics endpoints (not including the Make Data Counts endpoints for a single dataset which are part of the :doc:`/api/native-api`) along with additional notes about them. .. csv-table:: Metrics Endpoints From 531fe8eef706bf0026b1cd29dc71d6ec7af4431c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:37:39 -0400 Subject: [PATCH 092/322] add test debug logging --- .../engine/command/impl/DRSSubmitToArchiveCommandTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java index 9cc9fae67ba..a0e79268e3d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommandTest.java @@ -110,7 +110,8 @@ public void createJWT() throws CommandException { DecodedJWT jwt = JWT.decode(token1); System.out.println(jwt.getPayload()); } catch (Exception e) { - System.out.println(e.getLocalizedMessage()); + System.out.println(e.getClass() + e.getLocalizedMessage()); + e.printStackTrace(); //Any exception is a failure, otherwise decoding worked. Assert.fail(e.getLocalizedMessage()); } From 2d451dcb7de34772fdc536369abd12550559e81c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 5 Oct 2022 14:41:24 -0400 Subject: [PATCH 093/322] handle test case with no BrandingUtil --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 89666f02db2..f23033f09fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -305,7 +305,10 @@ public static String createJWTString(Algorithm algorithmRSA, String installation String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); String digest = DigestUtils.sha256Hex(canonicalBody); - return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + if(installationBrandName==null) { + installationBrandName = BrandingUtil.getInstallationBrandName(); + } + return JWT.create().withIssuer(installationBrandName).withIssuedAt(Date.from(Instant.now())) .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); } From b59f4835074518fc8374e4f86b4a8f36dc3ccb58 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 6 Oct 2022 11:40:51 +0200 Subject: [PATCH 094/322] dataset files cleanup --- .../harvard/iq/dataverse/api/Datasets.java | 60 +++++++++++++++++ .../iq/dataverse/dataaccess/FileAccessIO.java | 41 ++++++++++++ .../dataverse/dataaccess/InputStreamIO.java | 12 +++- .../dataaccess/RemoteOverlayAccessIO.java | 9 +++ .../iq/dataverse/dataaccess/S3AccessIO.java | 65 ++++++++++++++++++- .../iq/dataverse/dataaccess/StorageIO.java | 4 ++ .../dataverse/dataaccess/SwiftAccessIO.java | 57 ++++++++++++++-- 7 files changed, 241 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..0cdb2b3a73f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -66,6 +66,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; @@ -2502,6 +2503,65 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } // end: addFileToDataset + /** + * Clean storage of a Dataset + * + * @param idSupplied + * @return + */ + @GET + @Path("{id}/cleanStorage") + public Response cleanStorage(@PathParam("id") String idSupplied) { + // get user and dataset + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + Dataset dataset; + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // check permissions + if (!permissionSvc.permissionsFor(createDataverseRequest(authUser), dataset).contains(Permission.EditDataset)) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Access denied!"); + } + + List deleted = new ArrayList<>(); + Set files = new HashSet(); + try { + for (DatasetVersion dv : dataset.getVersions()) { + for (FileMetadata f : dv.getFileMetadatas()) { + String storageIdentifier = f.getDataFile().getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); + } + } + StorageIO datasetIO = DataAccess.getStorageIO(dataset); + List allDatasetFiles = datasetIO.listAllFiles(); + for (String f : allDatasetFiles) { + if (!files.contains(f)) { + datasetIO.deleteFile(f); + deleted.add(f); + } + } + } catch (IOException ex) { + logger.log(Level.SEVERE, null, ex); + return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); + } + + return ok("Found: " + files.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); + + } + private void msg(String m) { //System.out.println(m); logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d5f00b9868f..2bb3abf03a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -683,4 +683,45 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } return true; } + + public List listAllFiles() throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + DirectoryStream dirStream = Files.newDirectoryStream(Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString())); + + List res = new ArrayList<>(); + if (dirStream != null) { + for (Path filePath : dirStream) { + res.add(filePath.getFileName().toString()); + } + dirStream.close(); + } + + return res; + } + + @Override + public void deleteFile(String fileName) throws IOException { + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This FileAccessIO object hasn't been properly initialized."); + } + + Path datasetDirectoryPath = Paths.get(dataset.getAuthorityForFileStorage(), dataset.getIdentifierForFileStorage()); + if (datasetDirectoryPath == null) { + throw new IOException("Could not determine the filesystem directory of the dataset."); + } + + Path p = Paths.get(this.getFilesRootDirectory(), datasetDirectoryPath.toString(), fileName); + Files.delete(p); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index c9796d24b27..1235b386fe9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -159,5 +159,15 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); } - + @Override + public List listAllFiles() throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); + } + + + @Override + public void deleteFile(String fileName) throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index c8e42349318..b7fb4c86c7c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -631,4 +631,13 @@ public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } + @Override + public List listAllFiles() throws IOException { + return baseStore.listAllFiles(); + } + + @Override + public void deleteFile(String fileName) throws IOException { + baseStore.deleteFile(fileName); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 3c9cef04980..3796d7f0ce9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1307,4 +1307,67 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } -} + + @Override + public List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + List ret = new ArrayList<>(); + ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); + ObjectListing storedFilesList = null; + try { + storedFilesList = s3.listObjects(req); + } catch (SdkClientException sce) { + throw new IOException ("S3 listObjects: failed to get a listing for " + prefix); + } + if (storedFilesList == null) { + return ret; + } + List storedFilesSummary = storedFilesList.getObjectSummaries(); + try { + while (storedFilesList.isTruncated()) { + logger.fine("S3 listObjects: going to next page of list"); + storedFilesList = s3.listNextBatchOfObjects(storedFilesList); + if (storedFilesList != null) { + storedFilesSummary.addAll(storedFilesList.getObjectSummaries()); + } + } + } catch (AmazonClientException ase) { + //logger.warning("Caught an AmazonServiceException in S3AccessIO.listObjects(): " + ase.getMessage()); + throw new IOException("S3AccessIO: Failed to get objects for listing."); + } + + for (S3ObjectSummary item : storedFilesSummary) { + String fileName = item.getKey().substring(prefix.length()); + ret.add(fileName); + } + return ret; + } + + @Override + public void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This S3AccessIO object hasn't been properly initialized."); + } + String prefix = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + + try { + DeleteObjectRequest dor = new DeleteObjectRequest(bucketName, prefix + fileName); + s3.deleteObject(dor); + } catch (AmazonClientException ase) { + logger.warning("S3AccessIO: Unable to delete object " + ase.getMessage()); + } + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 90e4a54dbe8..0e42a84795c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -622,4 +622,8 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } + public abstract List listAllFiles() throws IOException; + + public abstract void deleteFile(String fileName) throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index b1725b040a3..5a376cb8d91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -864,13 +864,16 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } + private String getSwiftContainerName(Dataset dataset) { + String authorityNoSlashes = dataset.getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); + return dataset.getProtocolForFileStorage() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + + swiftFolderPathSeparator + dataset.getIdentifierForFileStorage(); + } + @Override public String getSwiftContainerName() { if (dvObject instanceof DataFile) { - String authorityNoSlashes = this.getDataFile().getOwner().getAuthorityForFileStorage().replace("/", swiftFolderPathSeparator); - return this.getDataFile().getOwner().getProtocolForFileStorage() + swiftFolderPathSeparator - + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + - swiftFolderPathSeparator + this.getDataFile().getOwner().getIdentifierForFileStorage(); + return getSwiftContainerName(this.getDataFile().getOwner()); } return null; } @@ -893,5 +896,49 @@ public static String calculateRFC2104HMAC(String data, String key) mac.init(signingKey); return toHexString(mac.doFinal(data.getBytes())); } - + + @Override + public List listAllFiles() throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + Collection items; + String lastItemName = null; + List ret = new ArrayList<>(); + + while ((items = this.swiftContainer.list(prefix, lastItemName, LIST_PAGE_LIMIT)) != null && items.size() > 0) { + for (StoredObject item : items) { + lastItemName = item.getName().substring(prefix.length()); + ret.add(lastItemName); + } + } + + return ret; + } + + @Override + public void deleteFile(String fileName) throws IOException { + if (!this.canWrite()) { + open(DataAccessOption.WRITE_ACCESS); + } + Dataset dataset = this.getDataset(); + if (dataset == null) { + throw new IOException("This SwiftAccessIO object hasn't been properly initialized."); + } + String prefix = getSwiftContainerName(dataset) + swiftFolderPathSeparator; + + StoredObject fileObject = this.swiftContainer.getObject(prefix + fileName); + + if (!fileObject.exists()) { + throw new FileNotFoundException("SwiftAccessIO/Direct Access: " + fileName + " does not exist"); + } + + fileObject.delete(); + } } From c47d2aeaff51967b79a641c3e246640541333c40 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Oct 2022 14:50:21 -0400 Subject: [PATCH 095/322] possible fix for curate issue when a file is the dataset thumb --- .../impl/CuratePublishedDatasetVersionCommand.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index 772b6205b02..4e86f5c60dd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -99,6 +99,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { logger.severe("Draft version of dataset: " + tempDataset.getId() + " has: " + newFileCount + " while last published version has " + pubFileCount); throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasetversion.update.failure"), this); } + Long thumbId = null; + if(tempDataset.getThumbnailFile()!=null) { + thumbId = tempDataset.getThumbnailFile().getId(); + }; for (FileMetadata publishedFmd : pubFmds) { DataFile dataFile = publishedFmd.getDataFile(); FileMetadata draftFmd = dataFile.getLatestFileMetadata(); @@ -136,6 +140,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { for (DataFileCategory cat : tempDataset.getCategories()) { cat.getFileMetadatas().remove(draftFmd); } + //And any thumbnail reference + if(publishedFmd.getDataFile().getId()==thumbId) { + tempDataset.setThumbnailFile(publishedFmd.getDataFile()); + } } // Update modification time on the published version and the dataset From fe6a1ebd2448f64f232b044c5e1cb4ff18e73eb9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 13:04:57 -0400 Subject: [PATCH 096/322] truncate descriptions in schema.org header and export Nominally schema.org requires description to be text (not an array as we had) and Google requires a description <5000 chars. --- .../harvard/iq/dataverse/DatasetVersion.java | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..4163b4058be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -842,12 +842,21 @@ public String getDescriptionPlainText() { return MarkupChecker.stripAllTags(getDescription()); } - public List getDescriptionsPlainText() { - List plainTextDescriptions = new ArrayList<>(); + /* This method is (only) used in creating schema.org json-jd where Google requires a text description <5000 chars. + * + * @returns - a single string composed of all descriptions (joined with \n if more than one) truncated with a trailing '...' if >=5000 chars + */ + public String getDescriptionsPlainTextTruncated() { + List plainTextDescriptions = new ArrayList(); + for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - return plainTextDescriptions; + String description = String.join("\\n", plainTextDescriptions); + if(description.length()>=5000) { + description = description.substring(0, (description.substring(0,4997).lastIndexOf(" "))) + "..."; + } + return description; } /** @@ -1859,16 +1868,8 @@ public String getJsonLd() { job.add("dateModified", this.getPublicationDateAsString()); job.add("version", this.getVersionNumber().toString()); - JsonArrayBuilder descriptionsArray = Json.createArrayBuilder(); - List descriptions = this.getDescriptionsPlainText(); - for (String description : descriptions) { - descriptionsArray.add(description); - } - /** - * In Dataverse 4.8.4 "description" was a single string but now it's an - * array. - */ - job.add("description", descriptionsArray); + String description = this.getDescriptionsPlainTextTruncated(); + job.add("description", description); /** * "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s) From 542a5f13d6904d0fddd9e9e93ab5a3db78d91e7f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:38:42 -0400 Subject: [PATCH 097/322] fix description text - string not json array --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..f130e742488 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -147,7 +147,7 @@ public void testExportDataset() throws Exception { assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getJsonArray("description").getString(0)); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getString("description")); assertEquals("Bird is the word.", json2.getJsonArray("description").getString(1)); assertEquals(2, json2.getJsonArray("description").size()); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); From 578ff16331baa86ee8c56bd550c3eb80e4c39905 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:44:38 -0400 Subject: [PATCH 098/322] more fixes --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index f130e742488..76cee7d65e8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -147,9 +147,7 @@ public void testExportDataset() throws Exception { assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getString("description")); - assertEquals("Bird is the word.", json2.getJsonArray("description").getString(1)); - assertEquals(2, json2.getJsonArray("description").size()); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\\nBird is the word.", json2.getString("description")); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); assertEquals("tcTerm1", json2.getJsonArray("keywords").getString(1)); assertEquals("KeywordTerm1", json2.getJsonArray("keywords").getString(2)); From 67ecd7a2dd2a0a2e71497e4015ded0f03ad8c69e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 19:11:29 -0400 Subject: [PATCH 099/322] fix linefeed in descriptions --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4163b4058be..78055cee22a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -852,7 +852,7 @@ public String getDescriptionsPlainTextTruncated() { for (String htmlDescription : getDescriptions()) { plainTextDescriptions.add(MarkupChecker.stripAllTags(htmlDescription)); } - String description = String.join("\\n", plainTextDescriptions); + String description = String.join("\n", plainTextDescriptions); if(description.length()>=5000) { description = description.substring(0, (description.substring(0,4997).lastIndexOf(" "))) + "..."; } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 76cee7d65e8..06f07404d6a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -147,7 +147,7 @@ public void testExportDataset() throws Exception { assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); - assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\\nBird is the word.", json2.getString("description")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.\nBird is the word.", json2.getString("description")); assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); assertEquals("tcTerm1", json2.getJsonArray("keywords").getString(1)); assertEquals("KeywordTerm1", json2.getJsonArray("keywords").getString(2)); From 99e276c51e84680bcc82e473a7f1fa61e9a4b1e8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 13:54:07 -0400 Subject: [PATCH 100/322] specify URL for citations/related publications --- .../harvard/iq/dataverse/DatasetVersion.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..41b3cba8bd8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1892,11 +1892,16 @@ public String getJsonLd() { job.add("keywords", keywords); /** - * citation: (multiple) related publication citation and URLs, if - * present. + * citation: (multiple) related publication citation and URLs, if present. * - * In Dataverse 4.8.4 "citation" was an array of strings but now it's an - * array of objects. + * Schema.org allows text or a CreativeWork object. Google recommends text with + * either the full citation or the PID URL. This code adds an object if we have + * the citation text for the work and/or an entry in the URL field (i.e. + * https://doi.org/...) The URL is reported as the 'url' field while the + * citation text (which would normally include the name) is reported as 'name' + * since there doesn't appear to be a better field ('text', which was used + * previously, is the actual text of the creative work). + * */ List relatedPublications = getRelatedPublications(); if (!relatedPublications.isEmpty()) { @@ -1911,11 +1916,11 @@ public String getJsonLd() { JsonObjectBuilder citationEntry = Json.createObjectBuilder(); citationEntry.add("@type", "CreativeWork"); if (pubCitation != null) { - citationEntry.add("text", pubCitation); + citationEntry.add("name", pubCitation); } if (pubUrl != null) { citationEntry.add("@id", pubUrl); - citationEntry.add("identifier", pubUrl); + citationEntry.add("url", pubUrl); } if (addToArray) { jsonArrayBuilder.add(citationEntry); From 59f9ed7a2db5d4709dd2fc8f74ea13e577d351ce Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 11:46:58 -0400 Subject: [PATCH 101/322] add description truncation test --- .../export/SchemaDotOrgExporterTest.java | 159 ++++---- .../json/dataset-long-description.json | 362 ++++++++++++++++++ 2 files changed, 453 insertions(+), 68 deletions(-) create mode 100644 src/test/resources/json/dataset-long-description.json diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 06f07404d6a..98da4008de9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -10,16 +10,18 @@ import static edu.harvard.iq.dataverse.util.SystemConfig.FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.IOException; import java.io.PrintWriter; -import java.io.StringReader; import java.net.URI; import java.nio.file.Files; import java.nio.file.Paths; import java.sql.Timestamp; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; @@ -28,15 +30,14 @@ import java.util.List; import java.util.Set; import java.util.logging.Logger; -import javax.json.Json; import javax.json.JsonObject; -import javax.json.JsonReader; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.mockito.Mockito; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * For docs see {@link SchemaDotOrgExporter}. @@ -62,75 +63,19 @@ public static void tearDownClass() { /** * Test of exportDataset method, of class SchemaDotOrgExporter. + * @throws IOException + * @throws JsonParseException + * @throws ParseException + * */ @Test - public void testExportDataset() throws Exception { + public void testExportDataset() throws JsonParseException, ParseException, IOException { File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json"); String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); - License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true); - license.setDefault(true); - - JsonReader jsonReader1 = Json.createReader(new StringReader(datasetVersionAsJson)); - JsonObject json1 = jsonReader1.readObject(); - JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, settingsService, licenseService); - DatasetVersion version = jsonParser.parseDatasetVersion(json1.getJsonObject("datasetVersion")); - version.setVersionState(DatasetVersion.VersionState.RELEASED); - SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); - Date publicationDate = dateFmt.parse("19551105"); - version.setReleaseTime(publicationDate); - version.setVersionNumber(1l); - TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); - terms.setLicense(license); - version.setTermsOfUseAndAccess(terms); - - Dataset dataset = new Dataset(); - dataset.setProtocol("doi"); - dataset.setAuthority("10.5072/FK2"); - dataset.setIdentifier("IMK5A4"); - dataset.setPublicationDate(new Timestamp(publicationDate.getTime())); - version.setDataset(dataset); - Dataverse dataverse = new Dataverse(); - dataverse.setName("LibraScholar"); - dataset.setOwner(dataverse); - System.setProperty(SITE_URL, "https://librascholar.org"); - boolean hideFileUrls = false; - if (hideFileUrls) { - System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); - } - FileMetadata fmd = new FileMetadata(); - DataFile dataFile = new DataFile(); - dataFile.setId(42l); - dataFile.setFilesize(1234); - dataFile.setContentType("text/plain"); - dataFile.setProtocol("doi"); - dataFile.setAuthority("10.5072/FK2"); - dataFile.setIdentifier("7V5MPI"); - fmd.setDatasetVersion(version); - fmd.setDataFile(dataFile); - fmd.setLabel("README.md"); - fmd.setDescription("README file."); - List fileMetadatas = new ArrayList<>(); - fileMetadatas.add(fmd); - dataFile.setFileMetadatas(fileMetadatas);; - dataFile.setOwner(dataset); - version.setFileMetadatas(fileMetadatas); - - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - if(json1 == null) logger.fine("Json null"); - if(version == null) logger.fine("ver null"); - if(byteArrayOutputStream == null) logger.fine("bytarr null"); - if(schemaDotOrgExporter == null) logger.fine("sdoe" + " null"); - try { - schemaDotOrgExporter.exportDataset(version, json1, byteArrayOutputStream); - } catch (Exception e) { - e.printStackTrace(); - } - String jsonLd = byteArrayOutputStream.toString(); - String prettyJson = JsonUtil.prettyPrint(jsonLd); - logger.fine("schema.org JSON-LD: " + prettyJson); - JsonReader jsonReader2 = Json.createReader(new StringReader(jsonLd)); - JsonObject json2 = jsonReader2.readObject(); + JsonObject json = JsonUtil.getJsonObject(datasetVersionAsJson); + JsonObject json2 = createExportFromJson(json); + assertEquals("http://schema.org", json2.getString("@context")); assertEquals("Dataset", json2.getString("@type")); assertEquals("https://doi.org/10.5072/FK2/IMK5A4", json2.getString("@id")); @@ -187,8 +132,86 @@ public void testExportDataset() throws Exception { assertEquals("https://librascholar.org/api/access/datafile/42", json2.getJsonArray("distribution").getJsonObject(0).getString("contentUrl")); assertEquals(1, json2.getJsonArray("distribution").size()); try (PrintWriter printWriter = new PrintWriter("/tmp/dvjsonld.json")) { - printWriter.println(prettyJson); + printWriter.println(JsonUtil.prettyPrint(json2)); + } + + } + + /** + * Test description truncation in exportDataset method, of class SchemaDotOrgExporter. + * @throws IOException + * @throws JsonParseException + * @throws ParseException + * + */ + @Test + public void testExportDescriptionTruncation() throws JsonParseException, ParseException, IOException { + File datasetVersionJson = new File("src/test/resources/json/dataset-long-description.json"); + String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + + JsonObject json = JsonUtil.getJsonObject(datasetVersionAsJson); + JsonObject json2 = createExportFromJson(json); + + assertTrue(json2.getString("description").endsWith("at...")); + } + + private JsonObject createExportFromJson(JsonObject json) throws JsonParseException, ParseException { + License license = new License("CC0 1.0", "You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.", URI.create("http://creativecommons.org/publicdomain/zero/1.0/"), URI.create("/resources/images/cc0.png"), true); + license.setDefault(true); + JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, settingsService, licenseService); + DatasetVersion version = jsonParser.parseDatasetVersion(json.getJsonObject("datasetVersion")); + version.setVersionState(DatasetVersion.VersionState.RELEASED); + SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); + Date publicationDate = dateFmt.parse("19551105"); + version.setReleaseTime(publicationDate); + version.setVersionNumber(1l); + TermsOfUseAndAccess terms = new TermsOfUseAndAccess(); + terms.setLicense(license); + version.setTermsOfUseAndAccess(terms); + + Dataset dataset = new Dataset(); + dataset.setProtocol("doi"); + dataset.setAuthority("10.5072/FK2"); + dataset.setIdentifier("IMK5A4"); + dataset.setPublicationDate(new Timestamp(publicationDate.getTime())); + version.setDataset(dataset); + Dataverse dataverse = new Dataverse(); + dataverse.setName("LibraScholar"); + dataset.setOwner(dataverse); + System.setProperty(SITE_URL, "https://librascholar.org"); + boolean hideFileUrls = false; + if (hideFileUrls) { + System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); + } + + FileMetadata fmd = new FileMetadata(); + DataFile dataFile = new DataFile(); + dataFile.setId(42l); + dataFile.setFilesize(1234); + dataFile.setContentType("text/plain"); + dataFile.setProtocol("doi"); + dataFile.setAuthority("10.5072/FK2"); + dataFile.setIdentifier("7V5MPI"); + fmd.setDatasetVersion(version); + fmd.setDataFile(dataFile); + fmd.setLabel("README.md"); + fmd.setDescription("README file."); + List fileMetadatas = new ArrayList<>(); + fileMetadatas.add(fmd); + dataFile.setFileMetadatas(fileMetadatas); + ; + dataFile.setOwner(dataset); + version.setFileMetadatas(fileMetadatas); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + if(schemaDotOrgExporter == null) logger.fine("sdoe" + " null"); + try { + schemaDotOrgExporter.exportDataset(version, json, byteArrayOutputStream); + } catch (Exception e) { + e.printStackTrace(); } + String jsonLdStr = byteArrayOutputStream.toString(); + return JsonUtil.getJsonObject(jsonLdStr); } /** diff --git a/src/test/resources/json/dataset-long-description.json b/src/test/resources/json/dataset-long-description.json new file mode 100644 index 00000000000..a6e5c291322 --- /dev/null +++ b/src/test/resources/json/dataset-long-description.json @@ -0,0 +1,362 @@ +{ + "datasetVersion": { + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorIdentifierScheme": { + "typeName": "authorIdentifierScheme", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ORCID" + }, + "authorIdentifier": { + "typeName": "authorIdentifier", + "multiple": false, + "typeClass": "primitive", + "value": "0000-0002-1825-0097" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value": "finch@mailinator.com" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed, nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia. Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget, imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus. Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque. Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi. Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo. Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis. Aenean posuere, tortor sed cursus feugiat, nunc augue blandit nunc, eu sollicitudin urna dolor sagittis lacus. Donec elit libero, sodales nec, volutpat a, suscipit non, turpis. Nullam sagittis. Suspendisse pulvinar, augue ac venenatis condimentum, sem libero volutpat nibh, nec pellentesque velit pede quis nunc. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Fusce id purus. Ut varius tincidunt libero. Phasellus dolor. Maecenas vestibulum mollis diam. Pellentesque ut neque. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. In dui magna, posuere eget, vestibulum et, tempor auctor, justo. In ac felis quis tortor malesuada pretium. Pellentesque auctor neque nec urna. Proin sapien ipsum, porta a, auctor quis, euismod ut, mi. Aenean viverra rhoncus pede. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Ut non enim eleifend felis pretium feugiat. Vivamus quis mi. Phasellus a est. Phasellus magna. In hac habitasse platea dictumst. Curabitur at lacus ac velit ornare lobortis. Curabitur a felis in nunc fringilla tristique. Morbi mattis ullamcorper velit. Phasellus gravida semper nisi. Nullam vel sem. Pellentesque libero tortor, tincidunt et, tincidunt eget, semper nec, quam. Sed hendrerit. Morbi ac felis. Nunc egestas, augue at pellentesque laoreet, felis eros vehicula leo, at malesuada velit leo quis pede. Donec interdum, metus et hendrerit aliquet, dolor diam sagittis ligula, eget egestas libero turpis vel mi. Nunc nulla. Fusce risus nisl, viverra et, tempor et, pretium in, sapien. Donec venenatis vulputate lorem. Morbi nec metus. Phasellus blandit leo ut odio. Maecenas ullamcorper, dui et placerat feugiat, eros pede varius nisi, condimentum viverra felis nunc et lorem. Sed magna purus, fermentum eu, tincidunt eu, varius ut, felis. In auctor lobortis lacus. Quisque libero metus, condimentum nec, tempor a, commodo mollis, magna. Vestibulum ullamcorper mauris at ligul beyond 5000 chars", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm1" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary1" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL1.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm2" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary2" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL2.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "keywords, with, commas" + } + } + ] + }, + { + "typeName": "topicClassification", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "topicClassValue": { + "typeName": "topicClassValue", + "multiple": false, + "typeClass": "primitive", + "value": "tcTerm1" + }, + "topicClassVocab": { + "typeName": "topicClassVocab", + "multiple": false, + "typeClass": "primitive", + "value": "tcVocab1" + }, + "topicClassVocabURI": { + "typeName": "topicClassVocabURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://example.com/tcTerm1" + } + } + ] + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "Holmes, Sherlock" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Funder" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "National Science Foundation" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "Watson, John" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "National Institutes of Health" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "1245" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005." + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "doi" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "10.5072/FK2/RV16HK" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "https://doi.org/10.5072/FK2/RV16HK" + } + } + ] + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2002" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2005" + } + }, + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "2001-10-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "2015-11-15" + } + } + ] + } + ], + "displayName": "Citation Metadata", + "name": "citation" + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "name": "geospatial", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "Columbus" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "Ohio" + }, + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "North America" + } + }, + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "United States" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "Wisconsin" + } + } + ] + } + ] + } + } + } +} From 8fbce47b7345cbeb96e46c4353376ca0c518dd01 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 11:53:23 -0400 Subject: [PATCH 102/322] release notes --- doc/release-notes/7349-1-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-1-schema.org-updates.md diff --git a/doc/release-notes/7349-1-schema.org-updates.md b/doc/release-notes/7349-1-schema.org-updates.md new file mode 100644 index 00000000000..2934a596001 --- /dev/null +++ b/doc/release-notes/7349-1-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - descriptions are now joined and truncated to less than 5K characters. \ No newline at end of file From e5993804d258043139adae35afb05a46185aea81 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:58:23 -0400 Subject: [PATCH 103/322] restore citation/identifier, add test for url --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 1 + 2 files changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 41b3cba8bd8..e09c458915c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1920,6 +1920,7 @@ public String getJsonLd() { } if (pubUrl != null) { citationEntry.add("@id", pubUrl); + citationEntry.add("identifier", pubUrl); citationEntry.add("url", pubUrl); } if (addToArray) { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..8083d4a8851 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -160,6 +160,7 @@ public void testExportDataset() throws Exception { assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("text")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("@id")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("identifier")); + assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("url")); assertEquals("2002/2005", json2.getJsonArray("temporalCoverage").getString(0)); assertEquals("2001-10-01/2015-11-15", json2.getJsonArray("temporalCoverage").getString(1)); assertEquals(null, json2.getString("schemaVersion", null)); From 286b0825c601addbca463dd4cd02bc571e6f67cf Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 12:21:47 -0400 Subject: [PATCH 104/322] fix text->name test --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 8083d4a8851..c0dfcd9e34d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -157,7 +157,7 @@ public void testExportDataset() throws Exception { // This dataset, for example, has multiple keywords separated by commas: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/24034&version=2.0 assertEquals("keywords, with, commas", json2.getJsonArray("keywords").getString(4)); assertEquals("CreativeWork", json2.getJsonArray("citation").getJsonObject(0).getString("@type")); - assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("text")); + assertEquals("Finch, Fiona 2018. \"The Finches.\" American Ornithological Journal 60 (4): 990-1005.", json2.getJsonArray("citation").getJsonObject(0).getString("name")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("@id")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("identifier")); assertEquals("https://doi.org/10.5072/FK2/RV16HK", json2.getJsonArray("citation").getJsonObject(0).getString("url")); From c28b4b3c277fceb13cd05f0bbe2a80f5fff11d7b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 12:40:40 -0400 Subject: [PATCH 105/322] release notes --- doc/release-notes/7349-2-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-2-schema.org-updates.md diff --git a/doc/release-notes/7349-2-schema.org-updates.md b/doc/release-notes/7349-2-schema.org-updates.md new file mode 100644 index 00000000000..41f2dfb766a --- /dev/null +++ b/doc/release-notes/7349-2-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - the "citation"/"text" key has been replaced by a "citation"/"name" key. \ No newline at end of file From 80059c9cb65cc042550f2f3fd470198ed337636e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:16:43 -0400 Subject: [PATCH 106/322] redo of schema.org file changes --- .../java/edu/harvard/iq/dataverse/DatasetVersion.java | 8 +++----- .../iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..314e06149ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -2012,7 +2012,7 @@ public String getJsonLd() { } fileObject.add("@type", "DataDownload"); fileObject.add("name", fileMetadata.getLabel()); - fileObject.add("fileFormat", fileMetadata.getDataFile().getContentType()); + fileObject.add("encodingFormat", fileMetadata.getDataFile().getContentType()); fileObject.add("contentSize", fileMetadata.getDataFile().getFilesize()); fileObject.add("description", fileMetadata.getDescription()); fileObject.add("@id", filePidUrlAsString); @@ -2021,10 +2021,8 @@ public String getJsonLd() { if (hideFilesBoolean != null && hideFilesBoolean.equals("true")) { // no-op } else { - if (FileUtil.isPubliclyDownloadable(fileMetadata)) { - String nullDownloadType = null; - fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); - } + String nullDownloadType = null; + fileObject.add("contentUrl", dataverseSiteUrl + FileUtil.getFileDownloadUrlPath(nullDownloadType, fileMetadata.getDataFile().getId(), false, fileMetadata.getId())); } fileArray.add(fileObject); } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..f5bc5fd97d0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -181,7 +181,7 @@ public void testExportDataset() throws Exception { assertEquals(2, json2.getJsonArray("spatialCoverage").size()); assertEquals("DataDownload", json2.getJsonArray("distribution").getJsonObject(0).getString("@type")); assertEquals("README.md", json2.getJsonArray("distribution").getJsonObject(0).getString("name")); - assertEquals("text/plain", json2.getJsonArray("distribution").getJsonObject(0).getString("fileFormat")); + assertEquals("text/plain", json2.getJsonArray("distribution").getJsonObject(0).getString("encodingFormat")); assertEquals(1234, json2.getJsonArray("distribution").getJsonObject(0).getInt("contentSize")); assertEquals("README file.", json2.getJsonArray("distribution").getJsonObject(0).getString("description")); assertEquals("https://doi.org/10.5072/FK2/7V5MPI", json2.getJsonArray("distribution").getJsonObject(0).getString("@id")); From d93df2ae32f1b00a7283f298b6de54d9008cd5b6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:29:11 -0400 Subject: [PATCH 107/322] release note --- doc/release-notes/7349-3-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-3-schema.org-updates.md diff --git a/doc/release-notes/7349-3-schema.org-updates.md b/doc/release-notes/7349-3-schema.org-updates.md new file mode 100644 index 00000000000..6a9c5e2b9b0 --- /dev/null +++ b/doc/release-notes/7349-3-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - file entries now have the mimetype reported as 'encodingFormat' rather than 'fileFormat' to better conform with the Schema.org specification for DataDownload entries. \ No newline at end of file From 0b375cfb38c5303c18045a80aaf347977414b8ef Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 17:43:55 -0400 Subject: [PATCH 108/322] add type for person/org, add sameas, fix affiliation --- .../harvard/iq/dataverse/DatasetVersion.java | 50 +++++++++---- .../iq/dataverse/util/PersonOrOrgUtil.java | 72 +++++++++++++++++++ 2 files changed, 107 insertions(+), 15 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..64371148254 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.util.MarkupChecker; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.branding.BrandingUtil; @@ -1802,27 +1803,46 @@ public String getJsonLd() { for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) { JsonObjectBuilder author = Json.createObjectBuilder(); String name = datasetAuthor.getName().getDisplayValue(); + String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } - // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". - //author.add("@type", "Person"); - author.add("name", name); - // We are aware that the following error is thrown by https://search.google.com/structured-data/testing-tool - // "The property affiliation is not recognized by Google for an object of type Thing." - // Someone at Google has said this is ok. - // This logic could be moved into the `if (authorAffiliation != null)` block above. - if (!StringUtil.isEmpty(affiliation)) { - author.add("affiliation", affiliation); - } - String identifierAsUrl = datasetAuthor.getIdentifierAsUrl(); - if (identifierAsUrl != null) { - // It would be valid to provide an array of identifiers for authors but we have decided to only provide one. - author.add("@id", identifierAsUrl); - author.add("identifier", identifierAsUrl); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); + String givenName= entity.getString("givenName"); + String familyName= entity.getString("familyName"); + + if (entity.getBoolean("isPerson")) { + // Person + author.add("@type", "Person"); + if (givenName != null) { + author.add("givenName", givenName); + } + if (familyName != null) { + author.add("familyName", familyName); + } + if (!StringUtil.isEmpty(affiliation)) { + author.add("affiliation", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } + //Currently all possible identifier URLs are for people not Organizations + if(identifierAsUrl != null) { + author.add("sameas", identifierAsUrl); + //Legacy - not sure if these are still useful + author.add("@id", identifierAsUrl); + author.add("identifier", identifierAsUrl); + + } + } else { + // Organization + author.add("@type", "Organization"); + if (!StringUtil.isEmpty(affiliation)) { + author.add("parentOrganization", Json.createObjectBuilder().add("@type", "Organization").add("name", affiliation)); + } } + // Both cases + author.add("name", entity.getString("name")); + //And add to the array authors.add(author); } JsonArray authorsArray = authors.build(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java new file mode 100644 index 00000000000..8d767d2e535 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -0,0 +1,72 @@ +package edu.harvard.iq.dataverse.util; + +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; + +import edu.harvard.iq.dataverse.export.openaire.Cleanup; +import edu.harvard.iq.dataverse.export.openaire.FirstNames; +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; + +/** + * + * @author qqmyers + * + * Adapted from earlier code in OpenAireExportUtil + * + * Implements an algorithm derived from code at DataCite to determine + * whether a name is that of a Person or Organization and, if the + * former, to pull out the given and family names. + */ + +public class PersonOrOrgUtil { + + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied) { + name = Cleanup.normalize(name); + + String givenName = null; + String familyName = null; + // Datacite algorithm, + // https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 + boolean isOrganization = Organizations.getInstance().isOrganization(name); + if (name.contains(",")) { + givenName = FirstNames.getInstance().getFirstName(name); + // contributorName=, + if (givenName != null && !isOrganization) { + // givenName ok + isOrganization = false; + // contributor_map.put("nameType", "Personal"); + if (!name.replaceFirst(",", "").contains(",")) { + // contributorName=, + String[] fullName = name.split(", "); + givenName = fullName[1]; + familyName = fullName[0]; + } + } else if (isOrganization || organizationIfTied) { + isOrganization = true; + } + + } else { + givenName = FirstNames.getInstance().getFirstName(name); + + if (givenName != null && !isOrganization) { + isOrganization = false; + if (givenName.length() + 1 < name.length()) { + familyName = name.substring(givenName.length() + 1); + } + } else { + // default + if (isOrganization || organizationIfTied) { + isOrganization = true; + } + } + } + JsonObjectBuilder job = new NullSafeJsonBuilder(); + job.add("fullname", name); + job.add("givenName", givenName); + job.add("familyName", familyName); + job.add("isPerson", !isOrganization); + return job.build(); + + } +} From 5bd58d8f4390fc4eed31ba8f64835b527ca939fb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 18 Oct 2022 18:02:01 -0400 Subject: [PATCH 109/322] typo --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 64371148254..2aca5cc9705 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1841,7 +1841,7 @@ public String getJsonLd() { } } // Both cases - author.add("name", entity.getString("name")); + author.add("name", entity.getString("fullname")); //And add to the array authors.add(author); } From 63cd77d2a834221889125fbda952bd193e44d099 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:34:46 -0400 Subject: [PATCH 110/322] capitalization --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 2aca5cc9705..8aaf0d2fd89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1827,7 +1827,7 @@ public String getJsonLd() { } //Currently all possible identifier URLs are for people not Organizations if(identifierAsUrl != null) { - author.add("sameas", identifierAsUrl); + author.add("sameAs", identifierAsUrl); //Legacy - not sure if these are still useful author.add("@id", identifierAsUrl); author.add("identifier", identifierAsUrl); From 8084fb8796700eb56fdbcc17a6b792946875a5f0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:37:45 -0400 Subject: [PATCH 111/322] update tests --- .../iq/dataverse/export/SchemaDotOrgExporterTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..2327de43ca4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -137,13 +137,15 @@ public void testExportDataset() throws Exception { assertEquals("https://doi.org/10.5072/FK2/IMK5A4", json2.getString("identifier")); assertEquals("Darwin's Finches", json2.getString("name")); assertEquals("Finch, Fiona", json2.getJsonArray("creator").getJsonObject(0).getString("name")); - assertEquals("Birds Inc.", json2.getJsonArray("creator").getJsonObject(0).getString("affiliation")); + assertEquals("Birds Inc.", json2.getJsonArray("creator").getJsonObject(0).getJsonObject("affiliation").getString("name")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("@id")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("identifier")); + assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("creator").getJsonObject(0).getString("sameAs")); assertEquals("Finch, Fiona", json2.getJsonArray("author").getJsonObject(0).getString("name")); - assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getString("affiliation")); + assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getJsonObject("affiliation").getString("name")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("@id")); assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("identifier")); + assertEquals("https://orcid.org/0000-0002-1825-0097", json2.getJsonArray("author").getJsonObject(0).getString("sameAs")); assertEquals("1955-11-05", json2.getString("datePublished")); assertEquals("1955-11-05", json2.getString("dateModified")); assertEquals("1", json2.getString("version")); From 489d0e36e9b81c095b7387522d95b92516c00b69 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 13:40:43 -0400 Subject: [PATCH 112/322] legacy test issue --- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 2327de43ca4..68bab7c8bb4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -170,7 +170,7 @@ public void testExportDataset() throws Exception { assertEquals("LibraScholar", json2.getJsonObject("includedInDataCatalog").getString("name")); assertEquals("https://librascholar.org", json2.getJsonObject("includedInDataCatalog").getString("url")); assertEquals("Organization", json2.getJsonObject("publisher").getString("@type")); - assertEquals("LibraScholar", json2.getJsonObject("provider").getString("name")); + assertEquals("LibraScholar", json2.getJsonObject("publisher").getString("name")); assertEquals("Organization", json2.getJsonObject("provider").getString("@type")); assertEquals("LibraScholar", json2.getJsonObject("provider").getString("name")); assertEquals("Organization", json2.getJsonArray("funder").getJsonObject(0).getString("@type")); From c3260a5009c99f0765d012d9ce20ef27048cb738 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:41:21 -0400 Subject: [PATCH 113/322] change fullname -> fullName --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 8aaf0d2fd89..8e9a0950b2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1841,7 +1841,7 @@ public String getJsonLd() { } } // Both cases - author.add("name", entity.getString("fullname")); + author.add("name", entity.getString("fullName")); //And add to the array authors.add(author); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 8d767d2e535..add5c8285ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -62,7 +62,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } JsonObjectBuilder job = new NullSafeJsonBuilder(); - job.add("fullname", name); + job.add("fullName", name); job.add("givenName", givenName); job.add("familyName", familyName); job.add("isPerson", !isOrganization); From 3ddc7960f24a63bf322d90befd71c3c440ab3101 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:41:31 -0400 Subject: [PATCH 114/322] note todos --- .../export/openaire/OpenAireExportUtil.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49fe203b96d..bea3858a60e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -256,7 +256,10 @@ public static void writeCreatorsElement(XMLStreamWriter xmlw, DatasetVersionDTO creator_map.put("nameType", "Personal"); nameType_check = true; } - + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm creatorName = Cleanup.normalize(creatorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (creatorName.contains(",")) { @@ -706,6 +709,11 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu boolean nameType_check = false; Map contributor_map = new HashMap(); + // ToDo - the algorithm to determine if this is a Person or Organization here + // has been abstracted into a separate + // edu.harvard.iq.dataverse.util.PersonOrOrgUtil class that could be used here + // to avoid duplication/variants of the algorithm + contributorName = Cleanup.normalize(contributorName); // Datacite algorithm, https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 if (contributorName.contains(",")) { @@ -717,6 +725,9 @@ public static void writeContributorElement(XMLStreamWriter xmlw, String contribu // givenName ok contributor_map.put("nameType", "Personal"); nameType_check = true; + // re: the above toDo - the ("ContactPerson".equals(contributorType) && + // !isValidEmailAddress(contributorName)) clause in the next line could/should + // be sent as the OrgIfTied boolean parameter } else if (isOrganization || ("ContactPerson".equals(contributorType) && !isValidEmailAddress(contributorName))) { contributor_map.put("nameType", "Organizational"); } From 05ea63aa98a7c896fbfbbfa00eb4c6755bd317ad Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:42:27 -0400 Subject: [PATCH 115/322] add tests same examples as in OrganizationTest but using the extracted algorithm and also checking given/family name in relevant cases --- .../dataverse/util/PersonOrOrgUtilTest.java | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java new file mode 100644 index 00000000000..32c72e9497c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -0,0 +1,92 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + +import org.junit.Ignore; +import org.junit.Test; +import static org.junit.Assert.*; + +import javax.json.JsonObject; + +public class PersonOrOrgUtilTest { + + public PersonOrOrgUtilTest() { + } + + @Test + public void testOrganizationSimpleName() { + verifyIsOrganization("IBM"); + verifyIsOrganization("Harvard University"); + } + + @Test + public void testOrganizationCOMPLEXName() { + verifyIsOrganization("The Institute for Quantitative Social Science"); + verifyIsOrganization("Council on Aging"); + verifyIsOrganization("The Ford Foundation"); + verifyIsOrganization("United Nations Economic and Social Commission for Asia and the Pacific (UNESCAP)"); + verifyIsOrganization("Michael J. Fox Foundation for Parkinson's Research"); + } + + @Test + public void testOrganizationComaOrDash() { + verifyIsOrganization("Digital Archive of Massachusetts Anti-Slavery and Anti-Segregation Petitions, Massachusetts Archives, Boston MA"); + verifyIsOrganization("U.S. Department of Commerce, Bureau of the Census, Geography Division"); + verifyIsOrganization("Harvard Map Collection, Harvard College Library"); + verifyIsOrganization("Geographic Data Technology, Inc. (GDT)"); + } + + @Ignore + @Test + public void testOrganizationES() { + //Spanish recognition is not enabled - see export/Organization.java + verifyIsOrganization("Compañía de San Fernando"); + } + + /** + * Name is composed of: + * + */ + @Test + public void testName() { + verifyIsPerson("Jorge Mario Bergoglio", "Jorge Mario", "Bergoglio"); + verifyIsPerson("Bergoglio", null, null); + verifyIsPerson("Francesco Cadili", "Francesco", "Cadili"); + // This Philip Seymour Hoffman example is from ShibUtilTest. + verifyIsPerson("Philip Seymour Hoffman", "Philip Seymour", "Hoffman"); + + // test Smith (is also a name) + verifyIsPerson("John Smith", "John", "Smith"); + // resolved using hint file + verifyIsPerson("Guido van Rossum", "Guido", "van Rossum"); + // test only name + verifyIsPerson("Francesco", "Francesco", null); + // test only family name + verifyIsPerson("Cadili", null, null); + } + + private void verifyIsOrganization(String fullName) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + System.out.println(JsonUtil.prettyPrint(obj)); + assertEquals(obj.getString("fullName"),fullName); + assertFalse(obj.getBoolean("isPerson")); + + } + + private void verifyIsPerson(String fullName, String givenName, String familyName) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + System.out.println(JsonUtil.prettyPrint(obj)); + assertEquals(obj.getString("fullName"),fullName); + assertTrue(obj.getBoolean("isPerson")); + assertEquals(obj.containsKey("givenName"), givenName != null); + if(obj.containsKey("givenName") && givenName != null) { + assertEquals(obj.getString("givenName"),givenName); + } + assertEquals(obj.containsKey("familyName"), familyName != null); + if(obj.containsKey("familyName") && familyName != null) { + assertEquals(obj.getString("familyName"),familyName); + } + } + + } From 6ca9f7099698bcfe08f6fbc98379f3f989d6a283 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:44:02 -0400 Subject: [PATCH 116/322] don't send giveName for orgs it does not appear to be useful given the tests in PersonOrOrgUtilTest --- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index add5c8285ae..468949e8a40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -44,6 +44,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else if (isOrganization || organizationIfTied) { isOrganization = true; + givenName=null; } } else { @@ -58,6 +59,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati // default if (isOrganization || organizationIfTied) { isOrganization = true; + givenName=null; } } } From a5fafd079d64ed334fa45fa238765815bd262f05 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 19 Oct 2022 17:48:12 -0400 Subject: [PATCH 117/322] release note --- doc/release-notes/7349-4-schema.org-updates.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/7349-4-schema.org-updates.md diff --git a/doc/release-notes/7349-4-schema.org-updates.md b/doc/release-notes/7349-4-schema.org-updates.md new file mode 100644 index 00000000000..1247471f137 --- /dev/null +++ b/doc/release-notes/7349-4-schema.org-updates.md @@ -0,0 +1,3 @@ +The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. + +Backward compatibility - author/creators now have an @type of Person or Organization and any affiliation (affiliation for Person, parentOrganization for Organization) is now an object of @type Organization \ No newline at end of file From f222160d16705b99f1e942037fc68828732f9934 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 20 Oct 2022 18:06:22 -0400 Subject: [PATCH 118/322] bugfix for no givenName/familyName from algorithm --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 8e9a0950b2a..278ab246fcf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1810,8 +1810,8 @@ public String getJsonLd() { affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); - String givenName= entity.getString("givenName"); - String familyName= entity.getString("familyName"); + String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; + String familyName= entity.containsKey("familyName")? entity.getString("familyName"):null; if (entity.getBoolean("isPerson")) { // Person From 41c30d9de4970b57f8547dbb443d594aefc92e9e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 21 Oct 2022 15:22:30 -0400 Subject: [PATCH 119/322] add assumeCommaInPersonName and tests --- .../harvard/iq/dataverse/DatasetVersion.java | 2 +- .../iq/dataverse/util/PersonOrOrgUtil.java | 51 ++++++++++++++----- .../dataverse/util/PersonOrOrgUtilTest.java | 14 ++++- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 278ab246fcf..1204d1dd4f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1811,7 +1811,7 @@ public String getJsonLd() { } JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; - String familyName= entity.containsKey("familyName")? entity.getString("familyName"):null; + String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; if (entity.getBoolean("isPerson")) { // Person diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 468949e8a40..b8089422fcd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -17,18 +17,37 @@ * Implements an algorithm derived from code at DataCite to determine * whether a name is that of a Person or Organization and, if the * former, to pull out the given and family names. + * + * Adds a parameter that can improve accuracy, e.g. for curated + * repositories, allowing the code to assume that all Person entries are + * in , order. + * + * Possible ToDo - one could also allow local configuration of specific + * words that will automatically categorize one-off cases that the + * algorithm would otherwise mis-categorize. For example, the code + * appears to not recognize names ending in "Project" as an + * Organization. + * */ public class PersonOrOrgUtil { + static boolean assumeCommaInPersonName = false; + + static { + setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); + } + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied) { name = Cleanup.normalize(name); String givenName = null; String familyName = null; - // Datacite algorithm, + // adapted from a Datacite algorithm, // https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 boolean isOrganization = Organizations.getInstance().isOrganization(name); + // ToDo - could add a check of stop words to handle problem cases, i.e. if name + // contains something in that list, it is an org if (name.contains(",")) { givenName = FirstNames.getInstance().getFirstName(name); // contributorName=, @@ -48,18 +67,21 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else { - givenName = FirstNames.getInstance().getFirstName(name); - - if (givenName != null && !isOrganization) { - isOrganization = false; - if (givenName.length() + 1 < name.length()) { - familyName = name.substring(givenName.length() + 1); - } + if (assumeCommaInPersonName) { + isOrganization = true; } else { - // default - if (isOrganization || organizationIfTied) { - isOrganization = true; - givenName=null; + givenName = FirstNames.getInstance().getFirstName(name); + + if (givenName != null && !isOrganization) { + isOrganization = false; + if (givenName.length() + 1 < name.length()) { + familyName = name.substring(givenName.length() + 1); + } + } else { + // default + if (isOrganization || organizationIfTied) { + isOrganization = true; + } } } } @@ -71,4 +93,9 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati return job.build(); } + + public static void setAssumeCommaInPersonName(boolean assume) { + assumeCommaInPersonName = assume; + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index 32c72e9497c..dbda622b536 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -30,7 +30,19 @@ public void testOrganizationCOMPLEXName() { } @Test - public void testOrganizationComaOrDash() { + public void testOrganizationAcademicName() { + + verifyIsOrganization("John Smith Center"); + verifyIsOrganization("John Smith Group"); + //An example the base algorithm doesn't handle: + PersonOrOrgUtil.setAssumeCommaInPersonName(true); + verifyIsOrganization("John Smith Project"); + PersonOrOrgUtil.setAssumeCommaInPersonName(false); + } + + + @Test + public void testOrganizationCommaOrDash() { verifyIsOrganization("Digital Archive of Massachusetts Anti-Slavery and Anti-Segregation Petitions, Massachusetts Archives, Boston MA"); verifyIsOrganization("U.S. Department of Commerce, Bureau of the Census, Geography Division"); verifyIsOrganization("Harvard Map Collection, Harvard College Library"); From d5d365589f627bedd529cbb93be5af33ae63e560 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 21 Oct 2022 17:02:44 -0400 Subject: [PATCH 120/322] update docs/release note --- doc/release-notes/7349-4-schema.org-updates.md | 2 ++ doc/sphinx-guides/source/installation/config.rst | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/7349-4-schema.org-updates.md b/doc/release-notes/7349-4-schema.org-updates.md index 1247471f137..2c78243dc29 100644 --- a/doc/release-notes/7349-4-schema.org-updates.md +++ b/doc/release-notes/7349-4-schema.org-updates.md @@ -1,3 +1,5 @@ The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. +New jvm-option: dataverse.personOrOrg.assumeCommaInPersonName, default is false + Backward compatibility - author/creators now have an @type of Person or Organization and any affiliation (affiliation for Person, parentOrganization for Organization) is now an object of @type Organization \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2de9d5702f..3e01f372c9b 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1627,8 +1627,6 @@ By default, download URLs to files will be included in Schema.org JSON-LD output ``./asadmin create-jvm-options '-Ddataverse.files.hide-schema-dot-org-download-urls=true'`` -Please note that there are other reasons why download URLs may not be included for certain files such as if a guestbook entry is required or if the file is restricted. - For more on Schema.org JSON-LD, see the :doc:`/admin/metadataexport` section of the Admin Guide. .. _useripaddresssourceheader: @@ -1658,6 +1656,14 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_FORWARDED", "HTTP_VIA", "REMOTE_ADDR" + +dataverse.personOrOrg.assumeCommaInPersonName ++++++++++++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata export and the Schema.org metadata included in DatasetPages tries to infer whether each entry in the Author field is a Person or Organization. If you are sure that +users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. .. _:ApplicationServerSettings: From ebb138042f8b9134482a0a2119b9008f76ab80a1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 09:57:04 -0400 Subject: [PATCH 121/322] added org Phrases for DANS vs creating a second PR --- .../harvard/iq/dataverse/DatasetVersion.java | 2 +- .../iq/dataverse/util/PersonOrOrgUtil.java | 82 +++++++++++++++---- .../iq/dataverse/util/json/JsonUtil.java | 6 ++ .../dataverse/util/PersonOrOrgUtilTest.java | 18 +++- 4 files changed, 90 insertions(+), 18 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 1204d1dd4f1..c374204f73f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1809,7 +1809,7 @@ public String getJsonLd() { if (authorAffiliation != null) { affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } - JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, (identifierAsUrl==null)); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl==null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index b8089422fcd..497cc689983 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -1,11 +1,18 @@ package edu.harvard.iq.dataverse.util; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import javax.json.JsonArray; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import javax.json.JsonString; import edu.harvard.iq.dataverse.export.openaire.Cleanup; import edu.harvard.iq.dataverse.export.openaire.FirstNames; import edu.harvard.iq.dataverse.export.openaire.Organizations; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; /** @@ -18,36 +25,63 @@ * whether a name is that of a Person or Organization and, if the * former, to pull out the given and family names. * - * Adds a parameter that can improve accuracy, e.g. for curated - * repositories, allowing the code to assume that all Person entries are - * in , order. + * Adds parameters that can improve accuracy: + * + * * e.g. for curated repositories, allowing the code to assume that all + * Person entries are in , order. * - * Possible ToDo - one could also allow local configuration of specific - * words that will automatically categorize one-off cases that the - * algorithm would otherwise mis-categorize. For example, the code - * appears to not recognize names ending in "Project" as an - * Organization. + * * allow local configuration of specific words/phrases that will + * automatically categorize one-off cases that the algorithm would + * otherwise mis-categorize. For example, the code appears to not + * recognize names ending in "Project" as an Organization. * */ public class PersonOrOrgUtil { + private static final Logger logger = Logger.getLogger(PersonOrOrgUtil.class.getCanonicalName()); + static boolean assumeCommaInPersonName = false; + static List orgPhrases; static { setAssumeCommaInPersonName(Boolean.parseBoolean(System.getProperty("dataverse.personOrOrg.assumeCommaInPersonName", "false"))); + setOrgPhraseArray(System.getProperty("dataverse.personOrOrg.orgPhraseArray", null)); } - public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied) { + /** + * This method tries to determine if a name belongs to a person or an + * organization and, if it is a person, what the given and family names are. The + * core algorithm is adapted from a Datacite algorithm, see + * https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 + * + * @param name + * - the name to test + * @param organizationIfTied + * - if a given name isn't found, should the name be assumed to be + * from an organization. This could be a generic true/false or + * information from some non-name aspect of the entity, e.g. which + * field is in use, or whether a .edu email exists, etc. + * @param isPerson + * - if this is known to be a person due to other info (i.e. they + * have an ORCID). In this case the algorithm is just looking for + * given/family names. + * @return + */ + public static JsonObject getPersonOrOrganization(String name, boolean organizationIfTied, boolean isPerson) { name = Cleanup.normalize(name); String givenName = null; String familyName = null; - // adapted from a Datacite algorithm, - // https://github.com/IQSS/dataverse/issues/2243#issuecomment-358615313 - boolean isOrganization = Organizations.getInstance().isOrganization(name); - // ToDo - could add a check of stop words to handle problem cases, i.e. if name - // contains something in that list, it is an org + + boolean isOrganization = !isPerson && Organizations.getInstance().isOrganization(name); + if (!isOrganization) { + for (String phrase : orgPhrases) { + if (name.contains(phrase)) { + isOrganization = true; + } + } + } if (name.contains(",")) { givenName = FirstNames.getInstance().getFirstName(name); // contributorName=, @@ -63,7 +97,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else if (isOrganization || organizationIfTied) { isOrganization = true; - givenName=null; + givenName = null; } } else { @@ -94,6 +128,24 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } + // Public for testing + public static void setOrgPhraseArray(String phraseArray) { + orgPhrases = new ArrayList(); + if (!StringUtil.isEmpty(phraseArray)) { + try { + JsonArray phrases = JsonUtil.getJsonArray(phraseArray); + phrases.forEach(val -> { + JsonString strVal = (JsonString) val; + orgPhrases.add(strVal.getString()); + }); + } catch (Exception e) { + logger.warning("Could not parse Org phrase list"); + } + } + + } + + // Public for testing public static void setAssumeCommaInPersonName(boolean assume) { assumeCommaInPersonName = assume; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index f4a3c635f8b..21ff0e03773 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -63,4 +63,10 @@ public static javax.json.JsonObject getJsonObject(String serializedJson) { return Json.createReader(rdr).readObject(); } } + + public static javax.json.JsonArray getJsonArray(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readArray(); + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index dbda622b536..b22f18ca787 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -27,6 +27,16 @@ public void testOrganizationCOMPLEXName() { verifyIsOrganization("The Ford Foundation"); verifyIsOrganization("United Nations Economic and Social Commission for Asia and the Pacific (UNESCAP)"); verifyIsOrganization("Michael J. Fox Foundation for Parkinson's Research"); + // The next example is one known to be asserted to be a Person without an entry + // in the OrgWordArray + // So we test with it in the array and then when the array is empty to verify + // the array works, resetting the array works, and the problem still exists in + // the underlying algorithm + PersonOrOrgUtil.setOrgPhraseArray("[\"Portable\"]"); + verifyIsOrganization("Portable Antiquities of the Netherlands"); + PersonOrOrgUtil.setOrgPhraseArray(null); + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization("Portable Antiquities of the Netherlands", false, false); + assertTrue(obj.getBoolean("isPerson")); } @Test @@ -79,7 +89,7 @@ public void testName() { } private void verifyIsOrganization(String fullName) { - JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, false); System.out.println(JsonUtil.prettyPrint(obj)); assertEquals(obj.getString("fullName"),fullName); assertFalse(obj.getBoolean("isPerson")); @@ -87,7 +97,11 @@ private void verifyIsOrganization(String fullName) { } private void verifyIsPerson(String fullName, String givenName, String familyName) { - JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false); + verifyIsPerson(fullName, givenName, familyName, false); + } + + private void verifyIsPerson(String fullName, String givenName, String familyName, boolean isPerson) { + JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, isPerson); System.out.println(JsonUtil.prettyPrint(obj)); assertEquals(obj.getString("fullName"),fullName); assertTrue(obj.getBoolean("isPerson")); From 4dcd8ed8e68807fd0381170f8260b381383b3171 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 11:14:34 -0400 Subject: [PATCH 122/322] fix affiliation value (no parens) --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index c374204f73f..b7eca85e95b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1807,7 +1807,7 @@ public String getJsonLd() { DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { - affiliation = datasetAuthor.getAffiliation().getDisplayValue(); + affiliation = datasetAuthor.getAffiliation().getValue(); } JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl==null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; From 4e6f065f272d0f74cbba19e19c04e4344cccc8fb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 11:17:16 -0400 Subject: [PATCH 123/322] use brandname for catalog --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 30815c43381..15d97dde55f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1957,13 +1957,14 @@ public String getJsonLd() { job.add("license",DatasetUtil.getLicenseURI(this)); } + String installationBrandName = BrandingUtil.getInstallationBrandName(); + job.add("includedInDataCatalog", Json.createObjectBuilder() .add("@type", "DataCatalog") - .add("name", BrandingUtil.getRootDataverseCollectionName()) + .add("name", installationBrandName) .add("url", SystemConfig.getDataverseSiteUrlStatic()) ); - - String installationBrandName = BrandingUtil.getInstallationBrandName(); + /** * Both "publisher" and "provider" are included but they have the same * values. Some services seem to prefer one over the other. From 0184b3d9afce7d83db4c6b0bb6e5956f0daa8b4b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 12:19:23 -0400 Subject: [PATCH 124/322] logic fix --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index b7eca85e95b..061712f6864 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1809,7 +1809,7 @@ public String getJsonLd() { if (authorAffiliation != null) { affiliation = datasetAuthor.getAffiliation().getValue(); } - JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl==null)); + JsonObject entity = PersonOrOrgUtil.getPersonOrOrganization(name, false, (identifierAsUrl!=null)); String givenName= entity.containsKey("givenName") ? entity.getString("givenName"):null; String familyName= entity.containsKey("familyName") ? entity.getString("familyName"):null; From 545a295764e71f63dc0b3d6480805801f1ef51f6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 12:40:05 -0400 Subject: [PATCH 125/322] comma check shouldn't override isPerson --- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 497cc689983..bacbb705721 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -101,7 +101,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati } } else { - if (assumeCommaInPersonName) { + if (assumeCommaInPersonName && !isPerson) { isOrganization = true; } else { givenName = FirstNames.getInstance().getFirstName(name); From ab2326c38aef3f76d1ee824606fcad8c73bc2944 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 13:18:29 -0400 Subject: [PATCH 126/322] always set givenName null for Org --- src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index bacbb705721..3a8088aac77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -115,6 +115,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati // default if (isOrganization || organizationIfTied) { isOrganization = true; + givenName=null; } } } From 0d541064d17d4b8d64d61db617e0d541613ec711 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 14:24:46 -0400 Subject: [PATCH 127/322] optimize - break out of loop when done --- src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index 3a8088aac77..da33fc9597e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -79,6 +79,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati for (String phrase : orgPhrases) { if (name.contains(phrase)) { isOrganization = true; + break; } } } From 1d935fe580284384328f8374c9f223f71916c4c6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 14:55:40 -0400 Subject: [PATCH 128/322] documentation of new options --- doc/sphinx-guides/source/admin/metadataexport.rst | 10 ++++++++++ doc/sphinx-guides/source/installation/config.rst | 11 ++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 78b8c8ce223..200c3a3e342 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -57,3 +57,13 @@ Downloading Metadata via API ---------------------------- The :doc:`/api/native-api` section of the API Guide explains how end users can download the metadata formats above via API. + +Exporter Configuration +---------------------- + +Two exporters - Schema.org JSONLD and OpenAire - use an algorithm to determine whether an author, or contact, name belongs to a person or organization. While the algorithm works well, there are cases in which it makes mistakes, usually inferring that an organization is a person. + +The Dataverse software implements two jvm-options that can be used to tune the algorithm: + +- :ref:`dataverse.personOrOrg.assumeCommaInPersonName` - boolean, default false. If true, Dataverse will assume any name without a comma must be an organization. This may be most useful for curated Dataverse instances that enforce the "family name, given name" convention. +- :ref:`dataverse.personOrOrg.orgPhraseArray` - a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 3e01f372c9b..5d4d29271f9 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1662,9 +1662,18 @@ dataverse.personOrOrg.assumeCommaInPersonName Please note that this setting is experimental. -The Schema.org metadata export and the Schema.org metadata included in DatasetPages tries to infer whether each entry in the Author field is a Person or Organization. If you are sure that +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. +dataverse.personOrOrg.orgPhraseArray +++++++++++++++++++++++++++++++++++++ + +Please note that this setting is experimental. + +The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. +If you have examples where an orgization name is being inferred to belong to a person, you can use this setting to force it to be recognized as an organization. +The value is expected to be a JsonArray of strings. Any name that contains one of the strings is assumed to be an organization. For example, "Project" is a word that is not otherwise associated with being an organization. + .. _:ApplicationServerSettings: Application Server Settings From a5ae4d782c63ba71a72f0da1748b7f62e1904434 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 28 Oct 2022 14:59:59 -0400 Subject: [PATCH 129/322] add labels --- doc/sphinx-guides/source/installation/config.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5d4d29271f9..96397b707ff 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1657,6 +1657,8 @@ This setting is useful in cases such as running your Dataverse installation behi "HTTP_VIA", "REMOTE_ADDR" +.. _dataverse.personOrOrg.assumeCommaInPersonName: + dataverse.personOrOrg.assumeCommaInPersonName +++++++++++++++++++++++++++++++++++++++++++++ @@ -1665,6 +1667,8 @@ Please note that this setting is experimental. The Schema.org metadata and OpenAIRE exports and the Schema.org metadata included in DatasetPages try to infer whether each entry in the various fields (e.g. Author, Contributor) is a Person or Organization. If you are sure that users are following the guidance to add people in the recommended family name, given name order, with a comma, you can set this true to always assume entries without a comma are for Organizations. The default is false. +.. _dataverse.personOrOrg.orgPhraseArray: + dataverse.personOrOrg.orgPhraseArray ++++++++++++++++++++++++++++++++++++ From 7274d1f61bdc9f2d4ae9c6e935b802e258fb9002 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 28 Oct 2022 15:34:10 -0400 Subject: [PATCH 130/322] Add quotes around CVoc term URIs on advanced search page --- .../iq/dataverse/search/AdvancedSearchPage.java | 3 ++- .../edu/harvard/iq/dataverse/search/SearchUtil.java | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java index a7a89def449..ef37569ac54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java @@ -111,7 +111,8 @@ private String constructDatasetQuery() { List queryStrings = new ArrayList<>(); for (DatasetFieldType dsfType : metadataFieldList) { if (dsfType.getSearchValue() != null && !dsfType.getSearchValue().equals("")) { - queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue())); + //CVoc fields return term URIs - add quotes around them to avoid solr breaking them into individual search words + queryStrings.add(constructQuery(dsfType.getSolrField().getNameSearchable(), dsfType.getSearchValue(), getCVocConf().containsKey(dsfType.getId()))); } else if (dsfType.getListValues() != null && !dsfType.getListValues().isEmpty()) { List listQueryStrings = new ArrayList<>(); for (String value : dsfType.getListValues()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index c226d77f885..dedb5457173 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -117,6 +117,10 @@ public static String determineFinalQuery(String userSuppliedQuery) { } public static String constructQuery(String solrField, String userSuppliedQuery) { + return constructQuery(solrField, userSuppliedQuery, false); + } + + public static String constructQuery(String solrField, String userSuppliedQuery, boolean addQuotes) { StringBuilder queryBuilder = new StringBuilder(); String delimiter = "[\"]+"; @@ -134,7 +138,12 @@ public static String constructQuery(String solrField, String userSuppliedQuery) } else { StringTokenizer st = new StringTokenizer(userSuppliedQuery); while (st.hasMoreElements()) { - queryStrings.add(solrField + ":" + st.nextElement()); + String nextElement = (String) st.nextElement(); + //Entries such as URIs will get tokenized into individual words by solr unless they are in quotes + if(addQuotes) { + nextElement = "\"" + nextElement + "\""; + } + queryStrings.add(solrField + ":" + nextElement); } } } From b8327eec4897bfef2366525651e6174d6b580cda Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Nov 2022 09:16:28 -0400 Subject: [PATCH 131/322] 9121 fix for search display with ext. CVoc --- src/main/webapp/search-include-fragment.xhtml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index f70356aa393..a6e344afb8c 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -594,7 +594,15 @@
- + + + + + + + + +
From 7d9327edbf194049c1233b12fed6c0ade8dc518d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Nov 2022 17:39:55 -0400 Subject: [PATCH 132/322] Refactored permissions checks and fixed workflow token access --- .../edu/harvard/iq/dataverse/api/Access.java | 334 +++++------------- 1 file changed, 96 insertions(+), 238 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index abeedf23b59..321b3ebfab6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -187,9 +187,6 @@ public class Access extends AbstractApiBean { @Inject MakeDataCountLoggingServiceBean mdcLogService; - - private static final String API_KEY_HEADER = "X-Dataverse-key"; - //@EJB // TODO: @@ -197,23 +194,19 @@ public class Access extends AbstractApiBean { @Path("datafile/bundle/{fileId}") @GET @Produces({"application/zip"}) - public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { GuestbookResponse gbr = null; DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); + checkAuthorization(df); if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); guestbookResponseService.save(gbr); MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); @@ -278,7 +271,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @Path("datafile/{fileId:.+}") @GET @Produces({"application/xml"}) - public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: while (fileId.lastIndexOf('/') == fileId.length() - 1) { @@ -303,20 +296,16 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs throw new NotFoundException(errorMessage); // (nobody should ever be using this API on a harvested DataFile)! } - - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - + + // This will throw a ForbiddenException if access isn't authorized: + checkAuthorization(df); + if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); } - - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); - + DownloadInfo dInfo = new DownloadInfo(df); logger.fine("checking if thumbnails are supported on this file."); @@ -532,11 +521,10 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId, @Q @Path("datafile/{fileId}/auxiliary") @GET public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, null, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, null, uriInfo, headers, response); } /* * GET method for retrieving a list auxiliary files associated with @@ -547,26 +535,21 @@ public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, @GET public Response listDatafileMetadataAuxByOrigin(@PathParam("fileId") String fileId, @PathParam("origin") String origin, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, origin, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, origin, uriInfo, headers, response); } - private Response listAuxiliaryFiles(String fileId, String origin, String apiToken, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { + private Response listAuxiliaryFiles(String fileId, String origin, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - List auxFileList = auxiliaryFileService.findAuxiliaryFiles(df, origin); if (auxFileList == null || auxFileList.isEmpty()) { throw new NotFoundException("No Auxiliary files exist for datafile " + fileId + (origin==null ? "": " and the specified origin")); } - boolean isAccessAllowed = isAccessAuthorized(df, apiToken); + boolean isAccessAllowed = isAccessAuthorized(df); JsonArrayBuilder jab = Json.createArrayBuilder(); auxFileList.forEach(auxFile -> { if (isAccessAllowed || auxFile.getIsPublic()) { @@ -594,17 +577,12 @@ private Response listAuxiliaryFiles(String fileId, String origin, String apiToke public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId, @PathParam("formatTag") String formatTag, @PathParam("formatVersion") String formatVersion, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - DownloadInfo dInfo = new DownloadInfo(df); boolean publiclyAvailable = false; @@ -654,7 +632,7 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId // as defined for the DataFile itself), and will throw a ForbiddenException // if access is denied: if (!publiclyAvailable) { - checkAuthorization(df, apiToken); + checkAuthorization(df); } return downloadInstance; @@ -670,16 +648,16 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId @POST @Consumes("text/plain") @Produces({ "application/zip" }) - public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } @Path("dataset/{id}") @GET @Produces({"application/zip"}) - public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); @@ -693,7 +671,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: - return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, true, uriInfo, headers, response); } } @@ -714,7 +692,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist } String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -763,7 +741,7 @@ public Command handleLatestPublished() { if (dsv.isDraft()) { gbrecs = true; } - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -784,11 +762,11 @@ private static String getFileIdsAsCommaSeparated(List fileMetadata @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) - public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } - private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); @@ -810,11 +788,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipServiceUrl != null; - String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) - ? headers.getHeaderString(API_KEY_HEADER) - : apiTokenParam; - - User apiTokenUser = findAPITokenUser(apiToken); //for use in adding gb records if necessary + User apiTokenUser = findAPITokenUser(); //for use in adding gb records if necessary Boolean getOrig = false; for (String key : uriInfo.getQueryParameters().keySet()) { @@ -827,7 +801,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -859,7 +833,7 @@ public void write(OutputStream os) throws IOException, logger.fine("token: " + fileIdParams[i]); Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); } catch (NumberFormatException nfe) { fileId = null; } @@ -867,7 +841,7 @@ public void write(OutputStream os) throws IOException, logger.fine("attempting to look up file id " + fileId); DataFile file = dataFileService.find(fileId); if (file != null) { - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); //downloadInstance.addDataFile(file); @@ -1436,8 +1410,8 @@ public Response requestFileAccess(@PathParam("id") String fileToRequestAccessId, List args = Arrays.asList(wr.getLocalizedMessage()); return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - - if (isAccessAuthorized(dataFile, getRequestApiKey())) { + //Already have access + if (isAccessAuthorized(dataFile)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest")); } @@ -1708,15 +1682,15 @@ public Response rejectFileAccess(@PathParam("id") String fileToRequestAccessId, // checkAuthorization is a convenience method; it calls the boolean method // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not. - private void checkAuthorization(DataFile df, String apiToken) throws WebApplicationException { + private void checkAuthorization(DataFile df) throws WebApplicationException { - if (!isAccessAuthorized(df, apiToken)) { + if (!isAccessAuthorized(df)) { throw new ForbiddenException(); } } - private boolean isAccessAuthorized(DataFile df, String apiToken) { + private boolean isAccessAuthorized(DataFile df) { // First, check if the file belongs to a released Dataset version: boolean published = false; @@ -1787,37 +1761,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } } - if (!restricted && !embargoed) { - // And if they are not published, they can still be downloaded, if the user + + + //The one case where we don't need to check permissions + if (!restricted && !embargoed && published) { + // If they are not published, they can still be downloaded, if the user // has the permission to view unpublished versions! (this case will // be handled below) - if (published) { - return true; - } + return true; } + //For permissions check decide if we havce a session user, or an API user User user = null; /** * Authentication/authorization: - * - * note that the fragment below - that retrieves the session object - * and tries to find the user associated with the session - is really - * for logging/debugging purposes only; for practical purposes, it - * would be enough to just call "permissionService.on(df).has(Permission.DownloadFile)" - * and the method does just that, tries to authorize for the user in - * the current session (or guest user, if no session user is available): */ - if (session != null) { + User apiTokenUser = null; + //If we get a non-GuestUser from findUserOrDie, use it. Otherwise, check the session + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + } + + if ((apiTokenUser instanceof GuestUser) && session != null) { if (session.getUser() != null) { - if (session.getUser().isAuthenticated()) { - user = session.getUser(); - } else { + user = session.getUser(); + apiTokenUser=null; + //Fine logging + if (!session.getUser().isAuthenticated()) { logger.fine("User associated with the session is not an authenticated user."); if (session.getUser() instanceof PrivateUrlUser) { logger.fine("User associated with the session is a PrivateUrlUser user."); - user = session.getUser(); } if (session.getUser() instanceof GuestUser) { logger.fine("User associated with the session is indeed a guest user."); @@ -1829,154 +1807,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } else { logger.fine("Session is null."); } - - User apiTokenUser = null; - - if ((apiToken != null)&&(apiToken.length()!=64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (apiTokenUser == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - } + //If we don't have a user, nothing more to do. (Note session could have returned GuestUser) + if (user == null && apiTokenUser == null) { + logger.warning("Unable to find a user via session or with a token."); + return false; } - - // OK, let's revisit the case of non-restricted files, this time in - // an unpublished version: + + // OK, let's revisit the case of non-restricted files, this time in + // an unpublished version: // (if (published) was already addressed above) - - if (!restricted && !embargoed) { + + DataverseRequest dvr = null; + if (apiTokenUser != null) { + dvr = createDataverseRequest(apiTokenUser); + } else { + // used in JSF context, user may be Guest + dvr = dvRequestService.getDataverseRequest(); + } + if (!published) { // and restricted or embargoed (implied by earlier processing) // If the file is not published, they can still download the file, if the user // has the permission to view unpublished versions: - - if ( user != null ) { - // used in JSF context - if (permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - // it's not unthinkable, that a null user (i.e., guest user) could be given - // the ViewUnpublished permission! - logger.log(Level.FINE, "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", user.getIdentifier()); - return true; - } - } - - if (apiTokenUser != null) { - // used in an API context - if (permissionService.requestOn( createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - // last option - guest user in either contexts - // Guset user is impled by the code above. - if ( permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset) ) { + if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) { + // it's not unthinkable, that a GuestUser could be given + // the ViewUnpublished permission! + logger.log(Level.FINE, + "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", + dvr.getUser().getIdentifier()); return true; } - - } else { - - // OK, this is a restricted and/or embargoed file. - - boolean hasAccessToRestrictedBySession = false; - boolean hasAccessToRestrictedByToken = false; - - if (permissionService.on(df).has(Permission.DownloadFile)) { - // Note: PermissionServiceBean.on(Datafile df) will obtain the - // User from the Session object, just like in the code fragment - // above. That's why it's not passed along as an argument. - hasAccessToRestrictedBySession = true; - } else if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df).has(Permission.DownloadFile)) { - hasAccessToRestrictedByToken = true; - } - - if (hasAccessToRestrictedBySession || hasAccessToRestrictedByToken) { - if (published) { - if (hasAccessToRestrictedBySession) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, published datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, published datafile."); - } - } else { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, published datafile.", apiTokenUser.getIdentifier()); - } - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - // Note that the code below does not allow a case where it is the - // session user that has the permission on the file, and the API token - // user with the ViewUnpublished permission, or vice versa! - if (hasAccessToRestrictedBySession) { - if (permissionService.on(df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, unpublished datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, unpublished datafile."); - } - return true; - } - } else { - if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - } - } - } + } else { // published and restricted and/or embargoed - - if ((apiToken != null)) { - // Will try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling user = findUserOrDie()..."); - user = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (user == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - return false; - } - - - //Doesn't this ~duplicate logic above - if so, if there's a way to get here, I think it still works for embargoed files (you only get access if you have download permissions, and, if not published, also view unpublished) - if (permissionService.requestOn(createDataverseRequest(user), df).has(Permission.DownloadFile)) { - if (published) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the datafile.", user.getIdentifier()); - //Same case as line 1809 (and part of 1708 though when published you don't need the DownloadFile permission) - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - if (permissionService.requestOn(createDataverseRequest(user), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the (unpublished) datafile.", user.getIdentifier()); - //Same case as line 1843? - return true; - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the (unpublished) datafile.", user.getIdentifier()); - } - } - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the datafile.", user.getIdentifier()); + if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) { + return true; } - - return false; - } - + } if (user != null) { logger.log(Level.FINE, "Session-based auth: user {0} has NO access rights on the requested datafile.", user.getIdentifier()); } @@ -1984,37 +1849,30 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { if (apiTokenUser != null) { logger.log(Level.FINE, "Token-based auth: user {0} has NO access rights on the requested datafile.", apiTokenUser.getIdentifier()); } - - if (user == null && apiTokenUser == null) { - logger.fine("Unauthenticated access: No guest access to the datafile."); - } - return false; } - private User findAPITokenUser(String apiToken) { + private User findAPITokenUser() { User apiTokenUser = null; - - if ((apiToken != null) && (apiToken.length() != 64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - return apiTokenUser; - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - return null; + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + if(apiTokenUser instanceof GuestUser) { + if(session!=null && session.getUser()!=null) { + //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user + apiTokenUser=null; + } } - + return apiTokenUser; + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + return null; } - return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { String zipServiceKey = null; Timestamp timestamp = null; @@ -2031,7 +1889,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, for (int i = 0; i < fileIdParams.length; i++) { Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); validIdCount++; } catch (NumberFormatException nfe) { fileId = null; @@ -2040,7 +1898,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, DataFile file = dataFileService.find(fileId); if (file != null) { validFileCount++; - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); if (donotwriteGBResponse != true && file.isReleased()) { GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); From d10b15439f09ed5929b934777caaba299152040f Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 4 Nov 2022 16:55:24 +0100 Subject: [PATCH 133/322] added documentation --- doc/release-notes/9130-cleanup-storage.md | 3 +++ doc/sphinx-guides/source/api/native-api.rst | 28 +++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 doc/release-notes/9130-cleanup-storage.md diff --git a/doc/release-notes/9130-cleanup-storage.md b/doc/release-notes/9130-cleanup-storage.md new file mode 100644 index 00000000000..71387a92db2 --- /dev/null +++ b/doc/release-notes/9130-cleanup-storage.md @@ -0,0 +1,3 @@ +### Support for cleaning up files in datasets' storage + +Experimental feature: all the files stored in the Dataset storage location that are not in the file list of that Dataset can be removed with the new native API call (/api/datasets/$id/cleanStorage). \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6d68d648cb3..260c5cc7765 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1511,6 +1511,34 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' +.. _cleanup-storage-api: + +Cleanup storage of a Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your Dataverse installation has been configured to support direct uploads, or in some other situations, +you could end up with some files in the storage of a dataset that are not linked to that dataset directly. Most commonly, this could +happen when an upload fails in the middle of a transfer, i.e. if a user does a UI direct upload and leaves the page without hitting cancel or save, +Dataverse doesn't know and doesn't clean up the files. Similarly in the direct upload API, if the final /addFiles call isn't done, the files are abandoned. + +You might also want to remove cached export files or some temporary files, thumbnails, etc. + +All the files stored in the Dataset storage location that are not in the file list of that Dataset can be removed, as shown in the example below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/:persistentId/cleanStorage?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X GET https://demo.dataverse.org/api/datasets/:persistentId/cleanStorage?persistentId=doi:10.5072/FK2/J8SJZB + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 9ba760d67456cfebc369d7f7d83e2d2dc7f3c505 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 4 Nov 2022 18:23:16 +0100 Subject: [PATCH 134/322] replaced listAllFiles and deleteFile with cleanUp in the StorageIO interface --- .../edu/harvard/iq/dataverse/api/Datasets.java | 13 ++++++------- .../iq/dataverse/dataaccess/FileAccessIO.java | 16 +++++++++++++--- .../iq/dataverse/dataaccess/InputStreamIO.java | 9 ++------- .../dataaccess/RemoteOverlayAccessIO.java | 10 +++------- .../iq/dataverse/dataaccess/S3AccessIO.java | 17 +++++++++++++---- .../iq/dataverse/dataaccess/StorageIO.java | 5 ++--- .../iq/dataverse/dataaccess/SwiftAccessIO.java | 18 ++++++++++++++---- 7 files changed, 53 insertions(+), 35 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 0cdb2b3a73f..38017514575 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -114,6 +114,7 @@ import java.time.LocalDateTime; import java.util.*; import java.util.concurrent.*; +import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.Map.Entry; @@ -2546,13 +2547,11 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } } StorageIO datasetIO = DataAccess.getStorageIO(dataset); - List allDatasetFiles = datasetIO.listAllFiles(); - for (String f : allDatasetFiles) { - if (!files.contains(f)) { - datasetIO.deleteFile(f); - deleted.add(f); - } - } + Predicate filter = f -> { + return !files.contains(f); + }; + + deleted.addAll(datasetIO.cleanUp(filter)); } catch (IOException ex) { logger.log(Level.SEVERE, null, ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 2bb3abf03a6..cc72a9cfb02 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -33,9 +33,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -684,7 +686,7 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - public List listAllFiles() throws IOException { + private List listAllFiles() throws IOException { Dataset dataset = this.getDataset(); if (dataset == null) { throw new IOException("This FileAccessIO object hasn't been properly initialized."); @@ -708,8 +710,7 @@ public List listAllFiles() throws IOException { return res; } - @Override - public void deleteFile(String fileName) throws IOException { + private void deleteFile(String fileName) throws IOException { Dataset dataset = this.getDataset(); if (dataset == null) { throw new IOException("This FileAccessIO object hasn't been properly initialized."); @@ -724,4 +725,13 @@ public void deleteFile(String fileName) throws IOException { Files.delete(p); } + @Override + public List cleanUp(Predicate filter) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 1235b386fe9..2a867bddcac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -14,6 +14,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; /** @@ -160,14 +161,8 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { } @Override - public List listAllFiles() throws IOException { + public List cleanUp(Predicate filter) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } - - - @Override - public void deleteFile(String fileName) throws IOException { - throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index b7fb4c86c7c..22373fdfee0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -24,6 +24,7 @@ import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import org.apache.http.Header; @@ -630,14 +631,9 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } - - @Override - public List listAllFiles() throws IOException { - return baseStore.listAllFiles(); - } @Override - public void deleteFile(String fileName) throws IOException { - baseStore.deleteFile(fileName); + public List cleanUp(Predicate filter) throws IOException { + return baseStore.cleanUp(filter); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 3796d7f0ce9..8dc93361375 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -60,7 +60,10 @@ import java.util.HashMap; import java.util.List; import java.util.Random; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import org.apache.commons.io.IOUtils; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -1308,8 +1311,7 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { - @Override - public List listAllFiles() throws IOException { + private List listAllFiles() throws IOException { if (!this.canWrite()) { open(); } @@ -1351,8 +1353,7 @@ public List listAllFiles() throws IOException { return ret; } - @Override - public void deleteFile(String fileName) throws IOException { + private void deleteFile(String fileName) throws IOException { if (!this.canWrite()) { open(); } @@ -1370,4 +1371,12 @@ public void deleteFile(String fileName) throws IOException { } } + @Override + public List cleanUp(Predicate filter) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 0e42a84795c..54e457ffab6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -39,6 +39,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -622,8 +623,6 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } - public abstract List listAllFiles() throws IOException; - - public abstract void deleteFile(String fileName) throws IOException; + public abstract List cleanUp(Predicate filter) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 5a376cb8d91..8857b054108 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -22,7 +22,10 @@ import java.util.Formatter; import java.util.List; import java.util.Properties; +import java.util.function.Predicate; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; import org.javaswift.joss.client.factory.AccountFactory; @@ -897,8 +900,7 @@ public static String calculateRFC2104HMAC(String data, String key) return toHexString(mac.doFinal(data.getBytes())); } - @Override - public List listAllFiles() throws IOException { + private List listAllFiles() throws IOException { if (!this.canWrite()) { open(DataAccessOption.WRITE_ACCESS); } @@ -922,8 +924,7 @@ public List listAllFiles() throws IOException { return ret; } - @Override - public void deleteFile(String fileName) throws IOException { + private void deleteFile(String fileName) throws IOException { if (!this.canWrite()) { open(DataAccessOption.WRITE_ACCESS); } @@ -941,4 +942,13 @@ public void deleteFile(String fileName) throws IOException { fileObject.delete(); } + + @Override + public List cleanUp(Predicate filter) throws IOException { + List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + for (String f : toDelete) { + this.deleteFile(f); + } + return toDelete; + } } From 00170695b920c2f5accdeb1cdcb367b6c892ab1b Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 4 Nov 2022 18:34:28 +0100 Subject: [PATCH 135/322] better filter for files to delete --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 38017514575..901b6cf17bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2548,7 +2548,7 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { - return !files.contains(f); + return files.stream().noneMatch(x -> f.startsWith(x)); }; deleted.addAll(datasetIO.cleanUp(filter)); From f0ac872828d3a48cf74e74052ae1b3767afb264a Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 13:59:16 +0100 Subject: [PATCH 136/322] updated filter: exlude export files --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 901b6cf17bf..1646dacd8b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2548,7 +2548,7 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { - return files.stream().noneMatch(x -> f.startsWith(x)); + return f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); }; deleted.addAll(datasetIO.cleanUp(filter)); From bcaeb9fd58f38fdcd8cc1587e763a80c17b55048 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 14:05:35 +0100 Subject: [PATCH 137/322] bugfix in filter --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1646dacd8b2..295a802c6a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2512,7 +2512,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, */ @GET @Path("{id}/cleanStorage") - public Response cleanStorage(@PathParam("id") String idSupplied) { + public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dryrun") Boolean dryrun) { // get user and dataset User authUser; try { @@ -2548,7 +2548,7 @@ public Response cleanStorage(@PathParam("id") String idSupplied) { } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { - return f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); + return !f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); }; deleted.addAll(datasetIO.cleanUp(filter)); From 503b9a36ca409e4bf89659cc877b62415d4ef33a Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 7 Nov 2022 14:06:35 +0100 Subject: [PATCH 138/322] added dryrun query parameter --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 295a802c6a0..0ce3a4043a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2551,7 +2551,11 @@ public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dr return !f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); }; - deleted.addAll(datasetIO.cleanUp(filter)); + if (dryrun != null && dryrun.booleanValue()) { + deleted.addAll(files.stream().filter(filter).collect(Collectors.toList())); + } else { + deleted.addAll(datasetIO.cleanUp(filter)); + } } catch (IOException ex) { logger.log(Level.SEVERE, null, ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); From 256a948564dc0ba9aa389f8ca0d79c915d37c320 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 28 Nov 2022 09:21:05 -0500 Subject: [PATCH 139/322] reverse unintended changes --- src/main/java/propertyFiles/Bundle.properties | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index b19e80020ba..8fd37b167b9 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -232,12 +232,12 @@ notification.access.revoked.datafile=You have been removed from a role in {0}. notification.checksumfail=One or more files in your upload failed checksum validation for dataset {1}. Please re-run the upload script. If the problem persists, please contact support. notification.ingest.completed=Your Dataset {2} has one or more tabular files that completed the tabular ingest process. These files will be available for download in their original formats and other formats for enhanced archival purposes after you publish the dataset. The archival .tab files are displayed in the file table. Please see the guides for more information about ingest and support for tabular files. notification.ingest.completedwitherrors=Your Dataset {2} has one or more tabular files that have been uploaded successfully but are not supported for tabular ingest. After you publish the dataset, these files will not have additional archival features. Please see the guides for more information about ingest and support for tabular files.

Files with incomplete ingest:{5} -notification.mail.import.filesystem=Globus transfer to Dataset {2} ({0}/dataset.xhtml?persistentId={1}) was successful. File(s) have been uploaded and verified. +notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
-notification.import.filesystem=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. From 7a80d7e69388d0a0a35ee72ea60442f11154b24a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 7 Dec 2022 13:24:13 -0500 Subject: [PATCH 140/322] #9211 fix render logic display with TOA OR restricted files --- src/main/webapp/dataset-license-terms.xhtml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 1cbf297bf89..8172efac67f 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -236,7 +236,8 @@
-
saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 + // Consider adding other formats such as NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -565,7 +575,58 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - + // Note: There is another method called extractMetadata for FITS files. + public void extractMetadata(Dataset dataset, AuthenticatedUser user) { + for (DataFile dataFile : dataset.getFiles()) { + Path pathToLocalDataFile = null; + try { + pathToLocalDataFile = dataFile.getStorageIO().getFileSystemPath(); + } catch (IOException ex) { + logger.info("Exception calling dataAccess.getFileSystemPath: " + ex); + } + InputStream inputStream = null; + if (pathToLocalDataFile != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(pathToLocalDataFile.toString())) { + if (netcdfFile != null) { + // TODO: What should we pass as a URL to toNcml()? + String ncml = netcdfFile.toNcml("FIXME_URL"); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("pathToLocalDataFile is null! Are you on S3? Metadata extraction from NetCDF/HDF5 is not yet available."); + // As a tabular file, we'll probably need to download the NetCDF/HDF5 files from S3 and then try to extra the metadata, + // unless we can get some sort of S3 interface working: + // https://docs.unidata.ucar.edu/netcdf-java/current/userguide/dataset_urls.html#object-stores + // If we need to download the file and extract only some of the bytes (hopefully the first bytes) here's the spec for NetCDF: + // https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html + } + if (inputStream != null) { + // TODO: What should the tag be? + String formatTag = "ncml"; + // TODO: What should the version be? + String formatVersion = "0.1"; + // TODO: What should the origin be? + String origin = "myOrigin"; + boolean isPublic = true; + // TODO: What should the type be? + String type = "myType"; + // TODO: Does NcML have its own content type? (MIME type) + MediaType mediaType = new MediaType("text", "xml"); + try { + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType); + logger.info("Aux file extracted from NetCDF/HDF5 file saved: " + auxFile); + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } + } + } + public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1159,6 +1220,7 @@ public boolean fileMetadataExtractable(DataFile dataFile) { * extractMetadata: * framework for extracting metadata from uploaded files. The results will * be used to populate the metadata of the Dataset to which the file belongs. + * Note that another method called extractMetadata creates aux files from data files. */ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException { boolean ingestSuccessful = false; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java new file mode 100644 index 00000000000..a83af514935 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -0,0 +1,57 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.IOException; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NetcdfIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testNmclFromNetcdf() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "ncml"; + String version = "0.1"; + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + //downloadNcml.prettyPrint(); // long output + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + } +} From 18b058a088217f615744d79e3c2b8d8ebdbd41d2 Mon Sep 17 00:00:00 2001 From: Anthony Reyes Date: Mon, 12 Dec 2022 13:17:24 -0800 Subject: [PATCH 142/322] Update dataverse.xhtml Added contentTruncateForDataverse() to allow dataverse descriptions to have read more/read less buttons. --- src/main/webapp/dataverse.xhtml | 44 ++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/dataverse.xhtml b/src/main/webapp/dataverse.xhtml index 572bcf40300..98dc7664753 100644 --- a/src/main/webapp/dataverse.xhtml +++ b/src/main/webapp/dataverse.xhtml @@ -592,7 +592,7 @@
- +
@@ -828,7 +828,49 @@ $(document).ready(function () { initCarousel(); popoverHTML('#{bundle.htmlAllowedTitle}','#{bundle.htmlAllowedTags}'); + contentTruncateForDataverse(); }); + function contentTruncateForDataverse(){ + // SELECTOR ID FROM PARAMETERS + $('#dataverseDesc').each(function () { + + // add responsive img class to limit width to that of container + $(this).find('img').attr('class', 'img-responsive'); + + // find container height + var containerHeight = $(this).outerHeight(); + + if (containerHeight > 250) { + // ADD A MAX-HEIGHT TO CONTAINER + $(this).css({'max-height':'250px','overflow-y':'hidden','position':'relative'}); + + // BTN LABEL TEXT, ARIA ATTR'S, FROM BUNDLE VIA PARAMETERS + var readMoreBtn = ''; + var moreBlock = '
' + readMoreBtn + '
'; + var readLessBtn = ''; + var lessBlock = '
' + readLessBtn + '
'; + + // add "Read full desc [+]" btn, background fade + $(this).append(moreBlock); + + // show full description in summary block on "Read full desc [+]" btn click + $(document).on('click', 'button.desc-more-link', function() { + $(this).tooltip('hide').parent('div').parent('div').css({'max-height':'none','overflow-y':'visible','position':'relative'}); + $(this).parent('div.more-block').replaceWith(lessBlock); + $('.less-block button').tooltip(); + }); + + // trucnate description in summary block on "Collapse desc [-]" btn click + $(document).on('click', 'button.desc-less-link', function() { + $(this).tooltip('hide').parent('div').parent('div').css({'max-height':'250px','overflow-y':'hidden','position':'relative'}); + $(this).parent('div.less-block').replaceWith(moreBlock); + $('html, body').animate({scrollTop: $('#' + truncSelector).offset().top - 60}, 500); + $('.more-block button').tooltip(); + }); + } + }); + } + function initCarousel() { var owl1 = $("#featuredDataversesList"); owl1.owlCarousel({ From fb333e6e7c7b9e0ede1053190008c3b621ae04dd Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 13 Dec 2022 20:31:53 +0100 Subject: [PATCH 143/322] revert(metadata): remove experimental workflow metadata from Solr schema #9224 --- conf/solr/8.11.1/schema.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 655cf1bc3cc..9d12f0dd9d0 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -266,9 +266,6 @@ - - - @@ -506,9 +503,6 @@ - - - From 3e59d53e25c1ecfe2dcadc216d49620d1be1649d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 13 Dec 2022 20:44:42 +0100 Subject: [PATCH 144/322] doc: add release note about removing workflow metadata from Solr schema #9224 --- .../9224-remove-workflow-fields-from-index.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 doc/release-notes/9224-remove-workflow-fields-from-index.md diff --git a/doc/release-notes/9224-remove-workflow-fields-from-index.md b/doc/release-notes/9224-remove-workflow-fields-from-index.md new file mode 100644 index 00000000000..4b324ce7b57 --- /dev/null +++ b/doc/release-notes/9224-remove-workflow-fields-from-index.md @@ -0,0 +1,12 @@ +# Optional: remove Workflow Schema fields from Solr index + +In Dataverse 5.12 we added a new experimental metadata schema block for workflow deposition. +We included the fields within the standard Solr schema we provide. With this version, we +removed it from the schema. If you are deploying the block to your installation, make sure to +update your index. + +If you already added these fields, you can delete them from your index when not using the schema. +Make sure to [reindex after changing the schema](https://guides.dataverse.org/en/latest/admin/solr-search-index.html?highlight=reindex#reindex-in-place. + +Remember: depending on the size of your installation, reindexing may take serious time to complete. +You should do this in off-hours. \ No newline at end of file From b604495321486cb5008227a9cc4c232f73994045 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 13 Dec 2022 20:49:15 +0100 Subject: [PATCH 145/322] doc(metadata): add hint about adding experimental metadata blocks to Solr schema #9224 --- doc/sphinx-guides/source/user/appendix.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index b05459b6aaf..4fcbc004c66 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -38,6 +38,9 @@ Unlike supported metadata, experimental metadata is not enabled by default in a - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. +Please note: these custom metadata schemas are not included in the Solr schema for indexation by default, you will need +to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. + See Also ~~~~~~~~ From bea65947277153fea567c9b918b12a571dd6f316 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 13 Dec 2022 21:00:24 +0100 Subject: [PATCH 146/322] Update doc/sphinx-guides/source/user/appendix.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/user/appendix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index 4fcbc004c66..2a7baf712cd 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -38,7 +38,7 @@ Unlike supported metadata, experimental metadata is not enabled by default in a - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. -Please note: these custom metadata schemas are not included in the Solr schema for indexation by default, you will need +Please note: these custom metadata schemas are not included in the Solr schema for indexing by default, you will need to add them as necessary for your custom metadata blocks. See "Update the Solr Schema" in :doc:`../admin/metadatacustomization`. See Also From f1764c5c1cd8e80b9c5cf71dbc2a165dea88f54f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Dec 2022 10:40:59 -0500 Subject: [PATCH 147/322] Update 7349-3-schema.org-updates.md --- doc/release-notes/7349-3-schema.org-updates.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/7349-3-schema.org-updates.md b/doc/release-notes/7349-3-schema.org-updates.md index 6a9c5e2b9b0..dc18fe2a24a 100644 --- a/doc/release-notes/7349-3-schema.org-updates.md +++ b/doc/release-notes/7349-3-schema.org-updates.md @@ -1,3 +1,3 @@ The Schema.org metadata export and the schema.org metadata embedded in dataset pages has been updated to improve compliance with Schema.org's schema and Google's recommendations. -Backward compatibility - file entries now have the mimetype reported as 'encodingFormat' rather than 'fileFormat' to better conform with the Schema.org specification for DataDownload entries. \ No newline at end of file +Backward compatibility - file entries now have the mimetype reported as 'encodingFormat' rather than 'fileFormat' to better conform with the Schema.org specification for DataDownload entries. Download URLs are now sent for all files unless the dataverse.files.hide-schema-dot-org-download-urls setting is set to true. From 438b86cf7a04e3be44c84883f2842680bd98450a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 14 Dec 2022 15:33:41 -0500 Subject: [PATCH 148/322] extract NcML earlier, from temp file during upload #9153 --- .../harvard/iq/dataverse/AuxiliaryFile.java | 5 +- .../dataverse/AuxiliaryFileServiceBean.java | 23 ++++- .../edu/harvard/iq/dataverse/DatasetPage.java | 1 - .../iq/dataverse/EditDatafilesPage.java | 1 - .../datadeposit/MediaResourceManagerImpl.java | 1 - .../datasetutility/AddReplaceFileHelper.java | 2 - .../dataverse/ingest/IngestServiceBean.java | 93 ++++++++----------- src/main/java/propertyFiles/Bundle.properties | 1 + .../harvard/iq/dataverse/api/NetcdfIT.java | 2 +- 9 files changed, 65 insertions(+), 64 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index a7a89934f47..344032ef5e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -55,7 +55,10 @@ public class AuxiliaryFile implements Serializable { private String formatTag; private String formatVersion; - + + /** + * The application/entity that created the auxiliary file. + */ private String origin; private boolean isPublic; diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 76c91382868..05f3e209632 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -70,9 +70,13 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param type how to group the files such as "DP" for "Differentially * @param mediaType user supplied content type (MIME type) * Private Statistics". - * @return success boolean - returns whether the save was successful + * @param save boolean - true to save immediately, false to let the cascade + * do persist to the database. + * @return an AuxiliaryFile with an id when save=true (assuming no + * exceptions) or an AuxiliaryFile without an id that will be persisted + * later through the cascade. */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType, boolean save) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -114,7 +118,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile auxFile.setType(type); auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); - auxFile = save(auxFile); + if (save) { + auxFile = save(auxFile); + } else { + if (dataFile.getAuxiliaryFiles() == null) { + dataFile.setAuxiliaryFiles(new ArrayList<>()); + } + dataFile.getAuxiliaryFiles().add(auxFile); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -129,7 +140,11 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } return auxFile; } - + + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + return processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, true); + } + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile"); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index b538aaca2c6..6e71f6c5042 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3733,7 +3733,6 @@ public String save() { // Call Ingest Service one more time, to // queue the data ingest jobs for asynchronous execution: ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser()); - ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser()); //After dataset saved, then persist prov json data if(systemConfig.isProvCollectionEnabled()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index d045126a3aa..fc8df8681af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1225,7 +1225,6 @@ public String save() { // queue the data ingest jobs for asynchronous execution: if (mode == FileEditMode.UPLOAD) { ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser()); - ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser()); } if (FileEditMode.EDIT == mode && Referrer.FILE == referrer && fileMetadatas.size() > 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index e8d25bb4148..5491024c73c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -373,7 +373,6 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au } ingestService.startIngestJobsForDataset(dataset, user); - ingestService.extractMetadata(dataset, user); ReceiptGenerator receiptGenerator = new ReceiptGenerator(); String baseUrl = urlManager.getHostnamePlusBaseUrlPath(uri); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 5277d014430..febbb249a91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1932,7 +1932,6 @@ private boolean step_100_startIngestJobs(){ // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); msg("post ingest start"); - ingestService.extractMetadata(dataset, dvRequest.getAuthenticatedUser()); } return true; } @@ -2146,7 +2145,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); - ingestService.extractMetadata(dataset, (AuthenticatedUser) authUser); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index e261efce642..b5934c1167f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -241,6 +241,45 @@ public List saveAndAddFilesToDataset(DatasetVersion version, savedSuccess = true; logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + // TODO: reformat this file to remove the many tabs added in cc08330 + InputStream inputStream = null; + if (tempLocationPath != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(tempLocationPath.toString())) { + if (netcdfFile != null) { + // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html + // With an empty string the XML will show 'location="file:"'. + String ncml = netcdfFile.toNcml(""); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("tempLocationPath is null for file id " + dataFile.getId() + ". Can't extract NcML."); + } + if (inputStream != null) { + // If you change NcML, you must also change the previewer. + String formatTag = "NcML"; + // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number. + // If you bump the number here, be sure the bump the number in the previewer as well. + // We could use 2.2 here since that's the current version of NcML. + String formatVersion = "0.1"; + String origin = "netcdf-java"; + boolean isPublic = true; + // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI. + String type = "NcML"; + // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml + MediaType mediaType = new MediaType("text", "xml"); + try { + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, false); + logger.fine ("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id " + dataFile.getId()); + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } + } catch (IOException ioex) { logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } finally { @@ -302,6 +341,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // Any necessary post-processing: // performPostProcessingTasks(dataFile); } else { + System.out.println("driver is not tmp"); try { StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata @@ -575,58 +615,6 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - // Note: There is another method called extractMetadata for FITS files. - public void extractMetadata(Dataset dataset, AuthenticatedUser user) { - for (DataFile dataFile : dataset.getFiles()) { - Path pathToLocalDataFile = null; - try { - pathToLocalDataFile = dataFile.getStorageIO().getFileSystemPath(); - } catch (IOException ex) { - logger.info("Exception calling dataAccess.getFileSystemPath: " + ex); - } - InputStream inputStream = null; - if (pathToLocalDataFile != null) { - try ( NetcdfFile netcdfFile = NetcdfFiles.open(pathToLocalDataFile.toString())) { - if (netcdfFile != null) { - // TODO: What should we pass as a URL to toNcml()? - String ncml = netcdfFile.toNcml("FIXME_URL"); - inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); - } else { - logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned)."); - } - } catch (IOException ex) { - logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex); - } - } else { - logger.info("pathToLocalDataFile is null! Are you on S3? Metadata extraction from NetCDF/HDF5 is not yet available."); - // As a tabular file, we'll probably need to download the NetCDF/HDF5 files from S3 and then try to extra the metadata, - // unless we can get some sort of S3 interface working: - // https://docs.unidata.ucar.edu/netcdf-java/current/userguide/dataset_urls.html#object-stores - // If we need to download the file and extract only some of the bytes (hopefully the first bytes) here's the spec for NetCDF: - // https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html - } - if (inputStream != null) { - // TODO: What should the tag be? - String formatTag = "ncml"; - // TODO: What should the version be? - String formatVersion = "0.1"; - // TODO: What should the origin be? - String origin = "myOrigin"; - boolean isPublic = true; - // TODO: What should the type be? - String type = "myType"; - // TODO: Does NcML have its own content type? (MIME type) - MediaType mediaType = new MediaType("text", "xml"); - try { - AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType); - logger.info("Aux file extracted from NetCDF/HDF5 file saved: " + auxFile); - } catch (Exception ex) { - logger.info("exception throw calling processAuxiliaryFile: " + ex); - } - } - } - } - public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1220,7 +1208,6 @@ public boolean fileMetadataExtractable(DataFile dataFile) { * extractMetadata: * framework for extracting metadata from uploaded files. The results will * be used to populate the metadata of the Dataset to which the file belongs. - * Note that another method called extractMetadata creates aux files from data files. */ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException { boolean ingestSuccessful = false; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index b19e80020ba..0ec81cb7d6b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2007,6 +2007,7 @@ file.remotelyStored=This file is stored remotely - click for more info file.auxfiles.download.header=Download Auxiliary Files # These types correspond to the AuxiliaryFile.Type enum. file.auxfiles.types.DP=Differentially Private Statistics +file.auxfiles.types.NcML=XML from NetCDF/HDF5 (NcML) # Add more types here file.auxfiles.unspecifiedTypes=Other Auxiliary Files diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java index a83af514935..74179b98833 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -45,7 +45,7 @@ public void testNmclFromNetcdf() throws IOException { uploadFile.then().assertThat().statusCode(OK.getStatusCode()); long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); - String tag = "ncml"; + String tag = "NcML"; String version = "0.1"; Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); From 0e00766eec1d3cb043c6863b10856be877eb6da7 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 14 Dec 2022 15:59:46 -0500 Subject: [PATCH 149/322] add NetcdfIT to list of tests #9153 --- tests/integration-tests.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 6e6668d45af..1e9110be2de 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT From 9edaf595480aeae85185e90d24d06d064bf0dc55 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 20 Dec 2022 10:13:52 -0500 Subject: [PATCH 150/322] add "requirements" and "auxFilesExist" to external tools #9153 The use case is an external tool that operates on aux files pulled out of NetCDF/HDF5 files. --- .../root/external-tools/auxFileTool.json | 26 ++++ .../source/api/external-tools.rst | 14 +- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../edu/harvard/iq/dataverse/FilePage.java | 15 ++- .../edu/harvard/iq/dataverse/api/TestApi.java | 4 +- .../dataverse/externaltools/ExternalTool.java | 22 +++- .../ExternalToolServiceBean.java | 45 ++++++- .../V5.13.0.3__9153-extract-metadata.sql | 1 + .../iq/dataverse/api/ExternalToolsIT.java | 121 ++++++++++++++++++ .../ExternalToolServiceBeanTest.java | 68 +++++++++- 10 files changed, 306 insertions(+), 12 deletions(-) create mode 100644 doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json create mode 100644 src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json new file mode 100644 index 00000000000..b188520dabb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json @@ -0,0 +1,26 @@ +{ + "displayName": "AuxFileViewer", + "description": "Show an auxiliary file from a dataset file.", + "toolName": "auxPreviewer", + "scope": "file", + "types": [ + "preview" + ], + "toolUrl": "https://example.com/AuxFileViewer.html", + "toolParameters": { + "queryParameters": [ + { + "fileid": "{fileId}" + } + ] + }, + "requirements": { + "auxFilesExist": [ + { + "formatTag": "myFormatTag", + "formatVersion": "0.1" + } + ] + }, + "contentType": "application/foobar" +} diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index 4f6c9a8015c..eec9944338f 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -53,15 +53,21 @@ External tools must be expressed in an external tool manifest file, a specific J Examples of Manifests +++++++++++++++++++++ -Let's look at two examples of external tool manifests (one at the file level and one at the dataset level) before we dive into how they work. +Let's look at a few examples of external tool manifests (both at the file level and at the dataset level) before we dive into how they work. + +.. _tools-for-files: External Tools for Files ^^^^^^^^^^^^^^^^^^^^^^^^ -:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level both an "explore" tool and a "preview" tool that operates on tabular files: +:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level (both an "explore" tool and a "preview" tool) that operates on tabular files: .. literalinclude:: ../_static/installation/files/root/external-tools/fabulousFileTool.json +:download:`auxFileTool.json <../_static/installation/files/root/external-tools/auxFileTool.json>` is a file level preview tool that operates on auxiliary files associated with a data file (note the "requirements" section): + +.. literalinclude:: ../_static/installation/files/root/external-tools/auxFileTool.json + External Tools for Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -113,6 +119,10 @@ Terminology allowedApiCalls httpMethod Which HTTP method the specified callback uses such as ``GET`` or ``POST``. allowedApiCalls timeOut For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for. Must be an integer. + + requirements **Resources your tool needs to function.** For now, the only requirement you can specify is that one or more auxiliary files exist (see auxFilesExist in the :ref:`tools-for-files` example). Currently, requirements only apply to preview tools. If the requirements are not met, the preview tool is not shown. + + auxFilesExist **An array containing formatTag and formatVersion pairs** for each auxiliary file that your tool needs to download to function properly. For example, a required aux file could have a ``formatTag`` of "NcML" and a ``formatVersion`` of "1.0". See also :doc:`/developers/aux-file-support`. toolName A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file. =========================== ========== diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 6e71f6c5042..8bb1167afcd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5490,7 +5490,7 @@ public List getCachedToolsForDataFile(Long fileId, ExternalTool.Ty return cachedTools; } DataFile dataFile = datafileService.find(fileId); - cachedTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + cachedTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); cachedToolsByFileId.put(fileId, cachedTools); //add to map so we don't have to do the lifting again return cachedTools; } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 85eb79d2ddc..228db0a7584 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -39,6 +39,7 @@ import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.IOException; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -57,6 +58,9 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonValue; import javax.validation.ConstraintViolation; import org.primefaces.PrimeFaces; @@ -125,6 +129,8 @@ public class FilePage implements java.io.Serializable { ExternalToolServiceBean externalToolService; @EJB PrivateUrlServiceBean privateUrlService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject DataverseRequestServiceBean dvRequestService; @@ -285,8 +291,15 @@ public void setDatasetVersionId(Long datasetVersionId) { this.datasetVersionId = datasetVersionId; } + // findPreviewTools would be a better name private List sortExternalTools(){ - List retList = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + List retList = new ArrayList<>(); + List previewTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + for (ExternalTool previewTool : previewTools) { + if (externalToolService.meetsRequirements(previewTool, file)) { + retList.add(previewTool); + } + } Collections.sort(retList, CompareExternalToolName); return retList; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index b532fbd4154..42caa95b9f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -63,7 +63,9 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); - tools.add(toolToJson); + if (externalToolService.meetsRequirements(tool, dataFile)) { + tools.add(toolToJson); + } } return ok(tools); } catch (WrappedResponse wr) { diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 1789b7a90c3..0a238eb5198 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -39,6 +39,7 @@ public class ExternalTool implements Serializable { public static final String CONTENT_TYPE = "contentType"; public static final String TOOL_NAME = "toolName"; public static final String ALLOWED_API_CALLS = "allowedApiCalls"; + public static final String REQUIREMENTS = "requirements"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -103,6 +104,15 @@ public class ExternalTool implements Serializable { @Column(nullable = true, columnDefinition = "TEXT") private String allowedApiCalls; + /** + * When non-null, the tool has indicated that it has certain requirements + * that must be met before it should be shown to the user. This + * functionality was added for tools that operate on aux files rather than + * data files so "auxFilesExist" is one of the possible values. + */ + @Column(nullable = true, columnDefinition = "TEXT") + private String requirements; + /** * This default constructor is only here to prevent this error at * deployment: @@ -118,10 +128,10 @@ public ExternalTool() { } public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType) { - this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null); + this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null, null); } - public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls) { + public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls, String requirements) { this.displayName = displayName; this.toolName = toolName; this.description = description; @@ -131,6 +141,7 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.toolParameters = toolParameters; this.contentType = contentType; this.allowedApiCalls = allowedApiCalls; + this.requirements = requirements; } public enum Type { @@ -326,5 +337,12 @@ public void setAllowedApiCalls(String allowedApiCalls) { this.allowedApiCalls = allowedApiCalls; } + public String getRequirements() { + return requirements; + } + + public void setRequirements(String requirements) { + this.requirements = requirements; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index a65ad2427ba..f38cd7301ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.externaltools; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -30,6 +32,8 @@ import static edu.harvard.iq.dataverse.externaltools.ExternalTool.*; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.ejb.EJB; +import javax.json.JsonValue; @Stateless @Named @@ -40,6 +44,9 @@ public class ExternalToolServiceBean { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + public List findAll() { TypedQuery typedQuery = em.createQuery("SELECT OBJECT(o) FROM ExternalTool AS o ORDER BY o.id", ExternalTool.class); return typedQuery.getResultList(); @@ -133,13 +140,13 @@ public ExternalTool save(ExternalTool externalTool) { * file supports The list of tools is passed in so it doesn't hit the * database each time */ - public static List findExternalToolsByFile(List allExternalTools, DataFile file) { + public List findExternalToolsByFile(List allExternalTools, DataFile file) { List externalTools = new ArrayList<>(); //Map tabular data to it's mimetype (the isTabularData() check assures that this code works the same as before, but it may need to change if tabular data is split into subtypes with differing mimetypes) final String contentType = file.isTabularData() ? DataFileServiceBean.MIME_TYPE_TSV_ALT : file.getContentType(); allExternalTools.forEach((externalTool) -> { - //Match tool and file type - if (contentType.equals(externalTool.getContentType())) { + //Match tool and file type, then check requirements + if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file)) { externalTools.add(externalTool); } }); @@ -147,6 +154,31 @@ public static List findExternalToolsByFile(List allE return externalTools; } + public boolean meetsRequirements(ExternalTool externalTool, DataFile dataFile) { + String requirements = externalTool.getRequirements(); + if (requirements == null) { + logger.fine("Data file id" + dataFile.getId() + ": no requirements for tool id " + externalTool.getId()); + return true; + } + boolean meetsRequirements = true; + JsonObject requirementsObj = JsonUtil.getJsonObject(requirements); + JsonArray auxFilesExist = requirementsObj.getJsonArray("auxFilesExist"); + for (JsonValue jsonValue : auxFilesExist) { + String formatTag = jsonValue.asJsonObject().getString("formatTag"); + String formatVersion = jsonValue.asJsonObject().getString("formatVersion"); + AuxiliaryFile auxFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (auxFile == null) { + logger.fine("Data file id" + dataFile.getId() + ": cannot find required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = false; + break; + } else { + logger.fine("Data file id" + dataFile.getId() + ": found required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = true; + } + } + return meetsRequirements; + } + public static ExternalTool parseAddExternalToolManifest(String manifest) { if (manifest == null || manifest.isEmpty()) { @@ -170,6 +202,7 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS); JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters"); JsonArray allowedApiCallsArray = jsonObject.getJsonArray(ALLOWED_API_CALLS); + JsonObject requirementsObj = jsonObject.getJsonObject(REQUIREMENTS); boolean allRequiredReservedWordsFound = false; if (scope.equals(Scope.FILE)) { @@ -227,8 +260,12 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { if(allowedApiCallsArray !=null) { allowedApiCalls = allowedApiCallsArray.toString(); } + String requirements = null; + if (requirementsObj != null) { + requirements = requirementsObj.toString(); + } - return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls); + return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls, requirements); } private static String getRequiredTopLevelField(JsonObject jsonObject, String key) { diff --git a/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql new file mode 100644 index 00000000000..48230d21032 --- /dev/null +++ b/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql @@ -0,0 +1 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS requirements TEXT; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 5508a6c57dc..cdebeddb7bc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -3,8 +3,11 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; +import java.io.File; import java.io.IOException; import java.io.StringReader; +import java.nio.file.Path; +import java.nio.file.Paths; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -442,4 +445,122 @@ public void createToolSpreadsheetViewer() { .statusCode(OK.getStatusCode()); } + @Test + public void testFileLevelToolWithAuxFileReq() throws IOException { + + // Delete all external tools before testing. + Response getTools = UtilIT.getExternalTools(); + getTools.prettyPrint(); + getTools.then().assertThat() + .statusCode(OK.getStatusCode()); + String body = getTools.getBody().asString(); + JsonReader bodyObject = Json.createReader(new StringReader(body)); + JsonArray tools = bodyObject.readObject().getJsonArray("data"); + for (int i = 0; i < tools.size(); i++) { + JsonObject tool = tools.getJsonObject(i); + int id = tool.getInt("id"); + Response deleteExternalTool = UtilIT.deleteExternalTool(id); + deleteExternalTool.prettyPrint(); + } + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + + // Not really an HDF5 file. Just random bytes. But the file extension makes it detected as HDF5. + Path pathToFalseHdf5 = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "false.hdf5"); + byte[] bytes = {1, 2, 3, 4, 5}; + java.nio.file.Files.write(pathToFalseHdf5, bytes); + + Response uploadFalseHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFalseHdf5.toString(), apiToken); + uploadFalseHdf5.prettyPrint(); + uploadFalseHdf5.then().assertThat() + .statusCode(OK.getStatusCode()); + + Integer falseHdf5 = JsonPath.from(uploadFalseHdf5.getBody().asString()).getInt("data.files[0].dataFile.id"); + + String pathToTrueHdf5 = "src/test/resources/hdf/hdf5/vlen_string_dset"; + Response uploadTrueHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTrueHdf5, apiToken); + uploadTrueHdf5.prettyPrint(); + uploadTrueHdf5.then().assertThat() + .statusCode(OK.getStatusCode()); + + Integer trueHdf5 = JsonPath.from(uploadTrueHdf5.getBody().asString()).getInt("data.files[0].dataFile.id"); + + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "HDF5 Tool"); + job.add("description", "Operates on HDF5 files"); + job.add("types", Json.createArrayBuilder().add("preview")); + job.add("scope", "file"); + job.add("contentType", "application/x-hdf5"); + job.add("toolUrl", "/dataexplore/dataverse-previewers/previewers/v1.3/TextPreview.html"); + job.add("toolParameters", Json.createObjectBuilder() + .add("queryParameters", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("fileid", "{fileId}") + .build()) + .add(Json.createObjectBuilder() + .add("siteUrl", "{siteUrl}") + .build()) + .add(Json.createObjectBuilder() + .add("key", "{apiToken}") + .build()) + .build()) + .build()); + job.add("requirements", Json.createObjectBuilder() + .add("auxFilesExist", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("formatTag", "NcML") + .add("formatVersion", "0.1") + ) + ) + ); + Response addExternalTool = UtilIT.addExternalTool(job.build()); + addExternalTool.prettyPrint(); + addExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.displayName", CoreMatchers.equalTo("HDF5 Tool")); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + Response getTool = UtilIT.getExternalTool(toolId); + getTool.prettyPrint(); + getTool.then().assertThat() + .body("data.scope", CoreMatchers.equalTo("file")) + .statusCode(OK.getStatusCode()); + + // No tools for false HDF5 file. Aux file couldn't be extracted. Doesn't meet requirements. + Response getToolsForFalseHdf5 = UtilIT.getExternalToolsForFile(falseHdf5.toString(), "preview", apiToken); + getToolsForFalseHdf5.prettyPrint(); + getToolsForFalseHdf5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data", Matchers.hasSize(0)); + + // The tool shows for a true HDF5 file. The NcML aux file is available. Requirements met. + Response getToolsForTrueHdf5 = UtilIT.getExternalToolsForFile(trueHdf5.toString(), "preview", apiToken); + getToolsForTrueHdf5.prettyPrint(); + getToolsForTrueHdf5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool")) + .body("data[0].scope", CoreMatchers.equalTo("file")) + .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5")); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 74e10d67352..631c22d959b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -19,7 +19,10 @@ public class ExternalToolServiceBeanTest { + private final ExternalToolServiceBean externalToolService; + public ExternalToolServiceBeanTest() { + this.externalToolService = new ExternalToolServiceBean(); } @Test @@ -49,7 +52,7 @@ public void testfindAll() { ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); List externalTools = new ArrayList<>(); externalTools.add(externalTool); - List availableExternalTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + List availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); assertEquals(availableExternalTools.size(), 1); } @@ -544,4 +547,67 @@ protected static ExternalTool getAllowedApiCallsTool() { return ExternalToolServiceBean.parseAddExternalToolManifest(tool); } + + @Test + public void testParseAddFileToolRequireAuxFile() { + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "AwesomeTool"); + job.add("toolName", "explorer"); + job.add("description", "This tool is awesome."); + job.add("types", Json.createArrayBuilder().add("explore")); + job.add("scope", "file"); + job.add("hasPreviewMode", "false"); + job.add("toolUrl", "http://awesometool.com"); + job.add("toolParameters", Json.createObjectBuilder() + .add("queryParameters", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("filePid", "{filePid}") + .build()) + .add(Json.createObjectBuilder() + .add("key", "{apiToken}") + .build()) + .add(Json.createObjectBuilder() + .add("fileMetadataId", "{fileMetadataId}") + .build()) + .add(Json.createObjectBuilder() + .add("dvLocale", "{localeCode}") + .build()) + .build()) + .build()); + job.add("requirements", Json.createObjectBuilder() + .add("auxFilesExist", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("formatTag", "NcML") + .add("formatVersion", "0.1") + ) + ) + ); + job.add(ExternalTool.CONTENT_TYPE, DataFileServiceBean.MIME_TYPE_TSV_ALT); + String tool = job.build().toString(); + ExternalTool externalTool = ExternalToolServiceBean.parseAddExternalToolManifest(tool); + assertEquals("AwesomeTool", externalTool.getDisplayName()); + assertEquals("explorer", externalTool.getToolName()); + assertEquals("{\"auxFilesExist\":[{\"formatTag\":\"NcML\",\"formatVersion\":\"0.1\"}]}", externalTool.getRequirements()); + /* + DataFile dataFile = new DataFile(); + dataFile.setId(42l); + dataFile.setGlobalId(new GlobalId("doi:10.5072/FK2/RMQT6J/G9F1A1")); + FileMetadata fmd = new FileMetadata(); + fmd.setId(2L); + DatasetVersion dv = new DatasetVersion(); + Dataset ds = new Dataset(); + dv.setDataset(ds); + fmd.setDatasetVersion(dv); + List fmdl = new ArrayList(); + fmdl.add(fmd); + dataFile.setFileMetadatas(fmdl); + ApiToken apiToken = new ApiToken(); + apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, "fr"); + String toolUrl = externalToolHandler.getToolUrlWithQueryParams(); + System.out.println("result: " + toolUrl); + assertEquals("http://awesometool.com?filePid=doi:10.5072/FK2/RMQT6J/G9F1A1&key=7196b5ce-f200-4286-8809-03ffdbc255d7&fileMetadataId=2&dvLocale=fr", toolUrl); +*/ + } + } From d2e14f06bb57da8a9ec8383b178c7bca8f94d148 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 20 Dec 2022 10:18:49 -0500 Subject: [PATCH 151/322] remove cruft #9153 --- .../ExternalToolServiceBeanTest.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 631c22d959b..3885c9b358c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -588,26 +588,6 @@ public void testParseAddFileToolRequireAuxFile() { assertEquals("AwesomeTool", externalTool.getDisplayName()); assertEquals("explorer", externalTool.getToolName()); assertEquals("{\"auxFilesExist\":[{\"formatTag\":\"NcML\",\"formatVersion\":\"0.1\"}]}", externalTool.getRequirements()); - /* - DataFile dataFile = new DataFile(); - dataFile.setId(42l); - dataFile.setGlobalId(new GlobalId("doi:10.5072/FK2/RMQT6J/G9F1A1")); - FileMetadata fmd = new FileMetadata(); - fmd.setId(2L); - DatasetVersion dv = new DatasetVersion(); - Dataset ds = new Dataset(); - dv.setDataset(ds); - fmd.setDatasetVersion(dv); - List fmdl = new ArrayList(); - fmdl.add(fmd); - dataFile.setFileMetadatas(fmdl); - ApiToken apiToken = new ApiToken(); - apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); - ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, "fr"); - String toolUrl = externalToolHandler.getToolUrlWithQueryParams(); - System.out.println("result: " + toolUrl); - assertEquals("http://awesometool.com?filePid=doi:10.5072/FK2/RMQT6J/G9F1A1&key=7196b5ce-f200-4286-8809-03ffdbc255d7&fileMetadataId=2&dvLocale=fr", toolUrl); -*/ } } From 6b0b40abd3b1acfc4239d3cbbed41e314a6325f3 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 22 Dec 2022 09:37:40 -0500 Subject: [PATCH 152/322] #8339 prelim checkin --- .../edu/harvard/iq/dataverse/api/Files.java | 45 +++++++++++++++++++ .../iq/dataverse/util/json/JsonPrinter.java | 21 ++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index af0f6be6d32..21379f0c286 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -452,6 +452,51 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, .build(); } + @GET + @Path("{id}") + public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { + DataverseRequest req; + try { + req = createDataverseRequest(findUserOrDie()); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); + } + final DataFile df; + try { + df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting get the requested data file."); + } + FileMetadata fm; + + if(null != getDraft && getDraft) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset." ); + } + if(null == fm) { + return error(BAD_REQUEST, "No draft availabile for this dataset"); + } + } else { + fm = df.getLatestPublishedFileMetadata(); + MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + mdcLogService.logEntry(entry); + } + + String jsonString = fm.asGsonObject(true).toString(); + + return Response + .status(Response.Status.OK) + .entity(jsonString) + .type(MediaType.TEXT_PLAIN) //Our plain text string is already json + .build(); + + /* + curl "http://localhost:8080/api/datasets/:persistentId/versions/2.0?persistentId=doi:10.5072/FK2/SDHST6" + */ + } + @GET @Path("{id}/metadata") public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index dc547f2e52c..0ebd7f18f15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -460,7 +460,7 @@ public static JsonArrayBuilder jsonFileMetadatas(Collection fmds) return filesArr; } - + public static JsonObjectBuilder json(DatasetDistributor dist) { return jsonObjectBuilder() .add("displayOrder", dist.getDisplayOrder()) @@ -631,6 +631,24 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { } JsonObjectBuilder embargo = df.getEmbargo() != null ? JsonPrinter.json(df.getEmbargo()) : null; + + /* + Dataset + {"label":"file241.txt","restricted":false,"version":1,"datasetVersionId":751, + "dataFile":{"id":1618,"persistentId":"","pidURL":"","filename":"file241.txt", + "contentType":"text/plain","filesize":28,"description":"file description for file 241","storageIdentifier":"file://185354db7ae-85e105e89721", + "rootDataFileId":-1, + "checksum":{"type":"SHA-1","value":"585831cecbaf7e2de25e46799475edc6619cb73c"}, + "creationDate":"2022-12-21"}}, + + File + + +{"label":"file241.txt", + "description":"file description for file 241","restricted":false, + "categories":["Custom Tag for file 241","Data"],"id":892}% + + */ return jsonObjectBuilder() .add("id", df.getId()) @@ -640,6 +658,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { .add("contentType", df.getContentType()) .add("filesize", df.getFilesize()) .add("description", df.getDescription()) + .add("categories", getFileCategories(fileMetadata)) .add("embargo", embargo) //.add("released", df.isReleased()) //.add("restricted", df.isRestricted()) From e4efe4ac3601b078ee2d08f157be30362ca2cc41 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 22 Dec 2022 11:56:40 -0500 Subject: [PATCH 153/322] typo --- src/main/java/edu/harvard/iq/dataverse/api/Access.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 47ff1e94303..3634bf3b4ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -1771,7 +1771,7 @@ private boolean isAccessAuthorized(DataFile df) { return true; } - //For permissions check decide if we havce a session user, or an API user + //For permissions check decide if we have a session user, or an API user User user = null; /** From 49ab1618e6457388ac1b493e158badddfea0ee02 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 22 Dec 2022 12:41:22 -0500 Subject: [PATCH 154/322] include comments from PR --- .../edu/harvard/iq/dataverse/api/Access.java | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 3634bf3b4ae..3bd0a19672b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -206,10 +206,11 @@ public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released + //This calls findUserOrDie which will retrieve the key param or api token header, or the workflow token header. User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); guestbookResponseService.save(gbr); - MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); mdcLogService.logEntry(entry); } @@ -1779,7 +1780,16 @@ private boolean isAccessAuthorized(DataFile df) { */ User apiTokenUser = null; - //If we get a non-GuestUser from findUserOrDie, use it. Otherwise, check the session + + /* + * The logic looks for an apitoken authenticated user and uses it if it exists. + * If not, and a session user exists, we use that. If the apitoken method + * indicates a GuestUser, we will use that if there's no session. + * + * This is currently the only API call that supports sessions. If the rest of + * the API is opened up, the custom logic here wouldn't be needed. + */ + try { logger.fine("calling apiTokenUser = findUserOrDie()..."); apiTokenUser = findUserOrDie(); @@ -1813,9 +1823,12 @@ private boolean isAccessAuthorized(DataFile df) { return false; } - // OK, let's revisit the case of non-restricted files, this time in - // an unpublished version: - // (if (published) was already addressed above) + /* + * Since published and not restricted/embargoed is handled above, the main split + * now is whether it is published or not. If it's published, the only case left + * is with restricted/embargoed. With unpublished, both the restricted/embargoed + * and not restricted/embargoed both get handled the same way. + */ DataverseRequest dvr = null; if (apiTokenUser != null) { @@ -1828,6 +1841,7 @@ private boolean isAccessAuthorized(DataFile df) { // If the file is not published, they can still download the file, if the user // has the permission to view unpublished versions: + // This line handles all three authenticated session user, token user, and guest cases. if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) { // it's not unthinkable, that a GuestUser could be given // the ViewUnpublished permission! @@ -1837,7 +1851,7 @@ private boolean isAccessAuthorized(DataFile df) { return true; } } else { // published and restricted and/or embargoed - + // This line also handles all three authenticated session user, token user, and guest cases. if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) { return true; } @@ -1859,6 +1873,11 @@ private User findAPITokenUser() { try { logger.fine("calling apiTokenUser = findUserOrDie()..."); apiTokenUser = findUserOrDie(); + /* + * The idea here is to not let a guest user returned from findUserOrDie (which + * happens when there is no key/token, and which we want if there's no session) + * from overriding an authenticated session user. + */ if(apiTokenUser instanceof GuestUser) { if(session!=null && session.getUser()!=null) { //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user From 24a0b3e4b437c7896e35ad9a94683d578809d32d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 22 Dec 2022 14:11:06 -0500 Subject: [PATCH 155/322] add NcML previewer to guides (merged upstream) #9153 Merged: https://github.com/gdcc/dataverse-previewers/pull/18 --- .../source/_static/admin/dataverse-external-tools.tsv | 2 +- doc/sphinx-guides/source/user/dataset-management.rst | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index fd1f0f27bc5..16623a6aeec 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -1,5 +1,5 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, and ZipFiles - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index e891ca72880..0c9c7c9e3c7 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -177,11 +177,15 @@ File Handling Certain file types in the Dataverse installation are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types. +.. _file-previews: + File Previews ------------- Dataverse installations can add previewers for common file types uploaded by their research communities. The previews appear on the file page. If a preview tool for a specific file type is available, the preview will be created and will display automatically, after terms have been agreed to or a guestbook entry has been made, if necessary. File previews are not available for restricted files unless they are being accessed using a Private URL. See also :ref:`privateurl`. +Installation of previewers is explained in the :doc:`/admin/external-tools` section of in the Admin Guide. + Tabular Data Files ------------------ @@ -302,7 +306,7 @@ Metadata found in the header section of `Flexible Image Transport System (FITS) NetCDF and HDF5 --------------- -For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`). .. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html From 54db081b3c8ad69cb7bc73d679afcd005a9ecadc Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 3 Jan 2023 08:18:36 -0700 Subject: [PATCH 156/322] call out future versions --- doc/sphinx-guides/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index 0cd01b8a5a7..de4c8107fbb 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -6,7 +6,7 @@ Dataverse Documentation v. |version| ==================================== -These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous versions, :ref:`guides_versions` has a list of all available versions. +These documentation guides are for the |version| version of Dataverse. To find guides belonging to previous or future versions, :ref:`guides_versions` has a list of all available versions. .. toctree:: :glob: From 1202dc72610e1272faaa2e4a81952c3be6e219a6 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 3 Jan 2023 08:26:09 -0700 Subject: [PATCH 157/322] add link to development documentation --- doc/sphinx-guides/source/versions.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index e0a344de9a1..138e7516ae1 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -4,8 +4,9 @@ Dataverse Software Documentation Versions ========================================= -This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. +This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. +- `develop Git branch `__ - 5.12.1 - `5.12 `__ - `5.11.1 `__ From 057469639c953834c3783d279aaa2a9838f8cbe8 Mon Sep 17 00:00:00 2001 From: Patrick Carlson Date: Tue, 3 Jan 2023 08:56:55 -0700 Subject: [PATCH 158/322] add link and comment about Github action building documentation for each PR --- doc/sphinx-guides/source/developers/documentation.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index b20fd112533..c89ed6e3b75 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -22,6 +22,8 @@ That's it! Thank you for your contribution! Your pull request will be added manu Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed). +Preview your documentation changes which will be built automatically as part of your pull request in Github. It will show up as a check entitled: `docs/readthedocs.org:dataverse-guide — Read the Docs build succeeded!`. For example, this PR built to https://dataverse-guide--9249.org.readthedocs.build/en/9249/. + If you would like to read more about the Dataverse Project's use of GitHub, please see the :doc:`version-control` section. For bug fixes and features we request that you create an issue before making a pull request but this is not at all necessary for quick fixes to the documentation. .. _admin: https://github.com/IQSS/dataverse/tree/develop/doc/sphinx-guides/source/admin From affff399dae8914a17cb6139f8b452bd10c519bc Mon Sep 17 00:00:00 2001 From: Philipp Conzett Date: Wed, 4 Jan 2023 16:19:27 +0100 Subject: [PATCH 159/322] Make productionPlace multiple and facetable #9253 --- scripts/api/data/metadatablocks/citation.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 1b1ff0ae819..bdcc6956f61 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -43,7 +43,7 @@ producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE FALSE FALSE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 42 FALSE FALSE TRUE TRUE FALSE FALSE citation contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation From ff61a9a9600a390a87acbb99be62ec2ecbb9dacb Mon Sep 17 00:00:00 2001 From: Philipp Conzett Date: Thu, 5 Jan 2023 06:38:37 +0100 Subject: [PATCH 160/322] Added release notes for PR #9254 Please feel free not use these notes and just list the change under Complete List of Changes. --- doc/release-notes/9253-productionPlace.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/release-notes/9253-productionPlace.md diff --git a/doc/release-notes/9253-productionPlace.md b/doc/release-notes/9253-productionPlace.md new file mode 100644 index 00000000000..d43ad04b398 --- /dev/null +++ b/doc/release-notes/9253-productionPlace.md @@ -0,0 +1,5 @@ +## Metadata field Production Place now repeatable and facetable +This enhancement allows depositors to define multiple instances of the metadata field Production Place in the Citation Metadata block. + +## Major Use Cases and Infrastructure Enhancements +* Data contained in a dataset may have been produced at multiple places. Making the field Production Place repeatable will make it possible to reflect this fact in the dataset metadata. Making the field facetable will allow us to customize Dataverse collections more appropriately. (Issue #9253, PR #9254) \ No newline at end of file From 36f3d39284dc68a43a45b5d1a3db93db0d4fd044 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 5 Jan 2023 12:19:15 -0500 Subject: [PATCH 161/322] add release note #1249 --- doc/release-notes/1249-collapse_dataverse_description.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/1249-collapse_dataverse_description.md diff --git a/doc/release-notes/1249-collapse_dataverse_description.md b/doc/release-notes/1249-collapse_dataverse_description.md new file mode 100644 index 00000000000..8fe933005de --- /dev/null +++ b/doc/release-notes/1249-collapse_dataverse_description.md @@ -0,0 +1 @@ +Long descriptions for collections are now truncated but can be expanded to read the full description. From e2066c854c534193b9fa9651a6a02bae82857e07 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 5 Jan 2023 16:09:55 -0500 Subject: [PATCH 162/322] add extractNcml API endpoint #9153 --- doc/release-notes/9153-extract-metadata.md | 2 + doc/sphinx-guides/source/api/native-api.rst | 41 ++++++ .../source/user/dataset-management.rst | 2 + .../edu/harvard/iq/dataverse/api/Files.java | 21 +++ .../dataverse/ingest/IngestServiceBean.java | 139 +++++++++++++----- .../harvard/iq/dataverse/api/NetcdfIT.java | 125 ++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 18 ++- 7 files changed, 304 insertions(+), 44 deletions(-) diff --git a/doc/release-notes/9153-extract-metadata.md b/doc/release-notes/9153-extract-metadata.md index ce4cc714805..be21c5ed739 100644 --- a/doc/release-notes/9153-extract-metadata.md +++ b/doc/release-notes/9153-extract-metadata.md @@ -1 +1,3 @@ For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file. + +An "extractNcml" API endpoint has been added, especially for installations with existing NetCDF and HDF5 files. After upgrading, they can iterate through these files and try to extract an NcML file. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 76ca38fdc70..40011a7d175 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2248,6 +2248,47 @@ Currently the following methods are used to detect file types: - The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. - The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. +.. _extractNcml: + +Extract NcML +~~~~~~~~~~~~ + +As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. + +This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. + +Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml + +A curl example using a PID: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + Replacing Files ~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 0c9c7c9e3c7..1da31707749 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -303,6 +303,8 @@ Astronomy (FITS) Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +.. _netcdf-and-hdf5: + NetCDF and HDF5 --------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index af0f6be6d32..6cdbcf82c1b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -625,6 +625,27 @@ public Response redetectDatafile(@PathParam("id") String id, @QueryParam("dryRun } } + @Path("{id}/extractNcml") + @POST + public Response extractNcml(@PathParam("id") String id) { + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + // We can always make a command in the future if there's a need + // for non-superusers to call this API. + return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + } + DataFile dataFileIn = findDataFileOrDie(id); + java.nio.file.Path tempLocationPath = null; + boolean successOrFail = ingestService.extractMetadataNcml(dataFileIn, tempLocationPath); + NullSafeJsonBuilder result = NullSafeJsonBuilder.jsonObjectBuilder() + .add("result", successOrFail); + return ok(result); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + /** * Attempting to run metadata export, for all the formats for which we have * metadata Exporters. diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b5934c1167f..f3fc56a54aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -242,43 +242,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); // TODO: reformat this file to remove the many tabs added in cc08330 - InputStream inputStream = null; - if (tempLocationPath != null) { - try ( NetcdfFile netcdfFile = NetcdfFiles.open(tempLocationPath.toString())) { - if (netcdfFile != null) { - // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html - // With an empty string the XML will show 'location="file:"'. - String ncml = netcdfFile.toNcml(""); - inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); - } else { - logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned)."); - } - } catch (IOException ex) { - logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex); - } - } else { - logger.info("tempLocationPath is null for file id " + dataFile.getId() + ". Can't extract NcML."); - } - if (inputStream != null) { - // If you change NcML, you must also change the previewer. - String formatTag = "NcML"; - // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number. - // If you bump the number here, be sure the bump the number in the previewer as well. - // We could use 2.2 here since that's the current version of NcML. - String formatVersion = "0.1"; - String origin = "netcdf-java"; - boolean isPublic = true; - // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI. - String type = "NcML"; - // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml - MediaType mediaType = new MediaType("text", "xml"); - try { - AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, false); - logger.fine ("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id " + dataFile.getId()); - } catch (Exception ex) { - logger.info("exception throw calling processAuxiliaryFile: " + ex); - } - } + extractMetadataNcml(dataFile, tempLocationPath); } catch (IOException ioex) { logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); @@ -392,7 +356,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 - // Consider adding other formats such as NetCDF/HDF5. + // Note that extractMetadataNcml() is used for NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -1255,7 +1219,104 @@ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, Datas return ingestSuccessful; } - + /** + * @param dataFile The DataFile from which to attempt NcML extraction + * (NetCDF or HDF5 format) + * @param tempLocationPath Null if the file is already saved to permanent + * storage. Otherwise, the path to the temp location of the files, as during + * initial upload. + * @return True if the Ncml files was created. False on any error or if the + * NcML file already exists. + */ + public boolean extractMetadataNcml(DataFile dataFile, Path tempLocationPath) { + boolean ncmlFileCreated = false; + logger.fine("extractMetadataNcml: dataFileIn: " + dataFile + ". tempLocationPath: " + tempLocationPath); + InputStream inputStream = null; + String dataFileLocation = null; + if (tempLocationPath != null) { + // This file was just uploaded and hasn't been saved to S3 or local storage. + dataFileLocation = tempLocationPath.toString(); + } else { + // This file is already on S3 or local storage. + File tempFile = null; + File localFile; + StorageIO storageIO; + try { + storageIO = dataFile.getStorageIO(); + storageIO.open(); + if (storageIO.isLocalFile()) { + localFile = storageIO.getFileSystemPath().toFile(); + dataFileLocation = localFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is local. Path: " + dataFileLocation); + } else { + // Need to create a temporary local file: + tempFile = File.createTempFile("tempFileExtractMetadataNcml", ".tmp"); + try ( ReadableByteChannel targetFileChannel = (ReadableByteChannel) storageIO.getReadChannel(); FileChannel tempFileChannel = new FileOutputStream(tempFile).getChannel();) { + tempFileChannel.transferFrom(targetFileChannel, 0, storageIO.getSize()); + } + dataFileLocation = tempFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is on S3. Downloaded and saved to temp path: " + dataFileLocation); + } + } catch (IOException ex) { + logger.info("While attempting to extract NcML, could not use storageIO for data file id " + dataFile.getId() + ". Exception: " + ex); + } + } + if (dataFileLocation != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(dataFileLocation)) { + logger.fine("trying to open " + dataFileLocation); + if (netcdfFile != null) { + // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html + // With an empty string the XML will show 'location="file:"'. + String ncml = netcdfFile.toNcml(""); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("dataFileLocation is null for file id " + dataFile.getId() + ". Can't extract NcML."); + } + if (inputStream != null) { + // If you change NcML, you must also change the previewer. + String formatTag = "NcML"; + // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number. + // If you bump the number here, be sure the bump the number in the previewer as well. + // We could use 2.2 here since that's the current version of NcML. + String formatVersion = "0.1"; + String origin = "netcdf-java"; + boolean isPublic = true; + // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI. + String type = "NcML"; + // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml + MediaType mediaType = new MediaType("text", "xml"); + try { + // Let the cascade do the save if the file isn't yet on permanent storage. + boolean callSave = false; + if (tempLocationPath == null) { + callSave = true; + // Check for an existing NcML file + logger.fine("Checking for existing NcML aux file for file id " + dataFile.getId()); + AuxiliaryFile existingAuxiliaryFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (existingAuxiliaryFile != null) { + logger.fine("Aux file already exists for NetCDF/HDF5 file for file id " + dataFile.getId()); + return false; + } + } + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, callSave); + logger.fine("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id " + dataFile.getId()); + ncmlFileCreated = true; + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } else { + logger.info("extractMetadataNcml: input stream is null! dataFileLocation was " + dataFileLocation); + } + + return ncmlFileCreated; + } + private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java index 74179b98833..9716e7aca13 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -3,9 +3,16 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; +import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; +import static javax.ws.rs.core.Response.Status.NOT_FOUND; import static javax.ws.rs.core.Response.Status.OK; +import org.hamcrest.CoreMatchers; +import static org.hamcrest.CoreMatchers.equalTo; import org.junit.BeforeClass; import org.junit.Test; @@ -53,5 +60,123 @@ public void testNmclFromNetcdf() throws IOException { downloadNcml.then().assertThat() .statusCode(OK.getStatusCode()) .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + + Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken); + deleteNcml.prettyPrint(); + deleteNcml.then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldFail.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response extractNcml = UtilIT.extractNcml(fileId, apiToken); + extractNcml.prettyPrint(); + extractNcml.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldWork.then().assertThat() + .statusCode(OK.getStatusCode()); + } + + @Test + public void testNmclFromNetcdfErrorChecking() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createUserRandom = UtilIT.createRandomUser(); + createUserRandom.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenRandom = UtilIT.getApiTokenFromResponse(createUserRandom); + + String apiTokenNull = null; + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "NcML"; + String version = "0.1"; + + Response downloadNcmlFail = UtilIT.downloadAuxFile(fileId, tag, version, apiTokenNull); + downloadNcmlFail.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + + Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken); + deleteNcml.prettyPrint(); + deleteNcml.then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldFail.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + Response extractNcmlFailRandomUser = UtilIT.extractNcml(fileId, apiTokenRandom); + extractNcmlFailRandomUser.prettyPrint(); + extractNcmlFailRandomUser.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response extractNcml = UtilIT.extractNcml(fileId, apiToken); + extractNcml.prettyPrint(); + extractNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(true)); + + Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldWork.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response extractNcmlFailExistsAlready = UtilIT.extractNcml(fileId, apiToken); + extractNcmlFailExistsAlready.prettyPrint(); + extractNcmlFailExistsAlready.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(false)); + + Path pathToTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file.txt"); + String contentOfTxt = "Just a text file. Don't expect NcML out!"; + java.nio.file.Files.write(pathToTxt, contentOfTxt.getBytes()); + + Response uploadFileTxt = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTxt.toString(), apiToken); + uploadFileTxt.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("file.txt")); + + long fileIdTxt = JsonPath.from(uploadFileTxt.body().asString()).getLong("data.files[0].dataFile.id"); + + Response extractNcmlFailText = UtilIT.extractNcml(fileIdTxt, apiToken); + extractNcmlFailText.prettyPrint(); + extractNcmlFailText.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(false)); + + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 12ccaf2caff..36dce2978fa 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -743,10 +743,11 @@ static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, } static Response downloadAuxFile(Long fileId, String formatTag, String formatVersion, String apiToken) { - Response response = given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion); - return response; + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion); } static Response listAuxFilesByOrigin(Long fileId, String origin, String apiToken) { @@ -1170,7 +1171,14 @@ public static Response uningestFile(Long fileId, String apiToken) { .post("/api/files/" + fileId + "/uningest/?key=" + apiToken); return uningestFileResponse; } - + + public static Response extractNcml(Long fileId, String apiToken) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .post("/api/files/" + fileId + "/extractNcml"); + return response; + } + //I don't understand why this blows up when I remove the key public static Response getDataFileMetadata(Long fileId, String apiToken) { Response fileResponse = given() From 268fb3ef784903350de4609e3d68f7259aecdeee Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 5 Jan 2023 16:43:39 -0500 Subject: [PATCH 163/322] #8339 add tests/handle draft --- .../edu/harvard/iq/dataverse/api/Files.java | 78 ++++++++++--------- .../iq/dataverse/util/json/JsonPrinter.java | 3 +- .../edu/harvard/iq/dataverse/api/FilesIT.java | 29 +++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 6 ++ 4 files changed, 80 insertions(+), 36 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 21379f0c286..de3c79b4a7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -43,6 +43,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.InputStream; @@ -55,6 +56,7 @@ import javax.ejb.EJB; import javax.inject.Inject; import javax.json.Json; +import javax.json.JsonObjectBuilder; import javax.json.JsonReader; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; @@ -454,47 +456,53 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, @GET @Path("{id}") - public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { + public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { DataverseRequest req; + try { + req = createDataverseRequest(findUserOrDie()); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); + } + final DataFile df; + try { + df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + } catch (Exception e) { + return error(BAD_REQUEST, "Error attempting get the requested data file."); + } + FileMetadata fm; + //first get latest published + //if not available get draft if permissible + try { + + fm = df.getLatestPublishedFileMetadata(); + + } catch (UnsupportedOperationException e) { try { - req = createDataverseRequest(findUserOrDie()); - } catch (Exception e) { - return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); } - final DataFile df; - try { - df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); - } catch (Exception e) { - return error(BAD_REQUEST, "Error attempting get the requested data file."); + if (null == fm) { + return error(BAD_REQUEST, "No draft availabile for this dataset"); } - FileMetadata fm; - - if(null != getDraft && getDraft) { - try { - fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, findDataFileOrDie(fileIdOrPersistentId))); - } catch (WrappedResponse w) { - return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset." ); - } - if(null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); - } - } else { - fm = df.getLatestPublishedFileMetadata(); - MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); - mdcLogService.logEntry(entry); - } - - String jsonString = fm.asGsonObject(true).toString(); - - return Response + } + + try { + MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + mdcLogService.logEntry(entry); + + } catch (UnsupportedOperationException e) { + // Don't write mdc if on a draft + } + + JsonObjectBuilder job = JsonPrinter.json(fm); + javax.json.JsonObject jsonObject = job.build(); + String jsonString = jsonObject.toString(); + return Response .status(Response.Status.OK) .entity(jsonString) - .type(MediaType.TEXT_PLAIN) //Our plain text string is already json - .build(); - - /* - curl "http://localhost:8080/api/datasets/:persistentId/versions/2.0?persistentId=doi:10.5072/FK2/SDHST6" - */ + .type(MediaType.TEXT_PLAIN) + .build(); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 0ebd7f18f15..f8c85164e8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -460,7 +460,7 @@ public static JsonArrayBuilder jsonFileMetadatas(Collection fmds) return filesArr; } - + public static JsonObjectBuilder json(DatasetDistributor dist) { return jsonObjectBuilder() .add("displayOrder", dist.getDisplayOrder()) @@ -579,6 +579,7 @@ public static JsonObjectBuilder json(FileMetadata fmd) { // in a sense that there's no longer the category field in the // fileMetadata object; but there are now multiple, oneToMany file // categories - and we probably need to export them too!) -- L.A. 4.5 + // DONE: catgegories by name .add("description", fmd.getDescription()) .add("label", fmd.getLabel()) // "label" is the filename .add("restricted", fmd.isRestricted()) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 950260d1400..3dbeecc82a4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1384,6 +1384,35 @@ public void testDataSizeInDataverse() throws InterruptedException { } + @Test + public void testGetFileInfo(){ + + + Response createUser = UtilIT.createRandomUser(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + Response makeSuperUser = UtilIT.makeSuperUser(username); + String dataverseAlias = createDataverseGetAlias(apiToken); + Integer datasetId = createDatasetGetId(dataverseAlias, apiToken); + + msg("Add tabular file"); + String pathToFile = "scripts/search/data/tabular/stata13-auto-withstrls.dta"; + Response addResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + + String dataFileId = addResponse.getBody().jsonPath().getString("data.files[0].dataFile.id"); + msgt("datafile id: " + dataFileId); + + addResponse.prettyPrint(); + + Response getFileDataResponse = UtilIT.getFileData(dataFileId, apiToken); + + msgt("after get file data response: " ); + getFileDataResponse.prettyPrint(); + + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + } + @Test public void testValidateDDI_issue6027() throws InterruptedException { msgt("testValidateDDI_issue6027"); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 12ccaf2caff..4cd1bf04f5e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -997,6 +997,12 @@ static Response getFileMetadata(String fileIdOrPersistentId, String optionalForm .urlEncodingEnabled(false) .get("/api/access/datafile/" + idInPath + "/metadata" + optionalFormatInPath + optionalQueryParam); } + + static Response getFileData(String fileId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/files/" + fileId ); + } static Response testIngest(String fileName, String fileType) { return given() From 072f0273243c58b74c0ea9e5141c0797a634cf50 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 6 Jan 2023 09:33:29 -0500 Subject: [PATCH 164/322] #8339 clean up some code --- src/main/java/edu/harvard/iq/dataverse/api/Files.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index de3c79b4a7f..44b7b54c547 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -478,7 +478,7 @@ public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Conte } catch (UnsupportedOperationException e) { try { - fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); } catch (WrappedResponse w) { return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); } @@ -487,13 +487,10 @@ public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Conte } } - try { + if (fm.getDatasetVersion().isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); mdcLogService.logEntry(entry); - - } catch (UnsupportedOperationException e) { - // Don't write mdc if on a draft - } + } JsonObjectBuilder job = JsonPrinter.json(fm); javax.json.JsonObject jsonObject = job.build(); From a2f5d12d428d9169e382509bce6148c1f2594625 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 6 Jan 2023 12:29:56 -0500 Subject: [PATCH 165/322] make productionPlace multiple in schema.xml #9254 --- conf/solr/8.11.1/schema.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 655cf1bc3cc..b89b28f3f41 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -350,7 +350,7 @@ - + From a2676e7e0dd0dc91c354ea7c54ea1eeaf525b96a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 6 Jan 2023 14:24:20 -0500 Subject: [PATCH 166/322] #8339 fix response/add tests --- .../edu/harvard/iq/dataverse/api/Files.java | 13 ++++-- .../edu/harvard/iq/dataverse/api/FilesIT.java | 46 ++++++++++++++++--- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 44b7b54c547..5594d3139b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccessValidator; import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_OK; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; @@ -44,6 +45,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.InputStream; @@ -495,11 +497,12 @@ public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Conte JsonObjectBuilder job = JsonPrinter.json(fm); javax.json.JsonObject jsonObject = job.build(); String jsonString = jsonObject.toString(); - return Response - .status(Response.Status.OK) - .entity(jsonString) - .type(MediaType.TEXT_PLAIN) - .build(); + + return Response.ok(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", json(fm)).build()) + .type(MediaType.APPLICATION_JSON) + .build(); } @GET diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 3dbeecc82a4..a373ee694c2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1385,8 +1385,7 @@ public void testDataSizeInDataverse() throws InterruptedException { } @Test - public void testGetFileInfo(){ - + public void testGetFileInfo() { Response createUser = UtilIT.createRandomUser(); String username = UtilIT.getUsernameFromResponse(createUser); @@ -1395,6 +1394,9 @@ public void testGetFileInfo(){ String dataverseAlias = createDataverseGetAlias(apiToken); Integer datasetId = createDatasetGetId(dataverseAlias, apiToken); + createUser = UtilIT.createRandomUser(); + String apiTokenRegular = UtilIT.getApiTokenFromResponse(createUser); + msg("Add tabular file"); String pathToFile = "scripts/search/data/tabular/stata13-auto-withstrls.dta"; Response addResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); @@ -1403,14 +1405,44 @@ public void testGetFileInfo(){ msgt("datafile id: " + dataFileId); addResponse.prettyPrint(); - + Response getFileDataResponse = UtilIT.getFileData(dataFileId, apiToken); - - msgt("after get file data response: " ); + getFileDataResponse.prettyPrint(); - - getFileDataResponse.then().assertThat() + getFileDataResponse.then().assertThat() + .body("data.label", equalTo("stata13-auto-withstrls.dta")) + .body("data.dataFile.filename", equalTo("stata13-auto-withstrls.dta")) .statusCode(OK.getStatusCode()); + + getFileDataResponse = UtilIT.getFileData(dataFileId, apiTokenRegular); + getFileDataResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + + // ------------------------- + // Publish dataverse and dataset + // ------------------------- + msg("Publish dataverse and dataset"); + Response publishDataversetResp = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken); + publishDataversetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + //regular user should get to see file data + getFileDataResponse = UtilIT.getFileData(dataFileId, apiTokenRegular); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + //cleanup + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); + assertEquals(200, destroyDatasetResponse.getStatusCode()); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + Response deleteUserResponse = UtilIT.deleteUser(username); + assertEquals(200, deleteUserResponse.getStatusCode()); } @Test From 99dbbbb3ba09ed2dce969317d417871172ebcb6b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 9 Jan 2023 09:24:31 -0500 Subject: [PATCH 167/322] #8339 add doc for get file json api --- doc/sphinx-guides/source/api/native-api.rst | 53 +++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 76ca38fdc70..e6f4b8d2584 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2051,6 +2051,59 @@ The response is a JSON object described in the :doc:`/api/external-tools` sectio Files ----- +Get JSON Representation of a File +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. + +Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + +You may get its draft version if you pass an api token with view draft permissions: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + + curl -H "X-Dataverse-key:$API_TOKEN" http://$SERVER/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + + +|CORS| Show the file whose id is passed: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=408730 + + curl $SERVER_URL/api/file/$ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl https://demo.dataverse.org/api/files/408730 + +The file id can be extracted from the response retrieved from the API which uses the persistent identifier (``/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER``). + Adding Files ~~~~~~~~~~~~ From 3b6d17c8e5cebe16e4623364f439f05374138fef Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 9 Jan 2023 15:25:36 +0100 Subject: [PATCH 168/322] docs(config): adapt wording and order for dataverse.files.directory as per review --- .../source/installation/config.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d46ec3ca3d5..946d580bbde 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -263,7 +263,9 @@ To support multiple stores, a Dataverse installation now requires an id, type, a Out of the box, a Dataverse installation is configured to use local file storage in the 'file' store by default. You can add additional stores and, as a superuser, configure specific Dataverse collections to use them (by editing the 'General Information' for the Dataverse collection as described in the :doc:`/admin/dataverses-datasets` section). -Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored +(in the /temp subdir of that directory), independent of the location of any 'file' store defined above. +(See also the option reference: :ref:`dataverse.files.directory`) If you wish to change which store is used by default, you'll need to delete the existing default storage driver and set a new one using jvm options. @@ -1495,6 +1497,8 @@ protocol, host, and port number and should not include a trailing slash. https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. +.. _dataverse.files.directory: + .. _dataverse.files.directory: dataverse.files.directory @@ -1503,16 +1507,15 @@ dataverse.files.directory Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number of purposes: -1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before +1. ``//`` is the subdirectory layout when using the target + directory as a :ref:`permanent file storage `. The DCM feature for :doc:`../developers/big-data-support` + is able to trigger imports for externally uploaded files from this area under certain conditions. +2. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. 2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer to final storage location and/or ingest. -3. ``//`` data location for file system imports, see - :ref:`api-import-dataset`. 4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. - -This directory might also be used for permanent storage of data, but this setting is independent from -:ref:`storage-files-dir` configuration. + This location is deprecated and might be refactored into a distinct setting in the future. .. _dataverse.files.uploads: @@ -3116,7 +3119,7 @@ For example: ``curl -X PUT -d "This content needs to go through an additional review by the Curation Team before it can be published." http://localhost:8080/api/admin/settings/:DatasetMetadataValidationFailureMsg`` - + :ExternalValidationAdminOverride ++++++++++++++++++++++++++++++++ From 8750cfe0cda0a6db9a356719f42dc24e87581b41 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 9 Jan 2023 15:36:53 +0100 Subject: [PATCH 169/322] doc(config): remove some merge artifact and fix enumeration --- doc/sphinx-guides/source/installation/config.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c0de6e43a98..03dcbb51624 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1521,9 +1521,6 @@ protocol, host, and port number and should not include a trailing slash. - We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``. https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion. - -.. _dataverse.files.directory: - .. _dataverse.files.directory: dataverse.files.directory @@ -1537,7 +1534,7 @@ of purposes: is able to trigger imports for externally uploaded files from this area under certain conditions. 2. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. -2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer +3. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer to final storage location and/or ingest. 4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. This location is deprecated and might be refactored into a distinct setting in the future. From 2a1b9e26c67146a60f1da0342e16bb5d4950919a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 9 Jan 2023 09:40:10 -0500 Subject: [PATCH 170/322] #8339 remove junk --- .../iq/dataverse/util/json/JsonPrinter.java | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index f8c85164e8a..0b0c50a9cb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -632,24 +632,6 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { } JsonObjectBuilder embargo = df.getEmbargo() != null ? JsonPrinter.json(df.getEmbargo()) : null; - - /* - Dataset - {"label":"file241.txt","restricted":false,"version":1,"datasetVersionId":751, - "dataFile":{"id":1618,"persistentId":"","pidURL":"","filename":"file241.txt", - "contentType":"text/plain","filesize":28,"description":"file description for file 241","storageIdentifier":"file://185354db7ae-85e105e89721", - "rootDataFileId":-1, - "checksum":{"type":"SHA-1","value":"585831cecbaf7e2de25e46799475edc6619cb73c"}, - "creationDate":"2022-12-21"}}, - - File - - -{"label":"file241.txt", - "description":"file description for file 241","restricted":false, - "categories":["Custom Tag for file 241","Data"],"id":892}% - - */ return jsonObjectBuilder() .add("id", df.getId()) From b7aecf56e67f13515150a7d5d3b344450d5ce7c3 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 10 Jan 2023 10:38:54 +0100 Subject: [PATCH 171/322] docs(config): change wording for dataverse.files.directory option as per review --- doc/sphinx-guides/source/installation/config.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 03dcbb51624..5f6a3a9aee8 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1529,15 +1529,18 @@ dataverse.files.directory Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number of purposes: -1. ``//`` is the subdirectory layout when using the target - directory as a :ref:`permanent file storage `. The DCM feature for :doc:`../developers/big-data-support` - is able to trigger imports for externally uploaded files from this area under certain conditions. -2. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before shipping to the final storage destination. -3. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer to final storage location and/or ingest. -4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. +3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. This location is deprecated and might be refactored into a distinct setting in the future. +4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally + uploaded files in a directory tree at ``//`` + under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, + but this is controlled by other, store-specific settings. + +Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DIRECTORY``. .. _dataverse.files.uploads: From 8cead71fe610e434fbbecd5cccdfe0aae8ef2eac Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 10 Jan 2023 15:36:45 -0500 Subject: [PATCH 172/322] cleanup (remove extraneous println) #9153 --- .../java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index f3fc56a54aa..9c6acd964c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -305,7 +305,6 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // Any necessary post-processing: // performPostProcessingTasks(dataFile); } else { - System.out.println("driver is not tmp"); try { StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata From c59bacee2e9608d897e9c243b9d548b05b4327d2 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 11 Jan 2023 10:01:48 -0500 Subject: [PATCH 173/322] remove cruft #8339 --- src/main/java/edu/harvard/iq/dataverse/api/Files.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 0508f5263c7..5c9cec90eb2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -490,7 +490,6 @@ public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Conte JsonObjectBuilder job = JsonPrinter.json(fm); javax.json.JsonObject jsonObject = job.build(); - String jsonString = jsonObject.toString(); return Response.ok(Json.createObjectBuilder() .add("status", STATUS_OK) From d5a86439a44b5b4dfd561d8eac73e915f394612d Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 13 Jan 2023 09:34:22 -0500 Subject: [PATCH 174/322] #8724 show child ds in linking dv --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index e2f2b3adcfd..09cd8a72f0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1497,6 +1497,7 @@ private List findAllLinkingDataverses(DvObject dvObject){ dataset = (Dataset) dvObject; linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); ancestorList = dataset.getOwner().getOwners(); + ancestorList.add(dataset.getOwner()); //to show dataset in linking dv when parent dv is linked } if(dvObject.isInstanceofDataverse()){ dv = (Dataverse) dvObject; From 2efd8a4e12967916032e315eb69fdecbd51a738b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 13 Jan 2023 16:14:25 -0500 Subject: [PATCH 175/322] A proof-of-concept quick implementation of "custom headers in OAI calls", #9231 --- .../iq/dataverse/api/HarvestingClients.java | 42 +-- .../harvest/client/HarvestingClient.java | 100 +------ .../client/oai/CustomJdkHttpXoaiClient.java | 259 ++++++++++++++++++ .../harvest/client/oai/OaiHandler.java | 49 +++- .../iq/dataverse/util/json/JsonParser.java | 1 + .../iq/dataverse/util/json/JsonPrinter.java | 29 +- 6 files changed, 355 insertions(+), 125 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index b75cb687c62..9aea3adab8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -15,6 +15,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import java.io.IOException; @@ -88,7 +89,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc } if (retrievedHarvestingClient != null) { - hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); + hcArr.add(JsonPrinter.json(retrievedHarvestingClient)); } } @@ -136,7 +137,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP } try { - return ok(harvestingConfigAsJson(retrievedHarvestingClient)); + return ok(JsonPrinter.json(retrievedHarvestingClient)); } catch (Exception ex) { logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); return error( Response.Status.BAD_REQUEST, @@ -216,7 +217,7 @@ public Response createHarvestingClient(String jsonBody, @PathParam("nickName") S DataverseRequest req = createDataverseRequest(findUserOrDie()); harvestingClient = execCommand(new CreateHarvestingClientCommand(req, harvestingClient)); - return created( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return created( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -268,6 +269,8 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S } // Go through the supported editable fields and update the client accordingly: + // TODO: We may want to reevaluate whether we really want/need *all* + // of these fields to be editable. if (newHarvestingClient.getHarvestingUrl() != null) { harvestingClient.setHarvestingUrl(newHarvestingClient.getHarvestingUrl()); @@ -287,10 +290,13 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S if (newHarvestingClient.getHarvestStyle() != null) { harvestingClient.setHarvestStyle(newHarvestingClient.getHarvestStyle()); } + if (newHarvestingClient.getCustomHttpHeaders() != null) { + harvestingClient.setCustomHttpHeaders(newHarvestingClient.getCustomHttpHeaders()); + } // TODO: Make schedule configurable via this API too. harvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); - return ok( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return ok( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); // harvestingConfigAsJson(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -390,32 +396,4 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } return this.accepted(); } - - /* Auxiliary, helper methods: */ - - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { - if (harvestingConfig == null) { - return null; - } - - - return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). - add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). - add("type", harvestingConfig.getHarvestType()). - add("style", harvestingConfig.getHarvestStyle()). - add("harvestUrl", harvestingConfig.getHarvestingUrl()). - add("archiveUrl", harvestingConfig.getArchiveUrl()). - add("archiveDescription",harvestingConfig.getArchiveDescription()). - add("metadataFormat", harvestingConfig.getMetadataPrefix()). - add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). - add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). - add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). - add("lastResult", harvestingConfig.getLastResult()). - add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). - add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). - add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). - add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index aeb010fad6d..d27ddc41b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -234,6 +234,16 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } + private String customHttpHeaders; + + public String getCustomHttpHeaders() { + return customHttpHeaders; + } + + public void setCustomHttpHeaders(String customHttpHeaders) { + this.customHttpHeaders = customHttpHeaders; + } + // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @@ -345,95 +355,7 @@ public Long getLastDeletedDatasetCount() { return lastNonEmptyHarvest.getDeletedDatasetCount(); } return null; - } - - /* move the fields below to the new HarvestingClientRun class: - private String harvestResult; - - public String getResult() { - return harvestResult; - } - - public void setResult(String harvestResult) { - this.harvestResult = harvestResult; - } - - // "Last Harvest Time" is the last time we *attempted* to harvest - // from this remote resource. - // It wasn't necessarily a successful attempt! - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastHarvestTime; - - public Date getLastHarvestTime() { - return lastHarvestTime; - } - - public void setLastHarvestTime(Date lastHarvestTime) { - this.lastHarvestTime = lastHarvestTime; - } - - // This is the last "successful harvest" - i.e., the last time we - // tried to harvest, and got a response from the remote server. - // We may not have necessarily harvested any useful content though; - // the result may have been a "no content" or "no changes since the last harvest" - // response. - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulHarvestTime; - - public Date getLastSuccessfulHarvestTime() { - return lastSuccessfulHarvestTime; - } - - public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { - this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; - } - - // Finally, this is the time stamp from the last "non-empty" harvest. - // I.e. the last time we ran a harvest that actually resulted in - // some Datasets created, updated or deleted: - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastNonEmptyHarvestTime; - - public Date getLastNonEmptyHarvestTime() { - return lastNonEmptyHarvestTime; - } - - public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { - this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; - } - - // And these are the Dataset counts from that last "non-empty" harvest: - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - - public Long getLastHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getLastFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getLastDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - */ + } private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java new file mode 100644 index 00000000000..25c3a048219 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java @@ -0,0 +1,259 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.harvest.client.oai; + +import io.gdcc.xoai.serviceprovider.client.OAIClient; + +import io.gdcc.xoai.serviceprovider.exceptions.OAIRequestException; +import io.gdcc.xoai.serviceprovider.parameters.Parameters; +import java.io.IOException; +import java.io.InputStream; +import static java.net.HttpURLConnection.HTTP_OK; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.charset.StandardCharsets; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.X509Certificate; +import java.time.Duration; +import java.util.List; +import java.util.ListIterator; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.http.Header; + +/** + * Sane default OAI Client implementation using JDK HTTP Client. Can only be used via builder in + * calling code. + * (this is essentially a copy of the final class JdkHttpOaiClient provided by + * gdcc.xoai, with the custom http headers added. proof of concept! + */ +public final class CustomJdkHttpXoaiClient extends OAIClient { + + private static final Logger log = LoggerFactory.getLogger(OAIClient.class.getCanonicalName()); + + // As these vars will be feed via the builder and those provide defaults and null-checks, + // we may assume FOR INTERNAL USE these are not null. + private final String baseUrl; + private final String userAgent; + private final Duration requestTimeout; + private final HttpClient httpClient; + // Custom headers are optional though, ok to be null: + private final List
customHeaders; + + + CustomJdkHttpXoaiClient( + String baseUrl, String userAgent, Duration requestTimeout, List
customHeaders, HttpClient httpClient) { + this.baseUrl = baseUrl; + this.userAgent = userAgent; + this.requestTimeout = requestTimeout; + this.httpClient = httpClient; + this.customHeaders = customHeaders; + } + + @Override + public InputStream execute(Parameters parameters) throws OAIRequestException { + try { + URI requestURI = URI.create(parameters.toUrl(this.baseUrl)); + + HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() + .uri(requestURI) + .GET() + .header("User-Agent", this.userAgent) + .timeout(requestTimeout); + + // add custom headers, if present: + if (customHeaders != null) { + ListIterator
iterator = customHeaders.listIterator(); + while (iterator.hasNext()) { + Header customHeader = iterator.next(); + httpRequestBuilder.header(customHeader.getName(), customHeader.getValue()); + } + } + + HttpRequest request = httpRequestBuilder.build(); + + HttpResponse response = + this.httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + + if (response.statusCode() == HTTP_OK) { + return response.body(); + } else { + // copy body of the response to string and send as exception message + throw new OAIRequestException( + "Query faild with status code " + + response.statusCode() + + ": " + + new String( + response.body().readAllBytes(), StandardCharsets.UTF_8)); + } + } catch (IllegalArgumentException | IOException | InterruptedException ex) { + // Hint by SonarCloud: + // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS2142&rule_key=java%3AS2142 + Thread.currentThread().interrupt(); + throw new OAIRequestException(ex); + } + } + + /*@Override + JdkHttpBuilder newBuilder() { + return new CustomJdkHttpXoaiClient.JdkHttpBuilder(); + }*/ + + /** + * Build an {@link OAIClient} using the JDK native HTTP client. You may use your own prepared + * {@link HttpClient.Builder} instead of the default one. + * + *

Provides defaults for request timeouts (60s) and user agent. Remember to set the base + * OAI-PMH URL via {@link #withBaseUrl(URL)}. An exception will occur on first request + * otherwise. + */ + public static final class JdkHttpBuilder implements OAIClient.Builder { + private String baseUrl = "Must be set via Builder.withBaseUrl()"; + private String userAgent = "XOAI Service Provider v5"; + private Duration requestTimeout = Duration.ofSeconds(60); + private List

customHeaders = null; + private final HttpClient.Builder httpClientBuilder; + + JdkHttpBuilder() { + this.httpClientBuilder = HttpClient.newBuilder(); + } + + /** + * While the default constructor can be accessed via {@link OAIClient#newBuilder()}, if + * someone provides a {@link HttpClient.Builder} (which might already contain + * configuration), happily work with it. + * + * @param httpClientBuilder Any (preconfigured) Java 11+ HTTP client builder + */ + public JdkHttpBuilder(HttpClient.Builder httpClientBuilder) { + this.httpClientBuilder = httpClientBuilder; + } + + @Override + public JdkHttpBuilder withBaseUrl(URL baseUrl) { + return this.withBaseUrl(baseUrl.toString()); + } + + @Override + public JdkHttpBuilder withBaseUrl(String baseUrl) { + try { + new URL(baseUrl).toURI(); + if (!baseUrl.startsWith("http")) { + throw new IllegalArgumentException("OAI-PMH supports HTTP/S only"); + } + this.baseUrl = baseUrl; + return this; + } catch (MalformedURLException | URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public JdkHttpBuilder withConnectTimeout(Duration timeout) { + // validation is done by client builder! + httpClientBuilder.connectTimeout(timeout); + return this; + } + + @Override + public JdkHttpBuilder withRequestTimeout(Duration timeout) { + if (timeout == null || timeout.isNegative()) { + throw new IllegalArgumentException("Timeout must not be null or negative value"); + } + this.requestTimeout = timeout; + return this; + } + + @Override + public JdkHttpBuilder withUserAgent(String userAgent) { + if (userAgent == null || userAgent.isBlank()) { + throw new IllegalArgumentException("User agent must not be null or empty/blank"); + } + this.userAgent = userAgent; + return this; + } + + @Override + public JdkHttpBuilder withFollowRedirects() { + this.httpClientBuilder.followRedirects(HttpClient.Redirect.NORMAL); + return this; + } + + @Override + public JdkHttpBuilder withInsecureSSL() { + // create insecure context (switch of certificate checks) + httpClientBuilder.sslContext(insecureContext()); + + // warn if the hostname verification is still active + // (users must do this themselves - it's a global setting and might pose a security + // risk) + if (!Boolean.getBoolean("jdk.internal.httpclient.disableHostnameVerification")) { + log.warn( + "You must disable JDK HTTP Client Host Name Verification globally via" + + " system property" + + " -Djdk.internal.httpclient.disableHostnameVerification=true for" + + " XOAI Client connections to insecure SSL servers. Don't do this in" + + " a production setup!"); + } + return this; + } + + public JdkHttpBuilder withCustomHeaders(List
customHeaders) { + // This can be null, as these headers are optional + this.customHeaders = customHeaders; + return this; + } + + @Override + public CustomJdkHttpXoaiClient build() { + return new CustomJdkHttpXoaiClient( + this.baseUrl, this.userAgent, this.requestTimeout, this.customHeaders, httpClientBuilder.build()); + } + + private static SSLContext insecureContext() { + TrustManager[] noopTrustManager = + new TrustManager[] { + new X509TrustManager() { + // This is insecure by design, we warn users and they need to do sth. to + // use it. + // Safely ignore the Sonarcloud message. + @SuppressWarnings("java:S4830") + public void checkClientTrusted(X509Certificate[] xcs, String string) { + // we want to accept every certificate - intentionally left blank + } + // This is insecure by design, we warn users and they need to do sth. to + // use it. + // Safely ignore the Sonarcloud message. + @SuppressWarnings("java:S4830") + public void checkServerTrusted(X509Certificate[] xcs, String string) { + // we want to accept every certificate - intentionally left blank + } + + public X509Certificate[] getAcceptedIssuers() { + return new X509Certificate[0]; + } + } + }; + try { + SSLContext sc = SSLContext.getInstance("TLSv1.2"); + sc.init(null, noopTrustManager, null); + return sc; + } catch (KeyManagementException | NoSuchAlgorithmException ex) { + log.error("Could not build insecure SSL context. Might cause NPE.", ex); + return null; + } + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index c0a039e2d2b..ae297416ff9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -5,7 +5,6 @@ import io.gdcc.xoai.model.oaipmh.results.MetadataFormat; import io.gdcc.xoai.model.oaipmh.results.Set; import io.gdcc.xoai.serviceprovider.ServiceProvider; -import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import io.gdcc.xoai.serviceprovider.exceptions.BadArgumentException; import io.gdcc.xoai.serviceprovider.exceptions.InvalidOAIResponse; import io.gdcc.xoai.serviceprovider.exceptions.NoSetHierarchyException; @@ -26,12 +25,15 @@ import java.util.Date; import java.util.Iterator; import java.util.List; +import java.util.logging.Logger; +import org.apache.http.message.BasicHeader; /** * * @author Leonid Andreev */ public class OaiHandler implements Serializable { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler"); public OaiHandler() { @@ -65,6 +67,9 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); + this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); + //test: this.customHeaders = makeCustomHeaders("x-api-key: xxx-yyy-zzz\\ny-api-key: zzz-yyy-xxx"); + this.harvestingClient = harvestingClient; } @@ -74,6 +79,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; + private List customHeaders = null; private ServiceProvider serviceProvider; @@ -119,6 +125,14 @@ public boolean isSetListTruncated() { return setListTruncated; } + public List getCustomHeaders() { + return this.customHeaders; + } + + public void setCustomHeaders(List customHeaders) { + this.customHeaders = customHeaders; + } + public ServiceProvider getServiceProvider() throws OaiHandlerException { if (serviceProvider == null) { if (baseOaiUrl == null) { @@ -128,8 +142,17 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client with the default parameters and the JDK http client: - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + // builds the client based on the default client provided in xoai, + // with the same default parameters and the JDK http client, with + // just the (optional) custom headers added: + // (this is proof-of-concept implementation; there gotta be a prettier way to do this) + //context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + if (getCustomHeaders() != null) { + for (org.apache.http.Header customHeader : getCustomHeaders()) { + logger.info("will add custom header; name: "+customHeader.getName()+", value: "+customHeader.getValue()); + } + } + context.withOAIClient((new CustomJdkHttpXoaiClient.JdkHttpBuilder()).withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); serviceProvider = new ServiceProvider(context); } @@ -293,4 +316,24 @@ public void runIdentify() { // (we will need it, both for validating the remote server, // and to learn about its extended capabilities) } + + private List makeCustomHeaders(String headersString) { + if (headersString != null) { + List ret = new ArrayList<>(); + String[] parts = headersString.split("\\\\n"); + + for (int i = 0; i < parts.length; i++) { + if (parts[i].indexOf(':') > 0) { + String headerName = parts[i].substring(0, parts[i].indexOf(':')); + String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); + ret.add(new BasicHeader(headerName, headerValue)); + } + // simply skipping it if malformed; or we could throw an exception - ? + } + if (!ret.isEmpty()) { + return ret; + } + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 905479c4e0d..22e2c6c8d78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -908,6 +908,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setArchiveDescription(obj.getString("archiveDescription", null)); harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); harvestingClient.setHarvestingSet(obj.getString("set",null)); + harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index dc547f2e52c..1ab596569a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -37,6 +37,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.globus.FileDetailsHolder; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -666,6 +667,32 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { ; } + public static JsonObjectBuilder json(HarvestingClient harvestingClient) { + if (harvestingClient == null) { + return null; + } + + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). + add("dataverseAlias", harvestingClient.getDataverse().getAlias()). + add("type", harvestingClient.getHarvestType()). + add("style", harvestingClient.getHarvestStyle()). + add("harvestUrl", harvestingClient.getHarvestingUrl()). + add("archiveUrl", harvestingClient.getArchiveUrl()). + add("archiveDescription", harvestingClient.getArchiveDescription()). + add("metadataFormat", harvestingClient.getMetadataPrefix()). + add("set", harvestingClient.getHarvestingSet()). + add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none"). + add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). + add("customHeaders", harvestingClient.getCustomHttpHeaders()). + add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). + add("lastResult", harvestingClient.getLastResult()). + add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). + add("lastNonEmpty", harvestingClient.getLastNonEmptyHarvestTime() == null ? null : harvestingClient.getLastNonEmptyHarvestTime().toString()). + add("lastDatasetsHarvested", harvestingClient.getLastHarvestedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastHarvestedDatasetCount().toString()). + add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). + add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); + } + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -702,7 +729,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); From 019fb749b11abdba75e3d058c9c5d38b07e50bae Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Jan 2023 13:53:40 -0500 Subject: [PATCH 176/322] Added the custom header configuration to the harvesting clients GUI (#9231). --- .../iq/dataverse/HarvestingClientsPage.java | 46 +++++++++++++++++-- src/main/java/propertyFiles/Bundle.properties | 4 ++ src/main/webapp/harvestclients.xhtml | 17 +++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index bc83c15dcd7..4430a7be73a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -79,7 +79,7 @@ public class HarvestingClientsPage implements java.io.Serializable { private Dataverse dataverse; private Long dataverseId = null; private HarvestingClient selectedClient; - private boolean setListTruncated = false; + private boolean setListTruncated = false; //private static final String solrDocIdentifierDataset = "dataset_"; @@ -245,6 +245,7 @@ public void editClient(HarvestingClient harvestingClient) { this.newNickname = harvestingClient.getName(); this.newHarvestingUrl = harvestingClient.getHarvestingUrl(); + this.customHeader = harvestingClient.getCustomHttpHeaders(); this.initialSettingsValidated = false; // TODO: do we want to try and contact the server, again, to make @@ -340,6 +341,7 @@ public void createClient(ActionEvent ae) { getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient); newHarvestingClient.setHarvestingUrl(newHarvestingUrl); + newHarvestingClient.setCustomHttpHeaders(customHeader); if (!StringUtils.isEmpty(newOaiSet)) { newHarvestingClient.setHarvestingSet(newOaiSet); } @@ -426,6 +428,7 @@ public void saveClient(ActionEvent ae) { // nickname is not editable for existing clients: //harvestingClient.setName(newNickname); harvestingClient.setHarvestingUrl(newHarvestingUrl); + harvestingClient.setCustomHttpHeaders(customHeader); harvestingClient.setHarvestingSet(newOaiSet); harvestingClient.setMetadataPrefix(newMetadataFormat); harvestingClient.setHarvestStyle(newHarvestingStyle); @@ -635,6 +638,23 @@ public boolean validateServerUrlOAI() { return false; } + public boolean validateCustomHeader() { + if (!StringUtils.isEmpty(getCustomHeader())) { + // TODO: put this method somewhere else as a static utility + + // check that it's looking like "{header-name}: {header value}" at least + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getCustomHeader())) { + FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), + new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); + + return false; + } + } + + // this setting is optional + return true; + } + public void validateInitialSettings() { if (isHarvestTypeOAI()) { boolean nicknameValidated = true; @@ -644,9 +664,10 @@ public void validateInitialSettings() { destinationDataverseValidated = validateSelectedDataverse(); } boolean urlValidated = validateServerUrlOAI(); + boolean customHeaderValidated = validateCustomHeader(); - if (nicknameValidated && destinationDataverseValidated && urlValidated) { - // In Create mode we want to run all 3 validation tests; this is why + if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) { + // In Create mode we want to run all 4 validation tests; this is why // we are not doing "if ((validateNickname() && validateServerUrlOAI())" // in the line above. -- L.A. 4.4 May 2016. @@ -688,6 +709,7 @@ public void backToStepThree() { UIInput newClientNicknameInputField; UIInput newClientUrlInputField; + UIInput newClientCustomHeaderInputField; UIInput hiddenInputField; /*UISelectOne*/ UIInput metadataFormatMenu; UIInput remoteArchiveStyleMenu; @@ -695,6 +717,7 @@ public void backToStepThree() { private String newNickname = ""; private String newHarvestingUrl = ""; + private String customHeader = null; private boolean initialSettingsValidated = false; private String newOaiSet = ""; private String newMetadataFormat = ""; @@ -718,6 +741,7 @@ public void initNewClient(ActionEvent ae) { //this.selectedClient = new HarvestingClient(); this.newNickname = ""; this.newHarvestingUrl = ""; + this.customHeader = null; this.initialSettingsValidated = false; this.newOaiSet = ""; this.newMetadataFormat = ""; @@ -762,6 +786,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } + public String getCustomHeader() { + return customHeader; + } + + public void setCustomHeader(String customHeader) { + this.customHeader = customHeader; + } + public int getHarvestTypeRadio() { return this.harvestTypeRadio; } @@ -871,6 +903,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) { this.newClientUrlInputField = newClientInputField; } + public UIInput getNewClientCustomHeaderInputField() { + return newClientCustomHeaderInputField; + } + + public void setNewClientCustomHeaderInputField(UIInput newClientInputField) { + this.newClientCustomHeaderInputField = newClientInputField; + } + public UIInput getHiddenInputField() { return hiddenInputField; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 62531d32bb2..e2007338e08 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -538,6 +538,10 @@ harvestclients.newClientDialog.nickname.helptext=Consists of letters, digits, un harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty! harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. +harvestclients.newClientDialog.customHeader=Custom HTTP Header +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to OAI requests +harvestclients.newClientDialog.customHeader.watermark=Enter the header as in header-name: header-value +harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. harvestclients.newClientDialog.type.OAI=OAI diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index 5c7b3482ed3..a5f271e8e75 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -277,6 +277,23 @@
+ + +
+ +
+ + + +

#{bundle['harvestclients.newClientDialog.customHeader.helptext']}

+
+
From 725348d78f2d749242dec78cdd071ef3428b6b69 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 17 Jan 2023 14:51:50 -0500 Subject: [PATCH 177/322] minor doc tweaks #7980 --- doc/release-notes/7980-enhanced-dsd.md | 4 +-- .../source/installation/config.rst | 32 +++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/doc/release-notes/7980-enhanced-dsd.md b/doc/release-notes/7980-enhanced-dsd.md index 6a86a2c4b37..d69f201e565 100644 --- a/doc/release-notes/7980-enhanced-dsd.md +++ b/doc/release-notes/7980-enhanced-dsd.md @@ -1,4 +1,4 @@ -### Default Values for Database Connections fixed +### Default Values for Database Connections Fixed Introduced in Dataverse release 5.3 a regression might have hit you: the announced default values for the database connection never actually worked. @@ -7,4 +7,4 @@ With the update to Payara 5.2022.3 it was possible to introduce working defaults. The documentation has been changed accordingly. Together with this change, you can now enable advanced connection pool -configurations useful for debugging and monitoring. See the docs for details. \ No newline at end of file +configurations useful for debugging and monitoring. Of particular interest may be `sslmode=require`. See the docs for details. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index bfd6c511a79..15924205026 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -263,20 +263,20 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. - +.. _database-persistence: Database Persistence -------------------- -The Dataverse software uses a PostgreSQL server and a Solr Search Index to store objects users create. -You can configure basic and advanced settings of the PostgreSQL database connection with the help of +The Dataverse software uses a PostgreSQL database to store objects users create. +You can configure basic and advanced settings for the PostgreSQL database connection with the help of MicroProfile Config API. Basic Database Settings +++++++++++++++++++++++ -1. Any of these settings can be set via system properties (see :ref:`jvm-options`), environment variables or other - MicroProfile Config mechanisms supported by the appserver. +1. Any of these settings can be set via system properties (see :ref:`jvm-options` starting at :ref:`dataverse.db.name`), environment variables or other + MicroProfile Config mechanisms supported by the app server. `See Payara docs for supported sources `_. 2. Remember to protect your secrets. For passwords, use an environment variable (bare minimum), a password alias named the same as the key (OK) or use the "dir config source" of Payara (best). @@ -289,7 +289,7 @@ Basic Database Settings asadmin create-password-alias --passwordfile /tmp/p.txt dataverse.db.password rm /tmp/p.txt -3. Environment variables follow the key, replacing any dot, colon, dash, etc into an underscore "_" and all uppercase +3. Environment variables follow the key, replacing any dot, colon, dash, etc. into an underscore "_" and all uppercase letters. Example: ``dataverse.db.host`` -> ``DATAVERSE_DB_HOST`` .. list-table:: @@ -320,7 +320,7 @@ Basic Database Settings - | ``dataverse`` | (installer sets to ``dvndb``) * - dataverse.db.parameters - - Connection parameters, see `Postgres JDBC docs `_ + - Connection parameters, such as ``sslmode=require``. See `Postgres JDBC docs `_ Note: you don't need to provide the initial "?". - *Empty string* @@ -347,17 +347,17 @@ Connection Validation - Description - Default * - dataverse.db.is-connection-validation-required - - ``true``: Validate connections, allow server to reconnect in case of failure + - ``true``: Validate connections, allow server to reconnect in case of failure. - false * - dataverse.db.connection-validation-method - | The method of connection validation: - | ``table|autocommit|meta-data|custom-validation`` + | ``table|autocommit|meta-data|custom-validation``. - *Empty string* * - dataverse.db.validation-table-name - - The name of the table used for validation if the validation method is set to ``table`` + - The name of the table used for validation if the validation method is set to ``table``. - *Empty string* * - dataverse.db.validation-classname - - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation`` + - The name of the custom class used for validation if the ``validation-method`` is set to ``custom-validation``. - *Empty string* * - dataverse.db.validate-atmost-once-period-in-seconds - Specifies the time interval in seconds between successive requests to validate a connection at most once. @@ -381,10 +381,10 @@ Connection & Statement Leaks - If enabled, leaked connection will be reclaimed by the pool after connection leak timeout occurs. - ``false`` * - dataverse.db.statement-leak-timeout-in-seconds - - Specifiy timeout when statements should be considered to be "leaked" + - Specifiy timeout when statements should be considered to be "leaked". - ``0`` (disabled) * - dataverse.db.statement-leak-reclaim - - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs + - If enabled, leaked statement will be reclaimed by the pool after statement leak timeout occurs. - ``false`` Logging & Slow Performance @@ -405,7 +405,7 @@ Logging & Slow Performance - SQL queries that exceed this time in seconds will be logged. - ``-1`` (disabled) * - dataverse.db.log-jdbc-calls - - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL + - When set to true, all JDBC calls will be logged allowing tracing of all JDBC interactions including SQL. - ``false`` @@ -1691,6 +1691,8 @@ dataverse.auth.password-reset-timeout-in-minutes Users have 60 minutes to change their passwords by default. You can adjust this value here. +.. _dataverse.db.name: + dataverse.db.name +++++++++++++++++ @@ -1700,6 +1702,8 @@ Defaults to ``dataverse`` (but the installer sets it to ``dvndb``). Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_NAME``. +See also :ref:`database-persistence`. + dataverse.db.user +++++++++++++++++ From 03ae358b5d787a94aafcb7c89f9290e62dbc07e9 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 18 Jan 2023 11:06:37 +0000 Subject: [PATCH 178/322] Added: API security filter with annotation --- .../iq/dataverse/api/ApiConfiguration.java | 5 +--- .../edu/harvard/iq/dataverse/api/Secured.java | 14 ++++++++++ .../iq/dataverse/api/SecurityFilter.java | 27 +++++++++++++++++++ 3 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/Secured.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java index eead559f15e..719b9c84cee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java @@ -11,9 +11,6 @@ public ApiConfiguration() { packages("edu.harvard.iq.dataverse.api"); packages("edu.harvard.iq.dataverse.mydata"); register(MultiPartFeature.class); + register(SecurityFilter.class); } } -/* -public class ApiConfiguration extends ResourceConfi { -} -*/ \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Secured.java b/src/main/java/edu/harvard/iq/dataverse/api/Secured.java new file mode 100644 index 00000000000..95f9776e036 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/Secured.java @@ -0,0 +1,14 @@ +package edu.harvard.iq.dataverse.api; + +import javax.ws.rs.NameBinding; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +@NameBinding +@Retention(RUNTIME) +@Target({ElementType.TYPE, ElementType.METHOD}) +public @interface Secured { +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java new file mode 100644 index 00000000000..36ec3d96c5a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java @@ -0,0 +1,27 @@ +package edu.harvard.iq.dataverse.api; + +import javax.annotation.Priority; +import javax.ws.rs.Priorities; +import javax.ws.rs.container.ContainerRequestContext; +import javax.ws.rs.container.ContainerRequestFilter; +import javax.ws.rs.container.ResourceInfo; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.Provider; +import java.io.IOException; + +@Secured +@Provider +@Priority(Priorities.AUTHENTICATION) +public class SecurityFilter implements ContainerRequestFilter { + + @Context + private ResourceInfo resourceInfo; + + @Override + public void filter(ContainerRequestContext containerRequestContext) throws IOException { + // Cascade filtering of auth mechanisms goes here + // Forcing unauthorized for this PoC + containerRequestContext.abortWith(Response.status(Response.Status.UNAUTHORIZED).build()); + } +} From 83f3aa0b9cfdf7784185ba47de282aa03e426525 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 18 Jan 2023 13:42:49 +0000 Subject: [PATCH 179/322] Added: ApiKeyAuthMechanism with key validation through filter. Pending private url support --- .../iq/dataverse/api/ApiConfiguration.java | 2 + .../api/auth/ApiKeyAuthMechanism.java | 64 +++++++++++++++++++ .../iq/dataverse/api/auth/AuthMechanism.java | 9 +++ .../iq/dataverse/api/{ => auth}/Secured.java | 2 +- .../api/{ => auth}/SecurityFilter.java | 20 +++--- .../api/auth/WrappedAuthErrorResponse.java | 26 ++++++++ 6 files changed, 113 insertions(+), 10 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java rename src/main/java/edu/harvard/iq/dataverse/api/{ => auth}/Secured.java (88%) rename src/main/java/edu/harvard/iq/dataverse/api/{ => auth}/SecurityFilter.java (50%) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java index 719b9c84cee..715aa270b0d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java @@ -1,6 +1,8 @@ package edu.harvard.iq.dataverse.api; import javax.ws.rs.ApplicationPath; + +import edu.harvard.iq.dataverse.api.auth.SecurityFilter; import org.glassfish.jersey.media.multipart.MultiPartFeature; import org.glassfish.jersey.server.ResourceConfig; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java new file mode 100644 index 00000000000..e4a96eeb6cc --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java @@ -0,0 +1,64 @@ +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.UserServiceBean; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; + +import javax.inject.Inject; +import javax.ws.rs.container.ContainerRequestContext; +import java.util.logging.Logger; + +public class ApiKeyAuthMechanism implements AuthMechanism { + + private static final String DATAVERSE_API_KEY_REQUEST_HEADER_NAME = "X-Dataverse-key"; + private static final String DATAVERSE_API_KEY_REQUEST_PARAM_NAME = "key"; + + @Inject + protected PrivateUrlServiceBean privateUrlSvc; + + @Inject + protected AuthenticationServiceBean authSvc; + + @Inject + protected UserServiceBean userSvc; + + private static final Logger logger = Logger.getLogger(ApiKeyAuthMechanism.class.getName()); + + @Override + public AuthenticatedUser getAuthenticatedUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { + String apiKey = getRequestApiKey(containerRequestContext); + if (apiKey == null) { + return null; + } + /*PrivateUrlUser privateUrlUser = privateUrlSvc.getPrivateUrlUserFromToken(apiKey); + if (privateUrlUser != null) { + if (privateUrlUser.hasAnonymizedAccess()) { + String pathInfo = containerRequestContext.getUriInfo().getPath(); + String prefix = "/access/datafile/"; + if (!(pathInfo.startsWith(prefix) && !pathInfo.substring(prefix.length()).contains("/"))) { + logger.info("Anonymized access request for " + pathInfo); + throw new AuthException("API Access not allowed with this Key"); + } + } + return privateUrlUser; + }*/ + AuthenticatedUser authUser = authSvc.lookupUser(apiKey); + if (authUser != null) { + authUser = userSvc.updateLastApiUseTime(authUser); + return authUser; + } + throw new WrappedAuthErrorResponse(getBadApiKeyResponseMessage(apiKey)); + } + + private String getRequestApiKey(ContainerRequestContext containerRequestContext) { + String headerParamApiKey = containerRequestContext.getHeaderString(DATAVERSE_API_KEY_REQUEST_HEADER_NAME); + String queryParamApiKey = containerRequestContext.getUriInfo().getQueryParameters().getFirst(DATAVERSE_API_KEY_REQUEST_PARAM_NAME); + + return headerParamApiKey != null ? headerParamApiKey : queryParamApiKey; + } + + protected String getBadApiKeyResponseMessage(String apiKey) { + return (apiKey != null) ? "Bad api key" : "Please provide a key query parameter (?" + DATAVERSE_API_KEY_REQUEST_PARAM_NAME + "=XXX) or via the HTTP header " + DATAVERSE_API_KEY_REQUEST_HEADER_NAME; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java new file mode 100644 index 00000000000..c281df3bc62 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java @@ -0,0 +1,9 @@ +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; + +import javax.ws.rs.container.ContainerRequestContext; + +interface AuthMechanism { + AuthenticatedUser getAuthenticatedUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse; +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Secured.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/Secured.java similarity index 88% rename from src/main/java/edu/harvard/iq/dataverse/api/Secured.java rename to src/main/java/edu/harvard/iq/dataverse/api/auth/Secured.java index 95f9776e036..d9cd42c148b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Secured.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/Secured.java @@ -1,4 +1,4 @@ -package edu.harvard.iq.dataverse.api; +package edu.harvard.iq.dataverse.api.auth; import javax.ws.rs.NameBinding; import java.lang.annotation.ElementType; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java similarity index 50% rename from src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java rename to src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java index 36ec3d96c5a..6baa2552262 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/SecurityFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java @@ -1,12 +1,12 @@ -package edu.harvard.iq.dataverse.api; +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import javax.annotation.Priority; +import javax.inject.Inject; import javax.ws.rs.Priorities; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.container.ContainerRequestFilter; -import javax.ws.rs.container.ResourceInfo; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.Response; import javax.ws.rs.ext.Provider; import java.io.IOException; @@ -15,13 +15,15 @@ @Priority(Priorities.AUTHENTICATION) public class SecurityFilter implements ContainerRequestFilter { - @Context - private ResourceInfo resourceInfo; + @Inject + private ApiKeyAuthMechanism apiKeyAuthMechanism; @Override public void filter(ContainerRequestContext containerRequestContext) throws IOException { - // Cascade filtering of auth mechanisms goes here - // Forcing unauthorized for this PoC - containerRequestContext.abortWith(Response.status(Response.Status.UNAUTHORIZED).build()); + try { + AuthenticatedUser authenticatedUser = apiKeyAuthMechanism.getAuthenticatedUserFromRequest(containerRequestContext); + } catch (WrappedAuthErrorResponse e) { + containerRequestContext.abortWith(e.getResponse()); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java new file mode 100644 index 00000000000..2aaa84c5b94 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java @@ -0,0 +1,26 @@ +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; + +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +// TODO: Find common place for this? +import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_ERROR; + +public class WrappedAuthErrorResponse extends Exception { + + private final Response response; + + public WrappedAuthErrorResponse(String message) { + this.response = Response.status(Response.Status.UNAUTHORIZED) + .entity(NullSafeJsonBuilder.jsonObjectBuilder() + .add("status", STATUS_ERROR) + .add("message", message).build() + ).type(MediaType.APPLICATION_JSON_TYPE).build(); + } + + public Response getResponse() { + return response; + } +} From 1f3b91724ed9387a8cd4830da06e2d4477bcd3a1 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 18 Jan 2023 15:31:06 +0000 Subject: [PATCH 180/322] Added: Private URL user support within ApiKeyAuthMechanism --- .../api/auth/ApiKeyAuthMechanism.java | 33 +++++++++++-------- .../iq/dataverse/api/auth/AuthMechanism.java | 4 +-- .../iq/dataverse/api/auth/SecurityFilter.java | 4 +-- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java index e4a96eeb6cc..497f23e44c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java @@ -3,6 +3,8 @@ import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import javax.inject.Inject; @@ -13,6 +15,8 @@ public class ApiKeyAuthMechanism implements AuthMechanism { private static final String DATAVERSE_API_KEY_REQUEST_HEADER_NAME = "X-Dataverse-key"; private static final String DATAVERSE_API_KEY_REQUEST_PARAM_NAME = "key"; + private static final String ACCESS_DATAFILE_PATH_PREFIX = "/access/datafile/"; + private static final String RESPONSE_MESSAGE_BAD_API_KEY = "Bad API key"; @Inject protected PrivateUrlServiceBean privateUrlSvc; @@ -26,29 +30,22 @@ public class ApiKeyAuthMechanism implements AuthMechanism { private static final Logger logger = Logger.getLogger(ApiKeyAuthMechanism.class.getName()); @Override - public AuthenticatedUser getAuthenticatedUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { + public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { String apiKey = getRequestApiKey(containerRequestContext); if (apiKey == null) { return null; } - /*PrivateUrlUser privateUrlUser = privateUrlSvc.getPrivateUrlUserFromToken(apiKey); + PrivateUrlUser privateUrlUser = privateUrlSvc.getPrivateUrlUserFromToken(apiKey); if (privateUrlUser != null) { - if (privateUrlUser.hasAnonymizedAccess()) { - String pathInfo = containerRequestContext.getUriInfo().getPath(); - String prefix = "/access/datafile/"; - if (!(pathInfo.startsWith(prefix) && !pathInfo.substring(prefix.length()).contains("/"))) { - logger.info("Anonymized access request for " + pathInfo); - throw new AuthException("API Access not allowed with this Key"); - } - } + checkAnonymizedAccessToRequestPath(containerRequestContext.getUriInfo().getPath(), privateUrlUser); return privateUrlUser; - }*/ + } AuthenticatedUser authUser = authSvc.lookupUser(apiKey); if (authUser != null) { authUser = userSvc.updateLastApiUseTime(authUser); return authUser; } - throw new WrappedAuthErrorResponse(getBadApiKeyResponseMessage(apiKey)); + throw new WrappedAuthErrorResponse(RESPONSE_MESSAGE_BAD_API_KEY); } private String getRequestApiKey(ContainerRequestContext containerRequestContext) { @@ -58,7 +55,15 @@ private String getRequestApiKey(ContainerRequestContext containerRequestContext) return headerParamApiKey != null ? headerParamApiKey : queryParamApiKey; } - protected String getBadApiKeyResponseMessage(String apiKey) { - return (apiKey != null) ? "Bad api key" : "Please provide a key query parameter (?" + DATAVERSE_API_KEY_REQUEST_PARAM_NAME + "=XXX) or via the HTTP header " + DATAVERSE_API_KEY_REQUEST_HEADER_NAME; + private void checkAnonymizedAccessToRequestPath(String requestPath, PrivateUrlUser privateUrlUser) throws WrappedAuthErrorResponse { + if (!privateUrlUser.hasAnonymizedAccess()) { + return; + } + // For privateUrlUsers restricted to anonymized access, all api calls are off-limits except for those used in the UI + // to download the file or image thumbs + if (!(requestPath.startsWith(ACCESS_DATAFILE_PATH_PREFIX) && !requestPath.substring(ACCESS_DATAFILE_PATH_PREFIX.length()).contains("/"))) { + logger.info("Anonymized access request for " + requestPath); + throw new WrappedAuthErrorResponse(RESPONSE_MESSAGE_BAD_API_KEY); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java index c281df3bc62..108b12ac13f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthMechanism.java @@ -1,9 +1,9 @@ package edu.harvard.iq.dataverse.api.auth; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import javax.ws.rs.container.ContainerRequestContext; interface AuthMechanism { - AuthenticatedUser getAuthenticatedUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse; + User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java index 6baa2552262..ace389b1da5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java @@ -1,6 +1,6 @@ package edu.harvard.iq.dataverse.api.auth; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import javax.annotation.Priority; import javax.inject.Inject; @@ -21,7 +21,7 @@ public class SecurityFilter implements ContainerRequestFilter { @Override public void filter(ContainerRequestContext containerRequestContext) throws IOException { try { - AuthenticatedUser authenticatedUser = apiKeyAuthMechanism.getAuthenticatedUserFromRequest(containerRequestContext); + User user = apiKeyAuthMechanism.findUserFromRequest(containerRequestContext); } catch (WrappedAuthErrorResponse e) { containerRequestContext.abortWith(e.getResponse()); } From d191cba41ca3665a3fd55e1f0c944a998b56a515 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 18 Jan 2023 15:43:59 -0500 Subject: [PATCH 181/322] #8724 fix dv linking to self if parent is linked --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 5 +++++ src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 09cd8a72f0c..f9a3cbf5633 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1662,6 +1662,11 @@ private List retrieveDVOPaths(DvObject dvo) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); + if (dataversePaths.size() > 0) { + // removing the dataverse's own id from the paths + // fixes bug where if my parent dv was linked my dv was shown as linked to myself + dataversePaths.remove(dataversePaths.size() - 1); + } /* add linking paths */ diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java index 9ac2d2cb7e5..45efda9e230 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java @@ -174,10 +174,14 @@ public void testDeepLinks() { * Remove this early return when you are ready to work on * https://github.com/IQSS/dataverse/issues/7430 about strange linking * behavior. - */ - if (true) { + * + * + * + * if (true) { return; } + */ + Response createLevel2a = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-level2a", null, apiToken, level1a); createLevel2a.prettyPrint(); From 230298902fbb7296c9623a355e66e72302f83174 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 19 Jan 2023 10:00:33 -0500 Subject: [PATCH 182/322] rename sql scripts #9153 "Use a version like '4.11.0.1' in the example above where the previously released version was 4.11" -- dev guide That is, these scripts should have been 5.12.1.whatever since the last release was 5.12.1. Fixing. (They were 5.13.whatever.) --- ...-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} | 0 ...ls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} | 0 ...imates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} | 0 ...-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.1__8671-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.2__7715-signed-urls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.3__8840-improve-guestbook-estimates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.3__9153-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql diff --git a/src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql rename to src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql rename to src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql rename to src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql From b4bb357062222b72ebacd48e45c721adc06ee82c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 19 Jan 2023 10:13:42 -0500 Subject: [PATCH 183/322] #8724 add release note for re-index collections --- .../8724-display-child-datasets-of-linked-dv.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 doc/release-notes/8724-display-child-datasets-of-linked-dv.md diff --git a/doc/release-notes/8724-display-child-datasets-of-linked-dv.md b/doc/release-notes/8724-display-child-datasets-of-linked-dv.md new file mode 100644 index 00000000000..5b1b9c8ae20 --- /dev/null +++ b/doc/release-notes/8724-display-child-datasets-of-linked-dv.md @@ -0,0 +1,14 @@ +Datasets that are part of linked dataverse collections will now be displayed in +their linking dataverse collections. In order to fix the display of collections +that have already been linked you must re-index the linked collections. This +query will provide a list of commands to re-index the effected collections: + +select 'curl http://localhost:8080/api/admin/index/dataverses/' +|| tmp.dvid from (select distinct dataverse_id as dvid +from dataverselinkingdataverse) as tmp + +The result of the query will be a list of re-index commands such as: + +curl http://localhost:8080/api/admin/index/dataverses/633 + +where '633' is the id of the linked collection. From f4e1dc9a4730da1207d3993d0f9b33ddf7635c38 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 19 Jan 2023 14:10:33 -0500 Subject: [PATCH 184/322] #8724 remove comments --- .../java/edu/harvard/iq/dataverse/api/LinkIT.java | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java index 45efda9e230..76e9b7d6bc8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java @@ -170,19 +170,6 @@ public void testDeepLinks() { .body("data.total_count", equalTo(1)) .body("data.items[0].name", equalTo(level1a)); - /** - * Remove this early return when you are ready to work on - * https://github.com/IQSS/dataverse/issues/7430 about strange linking - * behavior. - * - * - * - * if (true) { - return; - } - */ - - Response createLevel2a = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-level2a", null, apiToken, level1a); createLevel2a.prettyPrint(); String level2a = UtilIT.getAliasFromResponse(createLevel2a); From ad7f9e3b91c681b34f521364bee6756122c6d304 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 20 Jan 2023 11:46:21 +0000 Subject: [PATCH 185/322] Added: ApiKeyAuthMechanismTest with initial PrivateUrlUser (not anonymized) case --- .../api/auth/ApiKeyAuthMechanism.java | 4 +- .../api/auth/ApiKeyAuthMechanismTest.java | 37 +++++ .../api/auth/ContainerRequestFake.java | 148 ++++++++++++++++++ .../iq/dataverse/api/auth/UriInfoFake.java | 113 +++++++++++++ 4 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java index 497f23e44c8..29b11e7c99f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java @@ -13,8 +13,8 @@ public class ApiKeyAuthMechanism implements AuthMechanism { - private static final String DATAVERSE_API_KEY_REQUEST_HEADER_NAME = "X-Dataverse-key"; - private static final String DATAVERSE_API_KEY_REQUEST_PARAM_NAME = "key"; + public static final String DATAVERSE_API_KEY_REQUEST_HEADER_NAME = "X-Dataverse-key"; + public static final String DATAVERSE_API_KEY_REQUEST_PARAM_NAME = "key"; private static final String ACCESS_DATAFILE_PATH_PREFIX = "/access/datafile/"; private static final String RESPONSE_MESSAGE_BAD_API_KEY = "Bad API key"; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java new file mode 100644 index 00000000000..01c5223cd45 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java @@ -0,0 +1,37 @@ +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.UserServiceBean; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import org.junit.Test; +import org.mockito.Mockito; + +import javax.ws.rs.container.ContainerRequestContext; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class ApiKeyAuthMechanismTest { + + private final PrivateUrlUser testPrivateUrlUser = new PrivateUrlUser(1L); + + private static final String TEST_API_KEY = "test-api-key"; + + @Test + public void testFindUserFromRequestPrivateUrlUserNotAnonymized() throws WrappedAuthErrorResponse { + ApiKeyAuthMechanism sut = new ApiKeyAuthMechanism(); + + sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); + sut.userSvc = Mockito.mock(UserServiceBean.class); + + PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); + Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testPrivateUrlUser); + sut.privateUrlSvc = privateUrlServiceStub; + + ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY); + User actual = sut.findUserFromRequest(testContainerRequest); + + assertEquals(testPrivateUrlUser, actual); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java new file mode 100644 index 00000000000..f03d3cdebe1 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java @@ -0,0 +1,148 @@ +package edu.harvard.iq.dataverse.api.auth; + +import javax.ws.rs.container.ContainerRequestContext; +import javax.ws.rs.core.*; +import java.io.InputStream; +import java.net.URI; +import java.util.*; + +import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_HEADER_NAME; + +public class ContainerRequestFake implements ContainerRequestContext { + + private final String apiKey; + private final UriInfo uriInfo; + + public ContainerRequestFake(String apiKey) { + this.apiKey = apiKey; + this.uriInfo = new UriInfoFake(apiKey); + } + + @Override + public Object getProperty(String s) { + return null; + } + + @Override + public Collection getPropertyNames() { + return null; + } + + @Override + public void setProperty(String s, Object o) { + + } + + @Override + public void removeProperty(String s) { + + } + + @Override + public UriInfo getUriInfo() { + return uriInfo; + } + + @Override + public void setRequestUri(URI uri) { + + } + + @Override + public void setRequestUri(URI uri, URI uri1) { + + } + + @Override + public Request getRequest() { + return null; + } + + @Override + public String getMethod() { + return null; + } + + @Override + public void setMethod(String s) { + + } + + @Override + public MultivaluedMap getHeaders() { + return null; + } + + @Override + public String getHeaderString(String s) { + if (s.equals(DATAVERSE_API_KEY_REQUEST_HEADER_NAME)) { + return this.apiKey; + } + return null; + } + + @Override + public Date getDate() { + return null; + } + + @Override + public Locale getLanguage() { + return null; + } + + @Override + public int getLength() { + return 0; + } + + @Override + public MediaType getMediaType() { + return null; + } + + @Override + public List getAcceptableMediaTypes() { + return null; + } + + @Override + public List getAcceptableLanguages() { + return null; + } + + @Override + public Map getCookies() { + return null; + } + + @Override + public boolean hasEntity() { + return false; + } + + @Override + public InputStream getEntityStream() { + return null; + } + + @Override + public void setEntityStream(InputStream inputStream) { + + } + + @Override + public SecurityContext getSecurityContext() { + return null; + } + + @Override + public void setSecurityContext(SecurityContext securityContext) { + + } + + @Override + public void abortWith(Response response) { + + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java new file mode 100644 index 00000000000..f67f98f3b7a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java @@ -0,0 +1,113 @@ +package edu.harvard.iq.dataverse.api.auth; + +import javax.ws.rs.core.*; +import java.net.URI; +import java.util.List; + +import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_PARAM_NAME; + +public class UriInfoFake implements UriInfo { + + private final String apiKey; + + public UriInfoFake(String apiKey) { + this.apiKey = apiKey; + } + + @Override + public String getPath() { + return null; + } + + @Override + public String getPath(boolean b) { + return null; + } + + @Override + public List getPathSegments() { + return null; + } + + @Override + public List getPathSegments(boolean b) { + return null; + } + + @Override + public URI getRequestUri() { + return null; + } + + @Override + public UriBuilder getRequestUriBuilder() { + return null; + } + + @Override + public URI getAbsolutePath() { + return null; + } + + @Override + public UriBuilder getAbsolutePathBuilder() { + return null; + } + + @Override + public URI getBaseUri() { + return null; + } + + @Override + public UriBuilder getBaseUriBuilder() { + return null; + } + + @Override + public MultivaluedMap getPathParameters() { + return null; + } + + @Override + public MultivaluedMap getPathParameters(boolean b) { + return null; + } + + @Override + public MultivaluedMap getQueryParameters() { + MultivaluedMap queryParameters = new MultivaluedHashMap<>(); + queryParameters.add(DATAVERSE_API_KEY_REQUEST_PARAM_NAME, apiKey); + return queryParameters; + } + + @Override + public MultivaluedMap getQueryParameters(boolean b) { + return null; + } + + @Override + public List getMatchedURIs() { + return null; + } + + @Override + public List getMatchedURIs(boolean b) { + return null; + } + + @Override + public List getMatchedResources() { + return null; + } + + @Override + public URI resolve(URI uri) { + return null; + } + + @Override + public URI relativize(URI uri) { + return null; + } +} From f8bfd26bea29c4c90cce2b2c1216664714d6288b Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 20 Jan 2023 15:32:55 +0000 Subject: [PATCH 186/322] Added: rest of PrivateUrlUser-related test cases to ApiKeyAuthMechanismTest --- .../api/auth/ApiKeyAuthMechanism.java | 4 +- .../api/auth/WrappedAuthErrorResponse.java | 6 +++ .../api/auth/ApiKeyAuthMechanismTest.java | 47 ++++++++++++++++--- .../api/auth/ContainerRequestFake.java | 4 +- .../iq/dataverse/api/auth/UriInfoFake.java | 6 ++- 5 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java index 29b11e7c99f..9ae0a90035c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanism.java @@ -15,8 +15,8 @@ public class ApiKeyAuthMechanism implements AuthMechanism { public static final String DATAVERSE_API_KEY_REQUEST_HEADER_NAME = "X-Dataverse-key"; public static final String DATAVERSE_API_KEY_REQUEST_PARAM_NAME = "key"; - private static final String ACCESS_DATAFILE_PATH_PREFIX = "/access/datafile/"; - private static final String RESPONSE_MESSAGE_BAD_API_KEY = "Bad API key"; + public static final String RESPONSE_MESSAGE_BAD_API_KEY = "Bad API key"; + public static final String ACCESS_DATAFILE_PATH_PREFIX = "/access/datafile/"; @Inject protected PrivateUrlServiceBean privateUrlSvc; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java index 2aaa84c5b94..74359d08975 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java @@ -10,9 +10,11 @@ public class WrappedAuthErrorResponse extends Exception { + private final String message; private final Response response; public WrappedAuthErrorResponse(String message) { + this.message = message; this.response = Response.status(Response.Status.UNAUTHORIZED) .entity(NullSafeJsonBuilder.jsonObjectBuilder() .add("status", STATUS_ERROR) @@ -20,6 +22,10 @@ public WrappedAuthErrorResponse(String message) { ).type(MediaType.APPLICATION_JSON_TYPE).build(); } + public String getMessage() { + return this.message; + } + public Response getResponse() { return response; } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java index 01c5223cd45..3eca295fb32 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java @@ -5,33 +5,66 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.mockito.Mockito; import javax.ws.rs.container.ContainerRequestContext; +import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.ACCESS_DATAFILE_PATH_PREFIX; +import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.RESPONSE_MESSAGE_BAD_API_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; public class ApiKeyAuthMechanismTest { - private final PrivateUrlUser testPrivateUrlUser = new PrivateUrlUser(1L); - private static final String TEST_API_KEY = "test-api-key"; + private static final String TEST_PATH = "/test/path/"; - @Test - public void testFindUserFromRequestPrivateUrlUserNotAnonymized() throws WrappedAuthErrorResponse { - ApiKeyAuthMechanism sut = new ApiKeyAuthMechanism(); + private ApiKeyAuthMechanism sut; + + private final PrivateUrlUser testAnonymizedPrivateUrlUser = new PrivateUrlUser(1L, true); + @BeforeEach + public void setUp() { + sut = new ApiKeyAuthMechanism(); sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); sut.userSvc = Mockito.mock(UserServiceBean.class); + } + @Test + public void testFindUserFromRequest_NotAnonymizedPrivateUrlUser() throws WrappedAuthErrorResponse { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); + PrivateUrlUser testPrivateUrlUser = new PrivateUrlUser(1L); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testPrivateUrlUser); sut.privateUrlSvc = privateUrlServiceStub; - ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY); + ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY, TEST_PATH); User actual = sut.findUserFromRequest(testContainerRequest); assertEquals(testPrivateUrlUser, actual); } + + @Test + public void testFindUserFromRequest_AnonymizedPrivateUrlUserAccessingDatafile() throws WrappedAuthErrorResponse { + PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); + Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testAnonymizedPrivateUrlUser); + sut.privateUrlSvc = privateUrlServiceStub; + + ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY, ACCESS_DATAFILE_PATH_PREFIX); + User actual = sut.findUserFromRequest(testContainerRequest); + + assertEquals(testAnonymizedPrivateUrlUser, actual); + } + + @Test + public void testFindUserFromRequest_AnonymizedPrivateUrlUserNotAccessingDatafile() { + PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); + Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testAnonymizedPrivateUrlUser); + sut.privateUrlSvc = privateUrlServiceStub; + + ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY, TEST_PATH); + WrappedAuthErrorResponse wrappedAuthErrorResponse = assertThrows(WrappedAuthErrorResponse.class, () -> sut.findUserFromRequest(testContainerRequest)); + assertEquals(RESPONSE_MESSAGE_BAD_API_KEY, wrappedAuthErrorResponse.getMessage()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java index f03d3cdebe1..ea849214fa2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java @@ -13,9 +13,9 @@ public class ContainerRequestFake implements ContainerRequestContext { private final String apiKey; private final UriInfo uriInfo; - public ContainerRequestFake(String apiKey) { + public ContainerRequestFake(String apiKey, String path) { this.apiKey = apiKey; - this.uriInfo = new UriInfoFake(apiKey); + this.uriInfo = new UriInfoFake(apiKey, path); } @Override diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java index f67f98f3b7a..79b93caef3d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java @@ -9,14 +9,16 @@ public class UriInfoFake implements UriInfo { private final String apiKey; + private final String path; - public UriInfoFake(String apiKey) { + public UriInfoFake(String apiKey, String path) { this.apiKey = apiKey; + this.path = path; } @Override public String getPath() { - return null; + return path; } @Override From d87024159dedb8fffc31143a6d8f69c2e2c622d4 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 20 Jan 2023 15:56:17 +0000 Subject: [PATCH 187/322] Added: rest of test cases for ApiKeyAuthMechanismTest (empty Api Key, authenticated & unauthenticated user) --- .../api/auth/ApiKeyAuthMechanismTest.java | 75 +++++++++++++++++-- ...ake.java => ContainerRequestTestFake.java} | 6 +- ...{UriInfoFake.java => UriInfoTestFake.java} | 4 +- 3 files changed, 72 insertions(+), 13 deletions(-) rename src/test/java/edu/harvard/iq/dataverse/api/auth/{ContainerRequestFake.java => ContainerRequestTestFake.java} (92%) rename src/test/java/edu/harvard/iq/dataverse/api/auth/{UriInfoFake.java => UriInfoTestFake.java} (95%) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java index 3eca295fb32..bef8ff030bf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; @@ -13,8 +14,7 @@ import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.ACCESS_DATAFILE_PATH_PREFIX; import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.RESPONSE_MESSAGE_BAD_API_KEY; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.*; public class ApiKeyAuthMechanismTest { @@ -28,43 +28,102 @@ public class ApiKeyAuthMechanismTest { @BeforeEach public void setUp() { sut = new ApiKeyAuthMechanism(); + } + + @Test + public void testFindUserFromRequest_ApiKeyNotProvided() throws WrappedAuthErrorResponse { + sut.privateUrlSvc = Mockito.mock(PrivateUrlServiceBean.class); sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); sut.userSvc = Mockito.mock(UserServiceBean.class); + + ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(null, TEST_PATH); + User actual = sut.findUserFromRequest(testContainerRequest); + + assertNull(actual); } @Test - public void testFindUserFromRequest_NotAnonymizedPrivateUrlUser() throws WrappedAuthErrorResponse { + public void testFindUserFromRequest_NotAnonymizedPrivateUrlUserAuthenticated() throws WrappedAuthErrorResponse { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); PrivateUrlUser testPrivateUrlUser = new PrivateUrlUser(1L); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testPrivateUrlUser); sut.privateUrlSvc = privateUrlServiceStub; - ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY, TEST_PATH); + sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); + sut.userSvc = Mockito.mock(UserServiceBean.class); + + ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); User actual = sut.findUserFromRequest(testContainerRequest); assertEquals(testPrivateUrlUser, actual); } @Test - public void testFindUserFromRequest_AnonymizedPrivateUrlUserAccessingDatafile() throws WrappedAuthErrorResponse { + public void testFindUserFromRequest_AnonymizedPrivateUrlUserAuthenticated_AccessingAccessDatafilePath() throws WrappedAuthErrorResponse { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testAnonymizedPrivateUrlUser); sut.privateUrlSvc = privateUrlServiceStub; - ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY, ACCESS_DATAFILE_PATH_PREFIX); + sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); + sut.userSvc = Mockito.mock(UserServiceBean.class); + + ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, ACCESS_DATAFILE_PATH_PREFIX); User actual = sut.findUserFromRequest(testContainerRequest); assertEquals(testAnonymizedPrivateUrlUser, actual); } @Test - public void testFindUserFromRequest_AnonymizedPrivateUrlUserNotAccessingDatafile() { + public void testFindUserFromRequest_AnonymizedPrivateUrlUserAuthenticated_NotAccessingAccessDatafilePath() { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testAnonymizedPrivateUrlUser); sut.privateUrlSvc = privateUrlServiceStub; - ContainerRequestContext testContainerRequest = new ContainerRequestFake(TEST_API_KEY, TEST_PATH); + sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); + sut.userSvc = Mockito.mock(UserServiceBean.class); + + ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); + WrappedAuthErrorResponse wrappedAuthErrorResponse = assertThrows(WrappedAuthErrorResponse.class, () -> sut.findUserFromRequest(testContainerRequest)); + + assertEquals(RESPONSE_MESSAGE_BAD_API_KEY, wrappedAuthErrorResponse.getMessage()); + } + + @Test + public void testFindUserFromRequest_AuthenticatedUser() throws WrappedAuthErrorResponse { + PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); + Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(null); + sut.privateUrlSvc = privateUrlServiceStub; + + AuthenticationServiceBean authenticationServiceBeanStub = Mockito.mock(AuthenticationServiceBean.class); + AuthenticatedUser testAuthenticatedUser = new AuthenticatedUser(); + Mockito.when(authenticationServiceBeanStub.lookupUser(TEST_API_KEY)).thenReturn(testAuthenticatedUser); + sut.authSvc = authenticationServiceBeanStub; + + UserServiceBean userServiceBeanStub = Mockito.mock(UserServiceBean.class); + Mockito.when(userServiceBeanStub.updateLastApiUseTime(testAuthenticatedUser)).thenReturn(testAuthenticatedUser); + sut.userSvc = userServiceBeanStub; + + ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); + User actual = sut.findUserFromRequest(testContainerRequest); + + assertEquals(testAuthenticatedUser, actual); + } + + @Test + public void testFindUserFromRequest_CanNotAuthenticateUserWithAnyMethod() { + PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); + Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(null); + sut.privateUrlSvc = privateUrlServiceStub; + + AuthenticationServiceBean authenticationServiceBeanStub = Mockito.mock(AuthenticationServiceBean.class); + Mockito.when(authenticationServiceBeanStub.lookupUser(TEST_API_KEY)).thenReturn(null); + sut.authSvc = authenticationServiceBeanStub; + + sut.userSvc = Mockito.mock(UserServiceBean.class); + + ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); WrappedAuthErrorResponse wrappedAuthErrorResponse = assertThrows(WrappedAuthErrorResponse.class, () -> sut.findUserFromRequest(testContainerRequest)); + assertEquals(RESPONSE_MESSAGE_BAD_API_KEY, wrappedAuthErrorResponse.getMessage()); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java similarity index 92% rename from src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java rename to src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java index ea849214fa2..f5509408d12 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java @@ -8,14 +8,14 @@ import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_HEADER_NAME; -public class ContainerRequestFake implements ContainerRequestContext { +public class ContainerRequestTestFake implements ContainerRequestContext { private final String apiKey; private final UriInfo uriInfo; - public ContainerRequestFake(String apiKey, String path) { + public ContainerRequestTestFake(String apiKey, String path) { this.apiKey = apiKey; - this.uriInfo = new UriInfoFake(apiKey, path); + this.uriInfo = new UriInfoTestFake(apiKey, path); } @Override diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoTestFake.java similarity index 95% rename from src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java rename to src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoTestFake.java index 79b93caef3d..440aca812a8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoTestFake.java @@ -6,12 +6,12 @@ import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_PARAM_NAME; -public class UriInfoFake implements UriInfo { +public class UriInfoTestFake implements UriInfo { private final String apiKey; private final String path; - public UriInfoFake(String apiKey, String path) { + public UriInfoTestFake(String apiKey, String path) { this.apiKey = apiKey; this.path = path; } From 54d9c0a6abce2accf056cf9a1ddfd13b67a0fbb9 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 20 Jan 2023 16:11:59 +0000 Subject: [PATCH 188/322] Refactor: ApiKeyAuthMechanismTest cases naming structure --- .../iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java index bef8ff030bf..cf7d59032d2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java @@ -43,7 +43,7 @@ public void testFindUserFromRequest_ApiKeyNotProvided() throws WrappedAuthErrorR } @Test - public void testFindUserFromRequest_NotAnonymizedPrivateUrlUserAuthenticated() throws WrappedAuthErrorResponse { + public void testFindUserFromRequest_ApiKeyProvided_NotAnonymizedPrivateUrlUserAuthenticated() throws WrappedAuthErrorResponse { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); PrivateUrlUser testPrivateUrlUser = new PrivateUrlUser(1L); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testPrivateUrlUser); @@ -59,7 +59,7 @@ public void testFindUserFromRequest_NotAnonymizedPrivateUrlUserAuthenticated() t } @Test - public void testFindUserFromRequest_AnonymizedPrivateUrlUserAuthenticated_AccessingAccessDatafilePath() throws WrappedAuthErrorResponse { + public void testFindUserFromRequest_ApiKeyProvided_AnonymizedPrivateUrlUserAuthenticated_AccessingAccessDatafilePath() throws WrappedAuthErrorResponse { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testAnonymizedPrivateUrlUser); sut.privateUrlSvc = privateUrlServiceStub; @@ -74,7 +74,7 @@ public void testFindUserFromRequest_AnonymizedPrivateUrlUserAuthenticated_Access } @Test - public void testFindUserFromRequest_AnonymizedPrivateUrlUserAuthenticated_NotAccessingAccessDatafilePath() { + public void testFindUserFromRequest_ApiKeyProvided_AnonymizedPrivateUrlUserAuthenticated_NotAccessingAccessDatafilePath() { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(testAnonymizedPrivateUrlUser); sut.privateUrlSvc = privateUrlServiceStub; @@ -89,7 +89,7 @@ public void testFindUserFromRequest_AnonymizedPrivateUrlUserAuthenticated_NotAcc } @Test - public void testFindUserFromRequest_AuthenticatedUser() throws WrappedAuthErrorResponse { + public void testFindUserFromRequest_ApiKeyProvided_AuthenticatedUser() throws WrappedAuthErrorResponse { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(null); sut.privateUrlSvc = privateUrlServiceStub; @@ -110,7 +110,7 @@ public void testFindUserFromRequest_AuthenticatedUser() throws WrappedAuthErrorR } @Test - public void testFindUserFromRequest_CanNotAuthenticateUserWithAnyMethod() { + public void testFindUserFromRequest_ApiKeyProvided_CanNotAuthenticateUserWithAnyMethod() { PrivateUrlServiceBean privateUrlServiceStub = Mockito.mock(PrivateUrlServiceBean.class); Mockito.when(privateUrlServiceStub.getPrivateUrlUserFromToken(TEST_API_KEY)).thenReturn(null); sut.privateUrlSvc = privateUrlServiceStub; From cc8521c1298e3e3bf5bca87b16db4d027f5a0f01 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 20 Jan 2023 17:36:49 +0000 Subject: [PATCH 189/322] Added: composite of AuthMechanism implementations --- .../api/auth/CompoundAuthMechanism.java | 38 +++++++++++++++ .../iq/dataverse/api/auth/SecurityFilter.java | 4 +- .../api/auth/ApiKeyAuthMechanismTest.java | 12 ++--- .../auth/ApiKeyContainerRequestTestFake.java | 29 +++++++++++ ...stFake.java => ApiKeyUriInfoTestFake.java} | 4 +- .../api/auth/CompoundAuthMechanismTest.java | 48 +++++++++++++++++++ .../api/auth/ContainerRequestTestFake.java | 15 +----- 7 files changed, 127 insertions(+), 23 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyContainerRequestTestFake.java rename src/test/java/edu/harvard/iq/dataverse/api/auth/{UriInfoTestFake.java => ApiKeyUriInfoTestFake.java} (94%) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanismTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java new file mode 100644 index 00000000000..761bcef3654 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.User; + +import javax.ws.rs.container.ContainerRequestContext; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class CompoundAuthMechanism implements AuthMechanism { + + protected List authMechanisms = new ArrayList<>(); + + public CompoundAuthMechanism(AuthMechanism... authMechanisms) { + add(authMechanisms); + } + + public void add(AuthMechanism... authMechanisms) { + this.authMechanisms.addAll(Arrays.asList(authMechanisms)); + } + + @Override + public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { + User user = null; + for (AuthMechanism authMechanism : authMechanisms) { + User userFromRequest = authMechanism.findUserFromRequest(containerRequestContext); + if (userFromRequest != null) { + user = userFromRequest; + break; + } + } + if (user == null) { + return GuestUser.get(); + } + return user; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java index ace389b1da5..018b50a788b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java @@ -20,8 +20,10 @@ public class SecurityFilter implements ContainerRequestFilter { @Override public void filter(ContainerRequestContext containerRequestContext) throws IOException { + CompoundAuthMechanism compoundAuthMechanism = new CompoundAuthMechanism(apiKeyAuthMechanism); try { - User user = apiKeyAuthMechanism.findUserFromRequest(containerRequestContext); + User user = compoundAuthMechanism.findUserFromRequest(containerRequestContext); + containerRequestContext.setProperty("user", user); } catch (WrappedAuthErrorResponse e) { containerRequestContext.abortWith(e.getResponse()); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java index cf7d59032d2..984787a8d94 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyAuthMechanismTest.java @@ -36,7 +36,7 @@ public void testFindUserFromRequest_ApiKeyNotProvided() throws WrappedAuthErrorR sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); sut.userSvc = Mockito.mock(UserServiceBean.class); - ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(null, TEST_PATH); + ContainerRequestContext testContainerRequest = new ApiKeyContainerRequestTestFake(null, TEST_PATH); User actual = sut.findUserFromRequest(testContainerRequest); assertNull(actual); @@ -52,7 +52,7 @@ public void testFindUserFromRequest_ApiKeyProvided_NotAnonymizedPrivateUrlUserAu sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); sut.userSvc = Mockito.mock(UserServiceBean.class); - ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); + ContainerRequestContext testContainerRequest = new ApiKeyContainerRequestTestFake(TEST_API_KEY, TEST_PATH); User actual = sut.findUserFromRequest(testContainerRequest); assertEquals(testPrivateUrlUser, actual); @@ -67,7 +67,7 @@ public void testFindUserFromRequest_ApiKeyProvided_AnonymizedPrivateUrlUserAuthe sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); sut.userSvc = Mockito.mock(UserServiceBean.class); - ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, ACCESS_DATAFILE_PATH_PREFIX); + ContainerRequestContext testContainerRequest = new ApiKeyContainerRequestTestFake(TEST_API_KEY, ACCESS_DATAFILE_PATH_PREFIX); User actual = sut.findUserFromRequest(testContainerRequest); assertEquals(testAnonymizedPrivateUrlUser, actual); @@ -82,7 +82,7 @@ public void testFindUserFromRequest_ApiKeyProvided_AnonymizedPrivateUrlUserAuthe sut.authSvc = Mockito.mock(AuthenticationServiceBean.class); sut.userSvc = Mockito.mock(UserServiceBean.class); - ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); + ContainerRequestContext testContainerRequest = new ApiKeyContainerRequestTestFake(TEST_API_KEY, TEST_PATH); WrappedAuthErrorResponse wrappedAuthErrorResponse = assertThrows(WrappedAuthErrorResponse.class, () -> sut.findUserFromRequest(testContainerRequest)); assertEquals(RESPONSE_MESSAGE_BAD_API_KEY, wrappedAuthErrorResponse.getMessage()); @@ -103,7 +103,7 @@ public void testFindUserFromRequest_ApiKeyProvided_AuthenticatedUser() throws Wr Mockito.when(userServiceBeanStub.updateLastApiUseTime(testAuthenticatedUser)).thenReturn(testAuthenticatedUser); sut.userSvc = userServiceBeanStub; - ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); + ContainerRequestContext testContainerRequest = new ApiKeyContainerRequestTestFake(TEST_API_KEY, TEST_PATH); User actual = sut.findUserFromRequest(testContainerRequest); assertEquals(testAuthenticatedUser, actual); @@ -121,7 +121,7 @@ public void testFindUserFromRequest_ApiKeyProvided_CanNotAuthenticateUserWithAny sut.userSvc = Mockito.mock(UserServiceBean.class); - ContainerRequestContext testContainerRequest = new ContainerRequestTestFake(TEST_API_KEY, TEST_PATH); + ContainerRequestContext testContainerRequest = new ApiKeyContainerRequestTestFake(TEST_API_KEY, TEST_PATH); WrappedAuthErrorResponse wrappedAuthErrorResponse = assertThrows(WrappedAuthErrorResponse.class, () -> sut.findUserFromRequest(testContainerRequest)); assertEquals(RESPONSE_MESSAGE_BAD_API_KEY, wrappedAuthErrorResponse.getMessage()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyContainerRequestTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyContainerRequestTestFake.java new file mode 100644 index 00000000000..3a4f2c4c4c1 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyContainerRequestTestFake.java @@ -0,0 +1,29 @@ +package edu.harvard.iq.dataverse.api.auth; + +import javax.ws.rs.core.UriInfo; + +import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_HEADER_NAME; + +public class ApiKeyContainerRequestTestFake extends ContainerRequestTestFake { + + private final String apiKey; + private final UriInfo uriInfo; + + public ApiKeyContainerRequestTestFake(String apiKey, String path) { + this.apiKey = apiKey; + this.uriInfo = new ApiKeyUriInfoTestFake(apiKey, path); + } + + @Override + public UriInfo getUriInfo() { + return uriInfo; + } + + @Override + public String getHeaderString(String s) { + if (s.equals(DATAVERSE_API_KEY_REQUEST_HEADER_NAME)) { + return this.apiKey; + } + return null; + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyUriInfoTestFake.java similarity index 94% rename from src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoTestFake.java rename to src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyUriInfoTestFake.java index 440aca812a8..0329c178e3b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/UriInfoTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ApiKeyUriInfoTestFake.java @@ -6,12 +6,12 @@ import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_PARAM_NAME; -public class UriInfoTestFake implements UriInfo { +public class ApiKeyUriInfoTestFake implements UriInfo { private final String apiKey; private final String path; - public UriInfoTestFake(String apiKey, String path) { + public ApiKeyUriInfoTestFake(String apiKey, String path) { this.apiKey = apiKey; this.path = path; } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanismTest.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanismTest.java new file mode 100644 index 00000000000..8a342a44dba --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanismTest.java @@ -0,0 +1,48 @@ +package edu.harvard.iq.dataverse.api.auth; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import javax.ws.rs.container.ContainerRequestContext; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; + +public class CompoundAuthMechanismTest { + + @Test + public void testFindUserFromRequest_CanNotAuthenticateUserWithAnyMechanism() throws WrappedAuthErrorResponse { + AuthMechanism authMechanismStub1 = Mockito.mock(AuthMechanism.class); + Mockito.when(authMechanismStub1.findUserFromRequest(any(ContainerRequestContext.class))).thenReturn(null); + + AuthMechanism authMechanismStub2 = Mockito.mock(AuthMechanism.class); + Mockito.when(authMechanismStub2.findUserFromRequest(any(ContainerRequestContext.class))).thenReturn(null); + + CompoundAuthMechanism sut = new CompoundAuthMechanism(authMechanismStub1, authMechanismStub2); + + User actual = sut.findUserFromRequest(new ContainerRequestTestFake()); + + assertThat(actual, equalTo(GuestUser.get())); + } + + @Test + public void testFindUserFromRequest_UserAuthenticated() throws WrappedAuthErrorResponse { + AuthMechanism authMechanismStub1 = Mockito.mock(AuthMechanism.class); + AuthenticatedUser testAuthenticatedUser = new AuthenticatedUser(); + Mockito.when(authMechanismStub1.findUserFromRequest(any(ContainerRequestContext.class))).thenReturn(testAuthenticatedUser); + + AuthMechanism authMechanismStub2 = Mockito.mock(AuthMechanism.class); + Mockito.when(authMechanismStub2.findUserFromRequest(any(ContainerRequestContext.class))).thenReturn(null); + + CompoundAuthMechanism sut = new CompoundAuthMechanism(authMechanismStub1, authMechanismStub2); + + User actual = sut.findUserFromRequest(new ContainerRequestTestFake()); + + assertEquals(actual, testAuthenticatedUser); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java index f5509408d12..311cb9b71b2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/auth/ContainerRequestTestFake.java @@ -6,18 +6,8 @@ import java.net.URI; import java.util.*; -import static edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism.DATAVERSE_API_KEY_REQUEST_HEADER_NAME; - public class ContainerRequestTestFake implements ContainerRequestContext { - private final String apiKey; - private final UriInfo uriInfo; - - public ContainerRequestTestFake(String apiKey, String path) { - this.apiKey = apiKey; - this.uriInfo = new UriInfoTestFake(apiKey, path); - } - @Override public Object getProperty(String s) { return null; @@ -40,7 +30,7 @@ public void removeProperty(String s) { @Override public UriInfo getUriInfo() { - return uriInfo; + return null; } @Override @@ -75,9 +65,6 @@ public MultivaluedMap getHeaders() { @Override public String getHeaderString(String s) { - if (s.equals(DATAVERSE_API_KEY_REQUEST_HEADER_NAME)) { - return this.apiKey; - } return null; } From d328371399fe06e31bf6a2a3007e0f3785a3fb4f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 20 Jan 2023 15:38:01 -0500 Subject: [PATCH 190/322] #8339 add export API Token to docs --- doc/sphinx-guides/source/api/native-api.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 7a99795c335..0a8de08c56d 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2061,7 +2061,7 @@ Files ----- Get JSON Representation of a File -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: Files can be accessed using persistent identifiers. This is done by passing the constant ``:persistentId`` where the numeric id of the file is expected, and then passing the actual persistent id as a query parameter with the name ``persistentId``. @@ -2071,6 +2071,7 @@ Example: Getting the file whose DOI is *10.5072/FK2/J8SJZB*: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER @@ -2078,7 +2079,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB You may get its draft version if you pass an api token with view draft permissions: @@ -2086,6 +2087,7 @@ You may get its draft version if you pass an api token with view draft permissio export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx curl -H "X-Dataverse-key:$API_TOKEN" http://$SERVER/api/files/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER @@ -2093,7 +2095,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" https://demo.dataverse.org/api/files/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB |CORS| Show the file whose id is passed: From aab0f2ab4ef87fdd46f70e0c5de1870ccccd55b0 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 23 Jan 2023 10:27:03 +0100 Subject: [PATCH 191/322] simplified for loop: loop directly on DataFiles of dataset, not over each version separately --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 2c0d066b3fc..f0a33ceeb5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2538,13 +2538,11 @@ public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dr List deleted = new ArrayList<>(); Set files = new HashSet(); try { - for (DatasetVersion dv : dataset.getVersions()) { - for (FileMetadata f : dv.getFileMetadatas()) { - String storageIdentifier = f.getDataFile().getStorageIdentifier(); - String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); - String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName - files.add(locationParts[locationParts.length-1]); - } + for (DataFile dataFile: dataset.getFiles()) { + String storageIdentifier = dataFile.getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); } StorageIO datasetIO = DataAccess.getStorageIO(dataset); Predicate filter = f -> { From 60cebc6c657c90a0c39d9ff89d542c4b72739e53 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 10:17:18 +0000 Subject: [PATCH 192/322] Refactor: new ApiConstants for common API constant values --- .../edu/harvard/iq/dataverse/api/ApiConstants.java | 14 ++++++++++++++ .../dataverse/api/auth/CompoundAuthMechanism.java | 2 +- .../iq/dataverse/api/auth/SecurityFilter.java | 4 +++- .../api/auth/WrappedAuthErrorResponse.java | 4 ++-- 4 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java new file mode 100644 index 00000000000..5e8ce5fb071 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -0,0 +1,14 @@ +package edu.harvard.iq.dataverse.api; + +public final class ApiConstants { + + private ApiConstants() { + // Restricting instantiation + } + + // Statuses + public static final String STATUS_ERROR = "ERROR"; + + // Authentication + public static final String CONTAINER_REQUEST_CONTEXT_USER = "user"; +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java index 761bcef3654..d826d3e6774 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/CompoundAuthMechanism.java @@ -31,7 +31,7 @@ public User findUserFromRequest(ContainerRequestContext containerRequestContext) } } if (user == null) { - return GuestUser.get(); + user = GuestUser.get(); } return user; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java index 018b50a788b..701591c4b7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java @@ -10,6 +10,8 @@ import javax.ws.rs.ext.Provider; import java.io.IOException; +import static edu.harvard.iq.dataverse.api.ApiConstants.CONTAINER_REQUEST_CONTEXT_USER; + @Secured @Provider @Priority(Priorities.AUTHENTICATION) @@ -23,7 +25,7 @@ public void filter(ContainerRequestContext containerRequestContext) throws IOExc CompoundAuthMechanism compoundAuthMechanism = new CompoundAuthMechanism(apiKeyAuthMechanism); try { User user = compoundAuthMechanism.findUserFromRequest(containerRequestContext); - containerRequestContext.setProperty("user", user); + containerRequestContext.setProperty(CONTAINER_REQUEST_CONTEXT_USER, user); } catch (WrappedAuthErrorResponse e) { containerRequestContext.abortWith(e.getResponse()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java index 74359d08975..1b495806ad4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/WrappedAuthErrorResponse.java @@ -5,8 +5,8 @@ import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -// TODO: Find common place for this? -import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_ERROR; +import static edu.harvard.iq.dataverse.api.ApiConstants.STATUS_ERROR; + public class WrappedAuthErrorResponse extends Exception { From 9d037e84f09e8984f2c7e1804c89e318cb435a14 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 10:24:42 +0000 Subject: [PATCH 193/322] Refactor: Auth-components renamed --- .../java/edu/harvard/iq/dataverse/api/ApiConfiguration.java | 4 ++-- .../api/auth/{SecurityFilter.java => AuthFilter.java} | 4 ++-- .../iq/dataverse/api/auth/{Secured.java => AuthRequired.java} | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename src/main/java/edu/harvard/iq/dataverse/api/auth/{SecurityFilter.java => AuthFilter.java} (93%) rename src/main/java/edu/harvard/iq/dataverse/api/auth/{Secured.java => AuthRequired.java} (91%) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java index 715aa270b0d..f9ba088a4e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConfiguration.java @@ -2,7 +2,7 @@ import javax.ws.rs.ApplicationPath; -import edu.harvard.iq.dataverse.api.auth.SecurityFilter; +import edu.harvard.iq.dataverse.api.auth.AuthFilter; import org.glassfish.jersey.media.multipart.MultiPartFeature; import org.glassfish.jersey.server.ResourceConfig; @@ -13,6 +13,6 @@ public ApiConfiguration() { packages("edu.harvard.iq.dataverse.api"); packages("edu.harvard.iq.dataverse.mydata"); register(MultiPartFeature.class); - register(SecurityFilter.class); + register(AuthFilter.class); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthFilter.java similarity index 93% rename from src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java rename to src/main/java/edu/harvard/iq/dataverse/api/auth/AuthFilter.java index 701591c4b7d..ab0205806cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SecurityFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthFilter.java @@ -12,10 +12,10 @@ import static edu.harvard.iq.dataverse.api.ApiConstants.CONTAINER_REQUEST_CONTEXT_USER; -@Secured +@AuthRequired @Provider @Priority(Priorities.AUTHENTICATION) -public class SecurityFilter implements ContainerRequestFilter { +public class AuthFilter implements ContainerRequestFilter { @Inject private ApiKeyAuthMechanism apiKeyAuthMechanism; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/Secured.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthRequired.java similarity index 91% rename from src/main/java/edu/harvard/iq/dataverse/api/auth/Secured.java rename to src/main/java/edu/harvard/iq/dataverse/api/auth/AuthRequired.java index d9cd42c148b..0d05bd82587 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/Secured.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/AuthRequired.java @@ -10,5 +10,5 @@ @NameBinding @Retention(RUNTIME) @Target({ElementType.TYPE, ElementType.METHOD}) -public @interface Secured { +public @interface AuthRequired { } From d4d69b70ede07d5ab04deb5004157dd1522ed413 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 10:59:33 +0000 Subject: [PATCH 194/322] Refactor: deleteDataset endpoint uses AuthFilter instead of findUserOrDie --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c3d262a20db..372e3323690 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; +import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; @@ -93,6 +94,8 @@ import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; + +import static edu.harvard.iq.dataverse.api.ApiConstants.CONTAINER_REQUEST_CONTEXT_USER; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -140,6 +143,7 @@ import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; +import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.*; import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; @@ -289,8 +293,9 @@ public Response exportDataset(@QueryParam("persistentId") String persistentId, @ } @DELETE + @AuthRequired @Path("{id}") - public Response deleteDataset( @PathParam("id") String id) { + public Response deleteDataset(@Context ContainerRequestContext crc, @PathParam("id") String id) { // Internally, "DeleteDatasetCommand" simply redirects to "DeleteDatasetVersionCommand" // (and there's a comment that says "TODO: remove this command") // do we need an exposed API call for it? @@ -304,7 +309,7 @@ public Response deleteDataset( @PathParam("id") String id) { return response( req -> { Dataset doomed = findDatasetOrDie(id); DatasetVersion doomedVersion = doomed.getLatestVersion(); - User u = findUserOrDie(); + User u = (User) crc.getProperty(CONTAINER_REQUEST_CONTEXT_USER); boolean destroy = false; if (doomed.getVersions().size() == 1) { From a5b27c2e2fa78aa9cb5d37742d131cf81765964c Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 11:14:13 +0000 Subject: [PATCH 195/322] Refactor: new AbstractApiBean method for retrieving user from ContainerRequestContext --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 372e3323690..6170acae447 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -95,7 +95,6 @@ import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.search.IndexServiceBean; -import static edu.harvard.iq.dataverse.api.ApiConstants.CONTAINER_REQUEST_CONTEXT_USER; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -309,7 +308,7 @@ public Response deleteDataset(@Context ContainerRequestContext crc, @PathParam(" return response( req -> { Dataset doomed = findDatasetOrDie(id); DatasetVersion doomedVersion = doomed.getLatestVersion(); - User u = (User) crc.getProperty(CONTAINER_REQUEST_CONTEXT_USER); + User u = getRequestUser(crc); boolean destroy = false; if (doomed.getVersions().size() == 1) { @@ -343,13 +342,14 @@ public Response deleteDataset(@Context ContainerRequestContext crc, @PathParam(" } @DELETE + @AuthRequired @Path("{id}/destroy") - public Response destroyDataset(@PathParam("id") String id) { + public Response destroyDataset(@Context ContainerRequestContext crc, @PathParam("id") String id) { return response(req -> { // first check if dataset is released, and if so, if user is a superuser Dataset doomed = findDatasetOrDie(id); - User u = findUserOrDie(); + User u = getRequestUser(crc); if (doomed.isReleased() && (!(u instanceof AuthenticatedUser) || !u.isSuperuser())) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Destroy can only be called by superusers.")); From 4b8e7d45dc436a59f303b7b02c75f3839d15c179 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 11:17:29 +0000 Subject: [PATCH 196/322] Refactor: new AbstractApiBean method for retrieving user from ContainerRequestContext (2) --- .../java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index e919ecf786d..dfb946eb2d0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -77,11 +77,14 @@ import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.ResponseBuilder; import javax.ws.rs.core.Response.Status; + +import static edu.harvard.iq.dataverse.api.ApiConstants.CONTAINER_REQUEST_CONTEXT_USER; import static org.apache.commons.lang3.StringUtils.isNumeric; /** @@ -329,6 +332,10 @@ protected String getRequestWorkflowInvocationID() { return headerParamWFKey!=null ? headerParamWFKey : queryParamWFKey; } + protected User getRequestUser(ContainerRequestContext crc) { + return (User) crc.getProperty(CONTAINER_REQUEST_CONTEXT_USER); + } + /* ========= *\ * Finders * \* ========= */ From 360b73819df4028a9fbcd1bc914ab90720f334da Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Mon, 23 Jan 2023 12:35:49 +0100 Subject: [PATCH 197/322] clean up files made safer --- .../harvard/iq/dataverse/api/Datasets.java | 49 ++++++++++------ .../iq/dataverse/dataaccess/FileAccessIO.java | 5 +- .../dataverse/dataaccess/InputStreamIO.java | 2 +- .../dataaccess/RemoteOverlayAccessIO.java | 4 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 7 ++- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- .../dataverse/dataaccess/SwiftAccessIO.java | 5 +- .../iq/dataverse/api/DatasetsTest.java | 58 +++++++++++++++++++ 8 files changed, 105 insertions(+), 27 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index f0a33ceeb5a..43b0c6f9529 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -121,6 +121,7 @@ import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.ejb.EJB; @@ -157,6 +158,7 @@ public class Datasets extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Datasets.class.getCanonicalName()); + private static final Pattern dataFilePattern = Pattern.compile("^[0-9a-f]{11}-[0-9a-f]{12}\\.?.*"); @Inject DataverseSession session; @@ -2535,34 +2537,45 @@ public Response cleanStorage(@PathParam("id") String idSupplied, @QueryParam("dr return error(Response.Status.INTERNAL_SERVER_ERROR, "Access denied!"); } - List deleted = new ArrayList<>(); - Set files = new HashSet(); + boolean doDryRun = dryrun != null && dryrun.booleanValue(); + + // check if no legacy files are present + Set datasetFilenames = getDatasetFilenames(dataset); + if (datasetFilenames.stream().anyMatch(x -> !dataFilePattern.matcher(x).matches())) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Dataset contains files not matching the nameing pattern!"); + } + + Predicate filter = getToDeleteFilesFilter(datasetFilenames); + List deleted; try { - for (DataFile dataFile: dataset.getFiles()) { - String storageIdentifier = dataFile.getStorageIdentifier(); - String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); - String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName - files.add(locationParts[locationParts.length-1]); - } StorageIO datasetIO = DataAccess.getStorageIO(dataset); - Predicate filter = f -> { - return !f.startsWith("export_") || files.stream().noneMatch(x -> f.startsWith(x)); - }; - - if (dryrun != null && dryrun.booleanValue()) { - deleted.addAll(files.stream().filter(filter).collect(Collectors.toList())); - } else { - deleted.addAll(datasetIO.cleanUp(filter)); - } + deleted = datasetIO.cleanUp(filter, doDryRun); } catch (IOException ex) { logger.log(Level.SEVERE, null, ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "IOException! Serious Error! See administrator!"); } - return ok("Found: " + files.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); + return ok("Found: " + datasetFilenames.stream().collect(Collectors.joining(", ")) + "\n" + "Deleted: " + deleted.stream().collect(Collectors.joining(", "))); } + private static Set getDatasetFilenames(Dataset dataset) { + Set files = new HashSet<>(); + for (DataFile dataFile: dataset.getFiles()) { + String storageIdentifier = dataFile.getStorageIdentifier(); + String location = storageIdentifier.substring(storageIdentifier.indexOf("://") + 3); + String[] locationParts = location.split(":", 3);//separate bucket, swift container, etc. from fileName + files.add(locationParts[locationParts.length-1]); + } + return files; + } + + public static Predicate getToDeleteFilesFilter(Set datasetFilenames) { + return f -> { + return dataFilePattern.matcher(f).matches() && datasetFilenames.stream().noneMatch(x -> f.startsWith(x)); + }; + } + private void msg(String m) { //System.out.println(m); logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index cc72a9cfb02..8ee3f0cf53c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -726,8 +726,11 @@ private void deleteFile(String fileName) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } for (String f : toDelete) { this.deleteFile(f); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 2a867bddcac..be6f9df0254 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -161,7 +161,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 22373fdfee0..66c6a4cc2ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -633,7 +633,7 @@ public static String getBaseStoreIdFor(String driverId) { } @Override - public List cleanUp(Predicate filter) throws IOException { - return baseStore.cleanUp(filter); + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 8dc93361375..f396b07d788 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1309,8 +1309,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - - private List listAllFiles() throws IOException { if (!this.canWrite()) { open(); @@ -1372,8 +1370,11 @@ private void deleteFile(String fileName) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } for (String f : toDelete) { this.deleteFile(f); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 54e457ffab6..bfd5c5f0d8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -623,6 +623,6 @@ protected static boolean usesStandardNamePattern(String identifier) { return m.find(); } - public abstract List cleanUp(Predicate filter) throws IOException; + public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 8857b054108..6c84009de3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -944,8 +944,11 @@ private void deleteFile(String fileName) throws IOException { } @Override - public List cleanUp(Predicate filter) throws IOException { + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { List toDelete = this.listAllFiles().stream().filter(filter).collect(Collectors.toList()); + if (dryRun) { + return toDelete; + } for (String f : toDelete) { this.deleteFile(f); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java new file mode 100644 index 00000000000..fded590d9db --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsTest.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.api; + +import org.junit.Test; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class DatasetsTest { + + /** + * Test cleanup filter + */ + @Test + public void testCleanup() { + Set datasetFiles = new HashSet<>() { + { + add("1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e"); + add("1837fda1b80-46a899909269"); + } + }; + Set filesOnDrive = new HashSet<>() { + { + add("1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e"); + add("1837fda1b80-46a899909269"); + add("prefix_1837fda0b6c-90779481d439"); + add("1837fda0e17-4b0926f6d44e_suffix"); + add("1837fda1b80-extra-46a899909269"); + add("1837fda0e17-4b0926f6d44e.aux"); + add("1837fda1994-5f74d57e6e47"); + add("1837fda17ce-d7b9987fc6e9"); + add("18383198c49-aeda08ccffff"); + add("prefix_1837fda1994-5f74d57e6e47"); + add("1837fda17ce-d7b9987fc6e9_suffix"); + add("18383198c49-extra-aeda08ccffff"); + add("some_other_file"); + add("1837fda17ce-d7b9987fc6e9.aux"); + add("18383198c49.aeda08ccffff"); + add("1837fda17ce-d7b9987fc6xy"); + } + }; + + Predicate toDeleteFilesFilter = Datasets.getToDeleteFilesFilter(datasetFiles); + Set deleted = filesOnDrive.stream().filter(toDeleteFilesFilter).collect(Collectors.toSet()); + + assertEquals(5, deleted.size()); + assertTrue(deleted.contains("1837fda1994-5f74d57e6e47")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9")); + assertTrue(deleted.contains("18383198c49-aeda08ccffff")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9_suffix")); + assertTrue(deleted.contains("1837fda17ce-d7b9987fc6e9.aux")); + } +} From cf57f1427a43c89c6729be92b207a14111327307 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 11:42:14 +0000 Subject: [PATCH 198/322] Refactor: all Datasets API findUserOrDie calls replaced by AuthFilter --- .../harvard/iq/dataverse/api/Datasets.java | 213 +++++++++--------- 1 file changed, 102 insertions(+), 111 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 6170acae447..ffe9b2bb698 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -478,16 +478,17 @@ public Response getVersionFiles( @PathParam("id") String datasetId, @PathParam(" } @GET + @AuthRequired @Path("{id}/dirindex") @Produces("text/html") - public Response getFileAccessFolderView(@PathParam("id") String datasetId, @QueryParam("version") String versionId, @QueryParam("folder") String folderName, @QueryParam("original") Boolean originals, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { + public Response getFileAccessFolderView(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam("version") String versionId, @QueryParam("folder") String folderName, @QueryParam("original") Boolean originals, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { folderName = folderName == null ? "" : folderName; versionId = versionId == null ? ":latest-published" : versionId; DatasetVersion version; try { - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); version = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -603,16 +604,17 @@ public Response updateDatasetPIDMetadataAll() { } @PUT + @AuthRequired @Path("{id}/versions/{versionId}") @Consumes(MediaType.APPLICATION_JSON) - public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId ){ + public Response updateDraftVersion(@Context ContainerRequestContext crc, String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId){ if ( ! ":draft".equals(versionId) ) { return error( Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); } try ( StringReader rdr = new StringReader(jsonBody) ) { - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); DatasetVersion incomingVersion = jsonParser().parseDatasetVersion(json); @@ -668,11 +670,12 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, } @GET + @AuthRequired @Path("{id}/versions/{versionId}/metadata") @Produces("application/ld+json, application/json-ld") - public Response getVersionJsonLDMetadata(@PathParam("id") String id, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { try { - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(id), uriInfo, headers); OREMap ore = new OREMap(dsv, settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false)); @@ -696,13 +699,14 @@ public Response getVersionJsonLDMetadata(@PathParam("id") String id, @Context Ur } @PUT + @AuthRequired @Path("{id}/metadata") @Consumes("application/ld+json, application/json-ld") - public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String id, @DefaultValue("false") @QueryParam("replace") boolean replaceTerms) { + public Response updateVersionMetadata(@Context ContainerRequestContext crc, String jsonLDBody, @PathParam("id") String id, @DefaultValue("false") @QueryParam("replace") boolean replaceTerms) { try { Dataset ds = findDatasetOrDie(id); - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); DatasetVersion dsv = ds.getOrCreateEditVersion(); boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.updateDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, datasetFieldSvc, !replaceTerms, false, licenseSvc); @@ -730,12 +734,13 @@ public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String } @PUT + @AuthRequired @Path("{id}/metadata/delete") @Consumes("application/ld+json, application/json-ld") - public Response deleteMetadata(String jsonLDBody, @PathParam("id") String id) { + public Response deleteMetadata(@Context ContainerRequestContext crc, String jsonLDBody, @PathParam("id") String id) { try { Dataset ds = findDatasetOrDie(id); - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); DatasetVersion dsv = ds.getOrCreateEditVersion(); boolean updateDraft = ds.getLatestVersion().isDraft(); dsv = JSONLDUtil.deleteDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, licenseSvc); @@ -761,10 +766,11 @@ public Response deleteMetadata(String jsonLDBody, @PathParam("id") String id) { } @PUT + @AuthRequired @Path("{id}/deleteMetadata") - public Response deleteVersionMetadata(String jsonBody, @PathParam("id") String id) throws WrappedResponse { + public Response deleteVersionMetadata(@Context ContainerRequestContext crc, String jsonBody, @PathParam("id") String id) throws WrappedResponse { - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); return processDatasetFieldDataDelete(jsonBody, id, req); } @@ -916,17 +922,13 @@ private String getCompoundDisplayValue (DatasetFieldCompoundValue dscv){ } @PUT + @AuthRequired @Path("{id}/editMetadata") - public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, @QueryParam("replace") Boolean replace) { + public Response editVersionMetadata(@Context ContainerRequestContext crc, String jsonBody, @PathParam("id") String id, @QueryParam("replace") Boolean replace) { Boolean replaceData = replace != null; DataverseRequest req = null; - try { - req = createDataverseRequest(findUserOrDie()); - } catch (WrappedResponse ex) { - logger.log(Level.SEVERE, "Edit metdata error: " + ex.getMessage(), ex); - return ex.getResponse(); - } + req = createDataverseRequest(getRequestUser(crc)); return processDatasetUpdate(jsonBody, id, req, replaceData); } @@ -1307,10 +1309,11 @@ public Response publishMigratedDataset(String jsonldBody, @PathParam("id") Strin } @POST + @AuthRequired @Path("{id}/move/{targetDataverseAlias}") - public Response moveDataset(@PathParam("id") String id, @PathParam("targetDataverseAlias") String targetDataverseAlias, @QueryParam("forceMove") Boolean force) { + public Response moveDataset(@Context ContainerRequestContext crc, @PathParam("id") String id, @PathParam("targetDataverseAlias") String targetDataverseAlias, @QueryParam("forceMove") Boolean force) { try { - User u = findUserOrDie(); + User u = getRequestUser(crc); Dataset ds = findDatasetOrDie(id); Dataverse target = dataverseService.findByAlias(targetDataverseAlias); if (target == null) { @@ -1598,10 +1601,11 @@ public Response removeFileEmbargo(@PathParam("id") String id, String jsonBody){ @PUT + @AuthRequired @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") - public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { + public Response linkDataset(@Context ContainerRequestContext crc, @PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { try { - User u = findUserOrDie(); + User u = getRequestUser(crc); Dataset linked = findDatasetOrDie(linkedDatasetId); Dataverse linking = findDataverseOrDie(linkingDataverseAlias); if (linked == null){ @@ -1642,10 +1646,11 @@ public Response getCustomTermsTab(@PathParam("id") String id, @PathParam("versio @GET + @AuthRequired @Path("{id}/links") - public Response getLinks(@PathParam("id") String idSupplied ) { + public Response getLinks(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied ) { try { - User u = findUserOrDie(); + User u = getRequestUser(crc); if (!u.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Not a superuser"); } @@ -1672,8 +1677,9 @@ public Response getLinks(@PathParam("id") String idSupplied ) { * @param apiKey */ @POST + @AuthRequired @Path("{identifier}/assignments") - public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") String id, @QueryParam("key") String apiKey) { + public Response createAssignment(@Context ContainerRequestContext crc, RoleAssignmentDTO ra, @PathParam("identifier") String id, @QueryParam("key") String apiKey) { try { Dataset dataset = findDatasetOrDie(id); @@ -1701,7 +1707,7 @@ public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") String privateUrlToken = null; return ok( - json(execCommand(new AssignRoleCommand(assignee, theRole, dataset, createDataverseRequest(findUserOrDie()), privateUrlToken)))); + json(execCommand(new AssignRoleCommand(assignee, theRole, dataset, createDataverseRequest(getRequestUser(crc)), privateUrlToken)))); } catch (WrappedResponse ex) { List args = Arrays.asList(ex.getMessage()); logger.log(Level.WARNING, BundleUtil.getStringFromBundle("datasets.api.grant.role.cant.create.assignment.error", args)); @@ -1711,13 +1717,14 @@ public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") } @DELETE + @AuthRequired @Path("{identifier}/assignments/{id}") - public Response deleteAssignment(@PathParam("id") long assignmentId, @PathParam("identifier") String dsId) { + public Response deleteAssignment(@Context ContainerRequestContext crc, @PathParam("id") long assignmentId, @PathParam("identifier") String dsId) { RoleAssignment ra = em.find(RoleAssignment.class, assignmentId); if (ra != null) { try { findDatasetOrDie(dsId); - execCommand(new RevokeRoleCommand(ra, createDataverseRequest(findUserOrDie()))); + execCommand(new RevokeRoleCommand(ra, createDataverseRequest(getRequestUser(crc)))); List args = Arrays.asList(ra.getRole().getName(), ra.getAssigneeIdentifier(), ra.getDefinitionPoint().accept(DvObject.NamePrinter)); return ok(BundleUtil.getStringFromBundle("datasets.api.revoke.role.success", args)); } catch (WrappedResponse ex) { @@ -1775,16 +1782,13 @@ public Response deletePrivateUrl(@PathParam("id") String idSupplied) { } @GET + @AuthRequired @Path("{id}/thumbnail/candidates") - public Response getDatasetThumbnailCandidates(@PathParam("id") String idSupplied) { + public Response getDatasetThumbnailCandidates(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { try { Dataset dataset = findDatasetOrDie(idSupplied); boolean canUpdateThumbnail = false; - try { - canUpdateThumbnail = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetThumbnailCommand.class); - } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting thumbnail for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); - } + canUpdateThumbnail = permissionSvc.requestOn(createDataverseRequest(getRequestUser(crc)), dataset).canIssue(UpdateDatasetThumbnailCommand.class); if (!canUpdateThumbnail) { return error(Response.Status.FORBIDDEN, "You are not permitted to list dataset thumbnail candidates."); } @@ -1827,10 +1831,11 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers. @POST + @AuthRequired @Path("{id}/thumbnail/{dataFileId}") - public Response setDataFileAsThumbnail(@PathParam("id") String idSupplied, @PathParam("dataFileId") long dataFileIdSupplied) { + public Response setDataFileAsThumbnail(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @PathParam("dataFileId") long dataFileIdSupplied) { try { - DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(findUserOrDie()), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.setDatasetFileAsThumbnail, dataFileIdSupplied, null)); + DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(getRequestUser(crc)), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.setDatasetFileAsThumbnail, dataFileIdSupplied, null)); return ok("Thumbnail set to " + datasetThumbnail.getBase64image()); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -1838,12 +1843,12 @@ public Response setDataFileAsThumbnail(@PathParam("id") String idSupplied, @Path } @POST + @AuthRequired @Path("{id}/thumbnail") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response uploadDatasetLogo(@PathParam("id") String idSupplied, @FormDataParam("file") InputStream inputStream - ) { + public Response uploadDatasetLogo(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @FormDataParam("file") InputStream inputStream) { try { - DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(findUserOrDie()), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.setNonDatasetFileAsThumbnail, null, inputStream)); + DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(getRequestUser(crc)), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.setNonDatasetFileAsThumbnail, null, inputStream)); return ok("Thumbnail is now " + datasetThumbnail.getBase64image()); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -1851,10 +1856,11 @@ public Response uploadDatasetLogo(@PathParam("id") String idSupplied, @FormDataP } @DELETE + @AuthRequired @Path("{id}/thumbnail") - public Response removeDatasetLogo(@PathParam("id") String idSupplied) { + public Response removeDatasetLogo(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { try { - DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(findUserOrDie()), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.removeThumbnail, null, null)); + execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(getRequestUser(crc)), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.removeThumbnail, null, null)); return ok("Dataset thumbnail removed."); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -1902,8 +1908,9 @@ public Response getRsync(@PathParam("identifier") String id) { * -MAD 4.9.1 */ @POST + @AuthRequired @Path("{identifier}/dataCaptureModule/checksumValidation") - public Response receiveChecksumValidationResults(@PathParam("identifier") String id, JsonObject jsonFromDcm) { + public Response receiveChecksumValidationResults(@Context ContainerRequestContext crc, @PathParam("identifier") String id, JsonObject jsonFromDcm) { logger.log(Level.FINE, "jsonFromDcm: {0}", jsonFromDcm); AuthenticatedUser authenticatedUser = null; try { @@ -1930,7 +1937,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String ImportMode importMode = ImportMode.MERGE; try { - JsonObject jsonFromImportJobKickoff = execCommand(new ImportFromFileSystemCommand(createDataverseRequest(findUserOrDie()), dataset, uploadFolder, new Long(totalSize), importMode)); + JsonObject jsonFromImportJobKickoff = execCommand(new ImportFromFileSystemCommand(createDataverseRequest(getRequestUser(crc)), dataset, uploadFolder, new Long(totalSize), importMode)); long jobId = jsonFromImportJobKickoff.getInt("executionId"); String message = jsonFromImportJobKickoff.getString("message"); JsonObjectBuilder job = Json.createObjectBuilder(); @@ -2009,10 +2016,11 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String @POST + @AuthRequired @Path("{id}/submitForReview") - public Response submitForReview(@PathParam("id") String idSupplied) { + public Response submitForReview(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { try { - Dataset updatedDataset = execCommand(new SubmitDatasetForReviewCommand(createDataverseRequest(findUserOrDie()), findDatasetOrDie(idSupplied))); + Dataset updatedDataset = execCommand(new SubmitDatasetForReviewCommand(createDataverseRequest(getRequestUser(crc)), findDatasetOrDie(idSupplied))); JsonObjectBuilder result = Json.createObjectBuilder(); boolean inReview = updatedDataset.isLockedFor(DatasetLock.Reason.InReview); @@ -2055,12 +2063,13 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo } @GET + @AuthRequired @Path("{id}/curationStatus") - public Response getCurationStatus(@PathParam("id") String idSupplied) { + public Response getCurationStatus(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { try { Dataset ds = findDatasetOrDie(idSupplied); DatasetVersion dsv = ds.getLatestVersion(); - if (dsv.isDraft() && permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), ds).has(Permission.PublishDataset)) { + if (dsv.isDraft() && permissionSvc.requestOn(createDataverseRequest(getRequestUser(crc)), ds).has(Permission.PublishDataset)) { return response(req -> ok(dsv.getExternalStatusLabel()==null ? "":dsv.getExternalStatusLabel())); } else { return error(Response.Status.FORBIDDEN, "You are not permitted to view the curation status of this dataset."); @@ -2071,13 +2080,14 @@ public Response getCurationStatus(@PathParam("id") String idSupplied) { } @PUT + @AuthRequired @Path("{id}/curationStatus") - public Response setCurationStatus(@PathParam("id") String idSupplied, @QueryParam("label") String label) { + public Response setCurationStatus(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @QueryParam("label") String label) { Dataset ds = null; User u = null; try { ds = findDatasetOrDie(idSupplied); - u = findUserOrDie(); + u = getRequestUser(crc); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -2091,13 +2101,14 @@ public Response setCurationStatus(@PathParam("id") String idSupplied, @QueryPara } @DELETE + @AuthRequired @Path("{id}/curationStatus") - public Response deleteCurationStatus(@PathParam("id") String idSupplied) { + public Response deleteCurationStatus(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { Dataset ds = null; User u = null; try { ds = findDatasetOrDie(idSupplied); - u = findUserOrDie(); + u = getRequestUser(crc); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -2111,19 +2122,15 @@ public Response deleteCurationStatus(@PathParam("id") String idSupplied) { } @GET + @AuthRequired @Path("{id}/uploadsid") @Deprecated - public Response getUploadUrl(@PathParam("id") String idSupplied) { + public Response getUploadUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { try { Dataset dataset = findDatasetOrDie(idSupplied); boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(getRequestUser(crc)), dataset).canIssue(UpdateDatasetVersionCommand.class); if (!canUpdateDataset) { return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); } @@ -2151,21 +2158,15 @@ public Response getUploadUrl(@PathParam("id") String idSupplied) { } @GET + @AuthRequired @Path("{id}/uploadurls") - public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { + public Response getMPUploadUrls(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { try { Dataset dataset = findDatasetOrDie(idSupplied); boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(getRequestUser(crc)), dataset) + .canIssue(UpdateDatasetVersionCommand.class); if (!canUpdateDataset) { return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); } @@ -2333,9 +2334,11 @@ public Response completeMPUpload(String partETagBody, @QueryParam("globalid") St * @return */ @POST + @AuthRequired @Path("{id}/add") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addFileToDataset(@PathParam("id") String idSupplied, + public Response addFileToDataset(@Context ContainerRequestContext crc, + @PathParam("id") String idSupplied, @FormDataParam("jsonData") String jsonData, @FormDataParam("file") InputStream fileInputStream, @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, @@ -2347,18 +2350,11 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } // ------------------------------------- - // (1) Get the user from the API key + // (1) Get the user from the ContainerRequestContext // ------------------------------------- User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } - - + authUser = getRequestUser(crc); + // ------------------------------------- // (2) Get the Dataset Id // @@ -3132,15 +3128,16 @@ public Response getAllowedCurationLabels(@PathParam("identifier") String dvIdtf, } @GET + @AuthRequired @Path("{identifier}/timestamps") @Produces(MediaType.APPLICATION_JSON) - public Response getTimestamps(@PathParam("identifier") String id) { + public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam("identifier") String id) { Dataset dataset = null; DateTimeFormatter formatter = DateTimeFormatter.ISO_LOCAL_DATE_TIME; try { dataset = findDatasetOrDie(id); - User u = findUserOrDie(); + User u = getRequestUser(crc); Set perms = new HashSet(); perms.add(Permission.ViewUnpublishedDataset); boolean canSeeDraft = permissionSvc.hasPermissionsFor(u, dataset, perms); @@ -3285,9 +3282,10 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, } @POST + @AuthRequired @Path("{id}/deleteglobusRule") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response deleteglobusRule(@PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData + public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData ) throws IOException, ExecutionException, InterruptedException { @@ -3299,15 +3297,10 @@ public Response deleteglobusRule(@PathParam("id") String datasetId,@FormDataPara } // ------------------------------------- - // (1) Get the user from the API key + // (1) Get the user from the ContainerRequestContext // ------------------------------------- User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } + authUser = getRequestUser(crc); // ------------------------------------- // (2) Get the Dataset Id @@ -3336,9 +3329,11 @@ public Response deleteglobusRule(@PathParam("id") String datasetId,@FormDataPara * @return */ @POST + @AuthRequired @Path("{id}/addFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addFilesToDataset(@PathParam("id") String idSupplied, + public Response addFilesToDataset(@Context ContainerRequestContext crc, + @PathParam("id") String idSupplied, @FormDataParam("jsonData") String jsonData) { if (!systemConfig.isHTTPUpload()) { @@ -3346,15 +3341,10 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, } // ------------------------------------- - // (1) Get the user from the API key + // (1) Get the user from the ContainerRequestContext // ------------------------------------- User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } + authUser = getRequestUser(crc); // ------------------------------------- // (2) Get the Dataset Id @@ -3407,25 +3397,22 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, * @return */ @POST + @AuthRequired @Path("{id}/replaceFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response replaceFilesInDataset(@PathParam("id") String idSupplied, - @FormDataParam("jsonData") String jsonData) { + public Response replaceFilesInDataset(@Context ContainerRequestContext crc, + @PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); } // ------------------------------------- - // (1) Get the user from the API key + // (1) Get the user from the ContainerRequestContext // ------------------------------------- User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } + authUser = getRequestUser(crc); // ------------------------------------- // (2) Get the Dataset Id @@ -3664,11 +3651,15 @@ private boolean isSingleVersionArchiving() { // This supports the cases where a tool is accessing a restricted resource (e.g. // for a draft dataset), or public case. @GET + @AuthRequired @Path("{id}/versions/{version}/toolparams/{tid}") - public Response getExternalToolDVParams(@PathParam("tid") long externalToolId, - @PathParam("id") String datasetId, @PathParam("version") String version, @QueryParam(value = "locale") String locale) { + public Response getExternalToolDVParams(@Context ContainerRequestContext crc, + @PathParam("tid") long externalToolId, + @PathParam("id") String datasetId, + @PathParam("version") String version, + @QueryParam(value = "locale") String locale) { try { - DataverseRequest req = createDataverseRequest(findUserOrDie()); + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); DatasetVersion target = getDatasetVersionOrDie(req, version, findDatasetOrDie(datasetId), null, null); if (target == null) { return error(BAD_REQUEST, "DatasetVersion not found."); @@ -3682,7 +3673,7 @@ public Response getExternalToolDVParams(@PathParam("tid") long externalToolId, return error(BAD_REQUEST, "External tool does not have dataset scope."); } ApiToken apiToken = null; - User u = findUserOrDie(); + User u = getRequestUser(crc); if (u instanceof AuthenticatedUser) { apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); } From 17653b3f35d6db3245e7059c000293c19f20307a Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 23 Jan 2023 11:51:46 +0000 Subject: [PATCH 199/322] Fixed: missing AuthFilter setup for Datasets getVersionJsonLDMetadata endpoint call --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ffe9b2bb698..e66548b9f81 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -692,10 +692,11 @@ public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @ } @GET + @AuthRequired @Path("{id}/metadata") @Produces("application/ld+json, application/json-ld") - public Response getVersionJsonLDMetadata(@PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return getVersionJsonLDMetadata(id, ":draft", uriInfo, headers); + public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return getVersionJsonLDMetadata(crc, id, ":draft", uriInfo, headers); } @PUT From 7749b01995dd37895a0ca01162322268562aab84 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 10:36:06 -0500 Subject: [PATCH 200/322] The remaining, mostly finalized changes for the "custom header" feature for OAI harvesting (#9231) --- .../source/admin/harvestclients.rst | 2 + doc/sphinx-guides/source/api/native-api.rst | 4 +- modules/dataverse-parent/pom.xml | 7 +- .../iq/dataverse/HarvestingClientsPage.java | 13 +- .../harvest/client/FastGetRecord.java | 124 +++++---- .../harvest/client/HarvesterServiceBean.java | 12 +- .../client/oai/CustomJdkHttpXoaiClient.java | 259 ------------------ .../harvest/client/oai/OaiHandler.java | 41 +-- src/main/java/propertyFiles/Bundle.properties | 4 +- src/main/webapp/harvestclients.xhtml | 2 +- 10 files changed, 112 insertions(+), 356 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index e94a6aa1730..37204003026 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,6 +21,8 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. + How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 589b947f15e..609f1487177 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3296,7 +3296,8 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. + Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. An example JSON file would look like this:: @@ -3308,6 +3309,7 @@ An example JSON file would look like this:: "archiveUrl": "https://zenodo.org", "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", "metadataFormat": "oai_dc", + "customHeaders": "x-oai-api-key: xxxyyyzzz", "set": "user-lmops" } diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 3911e9d5bbb..600741dc972 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -164,7 +164,8 @@ 4.4.14 - 5.0.0-RC2 + + 5.0.0-SNAPSHOT 1.15.0 @@ -324,7 +325,7 @@ Local repository for hosting jars not available from network repositories. file://${project.basedir}/local_lib - oss-sonatype oss-sonatype @@ -335,7 +336,7 @@ true - --> + diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index 4430a7be73a..5be7578f7f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; @@ -24,7 +23,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; @@ -557,6 +555,9 @@ public boolean validateServerUrlOAI() { if (!StringUtils.isEmpty(getNewHarvestingUrl())) { OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl()); + if (getNewCustomHeader() != null) { + oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader())); + } boolean success = true; String message = null; @@ -639,11 +640,11 @@ public boolean validateServerUrlOAI() { } public boolean validateCustomHeader() { - if (!StringUtils.isEmpty(getCustomHeader())) { + if (!StringUtils.isEmpty(getNewCustomHeader())) { // TODO: put this method somewhere else as a static utility // check that it's looking like "{header-name}: {header value}" at least - if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getCustomHeader())) { + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) { FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); @@ -786,11 +787,11 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } - public String getCustomHeader() { + public String getNewCustomHeader() { return customHeader; } - public void setCustomHeader(String customHeader) { + public void setNewCustomHeader(String customHeader) { this.customHeader = customHeader; } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java index c5e3a93e2df..402d0d8ef91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java @@ -19,8 +19,8 @@ */ package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler; import java.io.IOException; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.StringReader; @@ -31,9 +31,14 @@ import java.io.FileOutputStream; import java.io.PrintWriter; -import java.net.HttpURLConnection; +import static java.net.HttpURLConnection.HTTP_OK; import java.net.MalformedURLException; -import java.net.URL; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import java.util.Optional; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; @@ -84,17 +89,18 @@ public class FastGetRecord { /** * Client-side GetRecord verb constructor * - * @param baseURL the baseURL of the server to be queried + * @param oaiHandler the configured OaiHande running this harvest + * @param identifier Record identifier + * @param httpClient jdk HttpClient used to make http requests * @exception MalformedURLException the baseURL is bad * @exception SAXException the xml response is bad * @exception IOException an I/O error occurred + * @exception TransformerException if it fails to parse the service portion of the record */ - public FastGetRecord(String baseURL, String identifier, String metadataPrefix) - throws IOException, ParserConfigurationException, SAXException, + public FastGetRecord(OaiHandler oaiHandler, String identifier, HttpClient httpClient) throws IOException, ParserConfigurationException, SAXException, TransformerException { - harvestRecord (baseURL, identifier, metadataPrefix); - + harvestRecord (oaiHandler.getBaseOaiUrl(), identifier, oaiHandler.getMetadataPrefix(), oaiHandler.getCustomHeaders(), httpClient); } private String errorMessage = null; @@ -117,57 +123,63 @@ public boolean isDeleted () { } - public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, - ParserConfigurationException, SAXException, TransformerException { + public void harvestRecord(String baseURL, String identifier, String metadataPrefix, Map customHeaders, HttpClient httpClient) throws IOException, + ParserConfigurationException, SAXException, TransformerException{ xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); - String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); + InputStream in; + + // This was one other place where the Harvester code was still using + // the obsolete java.net.ttpUrlConnection that didn't get replaced with + // the new java.net.http.HttpClient during the first pas of the XOAI + // rewrite. (L.A.) - InputStream in = null; - URL url = new URL(requestURL); - HttpURLConnection con = null; - int responseCode = 0; - - con = (HttpURLConnection) url.openConnection(); - con.setRequestProperty("User-Agent", "Dataverse Harvesting Client v5"); - con.setRequestProperty("Accept-Encoding", - "compress, gzip, identify"); - try { - responseCode = con.getResponseCode(); - //logger.debug("responseCode=" + responseCode); - } catch (FileNotFoundException e) { - //logger.info(requestURL, e); - responseCode = HttpURLConnection.HTTP_UNAVAILABLE; - } - - // TODO: -- L.A. - // - // support for cookies; - // support for limited retry attempts -- ? - // implement reading of the stream as filterinputstream -- ? - // -- that could make it a little faster still. -- L.A. - - - - if (responseCode == 200) { - - String contentEncoding = con.getHeaderField("Content-Encoding"); - //logger.debug("contentEncoding=" + contentEncoding); - - // support for the standard compress/gzip/deflate compression - // schemes: - if ("compress".equals(contentEncoding)) { - ZipInputStream zis = new ZipInputStream(con.getInputStream()); - zis.getNextEntry(); - in = zis; - } else if ("gzip".equals(contentEncoding)) { - in = new GZIPInputStream(con.getInputStream()); - } else if ("deflate".equals(contentEncoding)) { - in = new InflaterInputStream(con.getInputStream()); - } else { - in = con.getInputStream(); + if (httpClient == null) { + throw new IOException("Null Http Client, cannot make a GetRecord call to obtain the metadata."); + } + + HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() + .uri(URI.create(requestURL)) + .GET() + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") + .header("Accept-Encoding", "compress, gzip"); + + if (customHeaders != null) { + for (String headerName : customHeaders.keySet()) { + requestBuilder.header(headerName, customHeaders.get(headerName)); + } + } + + HttpRequest request = requestBuilder.build(); + HttpResponse response; + + try { + response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Failed to connect to the remote dataverse server to obtain GetRecord metadata"); + } + + int responseCode = response.statusCode(); + + if (responseCode == HTTP_OK) { + InputStream inputStream = response.body(); + Optional contentEncoding = response.headers().firstValue("Content-Encoding"); + + // support for the standard gzip encoding: + in = inputStream; + if (contentEncoding.isPresent()) { + if (contentEncoding.get().equals("compress")) { + ZipInputStream zis = new ZipInputStream(inputStream); + zis.getNextEntry(); + in = zis; + } else if (contentEncoding.get().equals("gzip")) { + in = new GZIPInputStream(inputStream); + } else if (contentEncoding.get().equals("deflate")) { + in = new InflaterInputStream(inputStream); + } } // We are going to read the OAI header and SAX-parse it for the @@ -185,9 +197,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref FileOutputStream tempFileStream = null; PrintWriter metadataOut = null; - savedMetadataFile = File.createTempFile("meta", ".tmp"); - - + savedMetadataFile = File.createTempFile("meta", ".tmp"); int mopen = 0; int mclose = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 0e9ffb20653..40bd45ecb30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -228,11 +228,9 @@ private void harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harv throw new IOException(errorMessage); } - if (DATAVERSE_PROPRIETARY_METADATA_FORMAT.equals(oaiHandler.getMetadataPrefix())) { - // If we are harvesting native Dataverse json, we'll also need this - // jdk http client to make direct calls to the remote Dataverse API: - httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); - } + // We will use this jdk http client to make direct calls to the remote + // OAI (or remote Dataverse API) to obtain the metadata records + httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); try { for (Iterator
idIter = oaiHandler.runListIdentifiers(); idIter.hasNext();) { @@ -295,7 +293,7 @@ private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, P tempFile = retrieveProprietaryDataverseMetadata(httpClient, metadataApiUrl); } else { - FastGetRecord record = oaiHandler.runGetRecord(identifier); + FastGetRecord record = oaiHandler.runGetRecord(identifier, httpClient); errMessage = record.getErrorMessage(); deleted = record.isDeleted(); tempFile = record.getMetadataFile(); @@ -360,7 +358,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(remoteApiUrl)) .GET() - .header("User-Agent", "Dataverse Harvesting Client v5") + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") .build(); HttpResponse response; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java deleted file mode 100644 index 25c3a048219..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java +++ /dev/null @@ -1,259 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.harvest.client.oai; - -import io.gdcc.xoai.serviceprovider.client.OAIClient; - -import io.gdcc.xoai.serviceprovider.exceptions.OAIRequestException; -import io.gdcc.xoai.serviceprovider.parameters.Parameters; -import java.io.IOException; -import java.io.InputStream; -import static java.net.HttpURLConnection.HTTP_OK; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.charset.StandardCharsets; -import java.security.KeyManagementException; -import java.security.NoSuchAlgorithmException; -import java.security.cert.X509Certificate; -import java.time.Duration; -import java.util.List; -import java.util.ListIterator; -import javax.net.ssl.SSLContext; -import javax.net.ssl.TrustManager; -import javax.net.ssl.X509TrustManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.http.Header; - -/** - * Sane default OAI Client implementation using JDK HTTP Client. Can only be used via builder in - * calling code. - * (this is essentially a copy of the final class JdkHttpOaiClient provided by - * gdcc.xoai, with the custom http headers added. proof of concept! - */ -public final class CustomJdkHttpXoaiClient extends OAIClient { - - private static final Logger log = LoggerFactory.getLogger(OAIClient.class.getCanonicalName()); - - // As these vars will be feed via the builder and those provide defaults and null-checks, - // we may assume FOR INTERNAL USE these are not null. - private final String baseUrl; - private final String userAgent; - private final Duration requestTimeout; - private final HttpClient httpClient; - // Custom headers are optional though, ok to be null: - private final List
customHeaders; - - - CustomJdkHttpXoaiClient( - String baseUrl, String userAgent, Duration requestTimeout, List
customHeaders, HttpClient httpClient) { - this.baseUrl = baseUrl; - this.userAgent = userAgent; - this.requestTimeout = requestTimeout; - this.httpClient = httpClient; - this.customHeaders = customHeaders; - } - - @Override - public InputStream execute(Parameters parameters) throws OAIRequestException { - try { - URI requestURI = URI.create(parameters.toUrl(this.baseUrl)); - - HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() - .uri(requestURI) - .GET() - .header("User-Agent", this.userAgent) - .timeout(requestTimeout); - - // add custom headers, if present: - if (customHeaders != null) { - ListIterator
iterator = customHeaders.listIterator(); - while (iterator.hasNext()) { - Header customHeader = iterator.next(); - httpRequestBuilder.header(customHeader.getName(), customHeader.getValue()); - } - } - - HttpRequest request = httpRequestBuilder.build(); - - HttpResponse response = - this.httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); - - if (response.statusCode() == HTTP_OK) { - return response.body(); - } else { - // copy body of the response to string and send as exception message - throw new OAIRequestException( - "Query faild with status code " - + response.statusCode() - + ": " - + new String( - response.body().readAllBytes(), StandardCharsets.UTF_8)); - } - } catch (IllegalArgumentException | IOException | InterruptedException ex) { - // Hint by SonarCloud: - // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS2142&rule_key=java%3AS2142 - Thread.currentThread().interrupt(); - throw new OAIRequestException(ex); - } - } - - /*@Override - JdkHttpBuilder newBuilder() { - return new CustomJdkHttpXoaiClient.JdkHttpBuilder(); - }*/ - - /** - * Build an {@link OAIClient} using the JDK native HTTP client. You may use your own prepared - * {@link HttpClient.Builder} instead of the default one. - * - *

Provides defaults for request timeouts (60s) and user agent. Remember to set the base - * OAI-PMH URL via {@link #withBaseUrl(URL)}. An exception will occur on first request - * otherwise. - */ - public static final class JdkHttpBuilder implements OAIClient.Builder { - private String baseUrl = "Must be set via Builder.withBaseUrl()"; - private String userAgent = "XOAI Service Provider v5"; - private Duration requestTimeout = Duration.ofSeconds(60); - private List

customHeaders = null; - private final HttpClient.Builder httpClientBuilder; - - JdkHttpBuilder() { - this.httpClientBuilder = HttpClient.newBuilder(); - } - - /** - * While the default constructor can be accessed via {@link OAIClient#newBuilder()}, if - * someone provides a {@link HttpClient.Builder} (which might already contain - * configuration), happily work with it. - * - * @param httpClientBuilder Any (preconfigured) Java 11+ HTTP client builder - */ - public JdkHttpBuilder(HttpClient.Builder httpClientBuilder) { - this.httpClientBuilder = httpClientBuilder; - } - - @Override - public JdkHttpBuilder withBaseUrl(URL baseUrl) { - return this.withBaseUrl(baseUrl.toString()); - } - - @Override - public JdkHttpBuilder withBaseUrl(String baseUrl) { - try { - new URL(baseUrl).toURI(); - if (!baseUrl.startsWith("http")) { - throw new IllegalArgumentException("OAI-PMH supports HTTP/S only"); - } - this.baseUrl = baseUrl; - return this; - } catch (MalformedURLException | URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - @Override - public JdkHttpBuilder withConnectTimeout(Duration timeout) { - // validation is done by client builder! - httpClientBuilder.connectTimeout(timeout); - return this; - } - - @Override - public JdkHttpBuilder withRequestTimeout(Duration timeout) { - if (timeout == null || timeout.isNegative()) { - throw new IllegalArgumentException("Timeout must not be null or negative value"); - } - this.requestTimeout = timeout; - return this; - } - - @Override - public JdkHttpBuilder withUserAgent(String userAgent) { - if (userAgent == null || userAgent.isBlank()) { - throw new IllegalArgumentException("User agent must not be null or empty/blank"); - } - this.userAgent = userAgent; - return this; - } - - @Override - public JdkHttpBuilder withFollowRedirects() { - this.httpClientBuilder.followRedirects(HttpClient.Redirect.NORMAL); - return this; - } - - @Override - public JdkHttpBuilder withInsecureSSL() { - // create insecure context (switch of certificate checks) - httpClientBuilder.sslContext(insecureContext()); - - // warn if the hostname verification is still active - // (users must do this themselves - it's a global setting and might pose a security - // risk) - if (!Boolean.getBoolean("jdk.internal.httpclient.disableHostnameVerification")) { - log.warn( - "You must disable JDK HTTP Client Host Name Verification globally via" - + " system property" - + " -Djdk.internal.httpclient.disableHostnameVerification=true for" - + " XOAI Client connections to insecure SSL servers. Don't do this in" - + " a production setup!"); - } - return this; - } - - public JdkHttpBuilder withCustomHeaders(List
customHeaders) { - // This can be null, as these headers are optional - this.customHeaders = customHeaders; - return this; - } - - @Override - public CustomJdkHttpXoaiClient build() { - return new CustomJdkHttpXoaiClient( - this.baseUrl, this.userAgent, this.requestTimeout, this.customHeaders, httpClientBuilder.build()); - } - - private static SSLContext insecureContext() { - TrustManager[] noopTrustManager = - new TrustManager[] { - new X509TrustManager() { - // This is insecure by design, we warn users and they need to do sth. to - // use it. - // Safely ignore the Sonarcloud message. - @SuppressWarnings("java:S4830") - public void checkClientTrusted(X509Certificate[] xcs, String string) { - // we want to accept every certificate - intentionally left blank - } - // This is insecure by design, we warn users and they need to do sth. to - // use it. - // Safely ignore the Sonarcloud message. - @SuppressWarnings("java:S4830") - public void checkServerTrusted(X509Certificate[] xcs, String string) { - // we want to accept every certificate - intentionally left blank - } - - public X509Certificate[] getAcceptedIssuers() { - return new X509Certificate[0]; - } - } - }; - try { - SSLContext sc = SSLContext.getInstance("TLSv1.2"); - sc.init(null, noopTrustManager, null); - return sc; - } catch (KeyManagementException | NoSuchAlgorithmException ex) { - log.error("Could not build insecure SSL context. Might cause NPE.", ex); - return null; - } - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index ae297416ff9..d9fa9b27c5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -14,8 +14,10 @@ import edu.harvard.iq.dataverse.harvest.client.FastGetRecord; import static edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.DATAVERSE_PROPRIETARY_METADATA_API; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import java.io.IOException; import java.io.Serializable; +import java.net.http.HttpClient; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; @@ -23,10 +25,11 @@ import javax.xml.transform.TransformerException; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.logging.Logger; -import org.apache.http.message.BasicHeader; /** * @@ -68,7 +71,6 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); - //test: this.customHeaders = makeCustomHeaders("x-api-key: xxx-yyy-zzz\\ny-api-key: zzz-yyy-xxx"); this.harvestingClient = harvestingClient; } @@ -79,7 +81,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; - private List customHeaders = null; + private Map customHeaders = null; private ServiceProvider serviceProvider; @@ -125,11 +127,11 @@ public boolean isSetListTruncated() { return setListTruncated; } - public List getCustomHeaders() { + public Map getCustomHeaders() { return this.customHeaders; } - public void setCustomHeaders(List customHeaders) { + public void setCustomHeaders(Map customHeaders) { this.customHeaders = customHeaders; } @@ -142,17 +144,12 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client based on the default client provided in xoai, - // with the same default parameters and the JDK http client, with - // just the (optional) custom headers added: - // (this is proof-of-concept implementation; there gotta be a prettier way to do this) - //context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); if (getCustomHeaders() != null) { - for (org.apache.http.Header customHeader : getCustomHeaders()) { - logger.info("will add custom header; name: "+customHeader.getName()+", value: "+customHeader.getValue()); + for (String headerName : getCustomHeaders().keySet()) { + logger.info("will add custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); } } - context.withOAIClient((new CustomJdkHttpXoaiClient.JdkHttpBuilder()).withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); + context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); serviceProvider = new ServiceProvider(context); } @@ -258,7 +255,7 @@ public Iterator
runListIdentifiers() throws OaiHandlerException { } - public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException { + public FastGetRecord runGetRecord(String identifier, HttpClient httpClient) throws OaiHandlerException { if (StringUtils.isEmpty(this.baseOaiUrl)) { throw new OaiHandlerException("Attempted to execute GetRecord without server URL specified."); } @@ -267,7 +264,7 @@ public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException } try { - return new FastGetRecord(this.baseOaiUrl, identifier, this.metadataPrefix); + return new FastGetRecord(this, identifier, httpClient); } catch (ParserConfigurationException pce) { throw new OaiHandlerException("ParserConfigurationException executing GetRecord: "+pce.getMessage()); } catch (SAXException se) { @@ -317,20 +314,24 @@ public void runIdentify() { // and to learn about its extended capabilities) } - private List makeCustomHeaders(String headersString) { + public Map makeCustomHeaders(String headersString) { if (headersString != null) { - List ret = new ArrayList<>(); String[] parts = headersString.split("\\\\n"); - + HashMap ret = new HashMap<>(); + logger.info("found "+parts.length+" parts"); + int count = 0; for (int i = 0; i < parts.length; i++) { if (parts[i].indexOf(':') > 0) { String headerName = parts[i].substring(0, parts[i].indexOf(':')); String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); - ret.add(new BasicHeader(headerName, headerValue)); + + ret.put(headerName, headerValue); + count++; } // simply skipping it if malformed; or we could throw an exception - ? } - if (!ret.isEmpty()) { + if (ret.size() > 0) { + logger.info("returning the array with "+ret.size()+" name/value pairs"); return ret; } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index e2007338e08..51d9b73085b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -539,8 +539,8 @@ harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. harvestclients.newClientDialog.customHeader=Custom HTTP Header -harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to OAI requests -harvestclients.newClientDialog.customHeader.watermark=Enter the header as in header-name: header-value +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to requests, if required by this OAI server. +harvestclients.newClientDialog.customHeader.watermark=Enter an http header, as in header-name: header-value harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index a5f271e8e75..3c09ed4ecb0 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -287,7 +287,7 @@
From 7888fcde8b78154a77e2d49375b815777b3a6d5d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 10:41:10 -0500 Subject: [PATCH 201/322] backslashes in the sphinx sources (#9231) --- doc/sphinx-guides/source/admin/harvestclients.rst | 2 +- doc/sphinx-guides/source/api/native-api.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index 37204003026..02783e4b97a 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,7 +21,7 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. -Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 609f1487177..2782f4d1d08 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3296,7 +3296,7 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). -- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. From 3f43b199eb6fd9a0658317de16d3a9b1159b8266 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 15:06:34 -0500 Subject: [PATCH 202/322] a 3 line fix for the broken "earliest date" (#9309) --- .../harvest/server/OAIRecordServiceBean.java | 13 +++++++++++++ .../harvest/server/web/servlet/OAIServlet.java | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 6cdc4e5c277..3cbfe313504 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -32,6 +32,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; +import javax.persistence.Query; import javax.persistence.TypedQuery; import javax.persistence.TemporalType; @@ -375,4 +376,16 @@ public List findDeletedOaiRecordsBySetName(String setName) { } } + public Instant getEarliestDate() { + String queryString = "SELECT min(r.lastUpdateTime) FROM OAIRecord r"; + TypedQuery query = em.createQuery(queryString, Date.class); + Date retDate = query.getSingleResult(); + if (retDate != null) { + return retDate.toInstant(); + } + + // if there are no records yet, return the default "now" + return new Date().toInstant(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5d0580708a9..f966b30311b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -207,7 +207,8 @@ private RepositoryConfiguration createRepositoryConfiguration() { .withDeleteMethod(DeletedRecord.TRANSIENT) .withMaxListIdentifiers(maxListIdentifiers) .withMaxListRecords(maxListRecords) - .withMaxListSets(maxListSets); + .withMaxListSets(maxListSets) + .withEarliestDate(recordService.getEarliestDate()); return repositoryConfiguration; } From 6873ad90f9760c3de35178a761568127b16004b8 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 23 Jan 2023 16:08:03 -0500 Subject: [PATCH 203/322] #8339 allow drafts; fix bundle --- .../edu/harvard/iq/dataverse/api/Files.java | 44 +++++++++++++++---- src/main/java/propertyFiles/Bundle.properties | 1 + 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index ccd8f67fa0c..9a43932dba6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -450,10 +450,22 @@ public Response updateFileMetadata(@FormDataParam("jsonData") String jsonData, .build(); } + @GET + @Path("{id}/draft") + public Response getFileDataDraft(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, true); + } + @GET @Path("{id}") public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { + return getFileDataResponse(fileIdOrPersistentId, uriInfo, headers, response, false); + } + + private Response getFileDataResponse(String fileIdOrPersistentId, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, boolean draft ){ + DataverseRequest req; + try { req = createDataverseRequest(findUserOrDie()); } catch (Exception e) { @@ -465,22 +477,37 @@ public Response getFileData(@PathParam("id") String fileIdOrPersistentId, @Conte } catch (Exception e) { return error(BAD_REQUEST, "Error attempting get the requested data file."); } + FileMetadata fm; - //first get latest published - //if not available get draft if permissible - try { - - fm = df.getLatestPublishedFileMetadata(); - } catch (UnsupportedOperationException e) { + if (draft) { try { fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); } catch (WrappedResponse w) { return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); } if (null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); } + } else { + //first get latest published + //if not available get draft if permissible + + try { + + fm = df.getLatestPublishedFileMetadata(); + + } catch (UnsupportedOperationException e) { + try { + fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); + } catch (WrappedResponse w) { + return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + } + if (null == fm) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + } + } + } if (fm.getDatasetVersion().isReleased()) { @@ -523,7 +550,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset." ); } if(null == fm) { - return error(BAD_REQUEST, "No draft availabile for this dataset"); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); } } else { fm = df.getLatestPublishedFileMetadata(); @@ -539,6 +566,7 @@ public Response getFileMetadata(@PathParam("id") String fileIdOrPersistentId, @P .type(MediaType.TEXT_PLAIN) //Our plain text string is already json .build(); } + @GET @Path("{id}/metadata/draft") public Response getFileMetadataDraft(@PathParam("id") String fileIdOrPersistentId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, Boolean getDraft) throws WrappedResponse, Exception { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4166ab78a39..f55a0636126 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2552,6 +2552,7 @@ admin.api.deleteUser.success=Authenticated User {0} deleted. #Files.java files.api.metadata.update.duplicateFile=Filename already exists at {0} +files.api.no.draft=No draft available for this file #Datasets.java datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. From cb4765d042b64023bda4acf8bc47a149655682da Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 16:27:07 -0500 Subject: [PATCH 204/322] Checked in something earlier that is prone to null pointers, due to a change in behavior in the latest gdcc.xoai - that I knew, but had forgotten about over the weekend. (#9231) --- .../iq/dataverse/harvest/client/oai/OaiHandler.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index d9fa9b27c5a..bb3dc06972c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -144,12 +144,15 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); + + JdkHttpOaiClient.Builder xoaiClientBuilder = JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()); if (getCustomHeaders() != null) { for (String headerName : getCustomHeaders().keySet()) { - logger.info("will add custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); - } + logger.fine("adding custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); + } + xoaiClientBuilder = xoaiClientBuilder.withCustomHeaders(getCustomHeaders()); } - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); + context.withOAIClient(xoaiClientBuilder.build()); serviceProvider = new ServiceProvider(context); } From 87c31d7a9a88c432b6ed71a424089aa76346bb78 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 23 Jan 2023 17:42:42 -0500 Subject: [PATCH 205/322] #8724 fix paths for new dataset --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index f9a3cbf5633..e73cce8acbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1662,7 +1662,7 @@ private List retrieveDVOPaths(DvObject dvo) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); - if (dataversePaths.size() > 0) { + if (dataversePaths.size() > 0 && dvo.isInstanceofDataverse()) { // removing the dataverse's own id from the paths // fixes bug where if my parent dv was linked my dv was shown as linked to myself dataversePaths.remove(dataversePaths.size() - 1); From dc9a2972c8a7b62b9d3f5ac2dbeddbba91a60ec0 Mon Sep 17 00:00:00 2001 From: Anthony Reyes Date: Mon, 23 Jan 2023 21:02:46 -0800 Subject: [PATCH 206/322] Added suggestions from #9265 I added changes suggested by @qqmyers from #9265 as well as a small change to prevent a horizontal scrollbar from appearing. --- src/main/java/propertyFiles/Bundle.properties | 2 +- src/main/webapp/dataset.xhtml | 13 +++-- src/main/webapp/dataverse.xhtml | 56 +++++-------------- .../resources/js/dv_rebind_bootstrap_ui.js | 4 +- 4 files changed, 24 insertions(+), 51 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index f7b46c308f5..e8238e79267 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -73,7 +73,7 @@ delete=Delete copyClipboard=Copy to Clipboard truncateMoreBtn=Read full {0} [+] truncateMoreTip=Click to read the full {0}. -truncateLessBtn=Collapse {0} [+] +truncateLessBtn=Collapse {0} [-] truncateLessTip=Click to collapse the {0}. yes=Yes no=No diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 1bb862721a5..4d5e0850083 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -1789,6 +1789,7 @@ +