diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst
index e94a6aa1730..02783e4b97a 100644
--- a/doc/sphinx-guides/source/admin/harvestclients.rst
+++ b/doc/sphinx-guides/source/admin/harvestclients.rst
@@ -21,6 +21,8 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da
The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered.
+Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character.
+
How to Stop a Harvesting Run in Progress
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index b4cce8a1ed1..f6fbb28fe31 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -3410,7 +3410,8 @@ The following optional fields are supported:
- archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data."
- set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything".
- style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation).
-
+- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character.
+
Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API.
An example JSON file would look like this::
@@ -3422,6 +3423,7 @@ An example JSON file would look like this::
"archiveUrl": "https://zenodo.org",
"archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.",
"metadataFormat": "oai_dc",
+ "customHeaders": "x-oai-api-key: xxxyyyzzz",
"set": "user-lmops"
}
diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 3911e9d5bbb..600741dc972 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -164,7 +164,8 @@
4.4.14
- 5.0.0-RC2
+
+ 5.0.0-SNAPSHOT1.15.0
@@ -324,7 +325,7 @@
Local repository for hosting jars not available from network repositories.file://${project.basedir}/local_lib
-
oss-sonatypeoss-sonatype
@@ -335,7 +336,7 @@
true
- -->
+
diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java
index bc83c15dcd7..5be7578f7f8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java
@@ -9,7 +9,6 @@
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand;
import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
@@ -24,7 +23,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import java.util.Locale;
import java.util.Collections;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -79,7 +77,7 @@ public class HarvestingClientsPage implements java.io.Serializable {
private Dataverse dataverse;
private Long dataverseId = null;
private HarvestingClient selectedClient;
- private boolean setListTruncated = false;
+ private boolean setListTruncated = false;
//private static final String solrDocIdentifierDataset = "dataset_";
@@ -245,6 +243,7 @@ public void editClient(HarvestingClient harvestingClient) {
this.newNickname = harvestingClient.getName();
this.newHarvestingUrl = harvestingClient.getHarvestingUrl();
+ this.customHeader = harvestingClient.getCustomHttpHeaders();
this.initialSettingsValidated = false;
// TODO: do we want to try and contact the server, again, to make
@@ -340,6 +339,7 @@ public void createClient(ActionEvent ae) {
getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient);
newHarvestingClient.setHarvestingUrl(newHarvestingUrl);
+ newHarvestingClient.setCustomHttpHeaders(customHeader);
if (!StringUtils.isEmpty(newOaiSet)) {
newHarvestingClient.setHarvestingSet(newOaiSet);
}
@@ -426,6 +426,7 @@ public void saveClient(ActionEvent ae) {
// nickname is not editable for existing clients:
//harvestingClient.setName(newNickname);
harvestingClient.setHarvestingUrl(newHarvestingUrl);
+ harvestingClient.setCustomHttpHeaders(customHeader);
harvestingClient.setHarvestingSet(newOaiSet);
harvestingClient.setMetadataPrefix(newMetadataFormat);
harvestingClient.setHarvestStyle(newHarvestingStyle);
@@ -554,6 +555,9 @@ public boolean validateServerUrlOAI() {
if (!StringUtils.isEmpty(getNewHarvestingUrl())) {
OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl());
+ if (getNewCustomHeader() != null) {
+ oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader()));
+ }
boolean success = true;
String message = null;
@@ -635,6 +639,23 @@ public boolean validateServerUrlOAI() {
return false;
}
+ public boolean validateCustomHeader() {
+ if (!StringUtils.isEmpty(getNewCustomHeader())) {
+ // TODO: put this method somewhere else as a static utility
+
+ // check that it's looking like "{header-name}: {header value}" at least
+ if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) {
+ FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(),
+ new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid")));
+
+ return false;
+ }
+ }
+
+ // this setting is optional
+ return true;
+ }
+
public void validateInitialSettings() {
if (isHarvestTypeOAI()) {
boolean nicknameValidated = true;
@@ -644,9 +665,10 @@ public void validateInitialSettings() {
destinationDataverseValidated = validateSelectedDataverse();
}
boolean urlValidated = validateServerUrlOAI();
+ boolean customHeaderValidated = validateCustomHeader();
- if (nicknameValidated && destinationDataverseValidated && urlValidated) {
- // In Create mode we want to run all 3 validation tests; this is why
+ if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) {
+ // In Create mode we want to run all 4 validation tests; this is why
// we are not doing "if ((validateNickname() && validateServerUrlOAI())"
// in the line above. -- L.A. 4.4 May 2016.
@@ -688,6 +710,7 @@ public void backToStepThree() {
UIInput newClientNicknameInputField;
UIInput newClientUrlInputField;
+ UIInput newClientCustomHeaderInputField;
UIInput hiddenInputField;
/*UISelectOne*/ UIInput metadataFormatMenu;
UIInput remoteArchiveStyleMenu;
@@ -695,6 +718,7 @@ public void backToStepThree() {
private String newNickname = "";
private String newHarvestingUrl = "";
+ private String customHeader = null;
private boolean initialSettingsValidated = false;
private String newOaiSet = "";
private String newMetadataFormat = "";
@@ -718,6 +742,7 @@ public void initNewClient(ActionEvent ae) {
//this.selectedClient = new HarvestingClient();
this.newNickname = "";
this.newHarvestingUrl = "";
+ this.customHeader = null;
this.initialSettingsValidated = false;
this.newOaiSet = "";
this.newMetadataFormat = "";
@@ -762,6 +787,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) {
this.newHarvestingUrl = newHarvestingUrl;
}
+ public String getNewCustomHeader() {
+ return customHeader;
+ }
+
+ public void setNewCustomHeader(String customHeader) {
+ this.customHeader = customHeader;
+ }
+
public int getHarvestTypeRadio() {
return this.harvestTypeRadio;
}
@@ -871,6 +904,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) {
this.newClientUrlInputField = newClientInputField;
}
+ public UIInput getNewClientCustomHeaderInputField() {
+ return newClientCustomHeaderInputField;
+ }
+
+ public void setNewClientCustomHeaderInputField(UIInput newClientInputField) {
+ this.newClientCustomHeaderInputField = newClientInputField;
+ }
+
public UIInput getHiddenInputField() {
return hiddenInputField;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
index b75cb687c62..9aea3adab8b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
@@ -15,6 +15,7 @@
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.json.JsonParseException;
+import edu.harvard.iq.dataverse.util.json.JsonPrinter;
import javax.json.JsonObjectBuilder;
import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
import java.io.IOException;
@@ -88,7 +89,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc
}
if (retrievedHarvestingClient != null) {
- hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient));
+ hcArr.add(JsonPrinter.json(retrievedHarvestingClient));
}
}
@@ -136,7 +137,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP
}
try {
- return ok(harvestingConfigAsJson(retrievedHarvestingClient));
+ return ok(JsonPrinter.json(retrievedHarvestingClient));
} catch (Exception ex) {
logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage());
return error( Response.Status.BAD_REQUEST,
@@ -216,7 +217,7 @@ public Response createHarvestingClient(String jsonBody, @PathParam("nickName") S
DataverseRequest req = createDataverseRequest(findUserOrDie());
harvestingClient = execCommand(new CreateHarvestingClientCommand(req, harvestingClient));
- return created( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient));
+ return created( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient));
} catch (JsonParseException ex) {
return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() );
@@ -268,6 +269,8 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S
}
// Go through the supported editable fields and update the client accordingly:
+ // TODO: We may want to reevaluate whether we really want/need *all*
+ // of these fields to be editable.
if (newHarvestingClient.getHarvestingUrl() != null) {
harvestingClient.setHarvestingUrl(newHarvestingClient.getHarvestingUrl());
@@ -287,10 +290,13 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S
if (newHarvestingClient.getHarvestStyle() != null) {
harvestingClient.setHarvestStyle(newHarvestingClient.getHarvestStyle());
}
+ if (newHarvestingClient.getCustomHttpHeaders() != null) {
+ harvestingClient.setCustomHttpHeaders(newHarvestingClient.getCustomHttpHeaders());
+ }
// TODO: Make schedule configurable via this API too.
harvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient));
- return ok( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient));
+ return ok( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); // harvestingConfigAsJson(harvestingClient));
} catch (JsonParseException ex) {
return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() );
@@ -390,32 +396,4 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
}
return this.accepted();
}
-
- /* Auxiliary, helper methods: */
-
- public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) {
- if (harvestingConfig == null) {
- return null;
- }
-
-
- return jsonObjectBuilder().add("nickName", harvestingConfig.getName()).
- add("dataverseAlias", harvestingConfig.getDataverse().getAlias()).
- add("type", harvestingConfig.getHarvestType()).
- add("style", harvestingConfig.getHarvestStyle()).
- add("harvestUrl", harvestingConfig.getHarvestingUrl()).
- add("archiveUrl", harvestingConfig.getArchiveUrl()).
- add("archiveDescription",harvestingConfig.getArchiveDescription()).
- add("metadataFormat", harvestingConfig.getMetadataPrefix()).
- add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()).
- add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none").
- add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive").
- add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()).
- add("lastResult", harvestingConfig.getLastResult()).
- add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()).
- add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()).
- add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()).
- add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()).
- add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString());
- }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java
index c5e3a93e2df..402d0d8ef91 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java
@@ -19,8 +19,8 @@
*/
package edu.harvard.iq.dataverse.harvest.client;
+import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler;
import java.io.IOException;
-import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.StringReader;
@@ -31,9 +31,14 @@
import java.io.FileOutputStream;
import java.io.PrintWriter;
-import java.net.HttpURLConnection;
+import static java.net.HttpURLConnection.HTTP_OK;
import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.util.Map;
+import java.util.Optional;
import java.util.zip.GZIPInputStream;
import java.util.zip.InflaterInputStream;
@@ -84,17 +89,18 @@ public class FastGetRecord {
/**
* Client-side GetRecord verb constructor
*
- * @param baseURL the baseURL of the server to be queried
+ * @param oaiHandler the configured OaiHande running this harvest
+ * @param identifier Record identifier
+ * @param httpClient jdk HttpClient used to make http requests
* @exception MalformedURLException the baseURL is bad
* @exception SAXException the xml response is bad
* @exception IOException an I/O error occurred
+ * @exception TransformerException if it fails to parse the service portion of the record
*/
- public FastGetRecord(String baseURL, String identifier, String metadataPrefix)
- throws IOException, ParserConfigurationException, SAXException,
+ public FastGetRecord(OaiHandler oaiHandler, String identifier, HttpClient httpClient) throws IOException, ParserConfigurationException, SAXException,
TransformerException {
- harvestRecord (baseURL, identifier, metadataPrefix);
-
+ harvestRecord (oaiHandler.getBaseOaiUrl(), identifier, oaiHandler.getMetadataPrefix(), oaiHandler.getCustomHeaders(), httpClient);
}
private String errorMessage = null;
@@ -117,57 +123,63 @@ public boolean isDeleted () {
}
- public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException,
- ParserConfigurationException, SAXException, TransformerException {
+ public void harvestRecord(String baseURL, String identifier, String metadataPrefix, Map customHeaders, HttpClient httpClient) throws IOException,
+ ParserConfigurationException, SAXException, TransformerException{
xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance();
-
String requestURL = getRequestURL(baseURL, identifier, metadataPrefix);
+ InputStream in;
+
+ // This was one other place where the Harvester code was still using
+ // the obsolete java.net.ttpUrlConnection that didn't get replaced with
+ // the new java.net.http.HttpClient during the first pas of the XOAI
+ // rewrite. (L.A.)
- InputStream in = null;
- URL url = new URL(requestURL);
- HttpURLConnection con = null;
- int responseCode = 0;
-
- con = (HttpURLConnection) url.openConnection();
- con.setRequestProperty("User-Agent", "Dataverse Harvesting Client v5");
- con.setRequestProperty("Accept-Encoding",
- "compress, gzip, identify");
- try {
- responseCode = con.getResponseCode();
- //logger.debug("responseCode=" + responseCode);
- } catch (FileNotFoundException e) {
- //logger.info(requestURL, e);
- responseCode = HttpURLConnection.HTTP_UNAVAILABLE;
- }
-
- // TODO: -- L.A.
- //
- // support for cookies;
- // support for limited retry attempts -- ?
- // implement reading of the stream as filterinputstream -- ?
- // -- that could make it a little faster still. -- L.A.
-
-
-
- if (responseCode == 200) {
-
- String contentEncoding = con.getHeaderField("Content-Encoding");
- //logger.debug("contentEncoding=" + contentEncoding);
-
- // support for the standard compress/gzip/deflate compression
- // schemes:
- if ("compress".equals(contentEncoding)) {
- ZipInputStream zis = new ZipInputStream(con.getInputStream());
- zis.getNextEntry();
- in = zis;
- } else if ("gzip".equals(contentEncoding)) {
- in = new GZIPInputStream(con.getInputStream());
- } else if ("deflate".equals(contentEncoding)) {
- in = new InflaterInputStream(con.getInputStream());
- } else {
- in = con.getInputStream();
+ if (httpClient == null) {
+ throw new IOException("Null Http Client, cannot make a GetRecord call to obtain the metadata.");
+ }
+
+ HttpRequest.Builder requestBuilder = HttpRequest.newBuilder()
+ .uri(URI.create(requestURL))
+ .GET()
+ .header("User-Agent", "XOAI Service Provider v5 (Dataverse)")
+ .header("Accept-Encoding", "compress, gzip");
+
+ if (customHeaders != null) {
+ for (String headerName : customHeaders.keySet()) {
+ requestBuilder.header(headerName, customHeaders.get(headerName));
+ }
+ }
+
+ HttpRequest request = requestBuilder.build();
+ HttpResponse response;
+
+ try {
+ response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream());
+ } catch (InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ throw new IOException("Failed to connect to the remote dataverse server to obtain GetRecord metadata");
+ }
+
+ int responseCode = response.statusCode();
+
+ if (responseCode == HTTP_OK) {
+ InputStream inputStream = response.body();
+ Optional contentEncoding = response.headers().firstValue("Content-Encoding");
+
+ // support for the standard gzip encoding:
+ in = inputStream;
+ if (contentEncoding.isPresent()) {
+ if (contentEncoding.get().equals("compress")) {
+ ZipInputStream zis = new ZipInputStream(inputStream);
+ zis.getNextEntry();
+ in = zis;
+ } else if (contentEncoding.get().equals("gzip")) {
+ in = new GZIPInputStream(inputStream);
+ } else if (contentEncoding.get().equals("deflate")) {
+ in = new InflaterInputStream(inputStream);
+ }
}
// We are going to read the OAI header and SAX-parse it for the
@@ -185,9 +197,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref
FileOutputStream tempFileStream = null;
PrintWriter metadataOut = null;
- savedMetadataFile = File.createTempFile("meta", ".tmp");
-
-
+ savedMetadataFile = File.createTempFile("meta", ".tmp");
int mopen = 0;
int mclose = 0;
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java
index 0e9ffb20653..40bd45ecb30 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java
@@ -228,11 +228,9 @@ private void harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harv
throw new IOException(errorMessage);
}
- if (DATAVERSE_PROPRIETARY_METADATA_FORMAT.equals(oaiHandler.getMetadataPrefix())) {
- // If we are harvesting native Dataverse json, we'll also need this
- // jdk http client to make direct calls to the remote Dataverse API:
- httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build();
- }
+ // We will use this jdk http client to make direct calls to the remote
+ // OAI (or remote Dataverse API) to obtain the metadata records
+ httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build();
try {
for (Iterator idIter = oaiHandler.runListIdentifiers(); idIter.hasNext();) {
@@ -295,7 +293,7 @@ private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, P
tempFile = retrieveProprietaryDataverseMetadata(httpClient, metadataApiUrl);
} else {
- FastGetRecord record = oaiHandler.runGetRecord(identifier);
+ FastGetRecord record = oaiHandler.runGetRecord(identifier, httpClient);
errMessage = record.getErrorMessage();
deleted = record.isDeleted();
tempFile = record.getMetadataFile();
@@ -360,7 +358,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(remoteApiUrl))
.GET()
- .header("User-Agent", "Dataverse Harvesting Client v5")
+ .header("User-Agent", "XOAI Service Provider v5 (Dataverse)")
.build();
HttpResponse response;
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java
index aeb010fad6d..d27ddc41b7f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java
@@ -234,6 +234,16 @@ public void setMetadataPrefix(String metadataPrefix) {
this.metadataPrefix = metadataPrefix;
}
+ private String customHttpHeaders;
+
+ public String getCustomHttpHeaders() {
+ return customHttpHeaders;
+ }
+
+ public void setCustomHttpHeaders(String customHttpHeaders) {
+ this.customHttpHeaders = customHttpHeaders;
+ }
+
// TODO: do we need "orphanRemoval=true"? -- L.A. 4.4
// TODO: should it be @OrderBy("startTime")? -- L.A. 4.4
@OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
@@ -345,95 +355,7 @@ public Long getLastDeletedDatasetCount() {
return lastNonEmptyHarvest.getDeletedDatasetCount();
}
return null;
- }
-
- /* move the fields below to the new HarvestingClientRun class:
- private String harvestResult;
-
- public String getResult() {
- return harvestResult;
- }
-
- public void setResult(String harvestResult) {
- this.harvestResult = harvestResult;
- }
-
- // "Last Harvest Time" is the last time we *attempted* to harvest
- // from this remote resource.
- // It wasn't necessarily a successful attempt!
-
- @Temporal(value = TemporalType.TIMESTAMP)
- private Date lastHarvestTime;
-
- public Date getLastHarvestTime() {
- return lastHarvestTime;
- }
-
- public void setLastHarvestTime(Date lastHarvestTime) {
- this.lastHarvestTime = lastHarvestTime;
- }
-
- // This is the last "successful harvest" - i.e., the last time we
- // tried to harvest, and got a response from the remote server.
- // We may not have necessarily harvested any useful content though;
- // the result may have been a "no content" or "no changes since the last harvest"
- // response.
-
- @Temporal(value = TemporalType.TIMESTAMP)
- private Date lastSuccessfulHarvestTime;
-
- public Date getLastSuccessfulHarvestTime() {
- return lastSuccessfulHarvestTime;
- }
-
- public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) {
- this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime;
- }
-
- // Finally, this is the time stamp from the last "non-empty" harvest.
- // I.e. the last time we ran a harvest that actually resulted in
- // some Datasets created, updated or deleted:
-
- @Temporal(value = TemporalType.TIMESTAMP)
- private Date lastNonEmptyHarvestTime;
-
- public Date getLastNonEmptyHarvestTime() {
- return lastNonEmptyHarvestTime;
- }
-
- public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) {
- this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime;
- }
-
- // And these are the Dataset counts from that last "non-empty" harvest:
- private Long harvestedDatasetCount;
- private Long failedDatasetCount;
- private Long deletedDatasetCount;
-
- public Long getLastHarvestedDatasetCount() {
- return harvestedDatasetCount;
- }
-
- public void setHarvestedDatasetCount(Long harvestedDatasetCount) {
- this.harvestedDatasetCount = harvestedDatasetCount;
- }
-
- public Long getLastFailedDatasetCount() {
- return failedDatasetCount;
- }
-
- public void setFailedDatasetCount(Long failedDatasetCount) {
- this.failedDatasetCount = failedDatasetCount;
- }
-
- public Long getLastDeletedDatasetCount() {
- return deletedDatasetCount;
- }
-
- public void setDeletedDatasetCount(Long deletedDatasetCount) {
- this.deletedDatasetCount = deletedDatasetCount;
- }
- */
+ }
private boolean scheduled;
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java
index c0a039e2d2b..bb3dc06972c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java
@@ -5,7 +5,6 @@
import io.gdcc.xoai.model.oaipmh.results.MetadataFormat;
import io.gdcc.xoai.model.oaipmh.results.Set;
import io.gdcc.xoai.serviceprovider.ServiceProvider;
-import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient;
import io.gdcc.xoai.serviceprovider.exceptions.BadArgumentException;
import io.gdcc.xoai.serviceprovider.exceptions.InvalidOAIResponse;
import io.gdcc.xoai.serviceprovider.exceptions.NoSetHierarchyException;
@@ -15,8 +14,10 @@
import edu.harvard.iq.dataverse.harvest.client.FastGetRecord;
import static edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.DATAVERSE_PROPRIETARY_METADATA_API;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
+import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient;
import java.io.IOException;
import java.io.Serializable;
+import java.net.http.HttpClient;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.lang3.StringUtils;
@@ -24,14 +25,18 @@
import javax.xml.transform.TransformerException;
import java.util.ArrayList;
import java.util.Date;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
/**
*
* @author Leonid Andreev
*/
public class OaiHandler implements Serializable {
+ private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler");
public OaiHandler() {
@@ -65,6 +70,8 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException
this.fromDate = harvestingClient.getLastNonEmptyHarvestTime();
+ this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders());
+
this.harvestingClient = harvestingClient;
}
@@ -74,6 +81,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException
private String setName;
private Date fromDate;
private Boolean setListTruncated = false;
+ private Map customHeaders = null;
private ServiceProvider serviceProvider;
@@ -119,6 +127,14 @@ public boolean isSetListTruncated() {
return setListTruncated;
}
+ public Map getCustomHeaders() {
+ return this.customHeaders;
+ }
+
+ public void setCustomHeaders(Map customHeaders) {
+ this.customHeaders = customHeaders;
+ }
+
public ServiceProvider getServiceProvider() throws OaiHandlerException {
if (serviceProvider == null) {
if (baseOaiUrl == null) {
@@ -128,8 +144,15 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException {
context.withBaseUrl(baseOaiUrl);
context.withGranularity(Granularity.Second);
- // builds the client with the default parameters and the JDK http client:
- context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build());
+
+ JdkHttpOaiClient.Builder xoaiClientBuilder = JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl());
+ if (getCustomHeaders() != null) {
+ for (String headerName : getCustomHeaders().keySet()) {
+ logger.fine("adding custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName));
+ }
+ xoaiClientBuilder = xoaiClientBuilder.withCustomHeaders(getCustomHeaders());
+ }
+ context.withOAIClient(xoaiClientBuilder.build());
serviceProvider = new ServiceProvider(context);
}
@@ -235,7 +258,7 @@ public Iterator runListIdentifiers() throws OaiHandlerException {
}
- public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException {
+ public FastGetRecord runGetRecord(String identifier, HttpClient httpClient) throws OaiHandlerException {
if (StringUtils.isEmpty(this.baseOaiUrl)) {
throw new OaiHandlerException("Attempted to execute GetRecord without server URL specified.");
}
@@ -244,7 +267,7 @@ public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException
}
try {
- return new FastGetRecord(this.baseOaiUrl, identifier, this.metadataPrefix);
+ return new FastGetRecord(this, identifier, httpClient);
} catch (ParserConfigurationException pce) {
throw new OaiHandlerException("ParserConfigurationException executing GetRecord: "+pce.getMessage());
} catch (SAXException se) {
@@ -293,4 +316,28 @@ public void runIdentify() {
// (we will need it, both for validating the remote server,
// and to learn about its extended capabilities)
}
+
+ public Map makeCustomHeaders(String headersString) {
+ if (headersString != null) {
+ String[] parts = headersString.split("\\\\n");
+ HashMap ret = new HashMap<>();
+ logger.info("found "+parts.length+" parts");
+ int count = 0;
+ for (int i = 0; i < parts.length; i++) {
+ if (parts[i].indexOf(':') > 0) {
+ String headerName = parts[i].substring(0, parts[i].indexOf(':'));
+ String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip();
+
+ ret.put(headerName, headerValue);
+ count++;
+ }
+ // simply skipping it if malformed; or we could throw an exception - ?
+ }
+ if (ret.size() > 0) {
+ logger.info("returning the array with "+ret.size()+" name/value pairs");
+ return ret;
+ }
+ }
+ return null;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
index 5d0580708a9..d6ca85d17aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
@@ -31,10 +31,13 @@
import edu.harvard.iq.dataverse.util.MailUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import io.gdcc.xoai.exceptions.OAIException;
+import io.gdcc.xoai.model.oaipmh.Granularity;
+import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
+import java.time.Instant;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.inject.Inject;
@@ -127,10 +130,9 @@ public void init(ServletConfig config) throws ServletException {
repositoryConfiguration = createRepositoryConfiguration();
- xoaiRepository = new Repository()
+ xoaiRepository = new Repository(repositoryConfiguration)
.withSetRepository(setRepository)
- .withItemRepository(itemRepository)
- .withConfiguration(repositoryConfiguration);
+ .withItemRepository(itemRepository);
dataProvider = new DataProvider(getXoaiContext(), getXoaiRepository());
}
@@ -193,23 +195,29 @@ private RepositoryConfiguration createRepositoryConfiguration() {
}
// The admin email address associated with this installation:
// (Note: if the setting does not exist, we are going to assume that they
- // have a reason not to want to advertise their email address, so no
- // email will be shown in the output of Identify.
+ // have a reason not to want to advertise their email address.
InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail));
-
- RepositoryConfiguration repositoryConfiguration = RepositoryConfiguration.defaults()
- .withEnableMetadataAttributes(true)
- .withRepositoryName(repositoryName)
- .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai")
+ String systemEmailLabel = systemEmailAddress != null ? systemEmailAddress.getAddress() : "donotreply@localhost";
+
+ RepositoryConfiguration configuration = new RepositoryConfiguration.RepositoryConfigurationBuilder()
+ .withAdminEmail(systemEmailLabel)
.withCompression("gzip")
.withCompression("deflate")
- .withAdminEmail(systemEmailAddress != null ? systemEmailAddress.getAddress() : null)
- .withDeleteMethod(DeletedRecord.TRANSIENT)
+ .withGranularity(Granularity.Second)
+ .withResumptionTokenFormat(new SimpleResumptionTokenFormat().withGranularity(Granularity.Second))
+ .withRepositoryName(repositoryName)
+ .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai")
+ .withEarliestDate(Instant.EPOCH) // this is NOT something we really want to be doing, but this will be corrected once PR9316 is merged
.withMaxListIdentifiers(maxListIdentifiers)
+ .withMaxListSets(maxListSets)
.withMaxListRecords(maxListRecords)
- .withMaxListSets(maxListSets);
+ .withDeleteMethod(DeletedRecord.TRANSIENT)
+ .withEnableMetadataAttributes(true)
+ .withRequireFromAfterEarliest(false)
+ .build();
+
- return repositoryConfiguration;
+ return configuration;
}
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java
index 905479c4e0d..22e2c6c8d78 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java
@@ -908,6 +908,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC
harvestingClient.setArchiveDescription(obj.getString("archiveDescription", null));
harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null));
harvestingClient.setHarvestingSet(obj.getString("set",null));
+ harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null));
return dataverseAlias;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
index a3e6f1412e8..9f5401f77d1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
@@ -37,6 +37,7 @@
import edu.harvard.iq.dataverse.dataset.DatasetUtil;
import edu.harvard.iq.dataverse.license.License;
import edu.harvard.iq.dataverse.globus.FileDetailsHolder;
+import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.DatasetFieldWalker;
@@ -668,6 +669,32 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) {
;
}
+ public static JsonObjectBuilder json(HarvestingClient harvestingClient) {
+ if (harvestingClient == null) {
+ return null;
+ }
+
+ return jsonObjectBuilder().add("nickName", harvestingClient.getName()).
+ add("dataverseAlias", harvestingClient.getDataverse().getAlias()).
+ add("type", harvestingClient.getHarvestType()).
+ add("style", harvestingClient.getHarvestStyle()).
+ add("harvestUrl", harvestingClient.getHarvestingUrl()).
+ add("archiveUrl", harvestingClient.getArchiveUrl()).
+ add("archiveDescription", harvestingClient.getArchiveDescription()).
+ add("metadataFormat", harvestingClient.getMetadataPrefix()).
+ add("set", harvestingClient.getHarvestingSet()).
+ add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none").
+ add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive").
+ add("customHeaders", harvestingClient.getCustomHttpHeaders()).
+ add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()).
+ add("lastResult", harvestingClient.getLastResult()).
+ add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()).
+ add("lastNonEmpty", harvestingClient.getLastNonEmptyHarvestTime() == null ? null : harvestingClient.getLastNonEmptyHarvestTime().toString()).
+ add("lastDatasetsHarvested", harvestingClient.getLastHarvestedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastHarvestedDatasetCount().toString()).
+ add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()).
+ add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString());
+ }
+
public static String format(Date d) {
return (d == null) ? null : Util.getDateTimeFormat().format(d);
}
@@ -704,7 +731,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) {
}
return tabularTags;
}
-
+
private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener {
Deque objectStack = new LinkedList<>();
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index f55a0636126..b6fd6eb366b 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -538,6 +538,10 @@ harvestclients.newClientDialog.nickname.helptext=Consists of letters, digits, un
harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty!
harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters.
harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used.
+harvestclients.newClientDialog.customHeader=Custom HTTP Header
+harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to requests, if required by this OAI server.
+harvestclients.newClientDialog.customHeader.watermark=Enter an http header, as in header-name: header-value
+harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value"
harvestclients.newClientDialog.type=Server Protocol
harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported.
harvestclients.newClientDialog.type.OAI=OAI
diff --git a/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql
new file mode 100644
index 00000000000..fe6d717b2a3
--- /dev/null
+++ b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql
@@ -0,0 +1 @@
+ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS customhttpheaders TEXT;
diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml
index 5c7b3482ed3..3c09ed4ecb0 100644
--- a/src/main/webapp/harvestclients.xhtml
+++ b/src/main/webapp/harvestclients.xhtml
@@ -277,6 +277,23 @@
+
+
+