From 2efd8a4e12967916032e315eb69fdecbd51a738b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 13 Jan 2023 16:14:25 -0500 Subject: [PATCH 1/9] A proof-of-concept quick implementation of "custom headers in OAI calls", #9231 --- .../iq/dataverse/api/HarvestingClients.java | 42 +-- .../harvest/client/HarvestingClient.java | 100 +------ .../client/oai/CustomJdkHttpXoaiClient.java | 259 ++++++++++++++++++ .../harvest/client/oai/OaiHandler.java | 49 +++- .../iq/dataverse/util/json/JsonParser.java | 1 + .../iq/dataverse/util/json/JsonPrinter.java | 29 +- 6 files changed, 355 insertions(+), 125 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index b75cb687c62..9aea3adab8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -15,6 +15,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import java.io.IOException; @@ -88,7 +89,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc } if (retrievedHarvestingClient != null) { - hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); + hcArr.add(JsonPrinter.json(retrievedHarvestingClient)); } } @@ -136,7 +137,7 @@ public Response harvestingClient(@PathParam("nickName") String nickName, @QueryP } try { - return ok(harvestingConfigAsJson(retrievedHarvestingClient)); + return ok(JsonPrinter.json(retrievedHarvestingClient)); } catch (Exception ex) { logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); return error( Response.Status.BAD_REQUEST, @@ -216,7 +217,7 @@ public Response createHarvestingClient(String jsonBody, @PathParam("nickName") S DataverseRequest req = createDataverseRequest(findUserOrDie()); harvestingClient = execCommand(new CreateHarvestingClientCommand(req, harvestingClient)); - return created( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return created( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -268,6 +269,8 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S } // Go through the supported editable fields and update the client accordingly: + // TODO: We may want to reevaluate whether we really want/need *all* + // of these fields to be editable. if (newHarvestingClient.getHarvestingUrl() != null) { harvestingClient.setHarvestingUrl(newHarvestingClient.getHarvestingUrl()); @@ -287,10 +290,13 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S if (newHarvestingClient.getHarvestStyle() != null) { harvestingClient.setHarvestStyle(newHarvestingClient.getHarvestStyle()); } + if (newHarvestingClient.getCustomHttpHeaders() != null) { + harvestingClient.setCustomHttpHeaders(newHarvestingClient.getCustomHttpHeaders()); + } // TODO: Make schedule configurable via this API too. harvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); - return ok( "/harvest/clients/" + nickName, harvestingConfigAsJson(harvestingClient)); + return ok( "/harvest/clients/" + nickName, JsonPrinter.json(harvestingClient)); // harvestingConfigAsJson(harvestingClient)); } catch (JsonParseException ex) { return error( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); @@ -390,32 +396,4 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } return this.accepted(); } - - /* Auxiliary, helper methods: */ - - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { - if (harvestingConfig == null) { - return null; - } - - - return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). - add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). - add("type", harvestingConfig.getHarvestType()). - add("style", harvestingConfig.getHarvestStyle()). - add("harvestUrl", harvestingConfig.getHarvestingUrl()). - add("archiveUrl", harvestingConfig.getArchiveUrl()). - add("archiveDescription",harvestingConfig.getArchiveDescription()). - add("metadataFormat", harvestingConfig.getMetadataPrefix()). - add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). - add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). - add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). - add("lastResult", harvestingConfig.getLastResult()). - add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). - add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). - add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). - add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index aeb010fad6d..d27ddc41b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -234,6 +234,16 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } + private String customHttpHeaders; + + public String getCustomHttpHeaders() { + return customHttpHeaders; + } + + public void setCustomHttpHeaders(String customHttpHeaders) { + this.customHttpHeaders = customHttpHeaders; + } + // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @@ -345,95 +355,7 @@ public Long getLastDeletedDatasetCount() { return lastNonEmptyHarvest.getDeletedDatasetCount(); } return null; - } - - /* move the fields below to the new HarvestingClientRun class: - private String harvestResult; - - public String getResult() { - return harvestResult; - } - - public void setResult(String harvestResult) { - this.harvestResult = harvestResult; - } - - // "Last Harvest Time" is the last time we *attempted* to harvest - // from this remote resource. - // It wasn't necessarily a successful attempt! - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastHarvestTime; - - public Date getLastHarvestTime() { - return lastHarvestTime; - } - - public void setLastHarvestTime(Date lastHarvestTime) { - this.lastHarvestTime = lastHarvestTime; - } - - // This is the last "successful harvest" - i.e., the last time we - // tried to harvest, and got a response from the remote server. - // We may not have necessarily harvested any useful content though; - // the result may have been a "no content" or "no changes since the last harvest" - // response. - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulHarvestTime; - - public Date getLastSuccessfulHarvestTime() { - return lastSuccessfulHarvestTime; - } - - public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { - this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; - } - - // Finally, this is the time stamp from the last "non-empty" harvest. - // I.e. the last time we ran a harvest that actually resulted in - // some Datasets created, updated or deleted: - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastNonEmptyHarvestTime; - - public Date getLastNonEmptyHarvestTime() { - return lastNonEmptyHarvestTime; - } - - public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { - this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; - } - - // And these are the Dataset counts from that last "non-empty" harvest: - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - - public Long getLastHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getLastFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getLastDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - */ + } private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java new file mode 100644 index 00000000000..25c3a048219 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java @@ -0,0 +1,259 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.harvest.client.oai; + +import io.gdcc.xoai.serviceprovider.client.OAIClient; + +import io.gdcc.xoai.serviceprovider.exceptions.OAIRequestException; +import io.gdcc.xoai.serviceprovider.parameters.Parameters; +import java.io.IOException; +import java.io.InputStream; +import static java.net.HttpURLConnection.HTTP_OK; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.charset.StandardCharsets; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; +import java.security.cert.X509Certificate; +import java.time.Duration; +import java.util.List; +import java.util.ListIterator; +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.http.Header; + +/** + * Sane default OAI Client implementation using JDK HTTP Client. Can only be used via builder in + * calling code. + * (this is essentially a copy of the final class JdkHttpOaiClient provided by + * gdcc.xoai, with the custom http headers added. proof of concept! + */ +public final class CustomJdkHttpXoaiClient extends OAIClient { + + private static final Logger log = LoggerFactory.getLogger(OAIClient.class.getCanonicalName()); + + // As these vars will be feed via the builder and those provide defaults and null-checks, + // we may assume FOR INTERNAL USE these are not null. + private final String baseUrl; + private final String userAgent; + private final Duration requestTimeout; + private final HttpClient httpClient; + // Custom headers are optional though, ok to be null: + private final List
customHeaders; + + + CustomJdkHttpXoaiClient( + String baseUrl, String userAgent, Duration requestTimeout, List
customHeaders, HttpClient httpClient) { + this.baseUrl = baseUrl; + this.userAgent = userAgent; + this.requestTimeout = requestTimeout; + this.httpClient = httpClient; + this.customHeaders = customHeaders; + } + + @Override + public InputStream execute(Parameters parameters) throws OAIRequestException { + try { + URI requestURI = URI.create(parameters.toUrl(this.baseUrl)); + + HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() + .uri(requestURI) + .GET() + .header("User-Agent", this.userAgent) + .timeout(requestTimeout); + + // add custom headers, if present: + if (customHeaders != null) { + ListIterator
iterator = customHeaders.listIterator(); + while (iterator.hasNext()) { + Header customHeader = iterator.next(); + httpRequestBuilder.header(customHeader.getName(), customHeader.getValue()); + } + } + + HttpRequest request = httpRequestBuilder.build(); + + HttpResponse response = + this.httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + + if (response.statusCode() == HTTP_OK) { + return response.body(); + } else { + // copy body of the response to string and send as exception message + throw new OAIRequestException( + "Query faild with status code " + + response.statusCode() + + ": " + + new String( + response.body().readAllBytes(), StandardCharsets.UTF_8)); + } + } catch (IllegalArgumentException | IOException | InterruptedException ex) { + // Hint by SonarCloud: + // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS2142&rule_key=java%3AS2142 + Thread.currentThread().interrupt(); + throw new OAIRequestException(ex); + } + } + + /*@Override + JdkHttpBuilder newBuilder() { + return new CustomJdkHttpXoaiClient.JdkHttpBuilder(); + }*/ + + /** + * Build an {@link OAIClient} using the JDK native HTTP client. You may use your own prepared + * {@link HttpClient.Builder} instead of the default one. + * + *

Provides defaults for request timeouts (60s) and user agent. Remember to set the base + * OAI-PMH URL via {@link #withBaseUrl(URL)}. An exception will occur on first request + * otherwise. + */ + public static final class JdkHttpBuilder implements OAIClient.Builder { + private String baseUrl = "Must be set via Builder.withBaseUrl()"; + private String userAgent = "XOAI Service Provider v5"; + private Duration requestTimeout = Duration.ofSeconds(60); + private List

customHeaders = null; + private final HttpClient.Builder httpClientBuilder; + + JdkHttpBuilder() { + this.httpClientBuilder = HttpClient.newBuilder(); + } + + /** + * While the default constructor can be accessed via {@link OAIClient#newBuilder()}, if + * someone provides a {@link HttpClient.Builder} (which might already contain + * configuration), happily work with it. + * + * @param httpClientBuilder Any (preconfigured) Java 11+ HTTP client builder + */ + public JdkHttpBuilder(HttpClient.Builder httpClientBuilder) { + this.httpClientBuilder = httpClientBuilder; + } + + @Override + public JdkHttpBuilder withBaseUrl(URL baseUrl) { + return this.withBaseUrl(baseUrl.toString()); + } + + @Override + public JdkHttpBuilder withBaseUrl(String baseUrl) { + try { + new URL(baseUrl).toURI(); + if (!baseUrl.startsWith("http")) { + throw new IllegalArgumentException("OAI-PMH supports HTTP/S only"); + } + this.baseUrl = baseUrl; + return this; + } catch (MalformedURLException | URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public JdkHttpBuilder withConnectTimeout(Duration timeout) { + // validation is done by client builder! + httpClientBuilder.connectTimeout(timeout); + return this; + } + + @Override + public JdkHttpBuilder withRequestTimeout(Duration timeout) { + if (timeout == null || timeout.isNegative()) { + throw new IllegalArgumentException("Timeout must not be null or negative value"); + } + this.requestTimeout = timeout; + return this; + } + + @Override + public JdkHttpBuilder withUserAgent(String userAgent) { + if (userAgent == null || userAgent.isBlank()) { + throw new IllegalArgumentException("User agent must not be null or empty/blank"); + } + this.userAgent = userAgent; + return this; + } + + @Override + public JdkHttpBuilder withFollowRedirects() { + this.httpClientBuilder.followRedirects(HttpClient.Redirect.NORMAL); + return this; + } + + @Override + public JdkHttpBuilder withInsecureSSL() { + // create insecure context (switch of certificate checks) + httpClientBuilder.sslContext(insecureContext()); + + // warn if the hostname verification is still active + // (users must do this themselves - it's a global setting and might pose a security + // risk) + if (!Boolean.getBoolean("jdk.internal.httpclient.disableHostnameVerification")) { + log.warn( + "You must disable JDK HTTP Client Host Name Verification globally via" + + " system property" + + " -Djdk.internal.httpclient.disableHostnameVerification=true for" + + " XOAI Client connections to insecure SSL servers. Don't do this in" + + " a production setup!"); + } + return this; + } + + public JdkHttpBuilder withCustomHeaders(List
customHeaders) { + // This can be null, as these headers are optional + this.customHeaders = customHeaders; + return this; + } + + @Override + public CustomJdkHttpXoaiClient build() { + return new CustomJdkHttpXoaiClient( + this.baseUrl, this.userAgent, this.requestTimeout, this.customHeaders, httpClientBuilder.build()); + } + + private static SSLContext insecureContext() { + TrustManager[] noopTrustManager = + new TrustManager[] { + new X509TrustManager() { + // This is insecure by design, we warn users and they need to do sth. to + // use it. + // Safely ignore the Sonarcloud message. + @SuppressWarnings("java:S4830") + public void checkClientTrusted(X509Certificate[] xcs, String string) { + // we want to accept every certificate - intentionally left blank + } + // This is insecure by design, we warn users and they need to do sth. to + // use it. + // Safely ignore the Sonarcloud message. + @SuppressWarnings("java:S4830") + public void checkServerTrusted(X509Certificate[] xcs, String string) { + // we want to accept every certificate - intentionally left blank + } + + public X509Certificate[] getAcceptedIssuers() { + return new X509Certificate[0]; + } + } + }; + try { + SSLContext sc = SSLContext.getInstance("TLSv1.2"); + sc.init(null, noopTrustManager, null); + return sc; + } catch (KeyManagementException | NoSuchAlgorithmException ex) { + log.error("Could not build insecure SSL context. Might cause NPE.", ex); + return null; + } + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index c0a039e2d2b..ae297416ff9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -5,7 +5,6 @@ import io.gdcc.xoai.model.oaipmh.results.MetadataFormat; import io.gdcc.xoai.model.oaipmh.results.Set; import io.gdcc.xoai.serviceprovider.ServiceProvider; -import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import io.gdcc.xoai.serviceprovider.exceptions.BadArgumentException; import io.gdcc.xoai.serviceprovider.exceptions.InvalidOAIResponse; import io.gdcc.xoai.serviceprovider.exceptions.NoSetHierarchyException; @@ -26,12 +25,15 @@ import java.util.Date; import java.util.Iterator; import java.util.List; +import java.util.logging.Logger; +import org.apache.http.message.BasicHeader; /** * * @author Leonid Andreev */ public class OaiHandler implements Serializable { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler"); public OaiHandler() { @@ -65,6 +67,9 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); + this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); + //test: this.customHeaders = makeCustomHeaders("x-api-key: xxx-yyy-zzz\\ny-api-key: zzz-yyy-xxx"); + this.harvestingClient = harvestingClient; } @@ -74,6 +79,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; + private List customHeaders = null; private ServiceProvider serviceProvider; @@ -119,6 +125,14 @@ public boolean isSetListTruncated() { return setListTruncated; } + public List getCustomHeaders() { + return this.customHeaders; + } + + public void setCustomHeaders(List customHeaders) { + this.customHeaders = customHeaders; + } + public ServiceProvider getServiceProvider() throws OaiHandlerException { if (serviceProvider == null) { if (baseOaiUrl == null) { @@ -128,8 +142,17 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client with the default parameters and the JDK http client: - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + // builds the client based on the default client provided in xoai, + // with the same default parameters and the JDK http client, with + // just the (optional) custom headers added: + // (this is proof-of-concept implementation; there gotta be a prettier way to do this) + //context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); + if (getCustomHeaders() != null) { + for (org.apache.http.Header customHeader : getCustomHeaders()) { + logger.info("will add custom header; name: "+customHeader.getName()+", value: "+customHeader.getValue()); + } + } + context.withOAIClient((new CustomJdkHttpXoaiClient.JdkHttpBuilder()).withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); serviceProvider = new ServiceProvider(context); } @@ -293,4 +316,24 @@ public void runIdentify() { // (we will need it, both for validating the remote server, // and to learn about its extended capabilities) } + + private List makeCustomHeaders(String headersString) { + if (headersString != null) { + List ret = new ArrayList<>(); + String[] parts = headersString.split("\\\\n"); + + for (int i = 0; i < parts.length; i++) { + if (parts[i].indexOf(':') > 0) { + String headerName = parts[i].substring(0, parts[i].indexOf(':')); + String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); + ret.add(new BasicHeader(headerName, headerValue)); + } + // simply skipping it if malformed; or we could throw an exception - ? + } + if (!ret.isEmpty()) { + return ret; + } + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 905479c4e0d..22e2c6c8d78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -908,6 +908,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setArchiveDescription(obj.getString("archiveDescription", null)); harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); harvestingClient.setHarvestingSet(obj.getString("set",null)); + harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index dc547f2e52c..1ab596569a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -37,6 +37,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.globus.FileDetailsHolder; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -666,6 +667,32 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { ; } + public static JsonObjectBuilder json(HarvestingClient harvestingClient) { + if (harvestingClient == null) { + return null; + } + + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). + add("dataverseAlias", harvestingClient.getDataverse().getAlias()). + add("type", harvestingClient.getHarvestType()). + add("style", harvestingClient.getHarvestStyle()). + add("harvestUrl", harvestingClient.getHarvestingUrl()). + add("archiveUrl", harvestingClient.getArchiveUrl()). + add("archiveDescription", harvestingClient.getArchiveDescription()). + add("metadataFormat", harvestingClient.getMetadataPrefix()). + add("set", harvestingClient.getHarvestingSet()). + add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none"). + add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). + add("customHeaders", harvestingClient.getCustomHttpHeaders()). + add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). + add("lastResult", harvestingClient.getLastResult()). + add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). + add("lastNonEmpty", harvestingClient.getLastNonEmptyHarvestTime() == null ? null : harvestingClient.getLastNonEmptyHarvestTime().toString()). + add("lastDatasetsHarvested", harvestingClient.getLastHarvestedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastHarvestedDatasetCount().toString()). + add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). + add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); + } + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -702,7 +729,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); From 019fb749b11abdba75e3d058c9c5d38b07e50bae Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Jan 2023 13:53:40 -0500 Subject: [PATCH 2/9] Added the custom header configuration to the harvesting clients GUI (#9231). --- .../iq/dataverse/HarvestingClientsPage.java | 46 +++++++++++++++++-- src/main/java/propertyFiles/Bundle.properties | 4 ++ src/main/webapp/harvestclients.xhtml | 17 +++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index bc83c15dcd7..4430a7be73a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -79,7 +79,7 @@ public class HarvestingClientsPage implements java.io.Serializable { private Dataverse dataverse; private Long dataverseId = null; private HarvestingClient selectedClient; - private boolean setListTruncated = false; + private boolean setListTruncated = false; //private static final String solrDocIdentifierDataset = "dataset_"; @@ -245,6 +245,7 @@ public void editClient(HarvestingClient harvestingClient) { this.newNickname = harvestingClient.getName(); this.newHarvestingUrl = harvestingClient.getHarvestingUrl(); + this.customHeader = harvestingClient.getCustomHttpHeaders(); this.initialSettingsValidated = false; // TODO: do we want to try and contact the server, again, to make @@ -340,6 +341,7 @@ public void createClient(ActionEvent ae) { getSelectedDestinationDataverse().getHarvestingClientConfigs().add(newHarvestingClient); newHarvestingClient.setHarvestingUrl(newHarvestingUrl); + newHarvestingClient.setCustomHttpHeaders(customHeader); if (!StringUtils.isEmpty(newOaiSet)) { newHarvestingClient.setHarvestingSet(newOaiSet); } @@ -426,6 +428,7 @@ public void saveClient(ActionEvent ae) { // nickname is not editable for existing clients: //harvestingClient.setName(newNickname); harvestingClient.setHarvestingUrl(newHarvestingUrl); + harvestingClient.setCustomHttpHeaders(customHeader); harvestingClient.setHarvestingSet(newOaiSet); harvestingClient.setMetadataPrefix(newMetadataFormat); harvestingClient.setHarvestStyle(newHarvestingStyle); @@ -635,6 +638,23 @@ public boolean validateServerUrlOAI() { return false; } + public boolean validateCustomHeader() { + if (!StringUtils.isEmpty(getCustomHeader())) { + // TODO: put this method somewhere else as a static utility + + // check that it's looking like "{header-name}: {header value}" at least + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getCustomHeader())) { + FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), + new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); + + return false; + } + } + + // this setting is optional + return true; + } + public void validateInitialSettings() { if (isHarvestTypeOAI()) { boolean nicknameValidated = true; @@ -644,9 +664,10 @@ public void validateInitialSettings() { destinationDataverseValidated = validateSelectedDataverse(); } boolean urlValidated = validateServerUrlOAI(); + boolean customHeaderValidated = validateCustomHeader(); - if (nicknameValidated && destinationDataverseValidated && urlValidated) { - // In Create mode we want to run all 3 validation tests; this is why + if (nicknameValidated && destinationDataverseValidated && urlValidated && customHeaderValidated) { + // In Create mode we want to run all 4 validation tests; this is why // we are not doing "if ((validateNickname() && validateServerUrlOAI())" // in the line above. -- L.A. 4.4 May 2016. @@ -688,6 +709,7 @@ public void backToStepThree() { UIInput newClientNicknameInputField; UIInput newClientUrlInputField; + UIInput newClientCustomHeaderInputField; UIInput hiddenInputField; /*UISelectOne*/ UIInput metadataFormatMenu; UIInput remoteArchiveStyleMenu; @@ -695,6 +717,7 @@ public void backToStepThree() { private String newNickname = ""; private String newHarvestingUrl = ""; + private String customHeader = null; private boolean initialSettingsValidated = false; private String newOaiSet = ""; private String newMetadataFormat = ""; @@ -718,6 +741,7 @@ public void initNewClient(ActionEvent ae) { //this.selectedClient = new HarvestingClient(); this.newNickname = ""; this.newHarvestingUrl = ""; + this.customHeader = null; this.initialSettingsValidated = false; this.newOaiSet = ""; this.newMetadataFormat = ""; @@ -762,6 +786,14 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } + public String getCustomHeader() { + return customHeader; + } + + public void setCustomHeader(String customHeader) { + this.customHeader = customHeader; + } + public int getHarvestTypeRadio() { return this.harvestTypeRadio; } @@ -871,6 +903,14 @@ public void setNewClientUrlInputField(UIInput newClientInputField) { this.newClientUrlInputField = newClientInputField; } + public UIInput getNewClientCustomHeaderInputField() { + return newClientCustomHeaderInputField; + } + + public void setNewClientCustomHeaderInputField(UIInput newClientInputField) { + this.newClientCustomHeaderInputField = newClientInputField; + } + public UIInput getHiddenInputField() { return hiddenInputField; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 62531d32bb2..e2007338e08 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -538,6 +538,10 @@ harvestclients.newClientDialog.nickname.helptext=Consists of letters, digits, un harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty! harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. +harvestclients.newClientDialog.customHeader=Custom HTTP Header +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to OAI requests +harvestclients.newClientDialog.customHeader.watermark=Enter the header as in header-name: header-value +harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. harvestclients.newClientDialog.type.OAI=OAI diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index 5c7b3482ed3..a5f271e8e75 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -277,6 +277,23 @@ + + +
+ +
+ + + +

#{bundle['harvestclients.newClientDialog.customHeader.helptext']}

+
+
From 7749b01995dd37895a0ca01162322268562aab84 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 10:36:06 -0500 Subject: [PATCH 3/9] The remaining, mostly finalized changes for the "custom header" feature for OAI harvesting (#9231) --- .../source/admin/harvestclients.rst | 2 + doc/sphinx-guides/source/api/native-api.rst | 4 +- modules/dataverse-parent/pom.xml | 7 +- .../iq/dataverse/HarvestingClientsPage.java | 13 +- .../harvest/client/FastGetRecord.java | 124 +++++---- .../harvest/client/HarvesterServiceBean.java | 12 +- .../client/oai/CustomJdkHttpXoaiClient.java | 259 ------------------ .../harvest/client/oai/OaiHandler.java | 41 +-- src/main/java/propertyFiles/Bundle.properties | 4 +- src/main/webapp/harvestclients.xhtml | 2 +- 10 files changed, 112 insertions(+), 356 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index e94a6aa1730..37204003026 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,6 +21,8 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. + How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 589b947f15e..609f1487177 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3296,7 +3296,8 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). - +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. + Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. An example JSON file would look like this:: @@ -3308,6 +3309,7 @@ An example JSON file would look like this:: "archiveUrl": "https://zenodo.org", "archiveDescription": "Moissonné depuis la collection LMOPS de l'entrepôt Zenodo. En cliquant sur ce jeu de données, vous serez redirigé vers Zenodo.", "metadataFormat": "oai_dc", + "customHeaders": "x-oai-api-key: xxxyyyzzz", "set": "user-lmops" } diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 3911e9d5bbb..600741dc972 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -164,7 +164,8 @@ 4.4.14 - 5.0.0-RC2 + + 5.0.0-SNAPSHOT 1.15.0 @@ -324,7 +325,7 @@ Local repository for hosting jars not available from network repositories. file://${project.basedir}/local_lib - oss-sonatype oss-sonatype @@ -335,7 +336,7 @@ true - --> + diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index 4430a7be73a..5be7578f7f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; @@ -24,7 +23,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Locale; import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; @@ -557,6 +555,9 @@ public boolean validateServerUrlOAI() { if (!StringUtils.isEmpty(getNewHarvestingUrl())) { OaiHandler oaiHandler = new OaiHandler(getNewHarvestingUrl()); + if (getNewCustomHeader() != null) { + oaiHandler.setCustomHeaders(oaiHandler.makeCustomHeaders(getNewCustomHeader())); + } boolean success = true; String message = null; @@ -639,11 +640,11 @@ public boolean validateServerUrlOAI() { } public boolean validateCustomHeader() { - if (!StringUtils.isEmpty(getCustomHeader())) { + if (!StringUtils.isEmpty(getNewCustomHeader())) { // TODO: put this method somewhere else as a static utility // check that it's looking like "{header-name}: {header value}" at least - if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getCustomHeader())) { + if (!Pattern.matches("^[a-zA-Z0-9\\_\\-]+:.*",getNewCustomHeader())) { FacesContext.getCurrentInstance().addMessage(getNewClientCustomHeaderInputField().getClientId(), new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.customHeader.invalid"))); @@ -786,11 +787,11 @@ public void setNewHarvestingUrl(String newHarvestingUrl) { this.newHarvestingUrl = newHarvestingUrl; } - public String getCustomHeader() { + public String getNewCustomHeader() { return customHeader; } - public void setCustomHeader(String customHeader) { + public void setNewCustomHeader(String customHeader) { this.customHeader = customHeader; } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java index c5e3a93e2df..402d0d8ef91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java @@ -19,8 +19,8 @@ */ package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler; import java.io.IOException; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.StringReader; @@ -31,9 +31,14 @@ import java.io.FileOutputStream; import java.io.PrintWriter; -import java.net.HttpURLConnection; +import static java.net.HttpURLConnection.HTTP_OK; import java.net.MalformedURLException; -import java.net.URL; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import java.util.Optional; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; @@ -84,17 +89,18 @@ public class FastGetRecord { /** * Client-side GetRecord verb constructor * - * @param baseURL the baseURL of the server to be queried + * @param oaiHandler the configured OaiHande running this harvest + * @param identifier Record identifier + * @param httpClient jdk HttpClient used to make http requests * @exception MalformedURLException the baseURL is bad * @exception SAXException the xml response is bad * @exception IOException an I/O error occurred + * @exception TransformerException if it fails to parse the service portion of the record */ - public FastGetRecord(String baseURL, String identifier, String metadataPrefix) - throws IOException, ParserConfigurationException, SAXException, + public FastGetRecord(OaiHandler oaiHandler, String identifier, HttpClient httpClient) throws IOException, ParserConfigurationException, SAXException, TransformerException { - harvestRecord (baseURL, identifier, metadataPrefix); - + harvestRecord (oaiHandler.getBaseOaiUrl(), identifier, oaiHandler.getMetadataPrefix(), oaiHandler.getCustomHeaders(), httpClient); } private String errorMessage = null; @@ -117,57 +123,63 @@ public boolean isDeleted () { } - public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, - ParserConfigurationException, SAXException, TransformerException { + public void harvestRecord(String baseURL, String identifier, String metadataPrefix, Map customHeaders, HttpClient httpClient) throws IOException, + ParserConfigurationException, SAXException, TransformerException{ xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); - String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); + InputStream in; + + // This was one other place where the Harvester code was still using + // the obsolete java.net.ttpUrlConnection that didn't get replaced with + // the new java.net.http.HttpClient during the first pas of the XOAI + // rewrite. (L.A.) - InputStream in = null; - URL url = new URL(requestURL); - HttpURLConnection con = null; - int responseCode = 0; - - con = (HttpURLConnection) url.openConnection(); - con.setRequestProperty("User-Agent", "Dataverse Harvesting Client v5"); - con.setRequestProperty("Accept-Encoding", - "compress, gzip, identify"); - try { - responseCode = con.getResponseCode(); - //logger.debug("responseCode=" + responseCode); - } catch (FileNotFoundException e) { - //logger.info(requestURL, e); - responseCode = HttpURLConnection.HTTP_UNAVAILABLE; - } - - // TODO: -- L.A. - // - // support for cookies; - // support for limited retry attempts -- ? - // implement reading of the stream as filterinputstream -- ? - // -- that could make it a little faster still. -- L.A. - - - - if (responseCode == 200) { - - String contentEncoding = con.getHeaderField("Content-Encoding"); - //logger.debug("contentEncoding=" + contentEncoding); - - // support for the standard compress/gzip/deflate compression - // schemes: - if ("compress".equals(contentEncoding)) { - ZipInputStream zis = new ZipInputStream(con.getInputStream()); - zis.getNextEntry(); - in = zis; - } else if ("gzip".equals(contentEncoding)) { - in = new GZIPInputStream(con.getInputStream()); - } else if ("deflate".equals(contentEncoding)) { - in = new InflaterInputStream(con.getInputStream()); - } else { - in = con.getInputStream(); + if (httpClient == null) { + throw new IOException("Null Http Client, cannot make a GetRecord call to obtain the metadata."); + } + + HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() + .uri(URI.create(requestURL)) + .GET() + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") + .header("Accept-Encoding", "compress, gzip"); + + if (customHeaders != null) { + for (String headerName : customHeaders.keySet()) { + requestBuilder.header(headerName, customHeaders.get(headerName)); + } + } + + HttpRequest request = requestBuilder.build(); + HttpResponse response; + + try { + response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IOException("Failed to connect to the remote dataverse server to obtain GetRecord metadata"); + } + + int responseCode = response.statusCode(); + + if (responseCode == HTTP_OK) { + InputStream inputStream = response.body(); + Optional contentEncoding = response.headers().firstValue("Content-Encoding"); + + // support for the standard gzip encoding: + in = inputStream; + if (contentEncoding.isPresent()) { + if (contentEncoding.get().equals("compress")) { + ZipInputStream zis = new ZipInputStream(inputStream); + zis.getNextEntry(); + in = zis; + } else if (contentEncoding.get().equals("gzip")) { + in = new GZIPInputStream(inputStream); + } else if (contentEncoding.get().equals("deflate")) { + in = new InflaterInputStream(inputStream); + } } // We are going to read the OAI header and SAX-parse it for the @@ -185,9 +197,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref FileOutputStream tempFileStream = null; PrintWriter metadataOut = null; - savedMetadataFile = File.createTempFile("meta", ".tmp"); - - + savedMetadataFile = File.createTempFile("meta", ".tmp"); int mopen = 0; int mclose = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 0e9ffb20653..40bd45ecb30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -228,11 +228,9 @@ private void harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harv throw new IOException(errorMessage); } - if (DATAVERSE_PROPRIETARY_METADATA_FORMAT.equals(oaiHandler.getMetadataPrefix())) { - // If we are harvesting native Dataverse json, we'll also need this - // jdk http client to make direct calls to the remote Dataverse API: - httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); - } + // We will use this jdk http client to make direct calls to the remote + // OAI (or remote Dataverse API) to obtain the metadata records + httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.ALWAYS).build(); try { for (Iterator
idIter = oaiHandler.runListIdentifiers(); idIter.hasNext();) { @@ -295,7 +293,7 @@ private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, P tempFile = retrieveProprietaryDataverseMetadata(httpClient, metadataApiUrl); } else { - FastGetRecord record = oaiHandler.runGetRecord(identifier); + FastGetRecord record = oaiHandler.runGetRecord(identifier, httpClient); errMessage = record.getErrorMessage(); deleted = record.isDeleted(); tempFile = record.getMetadataFile(); @@ -360,7 +358,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(remoteApiUrl)) .GET() - .header("User-Agent", "Dataverse Harvesting Client v5") + .header("User-Agent", "XOAI Service Provider v5 (Dataverse)") .build(); HttpResponse response; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java deleted file mode 100644 index 25c3a048219..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/CustomJdkHttpXoaiClient.java +++ /dev/null @@ -1,259 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.harvest.client.oai; - -import io.gdcc.xoai.serviceprovider.client.OAIClient; - -import io.gdcc.xoai.serviceprovider.exceptions.OAIRequestException; -import io.gdcc.xoai.serviceprovider.parameters.Parameters; -import java.io.IOException; -import java.io.InputStream; -import static java.net.HttpURLConnection.HTTP_OK; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.charset.StandardCharsets; -import java.security.KeyManagementException; -import java.security.NoSuchAlgorithmException; -import java.security.cert.X509Certificate; -import java.time.Duration; -import java.util.List; -import java.util.ListIterator; -import javax.net.ssl.SSLContext; -import javax.net.ssl.TrustManager; -import javax.net.ssl.X509TrustManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.http.Header; - -/** - * Sane default OAI Client implementation using JDK HTTP Client. Can only be used via builder in - * calling code. - * (this is essentially a copy of the final class JdkHttpOaiClient provided by - * gdcc.xoai, with the custom http headers added. proof of concept! - */ -public final class CustomJdkHttpXoaiClient extends OAIClient { - - private static final Logger log = LoggerFactory.getLogger(OAIClient.class.getCanonicalName()); - - // As these vars will be feed via the builder and those provide defaults and null-checks, - // we may assume FOR INTERNAL USE these are not null. - private final String baseUrl; - private final String userAgent; - private final Duration requestTimeout; - private final HttpClient httpClient; - // Custom headers are optional though, ok to be null: - private final List
customHeaders; - - - CustomJdkHttpXoaiClient( - String baseUrl, String userAgent, Duration requestTimeout, List
customHeaders, HttpClient httpClient) { - this.baseUrl = baseUrl; - this.userAgent = userAgent; - this.requestTimeout = requestTimeout; - this.httpClient = httpClient; - this.customHeaders = customHeaders; - } - - @Override - public InputStream execute(Parameters parameters) throws OAIRequestException { - try { - URI requestURI = URI.create(parameters.toUrl(this.baseUrl)); - - HttpRequest.Builder httpRequestBuilder = HttpRequest.newBuilder() - .uri(requestURI) - .GET() - .header("User-Agent", this.userAgent) - .timeout(requestTimeout); - - // add custom headers, if present: - if (customHeaders != null) { - ListIterator
iterator = customHeaders.listIterator(); - while (iterator.hasNext()) { - Header customHeader = iterator.next(); - httpRequestBuilder.header(customHeader.getName(), customHeader.getValue()); - } - } - - HttpRequest request = httpRequestBuilder.build(); - - HttpResponse response = - this.httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); - - if (response.statusCode() == HTTP_OK) { - return response.body(); - } else { - // copy body of the response to string and send as exception message - throw new OAIRequestException( - "Query faild with status code " - + response.statusCode() - + ": " - + new String( - response.body().readAllBytes(), StandardCharsets.UTF_8)); - } - } catch (IllegalArgumentException | IOException | InterruptedException ex) { - // Hint by SonarCloud: - // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS2142&rule_key=java%3AS2142 - Thread.currentThread().interrupt(); - throw new OAIRequestException(ex); - } - } - - /*@Override - JdkHttpBuilder newBuilder() { - return new CustomJdkHttpXoaiClient.JdkHttpBuilder(); - }*/ - - /** - * Build an {@link OAIClient} using the JDK native HTTP client. You may use your own prepared - * {@link HttpClient.Builder} instead of the default one. - * - *

Provides defaults for request timeouts (60s) and user agent. Remember to set the base - * OAI-PMH URL via {@link #withBaseUrl(URL)}. An exception will occur on first request - * otherwise. - */ - public static final class JdkHttpBuilder implements OAIClient.Builder { - private String baseUrl = "Must be set via Builder.withBaseUrl()"; - private String userAgent = "XOAI Service Provider v5"; - private Duration requestTimeout = Duration.ofSeconds(60); - private List

customHeaders = null; - private final HttpClient.Builder httpClientBuilder; - - JdkHttpBuilder() { - this.httpClientBuilder = HttpClient.newBuilder(); - } - - /** - * While the default constructor can be accessed via {@link OAIClient#newBuilder()}, if - * someone provides a {@link HttpClient.Builder} (which might already contain - * configuration), happily work with it. - * - * @param httpClientBuilder Any (preconfigured) Java 11+ HTTP client builder - */ - public JdkHttpBuilder(HttpClient.Builder httpClientBuilder) { - this.httpClientBuilder = httpClientBuilder; - } - - @Override - public JdkHttpBuilder withBaseUrl(URL baseUrl) { - return this.withBaseUrl(baseUrl.toString()); - } - - @Override - public JdkHttpBuilder withBaseUrl(String baseUrl) { - try { - new URL(baseUrl).toURI(); - if (!baseUrl.startsWith("http")) { - throw new IllegalArgumentException("OAI-PMH supports HTTP/S only"); - } - this.baseUrl = baseUrl; - return this; - } catch (MalformedURLException | URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - @Override - public JdkHttpBuilder withConnectTimeout(Duration timeout) { - // validation is done by client builder! - httpClientBuilder.connectTimeout(timeout); - return this; - } - - @Override - public JdkHttpBuilder withRequestTimeout(Duration timeout) { - if (timeout == null || timeout.isNegative()) { - throw new IllegalArgumentException("Timeout must not be null or negative value"); - } - this.requestTimeout = timeout; - return this; - } - - @Override - public JdkHttpBuilder withUserAgent(String userAgent) { - if (userAgent == null || userAgent.isBlank()) { - throw new IllegalArgumentException("User agent must not be null or empty/blank"); - } - this.userAgent = userAgent; - return this; - } - - @Override - public JdkHttpBuilder withFollowRedirects() { - this.httpClientBuilder.followRedirects(HttpClient.Redirect.NORMAL); - return this; - } - - @Override - public JdkHttpBuilder withInsecureSSL() { - // create insecure context (switch of certificate checks) - httpClientBuilder.sslContext(insecureContext()); - - // warn if the hostname verification is still active - // (users must do this themselves - it's a global setting and might pose a security - // risk) - if (!Boolean.getBoolean("jdk.internal.httpclient.disableHostnameVerification")) { - log.warn( - "You must disable JDK HTTP Client Host Name Verification globally via" - + " system property" - + " -Djdk.internal.httpclient.disableHostnameVerification=true for" - + " XOAI Client connections to insecure SSL servers. Don't do this in" - + " a production setup!"); - } - return this; - } - - public JdkHttpBuilder withCustomHeaders(List
customHeaders) { - // This can be null, as these headers are optional - this.customHeaders = customHeaders; - return this; - } - - @Override - public CustomJdkHttpXoaiClient build() { - return new CustomJdkHttpXoaiClient( - this.baseUrl, this.userAgent, this.requestTimeout, this.customHeaders, httpClientBuilder.build()); - } - - private static SSLContext insecureContext() { - TrustManager[] noopTrustManager = - new TrustManager[] { - new X509TrustManager() { - // This is insecure by design, we warn users and they need to do sth. to - // use it. - // Safely ignore the Sonarcloud message. - @SuppressWarnings("java:S4830") - public void checkClientTrusted(X509Certificate[] xcs, String string) { - // we want to accept every certificate - intentionally left blank - } - // This is insecure by design, we warn users and they need to do sth. to - // use it. - // Safely ignore the Sonarcloud message. - @SuppressWarnings("java:S4830") - public void checkServerTrusted(X509Certificate[] xcs, String string) { - // we want to accept every certificate - intentionally left blank - } - - public X509Certificate[] getAcceptedIssuers() { - return new X509Certificate[0]; - } - } - }; - try { - SSLContext sc = SSLContext.getInstance("TLSv1.2"); - sc.init(null, noopTrustManager, null); - return sc; - } catch (KeyManagementException | NoSuchAlgorithmException ex) { - log.error("Could not build insecure SSL context. Might cause NPE.", ex); - return null; - } - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index ae297416ff9..d9fa9b27c5a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -14,8 +14,10 @@ import edu.harvard.iq.dataverse.harvest.client.FastGetRecord; import static edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.DATAVERSE_PROPRIETARY_METADATA_API; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import io.gdcc.xoai.serviceprovider.client.JdkHttpOaiClient; import java.io.IOException; import java.io.Serializable; +import java.net.http.HttpClient; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang3.StringUtils; @@ -23,10 +25,11 @@ import javax.xml.transform.TransformerException; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.logging.Logger; -import org.apache.http.message.BasicHeader; /** * @@ -68,7 +71,6 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException this.fromDate = harvestingClient.getLastNonEmptyHarvestTime(); this.customHeaders = makeCustomHeaders(harvestingClient.getCustomHttpHeaders()); - //test: this.customHeaders = makeCustomHeaders("x-api-key: xxx-yyy-zzz\\ny-api-key: zzz-yyy-xxx"); this.harvestingClient = harvestingClient; } @@ -79,7 +81,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException private String setName; private Date fromDate; private Boolean setListTruncated = false; - private List customHeaders = null; + private Map customHeaders = null; private ServiceProvider serviceProvider; @@ -125,11 +127,11 @@ public boolean isSetListTruncated() { return setListTruncated; } - public List getCustomHeaders() { + public Map getCustomHeaders() { return this.customHeaders; } - public void setCustomHeaders(List customHeaders) { + public void setCustomHeaders(Map customHeaders) { this.customHeaders = customHeaders; } @@ -142,17 +144,12 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); - // builds the client based on the default client provided in xoai, - // with the same default parameters and the JDK http client, with - // just the (optional) custom headers added: - // (this is proof-of-concept implementation; there gotta be a prettier way to do this) - //context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(baseOaiUrl).build()); if (getCustomHeaders() != null) { - for (org.apache.http.Header customHeader : getCustomHeaders()) { - logger.info("will add custom header; name: "+customHeader.getName()+", value: "+customHeader.getValue()); + for (String headerName : getCustomHeaders().keySet()) { + logger.info("will add custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); } } - context.withOAIClient((new CustomJdkHttpXoaiClient.JdkHttpBuilder()).withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); + context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); serviceProvider = new ServiceProvider(context); } @@ -258,7 +255,7 @@ public Iterator
runListIdentifiers() throws OaiHandlerException { } - public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException { + public FastGetRecord runGetRecord(String identifier, HttpClient httpClient) throws OaiHandlerException { if (StringUtils.isEmpty(this.baseOaiUrl)) { throw new OaiHandlerException("Attempted to execute GetRecord without server URL specified."); } @@ -267,7 +264,7 @@ public FastGetRecord runGetRecord(String identifier) throws OaiHandlerException } try { - return new FastGetRecord(this.baseOaiUrl, identifier, this.metadataPrefix); + return new FastGetRecord(this, identifier, httpClient); } catch (ParserConfigurationException pce) { throw new OaiHandlerException("ParserConfigurationException executing GetRecord: "+pce.getMessage()); } catch (SAXException se) { @@ -317,20 +314,24 @@ public void runIdentify() { // and to learn about its extended capabilities) } - private List makeCustomHeaders(String headersString) { + public Map makeCustomHeaders(String headersString) { if (headersString != null) { - List ret = new ArrayList<>(); String[] parts = headersString.split("\\\\n"); - + HashMap ret = new HashMap<>(); + logger.info("found "+parts.length+" parts"); + int count = 0; for (int i = 0; i < parts.length; i++) { if (parts[i].indexOf(':') > 0) { String headerName = parts[i].substring(0, parts[i].indexOf(':')); String headerValue = parts[i].substring(parts[i].indexOf(':')+1).strip(); - ret.add(new BasicHeader(headerName, headerValue)); + + ret.put(headerName, headerValue); + count++; } // simply skipping it if malformed; or we could throw an exception - ? } - if (!ret.isEmpty()) { + if (ret.size() > 0) { + logger.info("returning the array with "+ret.size()+" name/value pairs"); return ret; } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index e2007338e08..51d9b73085b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -539,8 +539,8 @@ harvestclients.newClientDialog.nickname.required=Client nickname cannot be empty harvestclients.newClientDialog.nickname.invalid=Client nickname can contain only letters, digits, underscores (_) and dashes (-); and must be at most 30 characters. harvestclients.newClientDialog.nickname.alreadyused=This nickname is already used. harvestclients.newClientDialog.customHeader=Custom HTTP Header -harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to OAI requests -harvestclients.newClientDialog.customHeader.watermark=Enter the header as in header-name: header-value +harvestclients.newClientDialog.customHeader.helptext=(Optional) Custom HTTP header to add to requests, if required by this OAI server. +harvestclients.newClientDialog.customHeader.watermark=Enter an http header, as in header-name: header-value harvestclients.newClientDialog.customHeader.invalid=Client header name can only contain letters, digits, underscores (_) and dashes (-); the entire header string must be in the form of "header-name: header-value" harvestclients.newClientDialog.type=Server Protocol harvestclients.newClientDialog.type.helptext=Only the OAI server protocol is currently supported. diff --git a/src/main/webapp/harvestclients.xhtml b/src/main/webapp/harvestclients.xhtml index a5f271e8e75..3c09ed4ecb0 100644 --- a/src/main/webapp/harvestclients.xhtml +++ b/src/main/webapp/harvestclients.xhtml @@ -287,7 +287,7 @@
From 7888fcde8b78154a77e2d49375b815777b3a6d5d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 10:41:10 -0500 Subject: [PATCH 4/9] backslashes in the sphinx sources (#9231) --- doc/sphinx-guides/source/admin/harvestclients.rst | 2 +- doc/sphinx-guides/source/api/native-api.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/harvestclients.rst b/doc/sphinx-guides/source/admin/harvestclients.rst index 37204003026..02783e4b97a 100644 --- a/doc/sphinx-guides/source/admin/harvestclients.rst +++ b/doc/sphinx-guides/source/admin/harvestclients.rst @@ -21,7 +21,7 @@ Clients are managed on the "Harvesting Clients" page accessible via the :doc:`da The process of creating a new, or editing an existing client, is largely self-explanatory. It is split into logical steps, in a way that allows the user to go back and correct the entries made earlier. The process is interactive and guidance text is provided. For example, the user is required to enter the URL of the remote OAI server. When they click *Next*, the application will try to establish a connection to the server in order to verify that it is working, and to obtain the information about the sets of metadata records and the metadata formats it supports. The choices offered to the user on the next page will be based on this extra information. If the application fails to establish a connection to the remote archive at the address specified, or if an invalid response is received, the user is given an opportunity to check and correct the URL they entered. -Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. +Note that as of 5.13, a new entry "Custom HTTP Header" has been added to the Step 1. of Create or Edit form. This optional field can be used to configure this client with a specific HTTP header to be added to every OAI request. This is to accommodate a (rare) use case where the remote server may require a special token of some kind in order to offer some content not available to other clients. Most OAI servers offer the same publicly-available content to all clients, so few admins will have a use for this feature. It is however on the very first, Step 1. screen in case the OAI server requires this token even for the "ListSets" and "ListMetadataFormats" requests, which need to be sent in the Step 2. of creating or editing a client. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. How to Stop a Harvesting Run in Progress ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 609f1487177..2782f4d1d08 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -3296,7 +3296,7 @@ The following optional fields are supported: - archiveDescription: What the name suggests. If not supplied, will default to "This Dataset is harvested from our partners. Clicking the link will take you directly to the archival source of the data." - set: The OAI set on the remote server. If not supplied, will default to none, i.e., "harvest everything". - style: Defaults to "default" - a generic OAI archive. (Make sure to use "dataverse" when configuring harvesting from another Dataverse installation). -- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\n` - actual "backslash" and "n" characters, not a single "new line" character. +- customHeaders: This can be used to configure this client with a specific HTTP header that will be added to every OAI request. This is to accommodate a use case where the remote server requires this header to supply some form of a token in order to offer some content not available to other clients. See the example below. Multiple headers can be supplied separated by `\\n` - actual "backslash" and "n" characters, not a single "new line" character. Generally, the API will accept the output of the GET version of the API for an existing client as valid input, but some fields will be ignored. For example, as of writing this there is no way to configure a harvesting schedule via this API. From cb4765d042b64023bda4acf8bc47a149655682da Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 23 Jan 2023 16:27:07 -0500 Subject: [PATCH 5/9] Checked in something earlier that is prone to null pointers, due to a change in behavior in the latest gdcc.xoai - that I knew, but had forgotten about over the weekend. (#9231) --- .../iq/dataverse/harvest/client/oai/OaiHandler.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index d9fa9b27c5a..bb3dc06972c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -144,12 +144,15 @@ public ServiceProvider getServiceProvider() throws OaiHandlerException { context.withBaseUrl(baseOaiUrl); context.withGranularity(Granularity.Second); + + JdkHttpOaiClient.Builder xoaiClientBuilder = JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()); if (getCustomHeaders() != null) { for (String headerName : getCustomHeaders().keySet()) { - logger.info("will add custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); - } + logger.fine("adding custom header; name: "+headerName+", value: "+getCustomHeaders().get(headerName)); + } + xoaiClientBuilder = xoaiClientBuilder.withCustomHeaders(getCustomHeaders()); } - context.withOAIClient(JdkHttpOaiClient.newBuilder().withBaseUrl(getBaseOaiUrl()).withCustomHeaders(getCustomHeaders()).build()); + context.withOAIClient(xoaiClientBuilder.build()); serviceProvider = new ServiceProvider(context); } From a20fd764eca235422a4863275a76ffeb2b7093e3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 26 Jan 2023 11:33:33 -0500 Subject: [PATCH 6/9] new repositoryconfiguration framework in xoai (#9231) --- .../server/web/servlet/OAIServlet.java | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5d0580708a9..611fe7e9ba8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -31,10 +31,15 @@ import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import io.gdcc.xoai.exceptions.OAIException; +import io.gdcc.xoai.model.oaipmh.Granularity; +import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; import org.apache.commons.lang3.StringUtils; import java.io.IOException; +import java.time.Instant; +import java.util.Arrays; +import java.util.Date; import java.util.logging.Logger; import javax.ejb.EJB; import javax.inject.Inject; @@ -127,10 +132,9 @@ public void init(ServletConfig config) throws ServletException { repositoryConfiguration = createRepositoryConfiguration(); - xoaiRepository = new Repository() + xoaiRepository = new Repository(repositoryConfiguration) .withSetRepository(setRepository) - .withItemRepository(itemRepository) - .withConfiguration(repositoryConfiguration); + .withItemRepository(itemRepository); dataProvider = new DataProvider(getXoaiContext(), getXoaiRepository()); } @@ -196,8 +200,25 @@ private RepositoryConfiguration createRepositoryConfiguration() { // have a reason not to want to advertise their email address, so no // email will be shown in the output of Identify. InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); - - RepositoryConfiguration repositoryConfiguration = RepositoryConfiguration.defaults() + + RepositoryConfiguration repositoryConfiguration = new RepositoryConfiguration.RepositoryConfigurationBuilder().withAdminEmail(systemEmailAddress.getAddress()) + //.withDescription(null) + .withCompression("gzip") + .withCompression("deflate") + .withGranularity(Granularity.Second) + .withResumptionTokenFormat(new SimpleResumptionTokenFormat().withGranularity(Granularity.Second)) + .withRepositoryName(repositoryName) + .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") + .withEarliestDate(Instant.EPOCH) + .withMaxListIdentifiers(maxListIdentifiers) + .withMaxListSets(maxListSets) + .withMaxListRecords(maxListRecords) + .withDeleteMethod(DeletedRecord.TRANSIENT) + .withEnableMetadataAttributes(true) + .withRequireFromAfterEarliest(false) + .build(); + + /*RepositoryConfiguration repositoryConfiguration = new RepositoryConfiguration.defaults() .withEnableMetadataAttributes(true) .withRepositoryName(repositoryName) .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") @@ -207,7 +228,7 @@ private RepositoryConfiguration createRepositoryConfiguration() { .withDeleteMethod(DeletedRecord.TRANSIENT) .withMaxListIdentifiers(maxListIdentifiers) .withMaxListRecords(maxListRecords) - .withMaxListSets(maxListSets); + .withMaxListSets(maxListSets);*/ return repositoryConfiguration; } From e30d8146d9412c0e2a360e5996d1be43cf1df9cf Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 26 Jan 2023 11:54:11 -0500 Subject: [PATCH 7/9] cleaning up the oai servlet a little bit (#9231) --- .../harvest/server/web/servlet/OAIServlet.java | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 611fe7e9ba8..5621487ef4e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -38,8 +38,6 @@ import java.io.IOException; import java.time.Instant; -import java.util.Arrays; -import java.util.Date; import java.util.logging.Logger; import javax.ejb.EJB; import javax.inject.Inject; @@ -209,7 +207,7 @@ private RepositoryConfiguration createRepositoryConfiguration() { .withResumptionTokenFormat(new SimpleResumptionTokenFormat().withGranularity(Granularity.Second)) .withRepositoryName(repositoryName) .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") - .withEarliestDate(Instant.EPOCH) + .withEarliestDate(Instant.EPOCH) // this is NOT something we really want to be doing, but this will be corrected once PR9316 is merged .withMaxListIdentifiers(maxListIdentifiers) .withMaxListSets(maxListSets) .withMaxListRecords(maxListRecords) @@ -218,17 +216,6 @@ private RepositoryConfiguration createRepositoryConfiguration() { .withRequireFromAfterEarliest(false) .build(); - /*RepositoryConfiguration repositoryConfiguration = new RepositoryConfiguration.defaults() - .withEnableMetadataAttributes(true) - .withRepositoryName(repositoryName) - .withBaseUrl(systemConfig.getDataverseSiteUrl()+"/oai") - .withCompression("gzip") - .withCompression("deflate") - .withAdminEmail(systemEmailAddress != null ? systemEmailAddress.getAddress() : null) - .withDeleteMethod(DeletedRecord.TRANSIENT) - .withMaxListIdentifiers(maxListIdentifiers) - .withMaxListRecords(maxListRecords) - .withMaxListSets(maxListSets);*/ return repositoryConfiguration; } From cff1d1c23302ae9fb55246304b8daf40eb97c039 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 26 Jan 2023 13:04:32 -0500 Subject: [PATCH 8/9] advertised email address cleanup, per updated xoai. (#9231) --- .../harvest/server/web/servlet/OAIServlet.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5621487ef4e..d6ca85d17aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -195,12 +195,12 @@ private RepositoryConfiguration createRepositoryConfiguration() { } // The admin email address associated with this installation: // (Note: if the setting does not exist, we are going to assume that they - // have a reason not to want to advertise their email address, so no - // email will be shown in the output of Identify. + // have a reason not to want to advertise their email address. InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); + String systemEmailLabel = systemEmailAddress != null ? systemEmailAddress.getAddress() : "donotreply@localhost"; - RepositoryConfiguration repositoryConfiguration = new RepositoryConfiguration.RepositoryConfigurationBuilder().withAdminEmail(systemEmailAddress.getAddress()) - //.withDescription(null) + RepositoryConfiguration configuration = new RepositoryConfiguration.RepositoryConfigurationBuilder() + .withAdminEmail(systemEmailLabel) .withCompression("gzip") .withCompression("deflate") .withGranularity(Granularity.Second) @@ -217,7 +217,7 @@ private RepositoryConfiguration createRepositoryConfiguration() { .build(); - return repositoryConfiguration; + return configuration; } /** From a669aa9ca8140e0bf5ced04f38d4ed8c4aceea26 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 26 Jan 2023 14:04:14 -0500 Subject: [PATCH 9/9] the renamed flyway script, doh. #9231 --- .../db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql diff --git a/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql new file mode 100644 index 00000000000..fe6d717b2a3 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.5__9231_custom_headers_oai_requests.sql @@ -0,0 +1 @@ +ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS customhttpheaders TEXT;