Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/dataverse-parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@
<apache.httpcomponents.core.version>4.4.14</apache.httpcomponents.core.version>

<!-- NEW gdcc XOAI library implementation -->
<gdcc.xoai.version>5.0.0-RC1</gdcc.xoai.version>
<gdcc.xoai.version>5.0.0-RC2</gdcc.xoai.version>

<!-- Testing dependencies -->
<testcontainers.version>1.15.0</testcontainers.version>
Expand Down
31 changes: 2 additions & 29 deletions src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,13 +373,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
}

if (authenticatedUser == null || !authenticatedUser.isSuperuser()) {
return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs");
return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs");
}

HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname);

if (harvestingClient == null) {
return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname);
return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname);
}

DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser);
Expand All @@ -391,35 +391,8 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
return this.accepted();
}

// This GET shows the status of the harvesting run in progress for this
// client, if present:
// @GET
// @Path("{nickName}/run")
// TODO:

// This DELETE kills the harvesting run in progress for this client,
// if present:
// @DELETE
// @Path("{nickName}/run")
// TODO:





/* Auxiliary, helper methods: */

/*
@Deprecated
public static JsonArrayBuilder harvestingConfigsAsJsonArray(List<Dataverse> harvestingDataverses) {
JsonArrayBuilder hdArr = Json.createArrayBuilder();

for (Dataverse hd : harvestingDataverses) {
hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig()));
}
return hdArr;
}*/

public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) {
if (harvestingConfig == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref
int responseCode = 0;

con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent", "DataverseHarvester/3.0");
con.setRequestProperty("User-Agent", "Dataverse Harvesting Client v5");
con.setRequestProperty("Accept-Encoding",
"compress, gzip, identify");
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create(remoteApiUrl))
.GET()
.header("User-Agent", "DataverseHarvester/6.0")
.header("User-Agent", "Dataverse Harvesting Client v5")
.build();

HttpResponse<InputStream> response;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS
}

@Override
public ItemIdentifier getItem(String identifier) throws IdDoesNotExistException {
public ItemIdentifier getItemIdentifier(String identifier) throws IdDoesNotExistException {
// This method is called when ListMetadataFormats request specifies
// the identifier, requesting the formats available for this specific record.
// In our case, under the current implementation, we need to simply look
Expand Down
200 changes: 174 additions & 26 deletions src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
Original file line number Diff line number Diff line change
@@ -1,58 +1,94 @@
package edu.harvard.iq.dataverse.api;

import java.util.logging.Logger;
import java.util.logging.Level;
import com.jayway.restassured.RestAssured;
import static com.jayway.restassured.RestAssured.given;
import com.jayway.restassured.path.json.JsonPath;
import org.junit.Test;
import com.jayway.restassured.response.Response;
import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.UNAUTHORIZED;
import static javax.ws.rs.core.Response.Status.ACCEPTED;
import static javax.ws.rs.core.Response.Status.OK;
import static org.hamcrest.CoreMatchers.equalTo;
import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;

/**
* extremely minimal (for now) API tests for creating OAI clients.
* This class tests Harvesting Client functionality.
* Note that these methods test BOTH the proprietary Dataverse rest API for
* creating and managing harvesting clients, AND the underlining OAI-PMH
* harvesting functionality itself. I.e., we will use the Dataverse
* /api/harvest/clients/ api to run an actual harvest of a control set and
* then validate the resulting harvested content.
*/
public class HarvestingClientsIT {

private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName());

private static final String harvestClientsApi = "/api/harvest/clients/";
private static final String harvestCollection = "root";
private static final String harvestUrl = "https://demo.dataverse.org/oai";
private static final String archiveUrl = "https://demo.dataverse.org";
private static final String harvestMetadataFormat = "oai_dc";
private static final String archiveDescription = "RestAssured harvesting client test";
private static final String HARVEST_CLIENTS_API = "/api/harvest/clients/";
private static final String ROOT_COLLECTION = "root";
private static final String HARVEST_URL = "https://demo.dataverse.org/oai";
private static final String ARCHIVE_URL = "https://demo.dataverse.org";
private static final String HARVEST_METADATA_FORMAT = "oai_dc";
private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test";
private static final String CONTROL_OAI_SET = "controlTestSet";
private static final int DATASETS_IN_CONTROL_SET = 7;
private static String normalUserAPIKey;
private static String adminUserAPIKey;
private static String harvestCollectionAlias;

@BeforeClass
public static void setUpClass() {
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();

// Create the users, an admin and a non-admin:
setupUsers();

// Create a collection that we will use to harvest remote content into:
setupCollection();

}

private void setupUsers() {
private static void setupUsers() {
Response cu0 = UtilIT.createRandomUser();
normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0);
Response cu1 = UtilIT.createRandomUser();
String un1 = UtilIT.getUsernameFromResponse(cu1);
Response u1a = UtilIT.makeSuperUser(un1);
adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1);
}

private static void setupCollection() {
Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey);
createDataverseResponse.prettyPrint();
assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode());

harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

private String normalUserAPIKey;
private String adminUserAPIKey;
// publish dataverse:
Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey);
assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode());
}

@Test
public void testCreateEditDeleteClient() {
setupUsers();
// This method focuses on testing the native Dataverse harvesting client
// API.

String nickName = UtilIT.getRandomString(6);


String clientApiPath = String.format(harvestClientsApi+"%s", nickName);
String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
String clientJson = String.format("{\"dataverseAlias\":\"%s\","
+ "\"type\":\"oai\","
+ "\"harvestUrl\":\"%s\","
+ "\"archiveUrl\":\"%s\","
+ "\"metadataFormat\":\"%s\"}",
harvestCollection, harvestUrl, archiveUrl, harvestMetadataFormat);
ROOT_COLLECTION, HARVEST_URL, ARCHIVE_URL, HARVEST_METADATA_FORMAT);


// Try to create a client as normal user, should fail:
Expand All @@ -61,7 +97,7 @@ public void testCreateEditDeleteClient() {
.header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(401, rCreate.getStatusCode());
assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode());


// Try to create the same as admin user, should succeed:
Expand All @@ -70,17 +106,17 @@ public void testCreateEditDeleteClient() {
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(201, rCreate.getStatusCode());
assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode());

// Try to update the client we have just created:

String updateJson = String.format("{\"archiveDescription\":\"%s\"}", archiveDescription);
String updateJson = String.format("{\"archiveDescription\":\"%s\"}", ARCHIVE_DESCRIPTION);

Response rUpdate = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(updateJson)
.put(clientApiPath);
assertEquals(200, rUpdate.getStatusCode());
assertEquals(OK.getStatusCode(), rUpdate.getStatusCode());

// Now let's retrieve the client we've just created and edited:

Expand All @@ -89,34 +125,146 @@ public void testCreateEditDeleteClient() {

logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode());
logger.info("getClient printresponse: " + getClientResponse.prettyPrint());
assertEquals(200, getClientResponse.getStatusCode());
assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());

// ... and validate the values:

getClientResponse.then().assertThat()
.body("status", equalTo(AbstractApiBean.STATUS_OK))
.body("data.type", equalTo("oai"))
.body("data.nickName", equalTo(nickName))
.body("data.archiveDescription", equalTo(archiveDescription))
.body("data.dataverseAlias", equalTo(harvestCollection))
.body("data.harvestUrl", equalTo(harvestUrl))
.body("data.archiveUrl", equalTo(archiveUrl))
.body("data.metadataFormat", equalTo(harvestMetadataFormat));
.body("data.archiveDescription", equalTo(ARCHIVE_DESCRIPTION))
.body("data.dataverseAlias", equalTo(ROOT_COLLECTION))
.body("data.harvestUrl", equalTo(HARVEST_URL))
.body("data.archiveUrl", equalTo(ARCHIVE_URL))
.body("data.metadataFormat", equalTo(HARVEST_METADATA_FORMAT));

// Try to delete the client as normal user should fail:

Response rDelete = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
.delete(clientApiPath);
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(401, rDelete.getStatusCode());
assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode());

// Try to delete as admin user should work:

rDelete = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.delete(clientApiPath);
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(200, rDelete.getStatusCode());
assertEquals(OK.getStatusCode(), rDelete.getStatusCode());
}

@Test
public void testHarvestingClientRun() throws InterruptedException {
// This test will create a client and attempt to perform an actual
// harvest and validate the resulting harvested content.

// Setup: create the client via native API
// since this API is tested somewhat extensively in the previous
// method, we don't need to pay too much attention to this method, aside
// from confirming the expected HTTP status code.

String nickName = UtilIT.getRandomString(6);

String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
String clientJson = String.format("{\"dataverseAlias\":\"%s\","
+ "\"type\":\"oai\","
+ "\"harvestUrl\":\"%s\","
+ "\"archiveUrl\":\"%s\","
+ "\"set\":\"%s\","
+ "\"metadataFormat\":\"%s\"}",
harvestCollectionAlias, HARVEST_URL, ARCHIVE_URL, CONTROL_OAI_SET, HARVEST_METADATA_FORMAT);

Response createResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode());

// API TEST 1. Run the harvest using the configuration (client) we have
// just created

String runHarvestApiPath = String.format(HARVEST_CLIENTS_API+"%s/run", nickName);

// TODO? - verify that a non-admin user cannot perform this operation (401)

Response runResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.post(runHarvestApiPath);
assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode());

// API TEST 2. As indicated by the ACCEPTED status code above, harvesting
// is an asynchronous operation that will be performed in the background.
// Verify that this "in progress" status is properly reported while it's
// running, and that it completes in some reasonable amount of time.

int i = 0;
int maxWait=20; // a very conservative interval; this harvest has no business taking this long
do {
// Give it an initial 1 sec. delay, to make sure the client state
// has been updated in the database, which can take some appreciable
// amount of time on a heavily-loaded server running a full suite of
// tests:
Thread.sleep(1000L);
// keep checking the status of the client with the GET api:
Response getClientResponse = given()
.get(clientApiPath);

assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
JsonPath responseJsonPath = getClientResponse.body().jsonPath();
assertNotNull("Invalid JSON in GET client response", responseJsonPath);
assertEquals(AbstractApiBean.STATUS_OK, responseJsonPath.getString("status"));

String clientStatus = responseJsonPath.getString("data.status");
assertNotNull(clientStatus);

if ("inProgress".equals(clientStatus) || "IN PROGRESS".equals(responseJsonPath.getString("data.lastResult"))) {
// we'll sleep for another second
i++;
} else {
logger.info("getClientResponse.prettyPrint: "
+ getClientResponse.prettyPrint());
// Check the values in the response:
// a) Confirm that the harvest has completed:
assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus);

// b) Confirm that it has actually succeeded:
assertEquals("Last harvest not reported a success (took "+i+" seconds)", "SUCCESS", responseJsonPath.getString("data.lastResult"));
String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest");
assertNotNull(harvestTimeStamp);

// c) Confirm that the other timestamps match:
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastSuccessful"));
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty"));

// d) Confirm that the correct number of datasets have been harvested:
assertEquals(DATASETS_IN_CONTROL_SET, responseJsonPath.getInt("data.lastDatasetsHarvested"));

// ok, it looks like the harvest has completed successfully.
break;
}
} while (i<maxWait);

System.out.println("Waited " + i + " seconds for the harvest to complete.");

// Fail if it hasn't completed in maxWait seconds
assertTrue(i < maxWait);

// TODO(?) use the native Dataverses/Datasets apis to verify that the expected
// datasets have been harvested. This may or may not be necessary, seeing
// how we have already confirmed the number of successfully harvested
// datasets from the control set; somewhat hard to imagine a practical
// situation where that would not be enough (?).

// Cleanup: delete the client

Response deleteResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.delete(clientApiPath);
System.out.println("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
assertEquals(OK.getStatusCode(), deleteResponse.getStatusCode());

}
}
Loading