Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/release-notes/8525-ingest-optional-skip.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Tabular ingest can be skipped via API.
5 changes: 3 additions & 2 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ When adding a file to a dataset, you can optionally specify the following:
- A description of the file.
- The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset.
- Whether or not the file is restricted.
- Whether or not the file skips :doc:`tabular ingest </user/tabulardataingest/index>`. If the ``tabIngest`` parameter is not specified, it defaults to ``true``.

Note that when a Dataverse instance is configured to use S3 storage with direct upload enabled, there is API support to send a file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide.

Expand All @@ -1315,13 +1316,13 @@ In the curl example below, all of the above are specified but they are optional.
export SERVER_URL=https://demo.dataverse.org
export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB

curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false"}' "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID"
curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID"

The fully expanded example above (without environment variables) looks like this:

.. code-block:: bash

curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -F file=@data.tsv -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false"}' "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB"
curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -F file=@data.tsv -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB"

You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded file.

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -3591,7 +3591,7 @@ public String save() {
// have been created in the dataset.
dataset = datasetService.find(dataset.getId());

List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null);
List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null, true);
newFiles.clear();

// and another update command:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,7 @@ public String save() {
}

// Try to save the NEW files permanently:
List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null);
List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true);

// reset the working list of fileMetadatas, as to only include the ones
// that have been added to the version successfully:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au
throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to add file(s) to dataset: " + violation.getMessage() + " The invalid value was \"" + violation.getInvalidValue() + "\".");
} else {

ingestService.saveAndAddFilesToDataset(editVersion, dataFiles, null);
ingestService.saveAndAddFilesToDataset(editVersion, dataFiles, null, true);

}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,11 @@ private boolean runAddReplaceFile(Dataset owner,
if (!phase1Success){
return false;
}


return runAddReplacePhase2();
boolean tabIngest = true;
if (optionalFileParams != null) {
tabIngest = optionalFileParams.getTabIngest();
}
return runAddReplacePhase2(tabIngest);

}

Expand Down Expand Up @@ -653,7 +655,7 @@ private boolean runAddReplacePhase1(Dataset owner,


public boolean runReplaceFromUI_Phase2(){
return runAddReplacePhase2();
return runAddReplacePhase2(true);
}


Expand Down Expand Up @@ -744,7 +746,7 @@ public boolean updateLabelDescriptionRestrictedFromUI(String label, String descr
*
* @return
*/
private boolean runAddReplacePhase2(){
private boolean runAddReplacePhase2(boolean tabIngest){

if (this.hasError()){
return false; // possible to have errors already...
Expand All @@ -756,7 +758,7 @@ private boolean runAddReplacePhase2(){
}

msgt("step_060_addFilesViaIngestService");
if (!this.step_060_addFilesViaIngestService()){
if (!this.step_060_addFilesViaIngestService(tabIngest)){
return false;

}
Expand Down Expand Up @@ -1570,7 +1572,7 @@ private boolean step_055_loadOptionalFileParams(OptionalFileParams optionalFileP
return true;
}

private boolean step_060_addFilesViaIngestService(){
private boolean step_060_addFilesViaIngestService(boolean tabIngest){

if (this.hasError()){
return false;
Expand All @@ -1583,7 +1585,7 @@ private boolean step_060_addFilesViaIngestService(){
}

int nFiles = finalFileList.size();
finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace);
finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace, tabIngest);

if (nFiles != finalFileList.size()) {
if (nFiles == 1) {
Expand Down Expand Up @@ -2244,4 +2246,4 @@ public String getFileName()
1) Recovery from adding same file and duplicate being found
- draft ok
- published verion - nope
*/
*/
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ public class OptionalFileParams {

private boolean restrict = false;
public static final String RESTRICT_ATTR_NAME = "restrict";

private boolean tabIngest = true;
public static final String TAB_INGEST_ATTR_NAME = "tabIngest";

private String storageIdentifier;
public static final String STORAGE_IDENTIFIER_ATTR_NAME = "storageIdentifier";
Expand Down Expand Up @@ -173,7 +176,15 @@ public void setRestriction(boolean restrict){
public boolean getRestriction(){
return this.restrict;
}


public void setTabIngest(boolean tabIngest) {
this.tabIngest = tabIngest;
}

public boolean getTabIngest() {
return this.tabIngest;
}

public boolean hasCategories(){
if ((categories == null)||(this.categories.isEmpty())){
return false;
Expand Down Expand Up @@ -347,6 +358,14 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{

this.restrict = Boolean.valueOf(jsonObj.get(RESTRICT_ATTR_NAME).getAsString());
}

// -------------------------------
// get tabIngest as boolean
// -------------------------------
if ((jsonObj.has(TAB_INGEST_ATTR_NAME)) && (!jsonObj.get(TAB_INGEST_ATTR_NAME).isJsonNull())){

this.tabIngest = Boolean.valueOf(jsonObj.get(TAB_INGEST_ATTR_NAME).getAsString());
}

// -------------------------------
// get storage identifier as string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,11 @@ public class IngestServiceBean {
// attached to the Dataset via some cascade path (for example, via
// DataFileCategory objects, if any were already assigned to the files).
// It must be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version, List<DataFile> newFiles, DataFile fileToReplace) {
// the database by calling the Save command on the dataset and/or version.
public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
List<DataFile> newFiles,
DataFile fileToReplace,
boolean tabIngest) {
List<DataFile> ret = new ArrayList<>();

if (newFiles != null && newFiles.size() > 0) {
Expand Down Expand Up @@ -327,7 +330,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version, List<Data
String fileName = fileMetadata.getLabel();

boolean metadataExtracted = false;
if (FileUtil.canIngestAsTabular(dataFile)) {
if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
/*
* Note that we don't try to ingest the file right away - instead we mark it as
* "scheduled for ingest", then at the end of the save process it will be queued
Expand Down
2 changes: 2 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -2583,5 +2583,7 @@ public void testFilesUnchangedAfterDatasetMetadataUpdate() throws IOException {
.body("data.latestVersion.files[0].directoryLabel", equalTo("code"));

}



}
60 changes: 57 additions & 3 deletions src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import com.jayway.restassured.RestAssured;
import com.jayway.restassured.response.Response;
import java.util.logging.Logger;
import org.junit.BeforeClass;

import org.junit.Test;
import org.junit.BeforeClass;
import com.jayway.restassured.path.json.JsonPath;
import com.jayway.restassured.path.xml.XmlPath;
import static edu.harvard.iq.dataverse.api.AccessIT.apiToken;
Expand Down Expand Up @@ -39,11 +40,10 @@
import static org.hamcrest.CoreMatchers.startsWith;
import static org.hamcrest.CoreMatchers.nullValue;
import org.hamcrest.Matchers;
import org.junit.AfterClass;

import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import org.junit.Ignore;

public class FilesIT {

Expand Down Expand Up @@ -1784,4 +1784,58 @@ public void testRange() throws IOException {

}

@Test
public void testAddFileToDatasetSkipTabIngest() throws IOException, InterruptedException {

Response createUser = UtilIT.createRandomUser();
assertEquals(200, createUser.getStatusCode());
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
assertEquals(201, createDataverseResponse.getStatusCode());
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
assertEquals(201, createDatasetResponse.getStatusCode());
Integer datasetIdInt = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");

String pathToFile = "src/test/resources/sav/dct.sav";
String jsonAsString = "{\"description\":\"My description.\",\"directoryLabel\":\"data/subdir1\",\"categories\":[\"Data\"], \"restrict\":\"false\", \"tabIngest\":\"false\"}";
Response r = UtilIT.uploadFileViaNative(datasetIdInt.toString(), pathToFile, jsonAsString, apiToken);
logger.info(r.prettyPrint());
assertEquals(200, r.getStatusCode());

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry to keep bugging you but, since this hasn't been merged yet, could you please insert another sleepForLock statement here? (Otherwise, if the ingest somehow ends up happening, despite the tabIngest:false above, we will likely fail to detect that - because the file name will be checked before it gets updated!)
It should be the same entry as in line 1820:

assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetIdInt, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

Thanks. (I should've thought about this sooner of course).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added sleep for lock.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, thanks.

assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetIdInt, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

Long dataFileId = JsonPath.from(r.body().asString()).getLong("data.files[0].dataFile.id");
Response fileMeta = UtilIT.getDataFileMetadataDraft(dataFileId, apiToken);
String label = JsonPath.from(fileMeta.body().asString()).getString("label");
assertEquals("dct.sav", label);

pathToFile = "src/test/resources/sav/frequency-test.sav";
jsonAsString = "{\"description\":\"My description.\",\"directoryLabel\":\"data/subdir1\",\"categories\":[\"Data\"], \"restrict\":\"false\" }";
Response rTabIngest = UtilIT.uploadFileViaNative(datasetIdInt.toString(), pathToFile, jsonAsString, apiToken);
logger.info(rTabIngest.prettyPrint());
assertEquals(200, rTabIngest.getStatusCode());

assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetIdInt, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

Long ingDataFileId = JsonPath.from(rTabIngest.body().asString()).getLong("data.files[0].dataFile.id");
Response ingFileMeta = UtilIT.getDataFileMetadataDraft(ingDataFileId, apiToken);
String ingLabel = JsonPath.from(ingFileMeta.body().asString()).getString("label");
assertEquals("frequency-test.tab", ingLabel);

//cleanup
Response destroyDatasetResponse = UtilIT.destroyDataset(datasetIdInt, apiToken);
assertEquals(200, destroyDatasetResponse.getStatusCode());

Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
assertEquals(200, deleteDataverseResponse.getStatusCode());

Response deleteUserResponse = UtilIT.deleteUser(username);
assertEquals(200, deleteUserResponse.getStatusCode());

}

}