diff --git a/doc/release-notes/9153-extract-metadata.md b/doc/release-notes/9153-extract-metadata.md new file mode 100644 index 00000000000..be21c5ed739 --- /dev/null +++ b/doc/release-notes/9153-extract-metadata.md @@ -0,0 +1,3 @@ +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file. + +An "extractNcml" API endpoint has been added, especially for installations with existing NetCDF and HDF5 files. After upgrading, they can iterate through these files and try to extract an NcML file. diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index fd1f0f27bc5..16623a6aeec 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -1,5 +1,5 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, and ZipFiles - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json new file mode 100644 index 00000000000..b188520dabb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json @@ -0,0 +1,26 @@ +{ + "displayName": "AuxFileViewer", + "description": "Show an auxiliary file from a dataset file.", + "toolName": "auxPreviewer", + "scope": "file", + "types": [ + "preview" + ], + "toolUrl": "https://example.com/AuxFileViewer.html", + "toolParameters": { + "queryParameters": [ + { + "fileid": "{fileId}" + } + ] + }, + "requirements": { + "auxFilesExist": [ + { + "formatTag": "myFormatTag", + "formatVersion": "0.1" + } + ] + }, + "contentType": "application/foobar" +} diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index 4f6c9a8015c..eec9944338f 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -53,15 +53,21 @@ External tools must be expressed in an external tool manifest file, a specific J Examples of Manifests +++++++++++++++++++++ -Let's look at two examples of external tool manifests (one at the file level and one at the dataset level) before we dive into how they work. +Let's look at a few examples of external tool manifests (both at the file level and at the dataset level) before we dive into how they work. + +.. _tools-for-files: External Tools for Files ^^^^^^^^^^^^^^^^^^^^^^^^ -:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level both an "explore" tool and a "preview" tool that operates on tabular files: +:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level (both an "explore" tool and a "preview" tool) that operates on tabular files: .. literalinclude:: ../_static/installation/files/root/external-tools/fabulousFileTool.json +:download:`auxFileTool.json <../_static/installation/files/root/external-tools/auxFileTool.json>` is a file level preview tool that operates on auxiliary files associated with a data file (note the "requirements" section): + +.. literalinclude:: ../_static/installation/files/root/external-tools/auxFileTool.json + External Tools for Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -113,6 +119,10 @@ Terminology allowedApiCalls httpMethod Which HTTP method the specified callback uses such as ``GET`` or ``POST``. allowedApiCalls timeOut For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for. Must be an integer. + + requirements **Resources your tool needs to function.** For now, the only requirement you can specify is that one or more auxiliary files exist (see auxFilesExist in the :ref:`tools-for-files` example). Currently, requirements only apply to preview tools. If the requirements are not met, the preview tool is not shown. + + auxFilesExist **An array containing formatTag and formatVersion pairs** for each auxiliary file that your tool needs to download to function properly. For example, a required aux file could have a ``formatTag`` of "NcML" and a ``formatVersion`` of "1.0". See also :doc:`/developers/aux-file-support`. toolName A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file. =========================== ========== diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index c919f4687e2..f662eaa7a61 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2257,6 +2257,47 @@ Currently the following methods are used to detect file types: - The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``. - The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``. +.. _extractNcml: + +Extract NcML +~~~~~~~~~~~~ + +As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file. + +This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below. + +Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=24 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml + +A curl example using a PID: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.5072/FK2/AAA000 + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000" + Replacing Files ~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 8043e7ffbb7..55b1e70dd33 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -184,11 +184,15 @@ File Handling Certain file types in the Dataverse installation are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types. +.. _file-previews: + File Previews ------------- Dataverse installations can add previewers for common file types uploaded by their research communities. The previews appear on the file page. If a preview tool for a specific file type is available, the preview will be created and will display automatically, after terms have been agreed to or a guestbook entry has been made, if necessary. File previews are not available for restricted files unless they are being accessed using a Private URL. See also :ref:`privateurl`. +Installation of previewers is explained in the :doc:`/admin/external-tools` section of in the Admin Guide. + Tabular Data Files ------------------ @@ -306,6 +310,15 @@ Astronomy (FITS) Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +.. _netcdf-and-hdf5: + +NetCDF and HDF5 +--------------- + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`). + +.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html + Compressed Files ---------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java index a7a89934f47..344032ef5e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java @@ -55,7 +55,10 @@ public class AuxiliaryFile implements Serializable { private String formatTag; private String formatVersion; - + + /** + * The application/entity that created the auxiliary file. + */ private String origin; private boolean isPublic; diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 76c91382868..05f3e209632 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -70,9 +70,13 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param type how to group the files such as "DP" for "Differentially * @param mediaType user supplied content type (MIME type) * Private Statistics". - * @return success boolean - returns whether the save was successful + * @param save boolean - true to save immediately, false to let the cascade + * do persist to the database. + * @return an AuxiliaryFile with an id when save=true (assuming no + * exceptions) or an AuxiliaryFile without an id that will be persisted + * later through the cascade. */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType, boolean save) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -114,7 +118,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile auxFile.setType(type); auxFile.setDataFile(dataFile); auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension)); - auxFile = save(auxFile); + if (save) { + auxFile = save(auxFile); + } else { + if (dataFile.getAuxiliaryFiles() == null) { + dataFile.setAuxiliaryFiles(new ArrayList<>()); + } + dataFile.getAuxiliaryFiles().add(auxFile); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -129,7 +140,11 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } return auxFile; } - + + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { + return processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, true); + } + public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) { Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile"); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 95b30617547..429a0d7a4e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5494,7 +5494,7 @@ public List getCachedToolsForDataFile(Long fileId, ExternalTool.Ty return cachedTools; } DataFile dataFile = datafileService.find(fileId); - cachedTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + cachedTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); cachedToolsByFileId.put(fileId, cachedTools); //add to map so we don't have to do the lifting again return cachedTools; } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 85eb79d2ddc..228db0a7584 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -39,6 +39,7 @@ import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.IOException; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -57,6 +58,9 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonValue; import javax.validation.ConstraintViolation; import org.primefaces.PrimeFaces; @@ -125,6 +129,8 @@ public class FilePage implements java.io.Serializable { ExternalToolServiceBean externalToolService; @EJB PrivateUrlServiceBean privateUrlService; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; @Inject DataverseRequestServiceBean dvRequestService; @@ -285,8 +291,15 @@ public void setDatasetVersionId(Long datasetVersionId) { this.datasetVersionId = datasetVersionId; } + // findPreviewTools would be a better name private List sortExternalTools(){ - List retList = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + List retList = new ArrayList<>(); + List previewTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType()); + for (ExternalTool previewTool : previewTools) { + if (externalToolService.meetsRequirements(previewTool, file)) { + retList.add(previewTool); + } + } Collections.sort(retList, CompareExternalToolName); return retList; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 2e5cef06f27..6aeaaf42d0d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -619,6 +619,27 @@ public Response redetectDatafile(@PathParam("id") String id, @QueryParam("dryRun } } + @Path("{id}/extractNcml") + @POST + public Response extractNcml(@PathParam("id") String id) { + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + // We can always make a command in the future if there's a need + // for non-superusers to call this API. + return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + } + DataFile dataFileIn = findDataFileOrDie(id); + java.nio.file.Path tempLocationPath = null; + boolean successOrFail = ingestService.extractMetadataNcml(dataFileIn, tempLocationPath); + NullSafeJsonBuilder result = NullSafeJsonBuilder.jsonObjectBuilder() + .add("result", successOrFail); + return ok(result); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + /** * Attempting to run metadata export, for all the formats for which we have * metadata Exporters. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index b532fbd4154..42caa95b9f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -63,7 +63,9 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); - tools.add(toolToJson); + if (externalToolService.meetsRequirements(tool, dataFile)) { + tools.add(toolToJson); + } } return ok(tools); } catch (WrappedResponse wr) { diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 1789b7a90c3..0a238eb5198 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -39,6 +39,7 @@ public class ExternalTool implements Serializable { public static final String CONTENT_TYPE = "contentType"; public static final String TOOL_NAME = "toolName"; public static final String ALLOWED_API_CALLS = "allowedApiCalls"; + public static final String REQUIREMENTS = "requirements"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -103,6 +104,15 @@ public class ExternalTool implements Serializable { @Column(nullable = true, columnDefinition = "TEXT") private String allowedApiCalls; + /** + * When non-null, the tool has indicated that it has certain requirements + * that must be met before it should be shown to the user. This + * functionality was added for tools that operate on aux files rather than + * data files so "auxFilesExist" is one of the possible values. + */ + @Column(nullable = true, columnDefinition = "TEXT") + private String requirements; + /** * This default constructor is only here to prevent this error at * deployment: @@ -118,10 +128,10 @@ public ExternalTool() { } public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType) { - this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null); + this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null, null); } - public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls) { + public ExternalTool(String displayName, String toolName, String description, List externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls, String requirements) { this.displayName = displayName; this.toolName = toolName; this.description = description; @@ -131,6 +141,7 @@ public ExternalTool(String displayName, String toolName, String description, Lis this.toolParameters = toolParameters; this.contentType = contentType; this.allowedApiCalls = allowedApiCalls; + this.requirements = requirements; } public enum Type { @@ -326,5 +337,12 @@ public void setAllowedApiCalls(String allowedApiCalls) { this.allowedApiCalls = allowedApiCalls; } + public String getRequirements() { + return requirements; + } + + public void setRequirements(String requirements) { + this.requirements = requirements; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index a65ad2427ba..f38cd7301ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.externaltools; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -30,6 +32,8 @@ import static edu.harvard.iq.dataverse.externaltools.ExternalTool.*; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.ejb.EJB; +import javax.json.JsonValue; @Stateless @Named @@ -40,6 +44,9 @@ public class ExternalToolServiceBean { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + public List findAll() { TypedQuery typedQuery = em.createQuery("SELECT OBJECT(o) FROM ExternalTool AS o ORDER BY o.id", ExternalTool.class); return typedQuery.getResultList(); @@ -133,13 +140,13 @@ public ExternalTool save(ExternalTool externalTool) { * file supports The list of tools is passed in so it doesn't hit the * database each time */ - public static List findExternalToolsByFile(List allExternalTools, DataFile file) { + public List findExternalToolsByFile(List allExternalTools, DataFile file) { List externalTools = new ArrayList<>(); //Map tabular data to it's mimetype (the isTabularData() check assures that this code works the same as before, but it may need to change if tabular data is split into subtypes with differing mimetypes) final String contentType = file.isTabularData() ? DataFileServiceBean.MIME_TYPE_TSV_ALT : file.getContentType(); allExternalTools.forEach((externalTool) -> { - //Match tool and file type - if (contentType.equals(externalTool.getContentType())) { + //Match tool and file type, then check requirements + if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file)) { externalTools.add(externalTool); } }); @@ -147,6 +154,31 @@ public static List findExternalToolsByFile(List allE return externalTools; } + public boolean meetsRequirements(ExternalTool externalTool, DataFile dataFile) { + String requirements = externalTool.getRequirements(); + if (requirements == null) { + logger.fine("Data file id" + dataFile.getId() + ": no requirements for tool id " + externalTool.getId()); + return true; + } + boolean meetsRequirements = true; + JsonObject requirementsObj = JsonUtil.getJsonObject(requirements); + JsonArray auxFilesExist = requirementsObj.getJsonArray("auxFilesExist"); + for (JsonValue jsonValue : auxFilesExist) { + String formatTag = jsonValue.asJsonObject().getString("formatTag"); + String formatVersion = jsonValue.asJsonObject().getString("formatVersion"); + AuxiliaryFile auxFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (auxFile == null) { + logger.fine("Data file id" + dataFile.getId() + ": cannot find required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = false; + break; + } else { + logger.fine("Data file id" + dataFile.getId() + ": found required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion); + meetsRequirements = true; + } + } + return meetsRequirements; + } + public static ExternalTool parseAddExternalToolManifest(String manifest) { if (manifest == null || manifest.isEmpty()) { @@ -170,6 +202,7 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS); JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters"); JsonArray allowedApiCallsArray = jsonObject.getJsonArray(ALLOWED_API_CALLS); + JsonObject requirementsObj = jsonObject.getJsonObject(REQUIREMENTS); boolean allRequiredReservedWordsFound = false; if (scope.equals(Scope.FILE)) { @@ -227,8 +260,12 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) { if(allowedApiCallsArray !=null) { allowedApiCalls = allowedApiCallsArray.toString(); } + String requirements = null; + if (requirementsObj != null) { + requirements = requirementsObj.toString(); + } - return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls); + return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls, requirements); } private static String getRequiredTopLevelField(JsonObject jsonObject, String key) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b03bae618a4..9c6acd964c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -20,6 +20,8 @@ package edu.harvard.iq.dataverse.ingest; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.datavariable.VariableCategory; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; @@ -72,6 +74,7 @@ //import edu.harvard.iq.dvn.unf.*; import org.dataverse.unf.*; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -81,6 +84,7 @@ import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; @@ -113,6 +117,9 @@ import javax.jms.QueueSession; import javax.jms.Message; import javax.faces.application.FacesMessage; +import javax.ws.rs.core.MediaType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * @@ -134,6 +141,8 @@ public class IngestServiceBean { @EJB DataFileServiceBean fileService; @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -232,6 +241,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, savedSuccess = true; logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + // TODO: reformat this file to remove the many tabs added in cc08330 + extractMetadataNcml(dataFile, tempLocationPath); + } catch (IOException ioex) { logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } finally { @@ -343,6 +355,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 + // Note that extractMetadataNcml() is used for NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -565,7 +578,6 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1206,7 +1218,104 @@ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, Datas return ingestSuccessful; } - + /** + * @param dataFile The DataFile from which to attempt NcML extraction + * (NetCDF or HDF5 format) + * @param tempLocationPath Null if the file is already saved to permanent + * storage. Otherwise, the path to the temp location of the files, as during + * initial upload. + * @return True if the Ncml files was created. False on any error or if the + * NcML file already exists. + */ + public boolean extractMetadataNcml(DataFile dataFile, Path tempLocationPath) { + boolean ncmlFileCreated = false; + logger.fine("extractMetadataNcml: dataFileIn: " + dataFile + ". tempLocationPath: " + tempLocationPath); + InputStream inputStream = null; + String dataFileLocation = null; + if (tempLocationPath != null) { + // This file was just uploaded and hasn't been saved to S3 or local storage. + dataFileLocation = tempLocationPath.toString(); + } else { + // This file is already on S3 or local storage. + File tempFile = null; + File localFile; + StorageIO storageIO; + try { + storageIO = dataFile.getStorageIO(); + storageIO.open(); + if (storageIO.isLocalFile()) { + localFile = storageIO.getFileSystemPath().toFile(); + dataFileLocation = localFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is local. Path: " + dataFileLocation); + } else { + // Need to create a temporary local file: + tempFile = File.createTempFile("tempFileExtractMetadataNcml", ".tmp"); + try ( ReadableByteChannel targetFileChannel = (ReadableByteChannel) storageIO.getReadChannel(); FileChannel tempFileChannel = new FileOutputStream(tempFile).getChannel();) { + tempFileChannel.transferFrom(targetFileChannel, 0, storageIO.getSize()); + } + dataFileLocation = tempFile.getAbsolutePath(); + logger.fine("extractMetadataNcml: file is on S3. Downloaded and saved to temp path: " + dataFileLocation); + } + } catch (IOException ex) { + logger.info("While attempting to extract NcML, could not use storageIO for data file id " + dataFile.getId() + ". Exception: " + ex); + } + } + if (dataFileLocation != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(dataFileLocation)) { + logger.fine("trying to open " + dataFileLocation); + if (netcdfFile != null) { + // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html + // With an empty string the XML will show 'location="file:"'. + String ncml = netcdfFile.toNcml(""); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("dataFileLocation is null for file id " + dataFile.getId() + ". Can't extract NcML."); + } + if (inputStream != null) { + // If you change NcML, you must also change the previewer. + String formatTag = "NcML"; + // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number. + // If you bump the number here, be sure the bump the number in the previewer as well. + // We could use 2.2 here since that's the current version of NcML. + String formatVersion = "0.1"; + String origin = "netcdf-java"; + boolean isPublic = true; + // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI. + String type = "NcML"; + // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml + MediaType mediaType = new MediaType("text", "xml"); + try { + // Let the cascade do the save if the file isn't yet on permanent storage. + boolean callSave = false; + if (tempLocationPath == null) { + callSave = true; + // Check for an existing NcML file + logger.fine("Checking for existing NcML aux file for file id " + dataFile.getId()); + AuxiliaryFile existingAuxiliaryFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion); + if (existingAuxiliaryFile != null) { + logger.fine("Aux file already exists for NetCDF/HDF5 file for file id " + dataFile.getId()); + return false; + } + } + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, callSave); + logger.fine("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id " + dataFile.getId()); + ncmlFileCreated = true; + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } else { + logger.info("extractMetadataNcml: input stream is null! dataFileLocation was " + dataFileLocation); + } + + return ncmlFileCreated; + } + private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 62531d32bb2..4166ab78a39 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2011,6 +2011,7 @@ file.remotelyStored=This file is stored remotely - click for more info file.auxfiles.download.header=Download Auxiliary Files # These types correspond to the AuxiliaryFile.Type enum. file.auxfiles.types.DP=Differentially Private Statistics +file.auxfiles.types.NcML=XML from NetCDF/HDF5 (NcML) # Add more types here file.auxfiles.unspecifiedTypes=Other Auxiliary Files diff --git a/src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql diff --git a/src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql rename to src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql rename to src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql diff --git a/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql new file mode 100644 index 00000000000..48230d21032 --- /dev/null +++ b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql @@ -0,0 +1 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS requirements TEXT; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 5508a6c57dc..cdebeddb7bc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -3,8 +3,11 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; +import java.io.File; import java.io.IOException; import java.io.StringReader; +import java.nio.file.Path; +import java.nio.file.Paths; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -442,4 +445,122 @@ public void createToolSpreadsheetViewer() { .statusCode(OK.getStatusCode()); } + @Test + public void testFileLevelToolWithAuxFileReq() throws IOException { + + // Delete all external tools before testing. + Response getTools = UtilIT.getExternalTools(); + getTools.prettyPrint(); + getTools.then().assertThat() + .statusCode(OK.getStatusCode()); + String body = getTools.getBody().asString(); + JsonReader bodyObject = Json.createReader(new StringReader(body)); + JsonArray tools = bodyObject.readObject().getJsonArray("data"); + for (int i = 0; i < tools.size(); i++) { + JsonObject tool = tools.getJsonObject(i); + int id = tool.getInt("id"); + Response deleteExternalTool = UtilIT.deleteExternalTool(id); + deleteExternalTool.prettyPrint(); + } + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + + // Not really an HDF5 file. Just random bytes. But the file extension makes it detected as HDF5. + Path pathToFalseHdf5 = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "false.hdf5"); + byte[] bytes = {1, 2, 3, 4, 5}; + java.nio.file.Files.write(pathToFalseHdf5, bytes); + + Response uploadFalseHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFalseHdf5.toString(), apiToken); + uploadFalseHdf5.prettyPrint(); + uploadFalseHdf5.then().assertThat() + .statusCode(OK.getStatusCode()); + + Integer falseHdf5 = JsonPath.from(uploadFalseHdf5.getBody().asString()).getInt("data.files[0].dataFile.id"); + + String pathToTrueHdf5 = "src/test/resources/hdf/hdf5/vlen_string_dset"; + Response uploadTrueHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTrueHdf5, apiToken); + uploadTrueHdf5.prettyPrint(); + uploadTrueHdf5.then().assertThat() + .statusCode(OK.getStatusCode()); + + Integer trueHdf5 = JsonPath.from(uploadTrueHdf5.getBody().asString()).getInt("data.files[0].dataFile.id"); + + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "HDF5 Tool"); + job.add("description", "Operates on HDF5 files"); + job.add("types", Json.createArrayBuilder().add("preview")); + job.add("scope", "file"); + job.add("contentType", "application/x-hdf5"); + job.add("toolUrl", "/dataexplore/dataverse-previewers/previewers/v1.3/TextPreview.html"); + job.add("toolParameters", Json.createObjectBuilder() + .add("queryParameters", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("fileid", "{fileId}") + .build()) + .add(Json.createObjectBuilder() + .add("siteUrl", "{siteUrl}") + .build()) + .add(Json.createObjectBuilder() + .add("key", "{apiToken}") + .build()) + .build()) + .build()); + job.add("requirements", Json.createObjectBuilder() + .add("auxFilesExist", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("formatTag", "NcML") + .add("formatVersion", "0.1") + ) + ) + ); + Response addExternalTool = UtilIT.addExternalTool(job.build()); + addExternalTool.prettyPrint(); + addExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.displayName", CoreMatchers.equalTo("HDF5 Tool")); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + Response getTool = UtilIT.getExternalTool(toolId); + getTool.prettyPrint(); + getTool.then().assertThat() + .body("data.scope", CoreMatchers.equalTo("file")) + .statusCode(OK.getStatusCode()); + + // No tools for false HDF5 file. Aux file couldn't be extracted. Doesn't meet requirements. + Response getToolsForFalseHdf5 = UtilIT.getExternalToolsForFile(falseHdf5.toString(), "preview", apiToken); + getToolsForFalseHdf5.prettyPrint(); + getToolsForFalseHdf5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data", Matchers.hasSize(0)); + + // The tool shows for a true HDF5 file. The NcML aux file is available. Requirements met. + Response getToolsForTrueHdf5 = UtilIT.getExternalToolsForFile(trueHdf5.toString(), "preview", apiToken); + getToolsForTrueHdf5.prettyPrint(); + getToolsForTrueHdf5.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool")) + .body("data[0].scope", CoreMatchers.equalTo("file")) + .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5")); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java new file mode 100644 index 00000000000..9716e7aca13 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -0,0 +1,182 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; +import static javax.ws.rs.core.Response.Status.NOT_FOUND; +import static javax.ws.rs.core.Response.Status.OK; +import org.hamcrest.CoreMatchers; +import static org.hamcrest.CoreMatchers.equalTo; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NetcdfIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testNmclFromNetcdf() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "NcML"; + String version = "0.1"; + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + //downloadNcml.prettyPrint(); // long output + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + + Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken); + deleteNcml.prettyPrint(); + deleteNcml.then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldFail.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response extractNcml = UtilIT.extractNcml(fileId, apiToken); + extractNcml.prettyPrint(); + extractNcml.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldWork.then().assertThat() + .statusCode(OK.getStatusCode()); + + } + + @Test + public void testNmclFromNetcdfErrorChecking() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createUserRandom = UtilIT.createRandomUser(); + createUserRandom.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenRandom = UtilIT.getApiTokenFromResponse(createUserRandom); + + String apiTokenNull = null; + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "NcML"; + String version = "0.1"; + + Response downloadNcmlFail = UtilIT.downloadAuxFile(fileId, tag, version, apiTokenNull); + downloadNcmlFail.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + + Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken); + deleteNcml.prettyPrint(); + deleteNcml.then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldFail.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + Response extractNcmlFailRandomUser = UtilIT.extractNcml(fileId, apiTokenRandom); + extractNcmlFailRandomUser.prettyPrint(); + extractNcmlFailRandomUser.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response extractNcml = UtilIT.extractNcml(fileId, apiToken); + extractNcml.prettyPrint(); + extractNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(true)); + + Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + downloadNcmlShouldWork.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response extractNcmlFailExistsAlready = UtilIT.extractNcml(fileId, apiToken); + extractNcmlFailExistsAlready.prettyPrint(); + extractNcmlFailExistsAlready.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(false)); + + Path pathToTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file.txt"); + String contentOfTxt = "Just a text file. Don't expect NcML out!"; + java.nio.file.Files.write(pathToTxt, contentOfTxt.getBytes()); + + Response uploadFileTxt = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTxt.toString(), apiToken); + uploadFileTxt.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("file.txt")); + + long fileIdTxt = JsonPath.from(uploadFileTxt.body().asString()).getLong("data.files[0].dataFile.id"); + + Response extractNcmlFailText = UtilIT.extractNcml(fileIdTxt, apiToken); + extractNcmlFailText.prettyPrint(); + extractNcmlFailText.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.result", CoreMatchers.equalTo(false)); + + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 12ccaf2caff..36dce2978fa 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -743,10 +743,11 @@ static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, } static Response downloadAuxFile(Long fileId, String formatTag, String formatVersion, String apiToken) { - Response response = given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion); - return response; + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion); } static Response listAuxFilesByOrigin(Long fileId, String origin, String apiToken) { @@ -1170,7 +1171,14 @@ public static Response uningestFile(Long fileId, String apiToken) { .post("/api/files/" + fileId + "/uningest/?key=" + apiToken); return uningestFileResponse; } - + + public static Response extractNcml(Long fileId, String apiToken) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .post("/api/files/" + fileId + "/extractNcml"); + return response; + } + //I don't understand why this blows up when I remove the key public static Response getDataFileMetadata(Long fileId, String apiToken) { Response fileResponse = given() diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 74e10d67352..3885c9b358c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -19,7 +19,10 @@ public class ExternalToolServiceBeanTest { + private final ExternalToolServiceBean externalToolService; + public ExternalToolServiceBeanTest() { + this.externalToolService = new ExternalToolServiceBean(); } @Test @@ -49,7 +52,7 @@ public void testfindAll() { ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); List externalTools = new ArrayList<>(); externalTools.add(externalTool); - List availableExternalTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile); + List availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); assertEquals(availableExternalTools.size(), 1); } @@ -544,4 +547,47 @@ protected static ExternalTool getAllowedApiCallsTool() { return ExternalToolServiceBean.parseAddExternalToolManifest(tool); } + + @Test + public void testParseAddFileToolRequireAuxFile() { + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add("displayName", "AwesomeTool"); + job.add("toolName", "explorer"); + job.add("description", "This tool is awesome."); + job.add("types", Json.createArrayBuilder().add("explore")); + job.add("scope", "file"); + job.add("hasPreviewMode", "false"); + job.add("toolUrl", "http://awesometool.com"); + job.add("toolParameters", Json.createObjectBuilder() + .add("queryParameters", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("filePid", "{filePid}") + .build()) + .add(Json.createObjectBuilder() + .add("key", "{apiToken}") + .build()) + .add(Json.createObjectBuilder() + .add("fileMetadataId", "{fileMetadataId}") + .build()) + .add(Json.createObjectBuilder() + .add("dvLocale", "{localeCode}") + .build()) + .build()) + .build()); + job.add("requirements", Json.createObjectBuilder() + .add("auxFilesExist", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("formatTag", "NcML") + .add("formatVersion", "0.1") + ) + ) + ); + job.add(ExternalTool.CONTENT_TYPE, DataFileServiceBean.MIME_TYPE_TSV_ALT); + String tool = job.build().toString(); + ExternalTool externalTool = ExternalToolServiceBean.parseAddExternalToolManifest(tool); + assertEquals("AwesomeTool", externalTool.getDisplayName()); + assertEquals("explorer", externalTool.getToolName()); + assertEquals("{\"auxFilesExist\":[{\"formatTag\":\"NcML\",\"formatVersion\":\"0.1\"}]}", externalTool.getRequirements()); + } + } diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 6e6668d45af..1e9110be2de 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT