diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 80d4ceaabe6..9ed535539db 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -419,19 +419,25 @@ unidata-all Unidata All https://artifacts.unidata.ucar.edu/repository/unidata-all/ + + false + - + + --> diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java index 4197d978e79..282add42c58 100644 --- a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java +++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java @@ -28,8 +28,10 @@ public interface ExportDataProvider { * formatting it, since there can be a very large number of * files in a dataset. */ + JsonObject getDatasetJson(ExportDataContext... context); + /** * * @return - dataset metadata in the JSON-LD based OAI_ORE format used in @@ -73,6 +75,7 @@ public interface ExportDataProvider { * @throws ExportException */ JsonArray getTabularDataDetails(ExportDataContext ... context) throws ExportException; + /** * diff --git a/pom.xml b/pom.xml index ceb5ea28d84..cf5030cb137 100644 --- a/pom.xml +++ b/pom.xml @@ -680,7 +680,7 @@ io.gdcc dataverse-spi - 2.0.0 + 2.1.0-SNAPSHOT javax.cache diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index cca9be7ce9d..bc4047fd835 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -761,7 +761,7 @@ public void exportAllDatasets(boolean forceReExport) { || dataset.getLastExportTime().before(publicationDate)))) { countAll++; try { - recordService.exportAllFormatsInNewTransaction(dataset); + recordService.exportFormatsInNewTransaction(dataset, null); exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString()); countSuccess++; } catch (Exception ex) { @@ -785,13 +785,18 @@ public void exportAllDatasets(boolean forceReExport) { @Asynchronous public void reExportDatasetAsync(Dataset dataset) { - exportDataset(dataset, true); + exportDataset(dataset, true, null); + } + + @Asynchronous + public void reExportDatasetAsync(Dataset dataset, List formatNames) { + exportDataset(dataset, true, formatNames); } - public void exportDataset(Dataset dataset, boolean forceReExport) { + private void exportDataset(Dataset dataset, boolean forceReExport, List formatNames) { if (dataset != null) { // Note that the logic for handling a dataset is similar to what is implemented in exportAllDatasets, - // but when only one dataset is exported we do not log in a separate export logging file + // but when only one dataset is exported we do not use a dedicated log file if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { // can't trust dataset.getPublicationDate(), no. @@ -800,7 +805,7 @@ public void exportDataset(Dataset dataset, boolean forceReExport) { && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate)))) { try { - recordService.exportAllFormatsInNewTransaction(dataset); + recordService.exportFormatsInNewTransaction(dataset, formatNames); logger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString()); } catch (Exception ex) { logger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java index 27c91e8b312..4687d72bdbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -180,6 +180,55 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege } return typedQuery.getResultList(); } + + /** + * Similar to the above, but dedicated for retrieving FileMetadatas of only + * tabular datafiles in the specified DatasetVersion. Used in the metadata + * export subsystem. + * + * @param datasetVersion the DatasetVersion to access + * @param limit for pagination, can be null + * @param offset for pagination, can be null + * @param publicFilesOnly skip restricted, embargoed etc. files + * @return a FileMetadata list from the specified DatasetVersion + */ + public List getTabularDataFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, boolean publicFilesOnly) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(FileMetadata.class); + + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Predicate basePredicate = criteriaBuilder.equal(fileMetadataRoot.get("datasetVersion").get("id"), datasetVersion.getId()); + + Root dataTableRoot = criteriaQuery.from(DataTable.class); + Predicate tabularPredicate = criteriaBuilder.equal(dataTableRoot.get("dataFile"), fileMetadataRoot.get("dataFile")); + + Predicate combinedPredicate; + + if (publicFilesOnly) { + combinedPredicate = criteriaBuilder.and(basePredicate, tabularPredicate); + } else { + combinedPredicate = criteriaBuilder.and(basePredicate, + tabularPredicate, + createSearchCriteriaAccessStatusPredicate(FileSearchCriteria.FileAccessStatus.Public, + criteriaBuilder, + fileMetadataRoot)); + } + + criteriaQuery + .select(fileMetadataRoot) + .where(combinedPredicate) + .orderBy(criteriaBuilder.asc(fileMetadataRoot.get("label"))); + + TypedQuery typedQuery = em.createQuery(criteriaQuery); + if (limit != null) { + typedQuery.setMaxResults(limit); + } + if (offset != null) { + typedQuery.setFirstResult(offset); + } + + return typedQuery.getResultList(); + } /** * Returns the total download size of all files for a particular DatasetVersion diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 0a1b19985a4..880bd91f0b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -1064,7 +1064,7 @@ public Response getFileDataTables(@Context ContainerRequestContext crc, @PathPar if (!dataFile.isTabularData()) { return badRequest(BundleUtil.getStringFromBundle("files.api.only.tabular.supported")); } - return ok(jsonDT(dataFile.getDataTables())); + return ok(jsonDT(dataFile.getDataTables(), true)); } @POST diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index bd937878286..351409b39ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -19,6 +19,9 @@ import edu.harvard.iq.dataverse.harvest.server.OAISetServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAISet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * @@ -64,10 +67,14 @@ public Response reExportAll() { @GET @Path("{id}/reExportDataset") - public Response indexDatasetByPersistentId(@PathParam("id") String id) { + public Response indexDatasetByPersistentId(@PathParam("id") String id, @QueryParam("formats") String formats) { try { Dataset dataset = findDatasetOrDie(id); - datasetService.reExportDatasetAsync(dataset); + List formatNames = null; + if (formats != null) { + formatNames = new ArrayList<>(Arrays.asList(formats.split(","))); + } + datasetService.reExportDatasetAsync(dataset, formatNames); return ok("export started"); } catch (WrappedResponse wr) { return wr.getResponse(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java index 318bad4f3a3..f4537b0b7ad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java @@ -20,6 +20,7 @@ public class DataFileDTO { private String md5; private String description; private String pidURL; + private List tabularTags; public String getPidURL() { return pidURL; @@ -119,5 +120,11 @@ public void setDescription(String description) { this.description = description; } + public List getTabularTags() { + return tabularTags; + } + public void setTabularTags(List tabularTags) { + this.tabularTags = tabularTags; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java index f82c0d9ad3d..ad5010fdf50 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java @@ -8,6 +8,7 @@ import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; import edu.harvard.iq.dataverse.util.BundleUtil; +import io.gdcc.spi.export.ExportDataContext; import java.io.OutputStream; import java.util.Locale; import java.util.Optional; @@ -38,7 +39,7 @@ public String getDisplayName(Locale locale) { @Override public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { try { - DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(), outputStream, DublinCoreExportUtil.DC_FLAVOR_DCTERMS); + DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(ExportDataContext.context().withDatasetMetadataOnly()), outputStream, DublinCoreExportUtil.DC_FLAVOR_DCTERMS); } catch (XMLStreamException xse) { throw new ExportException("Caught XMLStreamException performing DCTERMS export", xse); } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java index 0130c18b22b..e03363a155e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java @@ -48,7 +48,7 @@ public String getDisplayName(Locale locale) { @Override public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { try { - DdiExportUtil.datasetJson2ddi(dataProvider.getDatasetJson(), dataProvider.getDatasetFileDetails(), + DdiExportUtil.datasetJson2ddi(dataProvider.getDatasetJson(), dataProvider, outputStream); } catch (XMLStreamException xse) { throw new ExportException("Caught XMLStreamException performing DDI export", xse); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java index 0fa32dd4bfa..db3c28deb78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.export.dublincore.DublinCoreExportUtil; import io.gdcc.spi.export.ExportDataProvider; import io.gdcc.spi.export.ExportException; +import io.gdcc.spi.export.ExportDataContext; import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -38,7 +39,7 @@ public String getDisplayName(Locale locale) { @Override public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { try { - DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(), outputStream, + DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(ExportDataContext.context().withDatasetMetadataOnly()), outputStream, DublinCoreExportUtil.DC_FLAVOR_OAI); } catch (XMLStreamException xse) { throw new ExportException("Caught XMLStreamException performing DC export", xse); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java b/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java index e7bcf17d44b..5d5863798aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java @@ -64,7 +64,7 @@ public class ExportService { private Map exporterMap = new HashMap<>(); private static final Logger logger = Logger.getLogger(ExportService.class.getCanonicalName()); - + private ExportService() { /* * Step 1 - find the EXPORTERS dir and add all jar files there to a class loader @@ -319,16 +319,72 @@ public void exportAllFormats(Dataset dataset) throws ExportException { } } + + public void exportFormats(Dataset dataset, List formatNames) throws ExportException { + try { + if (formatNames == null) { + clearAllCachedFormats(dataset); + } else { + clearCachedFormats(dataset, formatNames); + } + } catch (IOException ex) { + Logger.getLogger(ExportService.class.getName()).log(Level.SEVERE, null, ex); + } - public void clearAllCachedFormats(Dataset dataset) throws IOException { try { + DatasetVersion releasedVersion = dataset.getReleasedVersion(); + if (releasedVersion == null) { + throw new ExportException("No released version for dataset " + dataset.getGlobalId().toString()); + } + InternalExportDataProvider dataProvider = new InternalExportDataProvider(releasedVersion); for (Exporter e : exporterMap.values()) { String formatName = e.getFormatName(); - clearCachedExport(dataset, formatName); + if (formatNames == null || formatNames.contains(formatName)) { + if (e.getPrerequisiteFormatName().isPresent()) { + String prereqFormatName = e.getPrerequisiteFormatName().get(); + try (InputStream preReqStream = getExport(dataset.getReleasedVersion(), prereqFormatName)) { + dataProvider.setPrerequisiteInputStream(preReqStream); + cacheExport(dataset, dataProvider, formatName, e); + dataProvider.setPrerequisiteInputStream(null); + } catch (IOException ioe) { + throw new ExportException("Could not get prerequisite " + e.getPrerequisiteFormatName() + " to create " + formatName + "export for dataset " + dataset.getId(), ioe); + } + } else { + cacheExport(dataset, dataProvider, formatName, e); + } + } } + // Finally, if we have been able to successfully export in all available + // formats, we'll increment the "last exported" time stamp: + dataset.setLastExportTime(new Timestamp(new Date().getTime())); + + } catch (ServiceConfigurationError serviceError) { + throw new ExportException("Service configuration error during export. " + serviceError.getMessage()); + } catch (RuntimeException e) { + logger.log(Level.FINE, e.getMessage(), e); + throw new ExportException( + "Unknown runtime exception exporting metadata. " + (e.getMessage() == null ? "" : e.getMessage())); + } + } - dataset.setLastExportTime(null); + public void clearAllCachedFormats(Dataset dataset) throws IOException { + List formatNames = new ArrayList<>(); + + for (Exporter e : exporterMap.values()) { + String formatName = e.getFormatName(); + formatNames.add(formatName); + clearCachedExport(dataset, formatName); + } + clearCachedFormats(dataset, formatNames); + dataset.setLastExportTime(null); + } + + public void clearCachedFormats(Dataset dataset, List formatNames) throws IOException { + try { + for (String formatName : formatNames) { + clearCachedExport(dataset, formatName); + } } catch (IOException ex) { // not fatal } @@ -379,7 +435,7 @@ public void exportFormat(Dataset dataset, String formatName) throws ExportExcept } } - + public Exporter getExporter(String formatName) throws ExportException { Exporter e = exporterMap.get(formatName); if (e != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java index f0d77eb8b52..c0dc92b0f7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java @@ -3,6 +3,7 @@ import java.io.InputStream; import java.util.Optional; +import jakarta.enterprise.inject.spi.CDI; import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonArrayBuilder; @@ -11,12 +12,18 @@ import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean; import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.FileSearchCriteria; import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService; import io.gdcc.spi.export.ExportDataProvider; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_INGESTED_FILE; +import io.gdcc.spi.export.ExportException; +import io.gdcc.spi.export.ExportDataContext; +import java.util.List; /** * Provides all data necessary to create an export @@ -24,10 +31,12 @@ */ public class InternalExportDataProvider implements ExportDataProvider { - private DatasetVersion dv; + private final DatasetVersion dv; private JsonObject jsonRepresentation = null; + private JsonObject jsonRepresentationNoFiles = null; private JsonObject schemaDotOrgRepresentation = null; private JsonObject oreRepresentation = null; + private JsonArray fileAndDataDetails = null; private InputStream is = null; InternalExportDataProvider(DatasetVersion dv) { @@ -40,16 +49,28 @@ public class InternalExportDataProvider implements ExportDataProvider { } @Override - public JsonObject getDatasetJson() { + public JsonObject getDatasetJson(ExportDataContext... context) { + if (isOnlyDatasetLevelMetadataRequested(context)) { + // If we already have the "full" Json representation (with files) + // generated, should we return it (potentially moving MUCH more json + // than the client needs, or spend extra cycles generating the short + // form from scratch? - I'm choosing to go with latter. + if (jsonRepresentationNoFiles == null) { + final JsonObjectBuilder datasetAsJsonBuilder = JsonPrinter.datasetAsJsonForDTO(dv, false); + jsonRepresentationNoFiles = datasetAsJsonBuilder.build(); + } + return jsonRepresentationNoFiles; + } + if (jsonRepresentation == null) { - final JsonObjectBuilder datasetAsJsonBuilder = JsonPrinter.jsonAsDatasetDto(dv); + final JsonObjectBuilder datasetAsJsonBuilder = JsonPrinter.datasetAsJsonForDTO(dv); jsonRepresentation = datasetAsJsonBuilder.build(); } return jsonRepresentation; } - + @Override - public JsonObject getDatasetSchemaDotOrg() { + public JsonObject getDatasetSchemaDotOrg(ExportDataContext... context) { if (schemaDotOrgRepresentation == null) { String jsonLdAsString = dv.getJsonLd(); schemaDotOrgRepresentation = JsonUtil.getJsonObject(jsonLdAsString); @@ -58,7 +79,7 @@ public JsonObject getDatasetSchemaDotOrg() { } @Override - public JsonObject getDatasetORE() { + public JsonObject getDatasetORE(ExportDataContext... context) { if (oreRepresentation == null) { oreRepresentation = new OREMap(dv).getOREMap(); } @@ -66,27 +87,128 @@ public JsonObject getDatasetORE() { } @Override - public String getDataCiteXml() { + public String getDataCiteXml(ExportDataContext... context) { return DOIDataCiteRegisterService.getMetadataFromDvObject( dv.getDataset().getGlobalId().asString(), new DataCitation(dv).getDataCiteMetadata(), dv.getDataset()); } @Override - public JsonArray getDatasetFileDetails() { + public JsonArray getDatasetFileDetails(ExportDataContext... context) { + if (fileAndDataDetails == null) { + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (FileMetadata fileMetadata : dv.getFileMetadatas()) { + DataFile dataFile = fileMetadata.getDataFile(); + jab.add(JsonPrinter.json(dataFile, fileMetadata, true, false, true)); + } + fileAndDataDetails = jab.build(); + } + return fileAndDataDetails; + } + + @Override + /** + * This new (as of dataverse-spi 2.1.0) method will attempt to retrieve + * the requested tabular metadata more efficiently, by calling the + * DatasetVersionFilesServiceBean method directly. Which, among other things, + * allows to retrieve this information in batches. If for whatever reason + * that fails - if, for example, the EJB is not available in this context, + * we will throw an ExportException, giving the exporter a chance to try and + * retrieve this information using the traditional all-at-once method via + * getDatasetFileDetails(); + * + */ + public JsonArray getTabularDataDetails(ExportDataContext... context) throws ExportException { JsonArrayBuilder jab = Json.createArrayBuilder(); - for (FileMetadata fileMetadata : dv.getFileMetadatas()) { + + List fileMetadatas; + DatasetVersionFilesServiceBean datasetVersionFilesService = null; + try { + datasetVersionFilesService = CDI.current().select(DatasetVersionFilesServiceBean.class).get(); + } catch (java.lang.IllegalArgumentException | IllegalStateException ie) { + throw new ExportException("EJB DatasetVersionFilesService is not available; " + ie.getMessage()); + } + + if (datasetVersionFilesService == null) { + throw new ExportException("EJB DatasetVersionFilesService is not available"); + } + + fileMetadatas = datasetVersionFilesService.getTabularDataFileMetadatas(dv, + getLength(context), + getOffset(context), + isOnlyPublicMetadataRequested(context)); + + for (FileMetadata fileMetadata : fileMetadatas) { DataFile dataFile = fileMetadata.getDataFile(); - jab.add(JsonPrinter.json(dataFile, fileMetadata, true)); + jab.add(JsonPrinter.jsonDatafileWithDatatableForExport(dataFile, fileMetadata)); } return jab.build(); } @Override - public Optional getPrerequisiteInputStream() { + public Optional getPrerequisiteInputStream(ExportDataContext... context) { return Optional.ofNullable(is); } - + public void setPrerequisiteInputStream(InputStream prereqStream) { this.is=prereqStream; } + + /** + * Only one context object is supported + * @param contexts + * @return + */ + private boolean isOnlyDatasetLevelMetadataRequested(ExportDataContext... contexts) { + for (ExportDataContext context : contexts) { + return context.isDatasetMetadataOnly(); + } + + // By default, if no context is supplied, we pack both the Dataset, and + // the File-level metadata in that Json + return false; + } + + /** + * Only one context object is supported + * + * @param contexts + * @return + */ + private boolean isOnlyPublicMetadataRequested(ExportDataContext... contexts) { + + for (ExportDataContext context : contexts) { + return context.isPublicFilesOnly(); + } + + // By default, if no context is supplied, we return the metadata for all + // files - embargoed, restricted, etc.: + return false; + } + + /** + * Only one context object is supported + * + * @param contexts + * @return + */ + private Integer getOffset(ExportDataContext... contexts) { + for (ExportDataContext context : contexts) { + return context.getOffset(); + } + return null; + } + + /** + * Only one context object is supported + * + * @param contexts + * @return + */ + private Integer getLength(ExportDataContext... contexts) { + for (ExportDataContext context : contexts) { + return context.getLength(); + } + return null; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index f5cc86bf8ee..96a5fac2790 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.dto.DataTableDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; @@ -31,6 +32,9 @@ import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import edu.harvard.iq.dataverse.util.xml.XmlUtil; import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; +import io.gdcc.spi.export.ExportDataContext; +import io.gdcc.spi.export.ExportDataProvider; +import io.gdcc.spi.export.ExportException; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -81,6 +85,8 @@ public class DdiExportUtil { public static final String NOTE_TYPE_CONTENTTYPE = "DATAVERSE:CONTENTTYPE"; public static final String NOTE_SUBJECT_CONTENTTYPE = "Content/MIME Type"; public static final String CITATION_BLOCK_NAME = "citation"; + public static final int DATATABLES_BATCH_SIZE = 50; + public static final int DATAVARIABLES_BATCH_SIZE = 10000; // todo: review //Some tests don't send real PIDs that can be parsed //Use constant empty PID in these cases @@ -125,7 +131,13 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); - createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); + if (datasetDto.getDatasetVersion().getFiles() != null) { + // We create "otherMat" sections with skipTabularFiles = false, because + // this is the short version of the DDI where all the files, whether + // ingested or not, are encoded as otherMats: + + createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles(), false); + } xmlw.writeEndElement(); // codeBook xmlw.flush(); } finally { @@ -142,11 +154,11 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr // "full" ddi, with the the "" and "/" sections: - public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDetails, OutputStream outputStream) throws XMLStreamException { + public static void datasetJson2ddi(JsonObject datasetDtoAsJson, ExportDataProvider dataProvider, OutputStream outputStream) throws XMLStreamException { logger.fine(JsonUtil.prettyPrint(datasetDtoAsJson.toString())); Gson gson = new Gson(); DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson.toString(), DatasetDTO.class); - + XMLStreamWriter xmlw = null; try { xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); @@ -160,9 +172,38 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); - createFileDscr(xmlw, fileDetails); - createDataDscr(xmlw, fileDetails); - createOtherMatsFromFileMetadatas(xmlw, fileDetails); + + // If there are no files in this dataset, we can stop here + if (datasetDto.getDatasetVersion().getFiles() != null) { + + // The Files and Data section, for the rich metadata describing + // the "ingested" tabular data files. + // Note that as of 6.8, we are generating the fileDscr from the DTOs + // supplied by ExportDataProvider.ExportDataProvider.getDatasetJson() + List varQuantityMap = createFileDscrs(xmlw, datasetDto.getDatasetVersion().getFiles()); + + if (varQuantityMap != null && !varQuantityMap.isEmpty()) { + // Now that we know that there is 1 or more ingested tabular file + // in the dataset, we can try and produce the dataDscr section. + // A dataset with a large number + // of ingested files may contain more of such metadata than is + // practical or desirable to pass around as a single chunk of json. + // As of the ExportDataProvider v2.1.0 a more efficient method is + // provided for retrieving this information in chunks of length-offset + // datatables-worth at a time. + //if (tabularFilesTotal <= DATATABLES_BATCH_SIZE) { + if (isVarQuantityLimitExceeded(varQuantityMap)) { + createDataDscrInBatches(xmlw, varQuantityMap, dataProvider); + } else { + createDataDscr(xmlw, dataProvider.getDatasetFileDetails()); + } + } + // otherMats section: + // Note that we are asking createOtherMats() to skip tabular files, + // since we have already created the fileDscr and dataDscr sections + // for those. + createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles(), true); + } xmlw.writeEndElement(); // codeBook xmlw.flush(); } finally { @@ -177,6 +218,18 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe } } + private static boolean isVarQuantityLimitExceeded(List varQuantityMap) { + if (varQuantityMap != null) { + long varQuantityCount = 0; + for (long varQuantity : varQuantityMap) { + varQuantityCount += varQuantity; + if (varQuantityCount > DATAVARIABLES_BATCH_SIZE) { + return true; + } + } + } + return false; + } /** * @todo This is just a stub, copied from DDIExportServiceBean. It should * produce valid DDI based on @@ -1427,7 +1480,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da // see if there's more information that we could encode in this otherMat. // contentType? Unfs and such? (in the "short" DDI that is being used for // harvesting *all* files are encoded as otherMats; even tabular ones. - private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException { + private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos, boolean skipTabularFiles) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); @@ -1435,7 +1488,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, // tabular ones - in fileDscr sections. (fileDscr sections have special fields for numbers of variables // and observations, etc.) - if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { + if (!(skipTabularFiles && isTabularData(fileDTo))) { xmlw.writeStartElement("otherMat"); XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); String pidURL = fileDTo.getDataFile().getPidURL(); @@ -1473,6 +1526,13 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // tell if this file is in fact tabular data - so that we know if it needs an // otherMat, or a fileDscr section. // -- L.A. 4.5 + // [update:] Since the comment above was written, the method below was changed + // to operate on a JsonArray, as provided by the FileDetails method in the + // ExportDataProvider. However, As of 6.8, this method is no longer used at + // all. This is because the DTOs supplied by ExportDataProvider.getDatasetJson() + // DO in fact contain enough information to generate the otherMat sections + // properly, whether this is a short or a full version of the DDI. I am however leaving + // this method here for reference. private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonArray fileDetails) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: @@ -1483,7 +1543,7 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, // tabular ones - in fileDscr sections. (fileDscr sections have special fields for numbers of variables // and observations, etc.) - if (!fileJson.containsKey("dataTables")) { + if (!fileJson.getBoolean("tabularData", false)) { xmlw.writeStartElement("otherMat"); xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ @@ -1519,12 +1579,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA } private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) throws XMLStreamException { - xmlw.writeStartElement("txt"); String description = fileDTo.getDataFile().getDescription(); if (description != null) { + xmlw.writeStartElement("txt"); + xmlw.writeCharacters(description); + xmlw.writeEndElement(); // txt + } - xmlw.writeEndElement(); // txt } @@ -1594,7 +1656,7 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t return; } - boolean tabularData = false; + boolean dataDscrWritten = false; // we're not writing the opening tag until we find an actual // tabular datafile. @@ -1608,44 +1670,97 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t * should instead use the "Data Variable Metadata Access" endpoint.) * These days we skip restricted files to avoid this exposure. */ - if (fileJson.containsKey("restricted") && fileJson.getBoolean("restricted")) { + if (isFileRestricted(fileJson)) { continue; } - if(fileJson.containsKey("embargo")) { - String dateString = fileJson.getJsonObject("embargo").getString("dateAvailable"); - LocalDate endDate = LocalDate.parse(dateString); - if (endDate != null && endDate.isAfter(LocalDate.now())) { - //Embargo is active so skip - continue; - } - } if (fileJson.containsKey("dataTables")) { - if (!tabularData) { + if (!dataDscrWritten) { xmlw.writeStartElement("dataDscr"); - tabularData = true; - } - if(fileJson.containsKey("varGroups")) { - JsonArray varGroups = fileJson.getJsonArray("varGroups"); - for (int j=0;j varQuantityMap, ExportDataProvider exportDataProvider) throws XMLStreamException { + boolean dataDscrWritten = false; + + try { + int dataTableStart = 0; + int dataTablesThisBatch = 0; + int varQuantityThisBatch = 0; + + for (int dataTableCurrent = 0; dataTableCurrent < varQuantityMap.size(); dataTableCurrent++) { + varQuantityThisBatch += varQuantityMap.get(dataTableCurrent); + dataTablesThisBatch++; + + if (varQuantityThisBatch >= DATAVARIABLES_BATCH_SIZE || dataTableCurrent == varQuantityMap.size() - 1) { + JsonArray tabularFileDetails = exportDataProvider.getTabularDataDetails(ExportDataContext.context().withOffset(dataTableStart).withLength(dataTablesThisBatch)); + logger.fine("requested: " + dataTablesThisBatch + " tabular file data entries; retrieved: " + tabularFileDetails.size()); + logger.fine("total number of variables in this batch: " + varQuantityThisBatch); + + for (int i = 0; i < tabularFileDetails.size(); i++) { + JsonObject fileJson = tabularFileDetails.getJsonObject(i); + + if (isFileRestricted(fileJson)) { + continue; + } + + if (fileJson.containsKey("dataTables")) { + if (!dataDscrWritten) { + xmlw.writeStartElement("dataDscr"); + dataDscrWritten = true; + } + + createVariablesForDataFile(xmlw, fileJson); + } } + + dataTableStart += dataTablesThisBatch; + dataTablesThisBatch = 0; + varQuantityThisBatch = 0; } } + } catch (ExportException ee) { + if (dataDscrWritten) { + // Unfortunately, we've already written some output by the time + // this exception was caught. We have no other choice but to + // give up + throw new XMLStreamException("Failed to write dataDscr variable-level section using exportDataProvider.getTabularData()"); + } else { + // Looks like we haven't written anything out yet. We can try + // and produce the dataDscr section using the classic, "all-at-once" + // approach instead. + createDataDscr(xmlw, exportDataProvider.getDatasetFileDetails()); + } } + } - if (tabularData) { - xmlw.writeEndElement(); // dataDscr + private static void createVariablesForDataFile(XMLStreamWriter xmlw, JsonObject fileJson) throws XMLStreamException { + if (fileJson.containsKey("varGroups")) { + JsonArray varGroups = fileJson.getJsonArray("varGroups"); + for (int j = 0; j < varGroups.size(); j++) { + createVarGroupDDI(xmlw, varGroups.getJsonObject(j)); + } + } + JsonObject dataTable = fileJson.getJsonArray("dataTables").getJsonObject(0); + JsonArray vars = dataTable.getJsonArray("dataVariables"); + logger.fine(vars.size() + " variables retrieved for file " + fileJson.getJsonNumber("id")); + if (vars != null) { + for (int j = 0; j < vars.size(); j++) { + createVarDDI(xmlw, vars.getJsonObject(j), fileJson.getJsonNumber("id").toString(), + fileJson.getJsonNumber("fileMetadataId").toString()); + } } } + private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException { xmlw.writeStartElement("varGrp"); xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString()); @@ -1908,46 +2023,57 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f xmlw.writeEndElement(); //var } - - private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) throws XMLStreamException { + + private static List createFileDscrs(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException { + List ret = new ArrayList<>(); + + logger.fine("total " + fileDtos.size() + " file DTOs to process for fileDscr"); String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); - for (int i =0;i field) - if (fileJson.containsKey("description")) { + if (fileDTo.getDataFile().getDescription() != null) { xmlw.writeStartElement("notes"); xmlw.writeAttribute("level", LEVEL_FILE); xmlw.writeAttribute("type", NOTE_TYPE_FILEDESCRIPTION); xmlw.writeAttribute("subject", NOTE_SUBJECT_FILEDESCRIPTION); - xmlw.writeCharacters(fileJson.getString("description")); + xmlw.writeCharacters(fileDTo.getDataFile().getDescription()); xmlw.writeEndElement(); // notes } // TODO: add the remaining fileDscr elements! xmlw.writeEndElement(); // fileDscr + counter++; } } + logger.fine("produced " + counter + " fileDscr entries; total number of variables found: " + totalVarQuantity); + return ret; } - - - - - + public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { try { @@ -2051,5 +2176,35 @@ public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStrea public static void injectSettingsService(SettingsServiceBean settingsSvc) { settingsService=settingsSvc; } + + private static boolean isTabularData(FileDTO fileDTO) { + return !(fileDTO.getDataFile().getDataTables() == null || fileDTO.getDataFile().getDataTables().isEmpty()); + } + + /** + * Previously (in Dataverse 5.3 and below) the dataDscr section was included + * for restricted files but that meant that summary statistics were exposed. + * (To get at these statistics, API users should instead use the "Data + * Variable Metadata Access" endpoint.) These days we skip restricted files + * to avoid this exposure. + * @param fileJson - a JsonObject representing one datafile/datatable-worth + * of tabular data. + */ + private static boolean isFileRestricted(JsonObject fileJson) { + if (fileJson.containsKey("restricted") && fileJson.getBoolean("restricted")) { + return true; + } + if (fileJson.containsKey("embargo")) { + String dateString = fileJson.getJsonObject("embargo").getString("dateAvailable"); + LocalDate endDate = LocalDate.parse(dateString); + if (endDate != null && endDate.isAfter(LocalDate.now())) { + //Embargo is active so skip + return true; + } + } + return false; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index cc15d4c978b..6719d46adf0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -259,9 +259,14 @@ public void exportAllFormats(Dataset dataset) { @TransactionAttribute(REQUIRES_NEW) public void exportAllFormatsInNewTransaction(Dataset dataset) throws ExportException { + exportFormatsInNewTransaction(dataset, null); + } + + @TransactionAttribute(REQUIRES_NEW) + public void exportFormatsInNewTransaction(Dataset dataset, List formatNames) throws ExportException { try { ExportService exportServiceInstance = ExportService.getInstance(); - exportServiceInstance.exportAllFormats(dataset); + exportServiceInstance.exportFormats(dataset, formatNames); dataset = datasetService.merge(dataset); } catch (Exception e) { logger.log(Level.FINE, "Caught unknown exception while trying to export", e); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 46a05fc93f2..ff0d986892b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -63,6 +63,7 @@ import jakarta.ejb.Singleton; import jakarta.json.JsonArray; import jakarta.json.JsonObject; +import java.math.BigDecimal; /** * Convert objects to Json. @@ -523,6 +524,11 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized } public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles, boolean returnOwners, boolean includeMetadataBlocks) { + return json(dsv, anonymizedFieldTypeNamesList, includeFiles, returnOwners, includeMetadataBlocks, false); + } + + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, + boolean includeFiles, boolean returnOwners, boolean includeMetadataBlocks, boolean forExportDataProvider) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()).add("datasetId", dataset.getId()) @@ -580,7 +586,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized bld.add("isPartOf", getOwnersFromDvObject(dataset)); } if (includeFiles) { - bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); + bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas(), forExportDataProvider)); } return bld; @@ -605,6 +611,22 @@ public static JsonObjectBuilder jsonDataFileList(List dataFiles){ return bld; } + public static JsonObjectBuilder datasetAsJsonForDTO(DatasetVersion dsv) { + return datasetAsJsonForDTO(dsv, true); + } + + /** + * Same as above, but gives an option to skip the file-level info + * @param dsv + * @param includeFiles + * @return + */ + public static JsonObjectBuilder datasetAsJsonForDTO(DatasetVersion dsv, boolean includeFiles) { + JsonObjectBuilder jsonForDTO = JsonPrinter.json(dsv.getDataset()); + jsonForDTO.add("datasetVersion", versionAsJsonForDTO(dsv, includeFiles)); + return jsonForDTO; + } + /** * Export formats such as DDI require the citation to be included. See * https://github.com/IQSS/dataverse/issues/2579 for more on DDI export. @@ -613,34 +635,20 @@ public static JsonObjectBuilder jsonDataFileList(List dataFiles){ * to the regular `json` method for DatasetVersion? Will anything break? * Unit tests for that method could not be found. */ - public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv, boolean includeFiles) { - JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv, includeFiles); + private static JsonObjectBuilder versionAsJsonForDTO(DatasetVersion dsv, boolean includeFiles) { + JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv, null, includeFiles, false,true, true); dsvWithCitation.add("citation", dsv.getCitation()); return dsvWithCitation; } - /** - * Export formats such as DDI require the persistent identifier components - * such as "protocol", "authority" and "identifier" to be included so we - * create a JSON object we can convert to a DatasetDTO which can include a - * DatasetVersionDTO, which has all the metadata fields we need to export. - * See https://github.com/IQSS/dataverse/issues/2579 for more on DDI export. - * - * @todo Instead of having this separate method, should "datasetVersion" be - * added to the regular `json` method for Dataset? Will anything break? Unit - * tests for that method could not be found. If we keep this method as-is - * should the method be renamed? - */ - public static JsonObjectBuilder jsonAsDatasetDto(DatasetVersion dsv) { - JsonObjectBuilder datasetDtoAsJson = JsonPrinter.json(dsv.getDataset()); - datasetDtoAsJson.add("datasetVersion", jsonWithCitation(dsv, true)); - return datasetDtoAsJson; - } - public static JsonArrayBuilder jsonFileMetadatas(Collection fmds) { + return jsonFileMetadatas(fmds, false); + } + + public static JsonArrayBuilder jsonFileMetadatas(Collection fmds, boolean forExportDataProvider) { JsonArrayBuilder filesArr = Json.createArrayBuilder(); for (FileMetadata fmd : fmds) { - filesArr.add(JsonPrinter.json(fmd)); + filesArr.add(JsonPrinter.json(fmd, false, false, forExportDataProvider)); } return filesArr; @@ -854,6 +862,10 @@ public static JsonObjectBuilder json(FileMetadata fmd){ } public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boolean printDatasetVersion) { + return json(fmd, returnOwners, printDatasetVersion, false); + } + + public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boolean printDatasetVersion, boolean forExportDataProvider) { NullSafeJsonBuilder builder = jsonObjectBuilder(); // deprecated: .add("category", fmd.getCategory()) @@ -869,7 +881,7 @@ public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boo .add("version", fmd.getVersion()) .add("datasetVersionId", fmd.getDatasetVersion().getId()) .add("categories", getFileCategories(fmd)) - .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false, returnOwners)); + .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, forExportDataProvider, returnOwners)); if (printDatasetVersion) { builder.add("datasetVersion", json(fmd.getDatasetVersion(), false)); @@ -900,14 +912,14 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo } public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners) { - // File names are no longer stored in the DataFile entity; - // (they are instead in the FileMetadata (as "labels") - this way - // the filename can change between versions... - // It does appear that for some historical purpose we still need the - // filename in the file DTO (?)... We rely on it to be there for the - // DDI export, for example. So we need to make sure this is is the - // *correct* file name - i.e., that it comes from the right version. - // (TODO...? L.A. 4.5, Aug 7 2016) + return json(df, fileMetadata, forExportDataProvider, returnOwners, false); + } + + public static JsonObjectBuilder jsonDatafileWithDatatableForExport(DataFile df, FileMetadata fileMetadata) { + return json(df, fileMetadata, true, false, true); + } + + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners, boolean includeVariables) { String fileName = null; if (fileMetadata == null){ @@ -971,14 +983,18 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo * The restricted state was not included prior to #9175 so to avoid backward * incompatability, it is now only added when generating json for the * InternalExportDataProvider fileDetails. + * [update]: more fields have been added below that are only there + * when the json is requested by the InternalExportDataProvider. */ if (forExportDataProvider) { builder.add("restricted", df.isRestricted()) - .add("fileMetadataId", fileMetadata.getId()) - .add("dataTables", df.getDataTables().isEmpty() ? null : JsonPrinter.jsonDT(df.getDataTables())) - .add("varGroups", fileMetadata.getVarGroups().isEmpty() - ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups()) - : null); + .add("fileMetadataId", fileMetadata.getId()) + .add("dataTables", df.getDataTables().isEmpty() ? null : jsonDT(df.getDataTables(), includeVariables)); + if (includeVariables) { + builder.add("varGroups", fileMetadata.getVarGroups().isEmpty() + ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups()) + : null); + } } if (returnOwners){ builder.add("isPartOf", getOwnersFromDvObject(df, fileMetadata.getDatasetVersion())); @@ -987,22 +1003,24 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo } //Started from https://github.com/RENCI-NRIG/dataverse/, i.e. https://github.com/RENCI-NRIG/dataverse/commit/2b5a1225b42cf1caba85e18abfeb952171c6754a - public static JsonArrayBuilder jsonDT(List ldt) { + public static JsonArrayBuilder jsonDT(List ldt, boolean includeVariables) { JsonArrayBuilder ldtArr = Json.createArrayBuilder(); for(DataTable dt: ldt){ - ldtArr.add(JsonPrinter.json(dt)); + ldtArr.add(JsonPrinter.json(dt, includeVariables)); } return ldtArr; } - public static JsonObjectBuilder json(DataTable dt) { - return jsonObjectBuilder() + public static JsonObjectBuilder json(DataTable dt, boolean includeVariables) { + JsonObjectBuilder builder = jsonObjectBuilder() .add("varQuantity", dt.getVarQuantity()) .add("caseQuantity", dt.getCaseQuantity()) .add("recordsPerCase", dt.getRecordsPerCase()) - .add("UNF", dt.getUnf()) - .add("dataVariables", JsonPrinter.jsonDV(dt.getDataVariables())) - ; + .add("UNF", dt.getUnf()); + if (includeVariables) { + builder.add("dataVariables", JsonPrinter.jsonDV(dt.getDataVariables())); + } + return builder; } public static JsonArrayBuilder jsonDV(List dvl) {