diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 80d4ceaabe6..9ed535539db 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -419,19 +419,25 @@
unidata-all
Unidata All
https://artifacts.unidata.ucar.edu/repository/unidata-all/
+
+ false
+
-
+
+ -->
diff --git a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java
index 4197d978e79..282add42c58 100644
--- a/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java
+++ b/modules/dataverse-spi/src/main/java/io/gdcc/spi/export/ExportDataProvider.java
@@ -28,8 +28,10 @@ public interface ExportDataProvider {
* formatting it, since there can be a very large number of
* files in a dataset.
*/
+
JsonObject getDatasetJson(ExportDataContext... context);
+
/**
*
* @return - dataset metadata in the JSON-LD based OAI_ORE format used in
@@ -73,6 +75,7 @@ public interface ExportDataProvider {
* @throws ExportException
*/
JsonArray getTabularDataDetails(ExportDataContext ... context) throws ExportException;
+
/**
*
diff --git a/pom.xml b/pom.xml
index ceb5ea28d84..cf5030cb137 100644
--- a/pom.xml
+++ b/pom.xml
@@ -680,7 +680,7 @@
io.gdcc
dataverse-spi
- 2.0.0
+ 2.1.0-SNAPSHOT
javax.cache
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index cca9be7ce9d..bc4047fd835 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -761,7 +761,7 @@ public void exportAllDatasets(boolean forceReExport) {
|| dataset.getLastExportTime().before(publicationDate)))) {
countAll++;
try {
- recordService.exportAllFormatsInNewTransaction(dataset);
+ recordService.exportFormatsInNewTransaction(dataset, null);
exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString());
countSuccess++;
} catch (Exception ex) {
@@ -785,13 +785,18 @@ public void exportAllDatasets(boolean forceReExport) {
@Asynchronous
public void reExportDatasetAsync(Dataset dataset) {
- exportDataset(dataset, true);
+ exportDataset(dataset, true, null);
+ }
+
+ @Asynchronous
+ public void reExportDatasetAsync(Dataset dataset, List formatNames) {
+ exportDataset(dataset, true, formatNames);
}
- public void exportDataset(Dataset dataset, boolean forceReExport) {
+ private void exportDataset(Dataset dataset, boolean forceReExport, List formatNames) {
if (dataset != null) {
// Note that the logic for handling a dataset is similar to what is implemented in exportAllDatasets,
- // but when only one dataset is exported we do not log in a separate export logging file
+ // but when only one dataset is exported we do not use a dedicated log file
if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) {
// can't trust dataset.getPublicationDate(), no.
@@ -800,7 +805,7 @@ public void exportDataset(Dataset dataset, boolean forceReExport) {
&& (dataset.getLastExportTime() == null
|| dataset.getLastExportTime().before(publicationDate)))) {
try {
- recordService.exportAllFormatsInNewTransaction(dataset);
+ recordService.exportFormatsInNewTransaction(dataset, formatNames);
logger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString());
} catch (Exception ex) {
logger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
index 27c91e8b312..4687d72bdbc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
@@ -180,6 +180,55 @@ public List getFileMetadatas(DatasetVersion datasetVersion, Intege
}
return typedQuery.getResultList();
}
+
+ /**
+ * Similar to the above, but dedicated for retrieving FileMetadatas of only
+ * tabular datafiles in the specified DatasetVersion. Used in the metadata
+ * export subsystem.
+ *
+ * @param datasetVersion the DatasetVersion to access
+ * @param limit for pagination, can be null
+ * @param offset for pagination, can be null
+ * @param publicFilesOnly skip restricted, embargoed etc. files
+ * @return a FileMetadata list from the specified DatasetVersion
+ */
+ public List getTabularDataFileMetadatas(DatasetVersion datasetVersion, Integer limit, Integer offset, boolean publicFilesOnly) {
+ CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder();
+ CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(FileMetadata.class);
+
+ Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class);
+ Predicate basePredicate = criteriaBuilder.equal(fileMetadataRoot.get("datasetVersion").get("id"), datasetVersion.getId());
+
+ Root dataTableRoot = criteriaQuery.from(DataTable.class);
+ Predicate tabularPredicate = criteriaBuilder.equal(dataTableRoot.get("dataFile"), fileMetadataRoot.get("dataFile"));
+
+ Predicate combinedPredicate;
+
+ if (publicFilesOnly) {
+ combinedPredicate = criteriaBuilder.and(basePredicate, tabularPredicate);
+ } else {
+ combinedPredicate = criteriaBuilder.and(basePredicate,
+ tabularPredicate,
+ createSearchCriteriaAccessStatusPredicate(FileSearchCriteria.FileAccessStatus.Public,
+ criteriaBuilder,
+ fileMetadataRoot));
+ }
+
+ criteriaQuery
+ .select(fileMetadataRoot)
+ .where(combinedPredicate)
+ .orderBy(criteriaBuilder.asc(fileMetadataRoot.get("label")));
+
+ TypedQuery typedQuery = em.createQuery(criteriaQuery);
+ if (limit != null) {
+ typedQuery.setMaxResults(limit);
+ }
+ if (offset != null) {
+ typedQuery.setFirstResult(offset);
+ }
+
+ return typedQuery.getResultList();
+ }
/**
* Returns the total download size of all files for a particular DatasetVersion
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index 0a1b19985a4..880bd91f0b0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -1064,7 +1064,7 @@ public Response getFileDataTables(@Context ContainerRequestContext crc, @PathPar
if (!dataFile.isTabularData()) {
return badRequest(BundleUtil.getStringFromBundle("files.api.only.tabular.supported"));
}
- return ok(jsonDT(dataFile.getDataTables()));
+ return ok(jsonDT(dataFile.getDataTables(), true));
}
@POST
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java
index bd937878286..351409b39ba 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java
@@ -19,6 +19,9 @@
import edu.harvard.iq.dataverse.harvest.server.OAISetServiceBean;
import edu.harvard.iq.dataverse.harvest.server.OAISet;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
/**
*
@@ -64,10 +67,14 @@ public Response reExportAll() {
@GET
@Path("{id}/reExportDataset")
- public Response indexDatasetByPersistentId(@PathParam("id") String id) {
+ public Response indexDatasetByPersistentId(@PathParam("id") String id, @QueryParam("formats") String formats) {
try {
Dataset dataset = findDatasetOrDie(id);
- datasetService.reExportDatasetAsync(dataset);
+ List formatNames = null;
+ if (formats != null) {
+ formatNames = new ArrayList<>(Arrays.asList(formats.split(",")));
+ }
+ datasetService.reExportDatasetAsync(dataset, formatNames);
return ok("export started");
} catch (WrappedResponse wr) {
return wr.getResponse();
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java
index 318bad4f3a3..f4537b0b7ad 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataFileDTO.java
@@ -20,6 +20,7 @@ public class DataFileDTO {
private String md5;
private String description;
private String pidURL;
+ private List tabularTags;
public String getPidURL() {
return pidURL;
@@ -119,5 +120,11 @@ public void setDescription(String description) {
this.description = description;
}
+ public List getTabularTags() {
+ return tabularTags;
+ }
+ public void setTabularTags(List tabularTags) {
+ this.tabularTags = tabularTags;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java
index f82c0d9ad3d..ad5010fdf50 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DCTermsExporter.java
@@ -8,6 +8,7 @@
import io.gdcc.spi.export.Exporter;
import io.gdcc.spi.export.XMLExporter;
import edu.harvard.iq.dataverse.util.BundleUtil;
+import io.gdcc.spi.export.ExportDataContext;
import java.io.OutputStream;
import java.util.Locale;
import java.util.Optional;
@@ -38,7 +39,7 @@ public String getDisplayName(Locale locale) {
@Override
public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException {
try {
- DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(), outputStream, DublinCoreExportUtil.DC_FLAVOR_DCTERMS);
+ DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(ExportDataContext.context().withDatasetMetadataOnly()), outputStream, DublinCoreExportUtil.DC_FLAVOR_DCTERMS);
} catch (XMLStreamException xse) {
throw new ExportException("Caught XMLStreamException performing DCTERMS export", xse);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java
index 0130c18b22b..e03363a155e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExporter.java
@@ -48,7 +48,7 @@ public String getDisplayName(Locale locale) {
@Override
public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException {
try {
- DdiExportUtil.datasetJson2ddi(dataProvider.getDatasetJson(), dataProvider.getDatasetFileDetails(),
+ DdiExportUtil.datasetJson2ddi(dataProvider.getDatasetJson(), dataProvider,
outputStream);
} catch (XMLStreamException xse) {
throw new ExportException("Caught XMLStreamException performing DDI export", xse);
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java
index 0fa32dd4bfa..db3c28deb78 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DublinCoreExporter.java
@@ -5,6 +5,7 @@
import edu.harvard.iq.dataverse.export.dublincore.DublinCoreExportUtil;
import io.gdcc.spi.export.ExportDataProvider;
import io.gdcc.spi.export.ExportException;
+import io.gdcc.spi.export.ExportDataContext;
import io.gdcc.spi.export.Exporter;
import io.gdcc.spi.export.XMLExporter;
import edu.harvard.iq.dataverse.util.BundleUtil;
@@ -38,7 +39,7 @@ public String getDisplayName(Locale locale) {
@Override
public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException {
try {
- DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(), outputStream,
+ DublinCoreExportUtil.datasetJson2dublincore(dataProvider.getDatasetJson(ExportDataContext.context().withDatasetMetadataOnly()), outputStream,
DublinCoreExportUtil.DC_FLAVOR_OAI);
} catch (XMLStreamException xse) {
throw new ExportException("Caught XMLStreamException performing DC export", xse);
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java b/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java
index e7bcf17d44b..5d5863798aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ExportService.java
@@ -64,7 +64,7 @@ public class ExportService {
private Map exporterMap = new HashMap<>();
private static final Logger logger = Logger.getLogger(ExportService.class.getCanonicalName());
-
+
private ExportService() {
/*
* Step 1 - find the EXPORTERS dir and add all jar files there to a class loader
@@ -319,16 +319,72 @@ public void exportAllFormats(Dataset dataset) throws ExportException {
}
}
+
+ public void exportFormats(Dataset dataset, List formatNames) throws ExportException {
+ try {
+ if (formatNames == null) {
+ clearAllCachedFormats(dataset);
+ } else {
+ clearCachedFormats(dataset, formatNames);
+ }
+ } catch (IOException ex) {
+ Logger.getLogger(ExportService.class.getName()).log(Level.SEVERE, null, ex);
+ }
- public void clearAllCachedFormats(Dataset dataset) throws IOException {
try {
+ DatasetVersion releasedVersion = dataset.getReleasedVersion();
+ if (releasedVersion == null) {
+ throw new ExportException("No released version for dataset " + dataset.getGlobalId().toString());
+ }
+ InternalExportDataProvider dataProvider = new InternalExportDataProvider(releasedVersion);
for (Exporter e : exporterMap.values()) {
String formatName = e.getFormatName();
- clearCachedExport(dataset, formatName);
+ if (formatNames == null || formatNames.contains(formatName)) {
+ if (e.getPrerequisiteFormatName().isPresent()) {
+ String prereqFormatName = e.getPrerequisiteFormatName().get();
+ try (InputStream preReqStream = getExport(dataset.getReleasedVersion(), prereqFormatName)) {
+ dataProvider.setPrerequisiteInputStream(preReqStream);
+ cacheExport(dataset, dataProvider, formatName, e);
+ dataProvider.setPrerequisiteInputStream(null);
+ } catch (IOException ioe) {
+ throw new ExportException("Could not get prerequisite " + e.getPrerequisiteFormatName() + " to create " + formatName + "export for dataset " + dataset.getId(), ioe);
+ }
+ } else {
+ cacheExport(dataset, dataProvider, formatName, e);
+ }
+ }
}
+ // Finally, if we have been able to successfully export in all available
+ // formats, we'll increment the "last exported" time stamp:
+ dataset.setLastExportTime(new Timestamp(new Date().getTime()));
+
+ } catch (ServiceConfigurationError serviceError) {
+ throw new ExportException("Service configuration error during export. " + serviceError.getMessage());
+ } catch (RuntimeException e) {
+ logger.log(Level.FINE, e.getMessage(), e);
+ throw new ExportException(
+ "Unknown runtime exception exporting metadata. " + (e.getMessage() == null ? "" : e.getMessage()));
+ }
+ }
- dataset.setLastExportTime(null);
+ public void clearAllCachedFormats(Dataset dataset) throws IOException {
+ List formatNames = new ArrayList<>();
+
+ for (Exporter e : exporterMap.values()) {
+ String formatName = e.getFormatName();
+ formatNames.add(formatName);
+ clearCachedExport(dataset, formatName);
+ }
+ clearCachedFormats(dataset, formatNames);
+ dataset.setLastExportTime(null);
+ }
+
+ public void clearCachedFormats(Dataset dataset, List formatNames) throws IOException {
+ try {
+ for (String formatName : formatNames) {
+ clearCachedExport(dataset, formatName);
+ }
} catch (IOException ex) {
// not fatal
}
@@ -379,7 +435,7 @@ public void exportFormat(Dataset dataset, String formatName) throws ExportExcept
}
}
-
+
public Exporter getExporter(String formatName) throws ExportException {
Exporter e = exporterMap.get(formatName);
if (e != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java
index f0d77eb8b52..c0dc92b0f7e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java
@@ -3,6 +3,7 @@
import java.io.InputStream;
import java.util.Optional;
+import jakarta.enterprise.inject.spi.CDI;
import jakarta.json.Json;
import jakarta.json.JsonArray;
import jakarta.json.JsonArrayBuilder;
@@ -11,12 +12,18 @@
import edu.harvard.iq.dataverse.DataCitation;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean;
import edu.harvard.iq.dataverse.FileMetadata;
+import edu.harvard.iq.dataverse.FileSearchCriteria;
import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService;
import io.gdcc.spi.export.ExportDataProvider;
import edu.harvard.iq.dataverse.util.bagit.OREMap;
import edu.harvard.iq.dataverse.util.json.JsonPrinter;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
+import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_INGESTED_FILE;
+import io.gdcc.spi.export.ExportException;
+import io.gdcc.spi.export.ExportDataContext;
+import java.util.List;
/**
* Provides all data necessary to create an export
@@ -24,10 +31,12 @@
*/
public class InternalExportDataProvider implements ExportDataProvider {
- private DatasetVersion dv;
+ private final DatasetVersion dv;
private JsonObject jsonRepresentation = null;
+ private JsonObject jsonRepresentationNoFiles = null;
private JsonObject schemaDotOrgRepresentation = null;
private JsonObject oreRepresentation = null;
+ private JsonArray fileAndDataDetails = null;
private InputStream is = null;
InternalExportDataProvider(DatasetVersion dv) {
@@ -40,16 +49,28 @@ public class InternalExportDataProvider implements ExportDataProvider {
}
@Override
- public JsonObject getDatasetJson() {
+ public JsonObject getDatasetJson(ExportDataContext... context) {
+ if (isOnlyDatasetLevelMetadataRequested(context)) {
+ // If we already have the "full" Json representation (with files)
+ // generated, should we return it (potentially moving MUCH more json
+ // than the client needs, or spend extra cycles generating the short
+ // form from scratch? - I'm choosing to go with latter.
+ if (jsonRepresentationNoFiles == null) {
+ final JsonObjectBuilder datasetAsJsonBuilder = JsonPrinter.datasetAsJsonForDTO(dv, false);
+ jsonRepresentationNoFiles = datasetAsJsonBuilder.build();
+ }
+ return jsonRepresentationNoFiles;
+ }
+
if (jsonRepresentation == null) {
- final JsonObjectBuilder datasetAsJsonBuilder = JsonPrinter.jsonAsDatasetDto(dv);
+ final JsonObjectBuilder datasetAsJsonBuilder = JsonPrinter.datasetAsJsonForDTO(dv);
jsonRepresentation = datasetAsJsonBuilder.build();
}
return jsonRepresentation;
}
-
+
@Override
- public JsonObject getDatasetSchemaDotOrg() {
+ public JsonObject getDatasetSchemaDotOrg(ExportDataContext... context) {
if (schemaDotOrgRepresentation == null) {
String jsonLdAsString = dv.getJsonLd();
schemaDotOrgRepresentation = JsonUtil.getJsonObject(jsonLdAsString);
@@ -58,7 +79,7 @@ public JsonObject getDatasetSchemaDotOrg() {
}
@Override
- public JsonObject getDatasetORE() {
+ public JsonObject getDatasetORE(ExportDataContext... context) {
if (oreRepresentation == null) {
oreRepresentation = new OREMap(dv).getOREMap();
}
@@ -66,27 +87,128 @@ public JsonObject getDatasetORE() {
}
@Override
- public String getDataCiteXml() {
+ public String getDataCiteXml(ExportDataContext... context) {
return DOIDataCiteRegisterService.getMetadataFromDvObject(
dv.getDataset().getGlobalId().asString(), new DataCitation(dv).getDataCiteMetadata(), dv.getDataset());
}
@Override
- public JsonArray getDatasetFileDetails() {
+ public JsonArray getDatasetFileDetails(ExportDataContext... context) {
+ if (fileAndDataDetails == null) {
+ JsonArrayBuilder jab = Json.createArrayBuilder();
+ for (FileMetadata fileMetadata : dv.getFileMetadatas()) {
+ DataFile dataFile = fileMetadata.getDataFile();
+ jab.add(JsonPrinter.json(dataFile, fileMetadata, true, false, true));
+ }
+ fileAndDataDetails = jab.build();
+ }
+ return fileAndDataDetails;
+ }
+
+ @Override
+ /**
+ * This new (as of dataverse-spi 2.1.0) method will attempt to retrieve
+ * the requested tabular metadata more efficiently, by calling the
+ * DatasetVersionFilesServiceBean method directly. Which, among other things,
+ * allows to retrieve this information in batches. If for whatever reason
+ * that fails - if, for example, the EJB is not available in this context,
+ * we will throw an ExportException, giving the exporter a chance to try and
+ * retrieve this information using the traditional all-at-once method via
+ * getDatasetFileDetails();
+ *
+ */
+ public JsonArray getTabularDataDetails(ExportDataContext... context) throws ExportException {
JsonArrayBuilder jab = Json.createArrayBuilder();
- for (FileMetadata fileMetadata : dv.getFileMetadatas()) {
+
+ List fileMetadatas;
+ DatasetVersionFilesServiceBean datasetVersionFilesService = null;
+ try {
+ datasetVersionFilesService = CDI.current().select(DatasetVersionFilesServiceBean.class).get();
+ } catch (java.lang.IllegalArgumentException | IllegalStateException ie) {
+ throw new ExportException("EJB DatasetVersionFilesService is not available; " + ie.getMessage());
+ }
+
+ if (datasetVersionFilesService == null) {
+ throw new ExportException("EJB DatasetVersionFilesService is not available");
+ }
+
+ fileMetadatas = datasetVersionFilesService.getTabularDataFileMetadatas(dv,
+ getLength(context),
+ getOffset(context),
+ isOnlyPublicMetadataRequested(context));
+
+ for (FileMetadata fileMetadata : fileMetadatas) {
DataFile dataFile = fileMetadata.getDataFile();
- jab.add(JsonPrinter.json(dataFile, fileMetadata, true));
+ jab.add(JsonPrinter.jsonDatafileWithDatatableForExport(dataFile, fileMetadata));
}
return jab.build();
}
@Override
- public Optional getPrerequisiteInputStream() {
+ public Optional getPrerequisiteInputStream(ExportDataContext... context) {
return Optional.ofNullable(is);
}
-
+
public void setPrerequisiteInputStream(InputStream prereqStream) {
this.is=prereqStream;
}
+
+ /**
+ * Only one context object is supported
+ * @param contexts
+ * @return
+ */
+ private boolean isOnlyDatasetLevelMetadataRequested(ExportDataContext... contexts) {
+ for (ExportDataContext context : contexts) {
+ return context.isDatasetMetadataOnly();
+ }
+
+ // By default, if no context is supplied, we pack both the Dataset, and
+ // the File-level metadata in that Json
+ return false;
+ }
+
+ /**
+ * Only one context object is supported
+ *
+ * @param contexts
+ * @return
+ */
+ private boolean isOnlyPublicMetadataRequested(ExportDataContext... contexts) {
+
+ for (ExportDataContext context : contexts) {
+ return context.isPublicFilesOnly();
+ }
+
+ // By default, if no context is supplied, we return the metadata for all
+ // files - embargoed, restricted, etc.:
+ return false;
+ }
+
+ /**
+ * Only one context object is supported
+ *
+ * @param contexts
+ * @return
+ */
+ private Integer getOffset(ExportDataContext... contexts) {
+ for (ExportDataContext context : contexts) {
+ return context.getOffset();
+ }
+ return null;
+ }
+
+ /**
+ * Only one context object is supported
+ *
+ * @param contexts
+ * @return
+ */
+ private Integer getLength(ExportDataContext... contexts) {
+ for (ExportDataContext context : contexts) {
+ return context.getLength();
+ }
+ return null;
+ }
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
index f5cc86bf8ee..96a5fac2790 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
@@ -6,6 +6,7 @@
import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.DvObjectContainer;
import edu.harvard.iq.dataverse.GlobalId;
+import edu.harvard.iq.dataverse.api.dto.DataTableDTO;
import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO;
import edu.harvard.iq.dataverse.api.dto.DatasetDTO;
import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO;
@@ -31,6 +32,9 @@
import edu.harvard.iq.dataverse.util.xml.XmlPrinter;
import edu.harvard.iq.dataverse.util.xml.XmlUtil;
import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil;
+import io.gdcc.spi.export.ExportDataContext;
+import io.gdcc.spi.export.ExportDataProvider;
+import io.gdcc.spi.export.ExportException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -81,6 +85,8 @@ public class DdiExportUtil {
public static final String NOTE_TYPE_CONTENTTYPE = "DATAVERSE:CONTENTTYPE";
public static final String NOTE_SUBJECT_CONTENTTYPE = "Content/MIME Type";
public static final String CITATION_BLOCK_NAME = "citation";
+ public static final int DATATABLES_BATCH_SIZE = 50;
+ public static final int DATAVARIABLES_BATCH_SIZE = 10000; // todo: review
//Some tests don't send real PIDs that can be parsed
//Use constant empty PID in these cases
@@ -125,7 +131,13 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr
xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage());
}
createStdyDscr(xmlw, datasetDto);
- createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles());
+ if (datasetDto.getDatasetVersion().getFiles() != null) {
+ // We create "otherMat" sections with skipTabularFiles = false, because
+ // this is the short version of the DDI where all the files, whether
+ // ingested or not, are encoded as otherMats:
+
+ createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles(), false);
+ }
xmlw.writeEndElement(); // codeBook
xmlw.flush();
} finally {
@@ -142,11 +154,11 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr
// "full" ddi, with the the "" and "/" sections:
- public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDetails, OutputStream outputStream) throws XMLStreamException {
+ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, ExportDataProvider dataProvider, OutputStream outputStream) throws XMLStreamException {
logger.fine(JsonUtil.prettyPrint(datasetDtoAsJson.toString()));
Gson gson = new Gson();
DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson.toString(), DatasetDTO.class);
-
+
XMLStreamWriter xmlw = null;
try {
xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream);
@@ -160,9 +172,38 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe
xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage());
}
createStdyDscr(xmlw, datasetDto);
- createFileDscr(xmlw, fileDetails);
- createDataDscr(xmlw, fileDetails);
- createOtherMatsFromFileMetadatas(xmlw, fileDetails);
+
+ // If there are no files in this dataset, we can stop here
+ if (datasetDto.getDatasetVersion().getFiles() != null) {
+
+ // The Files and Data section, for the rich metadata describing
+ // the "ingested" tabular data files.
+ // Note that as of 6.8, we are generating the fileDscr from the DTOs
+ // supplied by ExportDataProvider.ExportDataProvider.getDatasetJson()
+ List varQuantityMap = createFileDscrs(xmlw, datasetDto.getDatasetVersion().getFiles());
+
+ if (varQuantityMap != null && !varQuantityMap.isEmpty()) {
+ // Now that we know that there is 1 or more ingested tabular file
+ // in the dataset, we can try and produce the dataDscr section.
+ // A dataset with a large number
+ // of ingested files may contain more of such metadata than is
+ // practical or desirable to pass around as a single chunk of json.
+ // As of the ExportDataProvider v2.1.0 a more efficient method is
+ // provided for retrieving this information in chunks of length-offset
+ // datatables-worth at a time.
+ //if (tabularFilesTotal <= DATATABLES_BATCH_SIZE) {
+ if (isVarQuantityLimitExceeded(varQuantityMap)) {
+ createDataDscrInBatches(xmlw, varQuantityMap, dataProvider);
+ } else {
+ createDataDscr(xmlw, dataProvider.getDatasetFileDetails());
+ }
+ }
+ // otherMats section:
+ // Note that we are asking createOtherMats() to skip tabular files,
+ // since we have already created the fileDscr and dataDscr sections
+ // for those.
+ createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles(), true);
+ }
xmlw.writeEndElement(); // codeBook
xmlw.flush();
} finally {
@@ -177,6 +218,18 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe
}
}
+ private static boolean isVarQuantityLimitExceeded(List varQuantityMap) {
+ if (varQuantityMap != null) {
+ long varQuantityCount = 0;
+ for (long varQuantity : varQuantityMap) {
+ varQuantityCount += varQuantity;
+ if (varQuantityCount > DATAVARIABLES_BATCH_SIZE) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
/**
* @todo This is just a stub, copied from DDIExportServiceBean. It should
* produce valid DDI based on
@@ -1427,7 +1480,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da
// see if there's more information that we could encode in this otherMat.
// contentType? Unfs and such? (in the "short" DDI that is being used for
// harvesting *all* files are encoded as otherMats; even tabular ones.
- private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException {
+ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos, boolean skipTabularFiles) throws XMLStreamException {
// The preferred URL for this dataverse, for cooking up the file access API links:
String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic();
@@ -1435,7 +1488,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos
// We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat,
// tabular ones - in fileDscr sections. (fileDscr sections have special fields for numbers of variables
// and observations, etc.)
- if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) {
+ if (!(skipTabularFiles && isTabularData(fileDTo))) {
xmlw.writeStartElement("otherMat");
XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId());
String pidURL = fileDTo.getDataFile().getPidURL();
@@ -1473,6 +1526,13 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos
// tell if this file is in fact tabular data - so that we know if it needs an
// otherMat, or a fileDscr section.
// -- L.A. 4.5
+ // [update:] Since the comment above was written, the method below was changed
+ // to operate on a JsonArray, as provided by the FileDetails method in the
+ // ExportDataProvider. However, As of 6.8, this method is no longer used at
+ // all. This is because the DTOs supplied by ExportDataProvider.getDatasetJson()
+ // DO in fact contain enough information to generate the otherMat sections
+ // properly, whether this is a short or a full version of the DDI. I am however leaving
+ // this method here for reference.
private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonArray fileDetails) throws XMLStreamException {
// The preferred URL for this dataverse, for cooking up the file access API links:
@@ -1483,7 +1543,7 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA
// We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat,
// tabular ones - in fileDscr sections. (fileDscr sections have special fields for numbers of variables
// and observations, etc.)
- if (!fileJson.containsKey("dataTables")) {
+ if (!fileJson.getBoolean("tabularData", false)) {
xmlw.writeStartElement("otherMat");
xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString()));
if (fileJson.containsKey("pidUrl")){
@@ -1519,12 +1579,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA
}
private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) throws XMLStreamException {
- xmlw.writeStartElement("txt");
String description = fileDTo.getDataFile().getDescription();
if (description != null) {
+ xmlw.writeStartElement("txt");
+
xmlw.writeCharacters(description);
+ xmlw.writeEndElement(); // txt
+
}
- xmlw.writeEndElement(); // txt
}
@@ -1594,7 +1656,7 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t
return;
}
- boolean tabularData = false;
+ boolean dataDscrWritten = false;
// we're not writing the opening tag until we find an actual
// tabular datafile.
@@ -1608,44 +1670,97 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t
* should instead use the "Data Variable Metadata Access" endpoint.)
* These days we skip restricted files to avoid this exposure.
*/
- if (fileJson.containsKey("restricted") && fileJson.getBoolean("restricted")) {
+ if (isFileRestricted(fileJson)) {
continue;
}
- if(fileJson.containsKey("embargo")) {
- String dateString = fileJson.getJsonObject("embargo").getString("dateAvailable");
- LocalDate endDate = LocalDate.parse(dateString);
- if (endDate != null && endDate.isAfter(LocalDate.now())) {
- //Embargo is active so skip
- continue;
- }
- }
if (fileJson.containsKey("dataTables")) {
- if (!tabularData) {
+ if (!dataDscrWritten) {
xmlw.writeStartElement("dataDscr");
- tabularData = true;
- }
- if(fileJson.containsKey("varGroups")) {
- JsonArray varGroups = fileJson.getJsonArray("varGroups");
- for (int j=0;j varQuantityMap, ExportDataProvider exportDataProvider) throws XMLStreamException {
+ boolean dataDscrWritten = false;
+
+ try {
+ int dataTableStart = 0;
+ int dataTablesThisBatch = 0;
+ int varQuantityThisBatch = 0;
+
+ for (int dataTableCurrent = 0; dataTableCurrent < varQuantityMap.size(); dataTableCurrent++) {
+ varQuantityThisBatch += varQuantityMap.get(dataTableCurrent);
+ dataTablesThisBatch++;
+
+ if (varQuantityThisBatch >= DATAVARIABLES_BATCH_SIZE || dataTableCurrent == varQuantityMap.size() - 1) {
+ JsonArray tabularFileDetails = exportDataProvider.getTabularDataDetails(ExportDataContext.context().withOffset(dataTableStart).withLength(dataTablesThisBatch));
+ logger.fine("requested: " + dataTablesThisBatch + " tabular file data entries; retrieved: " + tabularFileDetails.size());
+ logger.fine("total number of variables in this batch: " + varQuantityThisBatch);
+
+ for (int i = 0; i < tabularFileDetails.size(); i++) {
+ JsonObject fileJson = tabularFileDetails.getJsonObject(i);
+
+ if (isFileRestricted(fileJson)) {
+ continue;
+ }
+
+ if (fileJson.containsKey("dataTables")) {
+ if (!dataDscrWritten) {
+ xmlw.writeStartElement("dataDscr");
+ dataDscrWritten = true;
+ }
+
+ createVariablesForDataFile(xmlw, fileJson);
+ }
}
+
+ dataTableStart += dataTablesThisBatch;
+ dataTablesThisBatch = 0;
+ varQuantityThisBatch = 0;
}
}
+ } catch (ExportException ee) {
+ if (dataDscrWritten) {
+ // Unfortunately, we've already written some output by the time
+ // this exception was caught. We have no other choice but to
+ // give up
+ throw new XMLStreamException("Failed to write dataDscr variable-level section using exportDataProvider.getTabularData()");
+ } else {
+ // Looks like we haven't written anything out yet. We can try
+ // and produce the dataDscr section using the classic, "all-at-once"
+ // approach instead.
+ createDataDscr(xmlw, exportDataProvider.getDatasetFileDetails());
+ }
}
+ }
- if (tabularData) {
- xmlw.writeEndElement(); // dataDscr
+ private static void createVariablesForDataFile(XMLStreamWriter xmlw, JsonObject fileJson) throws XMLStreamException {
+ if (fileJson.containsKey("varGroups")) {
+ JsonArray varGroups = fileJson.getJsonArray("varGroups");
+ for (int j = 0; j < varGroups.size(); j++) {
+ createVarGroupDDI(xmlw, varGroups.getJsonObject(j));
+ }
+ }
+ JsonObject dataTable = fileJson.getJsonArray("dataTables").getJsonObject(0);
+ JsonArray vars = dataTable.getJsonArray("dataVariables");
+ logger.fine(vars.size() + " variables retrieved for file " + fileJson.getJsonNumber("id"));
+ if (vars != null) {
+ for (int j = 0; j < vars.size(); j++) {
+ createVarDDI(xmlw, vars.getJsonObject(j), fileJson.getJsonNumber("id").toString(),
+ fileJson.getJsonNumber("fileMetadataId").toString());
+ }
}
}
+
private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException {
xmlw.writeStartElement("varGrp");
xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString());
@@ -1908,46 +2023,57 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f
xmlw.writeEndElement(); //var
}
-
- private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) throws XMLStreamException {
+
+ private static List createFileDscrs(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException {
+ List ret = new ArrayList<>();
+
+ logger.fine("total " + fileDtos.size() + " file DTOs to process for fileDscr");
String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic();
- for (int i =0;i field)
- if (fileJson.containsKey("description")) {
+ if (fileDTo.getDataFile().getDescription() != null) {
xmlw.writeStartElement("notes");
xmlw.writeAttribute("level", LEVEL_FILE);
xmlw.writeAttribute("type", NOTE_TYPE_FILEDESCRIPTION);
xmlw.writeAttribute("subject", NOTE_SUBJECT_FILEDESCRIPTION);
- xmlw.writeCharacters(fileJson.getString("description"));
+ xmlw.writeCharacters(fileDTo.getDataFile().getDescription());
xmlw.writeEndElement(); // notes
}
// TODO: add the remaining fileDscr elements!
xmlw.writeEndElement(); // fileDscr
+ counter++;
}
}
+ logger.fine("produced " + counter + " fileDscr entries; total number of variables found: " + totalVarQuantity);
+ return ret;
}
-
-
-
-
-
+
public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException {
try {
@@ -2051,5 +2176,35 @@ public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStrea
public static void injectSettingsService(SettingsServiceBean settingsSvc) {
settingsService=settingsSvc;
}
+
+ private static boolean isTabularData(FileDTO fileDTO) {
+ return !(fileDTO.getDataFile().getDataTables() == null || fileDTO.getDataFile().getDataTables().isEmpty());
+ }
+
+ /**
+ * Previously (in Dataverse 5.3 and below) the dataDscr section was included
+ * for restricted files but that meant that summary statistics were exposed.
+ * (To get at these statistics, API users should instead use the "Data
+ * Variable Metadata Access" endpoint.) These days we skip restricted files
+ * to avoid this exposure.
+ * @param fileJson - a JsonObject representing one datafile/datatable-worth
+ * of tabular data.
+ */
+ private static boolean isFileRestricted(JsonObject fileJson) {
+ if (fileJson.containsKey("restricted") && fileJson.getBoolean("restricted")) {
+ return true;
+ }
+ if (fileJson.containsKey("embargo")) {
+ String dateString = fileJson.getJsonObject("embargo").getString("dateAvailable");
+ LocalDate endDate = LocalDate.parse(dateString);
+ if (endDate != null && endDate.isAfter(LocalDate.now())) {
+ //Embargo is active so skip
+ return true;
+ }
+ }
+ return false;
+ }
+
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java
index cc15d4c978b..6719d46adf0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java
@@ -259,9 +259,14 @@ public void exportAllFormats(Dataset dataset) {
@TransactionAttribute(REQUIRES_NEW)
public void exportAllFormatsInNewTransaction(Dataset dataset) throws ExportException {
+ exportFormatsInNewTransaction(dataset, null);
+ }
+
+ @TransactionAttribute(REQUIRES_NEW)
+ public void exportFormatsInNewTransaction(Dataset dataset, List formatNames) throws ExportException {
try {
ExportService exportServiceInstance = ExportService.getInstance();
- exportServiceInstance.exportAllFormats(dataset);
+ exportServiceInstance.exportFormats(dataset, formatNames);
dataset = datasetService.merge(dataset);
} catch (Exception e) {
logger.log(Level.FINE, "Caught unknown exception while trying to export", e);
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
index 46a05fc93f2..ff0d986892b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
@@ -63,6 +63,7 @@
import jakarta.ejb.Singleton;
import jakarta.json.JsonArray;
import jakarta.json.JsonObject;
+import java.math.BigDecimal;
/**
* Convert objects to Json.
@@ -523,6 +524,11 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized
}
public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList,
boolean includeFiles, boolean returnOwners, boolean includeMetadataBlocks) {
+ return json(dsv, anonymizedFieldTypeNamesList, includeFiles, returnOwners, includeMetadataBlocks, false);
+ }
+
+ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList,
+ boolean includeFiles, boolean returnOwners, boolean includeMetadataBlocks, boolean forExportDataProvider) {
Dataset dataset = dsv.getDataset();
JsonObjectBuilder bld = jsonObjectBuilder()
.add("id", dsv.getId()).add("datasetId", dataset.getId())
@@ -580,7 +586,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized
bld.add("isPartOf", getOwnersFromDvObject(dataset));
}
if (includeFiles) {
- bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas()));
+ bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas(), forExportDataProvider));
}
return bld;
@@ -605,6 +611,22 @@ public static JsonObjectBuilder jsonDataFileList(List dataFiles){
return bld;
}
+ public static JsonObjectBuilder datasetAsJsonForDTO(DatasetVersion dsv) {
+ return datasetAsJsonForDTO(dsv, true);
+ }
+
+ /**
+ * Same as above, but gives an option to skip the file-level info
+ * @param dsv
+ * @param includeFiles
+ * @return
+ */
+ public static JsonObjectBuilder datasetAsJsonForDTO(DatasetVersion dsv, boolean includeFiles) {
+ JsonObjectBuilder jsonForDTO = JsonPrinter.json(dsv.getDataset());
+ jsonForDTO.add("datasetVersion", versionAsJsonForDTO(dsv, includeFiles));
+ return jsonForDTO;
+ }
+
/**
* Export formats such as DDI require the citation to be included. See
* https://github.com/IQSS/dataverse/issues/2579 for more on DDI export.
@@ -613,34 +635,20 @@ public static JsonObjectBuilder jsonDataFileList(List dataFiles){
* to the regular `json` method for DatasetVersion? Will anything break?
* Unit tests for that method could not be found.
*/
- public static JsonObjectBuilder jsonWithCitation(DatasetVersion dsv, boolean includeFiles) {
- JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv, includeFiles);
+ private static JsonObjectBuilder versionAsJsonForDTO(DatasetVersion dsv, boolean includeFiles) {
+ JsonObjectBuilder dsvWithCitation = JsonPrinter.json(dsv, null, includeFiles, false,true, true);
dsvWithCitation.add("citation", dsv.getCitation());
return dsvWithCitation;
}
- /**
- * Export formats such as DDI require the persistent identifier components
- * such as "protocol", "authority" and "identifier" to be included so we
- * create a JSON object we can convert to a DatasetDTO which can include a
- * DatasetVersionDTO, which has all the metadata fields we need to export.
- * See https://github.com/IQSS/dataverse/issues/2579 for more on DDI export.
- *
- * @todo Instead of having this separate method, should "datasetVersion" be
- * added to the regular `json` method for Dataset? Will anything break? Unit
- * tests for that method could not be found. If we keep this method as-is
- * should the method be renamed?
- */
- public static JsonObjectBuilder jsonAsDatasetDto(DatasetVersion dsv) {
- JsonObjectBuilder datasetDtoAsJson = JsonPrinter.json(dsv.getDataset());
- datasetDtoAsJson.add("datasetVersion", jsonWithCitation(dsv, true));
- return datasetDtoAsJson;
- }
-
public static JsonArrayBuilder jsonFileMetadatas(Collection fmds) {
+ return jsonFileMetadatas(fmds, false);
+ }
+
+ public static JsonArrayBuilder jsonFileMetadatas(Collection fmds, boolean forExportDataProvider) {
JsonArrayBuilder filesArr = Json.createArrayBuilder();
for (FileMetadata fmd : fmds) {
- filesArr.add(JsonPrinter.json(fmd));
+ filesArr.add(JsonPrinter.json(fmd, false, false, forExportDataProvider));
}
return filesArr;
@@ -854,6 +862,10 @@ public static JsonObjectBuilder json(FileMetadata fmd){
}
public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boolean printDatasetVersion) {
+ return json(fmd, returnOwners, printDatasetVersion, false);
+ }
+
+ public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boolean printDatasetVersion, boolean forExportDataProvider) {
NullSafeJsonBuilder builder = jsonObjectBuilder();
// deprecated: .add("category", fmd.getCategory())
@@ -869,7 +881,7 @@ public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boo
.add("version", fmd.getVersion())
.add("datasetVersionId", fmd.getDatasetVersion().getId())
.add("categories", getFileCategories(fmd))
- .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false, returnOwners));
+ .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, forExportDataProvider, returnOwners));
if (printDatasetVersion) {
builder.add("datasetVersion", json(fmd.getDatasetVersion(), false));
@@ -900,14 +912,14 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo
}
public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners) {
- // File names are no longer stored in the DataFile entity;
- // (they are instead in the FileMetadata (as "labels") - this way
- // the filename can change between versions...
- // It does appear that for some historical purpose we still need the
- // filename in the file DTO (?)... We rely on it to be there for the
- // DDI export, for example. So we need to make sure this is is the
- // *correct* file name - i.e., that it comes from the right version.
- // (TODO...? L.A. 4.5, Aug 7 2016)
+ return json(df, fileMetadata, forExportDataProvider, returnOwners, false);
+ }
+
+ public static JsonObjectBuilder jsonDatafileWithDatatableForExport(DataFile df, FileMetadata fileMetadata) {
+ return json(df, fileMetadata, true, false, true);
+ }
+
+ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners, boolean includeVariables) {
String fileName = null;
if (fileMetadata == null){
@@ -971,14 +983,18 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo
* The restricted state was not included prior to #9175 so to avoid backward
* incompatability, it is now only added when generating json for the
* InternalExportDataProvider fileDetails.
+ * [update]: more fields have been added below that are only there
+ * when the json is requested by the InternalExportDataProvider.
*/
if (forExportDataProvider) {
builder.add("restricted", df.isRestricted())
- .add("fileMetadataId", fileMetadata.getId())
- .add("dataTables", df.getDataTables().isEmpty() ? null : JsonPrinter.jsonDT(df.getDataTables()))
- .add("varGroups", fileMetadata.getVarGroups().isEmpty()
- ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups())
- : null);
+ .add("fileMetadataId", fileMetadata.getId())
+ .add("dataTables", df.getDataTables().isEmpty() ? null : jsonDT(df.getDataTables(), includeVariables));
+ if (includeVariables) {
+ builder.add("varGroups", fileMetadata.getVarGroups().isEmpty()
+ ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups())
+ : null);
+ }
}
if (returnOwners){
builder.add("isPartOf", getOwnersFromDvObject(df, fileMetadata.getDatasetVersion()));
@@ -987,22 +1003,24 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo
}
//Started from https://github.com/RENCI-NRIG/dataverse/, i.e. https://github.com/RENCI-NRIG/dataverse/commit/2b5a1225b42cf1caba85e18abfeb952171c6754a
- public static JsonArrayBuilder jsonDT(List ldt) {
+ public static JsonArrayBuilder jsonDT(List ldt, boolean includeVariables) {
JsonArrayBuilder ldtArr = Json.createArrayBuilder();
for(DataTable dt: ldt){
- ldtArr.add(JsonPrinter.json(dt));
+ ldtArr.add(JsonPrinter.json(dt, includeVariables));
}
return ldtArr;
}
- public static JsonObjectBuilder json(DataTable dt) {
- return jsonObjectBuilder()
+ public static JsonObjectBuilder json(DataTable dt, boolean includeVariables) {
+ JsonObjectBuilder builder = jsonObjectBuilder()
.add("varQuantity", dt.getVarQuantity())
.add("caseQuantity", dt.getCaseQuantity())
.add("recordsPerCase", dt.getRecordsPerCase())
- .add("UNF", dt.getUnf())
- .add("dataVariables", JsonPrinter.jsonDV(dt.getDataVariables()))
- ;
+ .add("UNF", dt.getUnf());
+ if (includeVariables) {
+ builder.add("dataVariables", JsonPrinter.jsonDV(dt.getDataVariables()));
+ }
+ return builder;
}
public static JsonArrayBuilder jsonDV(List dvl) {