Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
8c8cc23
feat(pid): add dataset version pid configuration code #4499
poikilotherm Mar 22, 2023
d4a3ed3
refactor(model): switch JPA constraints from Hibernate internal to JS…
poikilotherm Mar 22, 2023
3cb82b4
feat(model): add dataset version PID conduct to Dataverse collection …
poikilotherm Mar 22, 2023
b4644a0
feat(collections): add business logic to determine dataset version pi…
poikilotherm Mar 22, 2023
b6eb3be
feat(pid): add initial extension points for dataset version PIDs
poikilotherm Mar 22, 2023
45dd5de
feat(api): add stub API endpoints for version pid conduct in collections
poikilotherm Mar 22, 2023
a503718
fix(model): make Dataverse.datasetVersionPidConduct require value
poikilotherm Mar 24, 2023
f340e12
refactor(api): simplify ConstraintViolationExceptionHandler and make …
poikilotherm Mar 24, 2023
db272fa
feat(api): make some Dataverse collection attributes changeable
poikilotherm Mar 24, 2023
21fd755
Merge branch 'develop' into 4499-version-doi
poikilotherm Mar 27, 2023
d94bc31
Merge branch 'develop' into 4499-version-doi
poikilotherm Mar 29, 2023
8a37a50
fix(model): add missing default for collection version pid conduct in…
poikilotherm Mar 29, 2023
2dc83b7
feat(model): add PID field to DatasetVersion model and include in DB …
poikilotherm Mar 29, 2023
d51d3bd
Merge branch 'develop' into 4499-version-doi
poikilotherm Apr 19, 2023
2d71f55
feat(pid): add version pid generation styles
poikilotherm Apr 19, 2023
7acbfb7
feat(pid): add version identifier methods to PID provider interface #…
poikilotherm Apr 19, 2023
5a6dc68
feat(pid): add default generator for version PIDs
poikilotherm Apr 19, 2023
4ca88b9
feat(pid): check dataset has identifier for version pid by suffix
poikilotherm Apr 20, 2023
5288bef
feat(pid): enable version PID generation when new version is created
poikilotherm Apr 20, 2023
37bcecc
feat(pid): add global id generation to versions and print
poikilotherm Apr 21, 2023
2885dab
fix(pid,sql): correct SQL default for version pid conduct in migratio…
poikilotherm Apr 21, 2023
e8d0023
Revert "feat(pid): enable version PID generation when new version is …
poikilotherm Apr 21, 2023
48b44d9
Merge branch 'develop' into 4499-version-doi
poikilotherm Apr 26, 2023
1eada89
refactor(pid): replace exception for GlobalIdServiceBean.createIdenti…
poikilotherm Apr 26, 2023
909b9f5
feat(pid): make DatasetVersion know about identifier registration status
poikilotherm May 2, 2023
4ecaa09
refactor(cmd): add NotImplementedException
poikilotherm May 2, 2023
4eac492
fix(pid): simple typo in javadoc of GlobalIdServiceBean.createIdentif…
poikilotherm May 2, 2023
6b891aa
feat(pid): restructure version identifier related methods in GlobalId…
poikilotherm May 2, 2023
3c18c73
feat(pid): restructure default version identifier generation
poikilotherm May 2, 2023
3c1433e
feat(pid): add create identifier interface in GlobalIdServiceBean for…
poikilotherm May 2, 2023
afc23d1
feat(pid): add fake version identifier methods to FakePidProviderServ…
poikilotherm May 2, 2023
d0bf0fd
feat(pid,cmd): create and publish version identifiers on dataset publ…
poikilotherm May 2, 2023
288e987
fix(pid): make DatasetVersion.persistentIdentifier non-unique in data…
poikilotherm May 2, 2023
e5dd1a6
Merge branch 'develop' into 4499-version-doi
poikilotherm Jun 13, 2023
877c728
fix(db): rename version PID db migration because of conflict and not …
poikilotherm Jun 14, 2023
c17cf27
refactor(pid): align collection conduct to be more aligned to JPA
poikilotherm Jun 14, 2023
77c9e5a
refactor(pid): align global and collection modes to review feedback
poikilotherm Jun 14, 2023
0b51e1b
feat(pid): update DataverseServiceBean.wantsDatasetVersionPids() call…
poikilotherm Jun 14, 2023
fa8d544
feat(pid): change business logic for DataverseServiceBean.wantsDatase…
poikilotherm Jun 14, 2023
d8f8534
docs(pid): fix Javadoc in VersionPidMode
poikilotherm Jun 19, 2023
4b9a3ba
Merge branch 'develop' into 4499-version-doi
poikilotherm Jun 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.pidproviders.VersionPidMode.GenStyle;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.InputStream;
Expand Down Expand Up @@ -155,6 +157,50 @@ public DvObject generateIdentifier(DvObject dvObject) {
return dvObject;
}

/**
* Generate an identifier for a given dataset version, depending on the chosen (configured) generation style.
* (See also {@link GenStyle} for available styles.)
*
* @param datasetVersion The version of a dataset to create a PID for
* @return The identifier (will never be null)
* @throws IllegalArgumentException If the style configured is not supported by this generator, the version is
* already released or a minor version, or if the owning dataset has no identifier
* while creating a suffix style identifier.
*/
@Override
public String generateDatasetVersionIdentifier(final DatasetVersion datasetVersion) throws IllegalArgumentException {
if (datasetVersion == null || datasetVersion.isReleased()) {
throw new IllegalArgumentException("Version may not be null or released");
}

// If this is a minor version update, reuse the identifier of the last released version
if (datasetVersion.getMinorVersionNumber() > 0) {
return datasetVersion.getDataset().getReleasedVersion().getPersistentIdentifier();
}

try {
GenStyle style = JvmSettings.PID_VERSIONS_STYLE.lookup(GenStyle.class);

if (style == GenStyle.DATASET) {
return generateDatasetIdentifier(datasetVersion.getDataset());

} else if (style == GenStyle.SUFFIX) {
String datasetIdentifier = datasetVersion.getDataset().getIdentifier();
if (datasetIdentifier == null || datasetIdentifier.isEmpty()) {
throw new IllegalArgumentException("Dataset must not have empty identifier when creating dataset version identifier by suffix");
}

return datasetIdentifier + getVersionSuffixDelimiter() + datasetVersion.getVersionNumber();

// Nothing appropriate found - bail out
} else {
throw new IllegalArgumentException("No supported version PID generation style configured");
}
} catch (NoSuchElementException e) {
throw new IllegalArgumentException("No supported version PID generation style configured", e);
}
}

//ToDo just send the DvObject.DType
public String generateDatasetIdentifier(Dataset dataset) {
//ToDo - track these in the bean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public boolean alreadyExists(GlobalId pid) {


@Override
public String createIdentifier(DvObject dvObject) throws Exception {
public String createIdentifier(DvObject dvObject) throws IOException {
logger.log(Level.FINE,"createIdentifier");
if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){
dvObject = generateIdentifier(dvObject);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.ucsb.nceas.ezid.EZIDException;
import edu.ucsb.nceas.ezid.EZIDService;

import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -240,7 +242,7 @@ public List<String> getProviderInformation(){
}

@Override
public String createIdentifier(DvObject dvObject) throws Throwable {
public String createIdentifier(DvObject dvObject) throws IOException {
logger.log(Level.FINE, "createIdentifier");
if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){
dvObject = generateIdentifier(dvObject);
Expand All @@ -265,7 +267,7 @@ public String createIdentifier(DvObject dvObject) throws Throwable {
logger.log(Level.WARNING, "cause", e.getCause());
logger.log(Level.WARNING, "message {0}", e.getMessage());
logger.log(Level.WARNING, "identifier: ", identifier);
throw e;
throw new IOException(e);
}
}

Expand Down
77 changes: 77 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,33 @@ public enum VersionState {
@Column(length = VERSION_NOTE_MAX_LENGTH)
private String versionNote;

/**
* A (globally) unique persistent identifier for this version.
* The version PID will always be dependent on the protocol and authority of the containing dataset.
* This identifier may contain {@link edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key#Shoulder} if
* configured and some more unique characters, also depending on the admin's choice to make version PIDs dependent
* on the dataset PID.
*
* The PID may be null (feature disabled, old entry, etc.). It might not be unique, as minor versions by default
* carry the identifier of their adjacent major version.
*/
@Column
private String persistentIdentifier;

/**
* Caching the {@link GlobalId} in a transient field saves retrievals and reformatting. Its value will
* be based on {@link #persistentIdentifier}, and details from {@link #dataset} like protocol, authority and
* shoulder.
*/
@Transient
private GlobalId globalId;

/**
* Saving in the database if this identifier has been registered before to avoid
* re-registration (which would probably fail) and switch to modification
*/
private boolean identifierRegistered = false;

/*
* @todo versionState should never be null so when we are ready, uncomment
* the `nullable = false` below.
Expand Down Expand Up @@ -233,6 +260,56 @@ public Long getVersion() {

public void setVersion(Long version) {
}

public String getPersistentIdentifier() {
return this.persistentIdentifier;
}

public void setPersistentIdentifier(String identifier) {
this.persistentIdentifier = identifier;
}

/**
* Create a {@link GlobalId} from {@link #persistentIdentifier} and the owning {@link #dataset}
* details of protocol and authority. This method is not free of side effects: it will cache
* the generated value in a transient instance variable if not yet initialized.
*
* @return The global id for this version or null if no PID, protocol or authority present.
*/
public GlobalId getGlobalId() {
if (this.globalId == null && this.getPersistentIdentifier() != null &&
this.dataset.getProtocol() != null && this.dataset.getAuthority() != null) {
this.globalId = PidUtil.parseAsGlobalID(
this.dataset.getProtocol(),
this.dataset.getAuthority(),
this.getPersistentIdentifier());
}
return this.globalId;
}

/**
* Check the status of the version identifier - has it been registered?
* @return True if registered, false otherwise.
*/
public boolean isIdentifierRegistered() {
return this.identifierRegistered;
}

/**
* Set registration as done.
*/
public void setIdentifierRegistered() {
this.identifierRegistered = true;
}

/**
* Overwrite the registration status with a specific value
* @param status The new status of the registration
*/
public void setIdentifierRegistered(boolean status) {
this.identifierRegistered = status;
}


public String getDataverseSiteUrl() {
return dataverseSiteUrl;
Expand Down
35 changes: 29 additions & 6 deletions src/main/java/edu/harvard/iq/dataverse/Dataverse.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.pidproviders.VersionPidMode.CollectionConduct;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand Down Expand Up @@ -30,14 +30,13 @@
import javax.persistence.OneToOne;
import javax.persistence.OrderBy;
import javax.persistence.Table;
import javax.persistence.Transient;
import javax.validation.constraints.NotBlank;
import javax.validation.constraints.NotEmpty;
import javax.validation.constraints.NotNull;
import javax.validation.constraints.Pattern;
import javax.validation.constraints.Size;

import org.apache.commons.lang3.StringUtils;
import org.hibernate.validator.constraints.NotBlank;
import org.hibernate.validator.constraints.NotEmpty;

/**
*
Expand Down Expand Up @@ -182,8 +181,7 @@ public void setDefaultContributorRole(DataverseRole defaultContributorRole) {
private boolean facetRoot;
// By default, themeRoot should be true, as new dataverses should start with the default theme
private boolean themeRoot = true;
private boolean templateRoot;

private boolean templateRoot;

@OneToOne(mappedBy = "dataverse",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
private DataverseTheme dataverseTheme;
Expand Down Expand Up @@ -591,6 +589,31 @@ public void setCitationDatasetFieldTypes(List<DatasetFieldType> citationDatasetF
}



/**
* Indicate if this Dataverse Collection wants to publicize PIDs for each (major) {@link DatasetVersion}
* for any {@link Dataset} in it.
*
* @see edu.harvard.iq.dataverse.pidproviders.VersionPidMode#ALLOW_MAJOR
* @see edu.harvard.iq.dataverse.pidproviders.VersionPidMode#ALLOW_MINOR
* @see CollectionConduct
*/
@Enumerated(EnumType.STRING)
private CollectionConduct datasetVersionPidConduct;

public void setDatasetVersionPidConduct(CollectionConduct conduct) {
this.datasetVersionPidConduct = conduct;
}

/**
* Retrieve the version PID conduct mode for this collection
* @return One of {@link CollectionConduct}. Never null, defaults to {@link CollectionConduct#INHERIT}.
*/
public CollectionConduct getDatasetVersionPidConduct() {
return this.datasetVersionPidConduct != null ? this.datasetVersionPidConduct : CollectionConduct.INHERIT;
}



public List<DataverseFacet> getDataverseFacets() {
return getDataverseFacets(false);
Expand Down
58 changes: 56 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.pidproviders.VersionPidMode;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.File;
Expand All @@ -28,9 +30,10 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.Properties;
import java.util.concurrent.Future;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.inject.Inject;
Expand Down Expand Up @@ -927,6 +930,57 @@ public List<Object[]> getDatasetTitlesWithinDataverse(Long dataverseId) {

return em.createNativeQuery(cqString).getResultList();
}


/**
* Check if a given Dataverse Collection has been configured to generate PIDs for a new version of a dataset
* contained in it. Will also respect the global version PID settings by an admin via
* {@link JvmSettings#PID_VERSIONS_MODE}.
*
* @param collection The collection to check. May not be null (will throw NPE).
* @param willBeMinorVersion Will the {@link DatasetVersion} to receive the PID be a minor version?
*
* @return true if enabled, false if disabled
* @throws java.util.NoSuchElementException When no or invalid configuration for version PID mode is given
*/
public boolean wantsDatasetVersionPids(final Dataverse collection, boolean willBeMinorVersion) {
Objects.requireNonNull(collection, "Collection parameter must not be null");

// Deactivated by admin globally or no PID for minor version allowed?
VersionPidMode vpm = JvmSettings.PID_VERSIONS_MODE.lookup(VersionPidMode.class);
if (VersionPidMode.OFF.equals(vpm) || ( willBeMinorVersion && VersionPidMode.ALLOW_MAJOR.equals(vpm) )) {
return false;
}

// Check the collection itself; and potentially it's ancestors
Dataverse c = collection;
while (c != null) {
// Note: the default behavior is INHERIT for the model class
switch (c.getDatasetVersionPidConduct()) {
case SKIP:
logger.log(Level.FINE, "Collection {0} makes {1} skip version PIDs", new String[]{c.getAlias(), collection.getAlias()});
return false;
case MAJOR:
logger.log(Level.FINE, "Collection {0} allows its sub {1} PIDs for major versions", new String[]{c.getAlias(), collection.getAlias()});
return !willBeMinorVersion;
case MINOR:
if (vpm.equals(VersionPidMode.ALLOW_MINOR)) {
logger.log(Level.FINE, "Collection {0} allows its sub {1} PIDs for minor versions", new String[]{c.getAlias(), collection.getAlias()});
return true;
} else {
// In some cases, an admin might have switched the setting after someone already activated it.
// The collection's conduct mode should be updated - we will still cap it as admin says no.
logger.log(Level.INFO, "Collection {0} allows its sub {1} PIDs for minor versions, which is disabled globally. Please update conduct mode of {0}.", new String[]{c.getAlias(), collection.getAlias()});
return !willBeMinorVersion;
}
case INHERIT:
// Note: root dataverse has no owner, which will break the loop condition
c = c.getOwner();
}
}

// If the root dataverse did also not have a policy set, use what the admin configured.
// Note: one could argue we should just return true here, as the below boolean expression is just the
// negation of the one at the top and the collections didn't intervene. But better safe than sorry...
return VersionPidMode.ALLOW_MINOR.equals(vpm) || (!willBeMinorVersion && VersionPidMode.ALLOW_MAJOR.equals(vpm));
}
}
Loading