From 1980c7508f527be8a8aa5822e40cf9ee4b7da0ca Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 27 Jan 2018 12:02:35 -0500 Subject: [PATCH 01/44] QDR-898 - support shoulders in generated DOIs --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 6 ++++++ .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 1 + 2 files changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index a03b71b6946..fca9427ce62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -252,6 +252,12 @@ public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idService return generateIdentifierAsRandomString(dataset, idServiceBean); case "sequentialNumber": return generateIdentifierAsSequentialNumber(dataset, idServiceBean); + case "shoulderWithRandomString": + String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); + case "shoulderWithSequentialNumber": + String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ return generateIdentifierAsRandomString(dataset, idServiceBean); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 16e8a1b8f29..80b2c0e26c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -181,6 +181,7 @@ public enum Key { /** DoiProvider for global id */ DoiProvider, DoiSeparator, + DoiShoulder, /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up DoiUsername, DoiPassword, From 94309e96fba7e317d3ec3882ca2c96e58f03f1e0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 29 Jan 2018 12:40:11 -0500 Subject: [PATCH 02/44] updates to test/warn about shoulders that contain the separator for this case, one can continue to append the shoulder to the Authority. --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 8 +++++--- .../iq/dataverse/settings/SettingsServiceBean.java | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index fca9427ce62..f44c1fe9445 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -247,17 +247,19 @@ public Dataset findByGlobalId(String globalId) { public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + if(doiShoulder.indexOf(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "/"))>=0) { + logger.warning("doiShoulder cannot contain / or doiSeparator"); + } switch (doiIdentifierType) { case "randomString": return generateIdentifierAsRandomString(dataset, idServiceBean); case "sequentialNumber": return generateIdentifierAsSequentialNumber(dataset, idServiceBean); case "shoulderWithRandomString": - String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); case "shoulderWithSequentialNumber": - String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); + return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ return generateIdentifierAsRandomString(dataset, idServiceBean); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 80b2c0e26c2..9253ca4aede 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -181,6 +181,10 @@ public enum Key { /** DoiProvider for global id */ DoiProvider, DoiSeparator, + /** DoiShoulder for global id - should not include DoiSeparator unless/until logic for separating authority and identifier is more robust. + * This case can be handled by combining the authority and shoulder as part of the Authority setting. Use this DoiShoulder for cases where the + * character separating the shoulder from the rest of th identifier is not '/' or DoiSeparator. + */ DoiShoulder, /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up DoiUsername, From 03b631722fe0ecba7e335539f74a7521b5ceac18 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 26 Apr 2018 08:44:56 -0400 Subject: [PATCH 03/44] comment update --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 9253ca4aede..7b958b0c194 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -182,8 +182,9 @@ public enum Key { DoiProvider, DoiSeparator, /** DoiShoulder for global id - should not include DoiSeparator unless/until logic for separating authority and identifier is more robust. - * This case can be handled by combining the authority and shoulder as part of the Authority setting. Use this DoiShoulder for cases where the - * character separating the shoulder from the rest of th identifier is not '/' or DoiSeparator. + * This case can be handled by combining the authority and shoulder as part of the Authority setting. Use this DoiShoulder for cases + * where there is no character separating the shoulder from the rest of the identifier or where the + * character separating the shoulder from the rest of the identifier is not '/' or DoiSeparator. */ DoiShoulder, /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up From 7d8b503cd8c4fe8a12be3786adbe8691150b08ae Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 4 May 2018 14:24:23 -0400 Subject: [PATCH 04/44] QDR-898 - to run with a database where all DOI shoulders have moved to the identifier part (out of the authority side) --- .../iq/dataverse/DatasetServiceBean.java | 1705 +++++++++-------- .../settings/SettingsServiceBean.java | 2 + 2 files changed, 861 insertions(+), 846 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index f44c1fe9445..3f574dbaee7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -49,855 +49,868 @@ * @author skraffmiller */ - @Stateless @Named public class DatasetServiceBean implements java.io.Serializable { - private static final Logger logger = Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); - @EJB - IndexServiceBean indexService; - - @EJB - DOIEZIdServiceBean doiEZIdServiceBean; - - @EJB - SettingsServiceBean settingsService; - - @EJB - DatasetVersionServiceBean versionService; - - @EJB - AuthenticationServiceBean authentication; - - @EJB - DataFileServiceBean fileService; - - @EJB - PermissionServiceBean permissionService; - - @EJB - OAIRecordServiceBean recordService; - - private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - - @PersistenceContext(unitName = "VDCNet-ejbPU") - protected EntityManager em; - - public Dataset find(Object pk) { - return em.find(Dataset.class, pk); - } - - public List findByOwnerId(Long ownerId) { - return findByOwnerId(ownerId, false); - } - - public List findPublishedByOwnerId(Long ownerId) { - return findByOwnerId(ownerId, true); - } - - private List findByOwnerId(Long ownerId, boolean onlyPublished) { - List retList = new ArrayList<>(); - TypedQuery query = em.createQuery("select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); - query.setParameter("ownerId", ownerId); - if (!onlyPublished) { - return query.getResultList(); - } else { - for (Dataset ds : query.getResultList()) { - if (ds.isReleased() && !ds.isDeaccessioned()) { - retList.add(ds); - } - } - return retList; - } - } - - public List findIdsByOwnerId(Long ownerId) { - return findIdsByOwnerId(ownerId, false); - } - - private List findIdsByOwnerId(Long ownerId, boolean onlyPublished) { - List retList = new ArrayList<>(); - if (!onlyPublished) { - TypedQuery query = em.createQuery("select o.id from Dataset as o where o.owner.id =:ownerId order by o.id", Long.class); - query.setParameter("ownerId", ownerId); - return query.getResultList(); - } else { - TypedQuery query = em.createQuery("select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); - query.setParameter("ownerId", ownerId); - for (Dataset ds : query.getResultList()) { - if (ds.isReleased() && !ds.isDeaccessioned()) { - retList.add(ds.getId()); - } - } - return retList; - } - } - - public List findAll() { - return em.createQuery("select object(o) from Dataset as o order by o.id", Dataset.class).getResultList(); - } - - - public List findAllLocalDatasetIds() { - return em.createQuery("SELECT o.id FROM Dataset o WHERE o.harvestedFrom IS null ORDER BY o.id", Long.class).getResultList(); - } - - public List findAllUnindexed() { - return em.createQuery("SELECT o.id FROM Dataset o WHERE o.indexTime IS null ORDER BY o.id DESC", Long.class).getResultList(); - } - - /** - * For docs, see the equivalent method on the DataverseServiceBean. - * @param numPartitions - * @param partitionId - * @param skipIndexed - * @return a list of datasets - * @see DataverseServiceBean#findAllOrSubset(long, long, boolean) - */ - public List findAllOrSubset(long numPartitions, long partitionId, boolean skipIndexed) { - if (numPartitions < 1) { - long saneNumPartitions = 1; - numPartitions = saneNumPartitions; - } - String skipClause = skipIndexed ? "AND o.indexTime is null " : ""; - TypedQuery typedQuery = em.createQuery("SELECT o.id FROM Dataset o WHERE MOD( o.id, :numPartitions) = :partitionId " + - skipClause + - "ORDER BY o.id", Long.class); - typedQuery.setParameter("numPartitions", numPartitions); - typedQuery.setParameter("partitionId", partitionId); - return typedQuery.getResultList(); - } - - /** - * Merges the passed dataset to the persistence context. - * @param ds the dataset whose new state we want to persist. - * @return The managed entity representing {@code ds}. - */ - public Dataset merge( Dataset ds ) { - return em.merge(ds); - } - - public Dataset findByGlobalId(String globalId) { - - String protocol = ""; - String authority = ""; - String identifier = ""; - int index1 = globalId.indexOf(':'); - String nonNullDefaultIfKeyNotFound = ""; - // This is kind of wrong right here: we should not assume that this is *our* DOI - - // it can be somebody else's registered DOI that we harvested. And they can - // have their own separator characters defined - so we should not assume - // that everybody's DOIs will look like ours! - // Also, this separator character gets applied to handles lookups too, below. - // Which is probably wrong too... - // -- L.A. 4.2.4 - String separator = settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); - int index2 = globalId.indexOf(separator, index1 + 1); - int index3; - if (index1 == -1) { - logger.info("Error parsing identifier: " + globalId + ". ':' not found in string"); - return null; - } else { - protocol = globalId.substring(0, index1); - } - if (index2 == -1 ) { - logger.info("Error parsing identifier: " + globalId + ". Second separator not found in string"); - return null; - } else { - authority = globalId.substring(index1 + 1, index2); - } - if (protocol.equals("doi")) { - - index3 = globalId.indexOf(separator, index2 + 1); - if (index3 == -1 ) { - // As of now (4.2.4, Feb. 2016) the ICPSR DOIs are the only - // use case where the authority has no "shoulder", so there's only - // 1 slash in the full global id string... hence, we get here. - // Their DOIs also have some lower case characters (for ex., - // 10.3886/ICPSR04599.v1), and that's how are they saved in the - // IQSS production database. So the .toUpperCase() below is - // causing a problem. -- L.A. - identifier = globalId.substring(index2 + 1); //.toUpperCase(); - } else { - if (index3 > -1) { - authority = globalId.substring(index1 + 1, index3); - identifier = globalId.substring(index3 + 1).toUpperCase(); - } - } - } else { - identifier = globalId.substring(index2 + 1).toUpperCase(); - } - String queryStr = "SELECT s from Dataset s where s.identifier = :identifier and s.protocol= :protocol and s.authority= :authority"; - Dataset foundDataset = null; - try { - Query query = em.createQuery(queryStr); - query.setParameter("identifier", identifier); - query.setParameter("protocol", protocol); - query.setParameter("authority", authority); - foundDataset = (Dataset) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { - // (set to .info, this can fill the log file with thousands of - // these messages during a large harvest run) - logger.fine("no ds found: " + globalId); - // DO nothing, just return null. - } - return foundDataset; - } - - public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - if(doiShoulder.indexOf(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "/"))>=0) { - logger.warning("doiShoulder cannot contain / or doiSeparator"); - } - switch (doiIdentifierType) { - case "randomString": - return generateIdentifierAsRandomString(dataset, idServiceBean); - case "sequentialNumber": - return generateIdentifierAsSequentialNumber(dataset, idServiceBean); - case "shoulderWithRandomString": - return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); - case "shoulderWithSequentialNumber": - return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, idServiceBean); - } - } - - private String generateIdentifierAsRandomString(Dataset dataset, IdServiceBean idServiceBean) { - - String identifier = null; - do { - identifier = RandomStringUtils.randomAlphanumeric(6).toUpperCase(); - } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); - - return identifier; - } - - private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBean idServiceBean) { - - String identifier; - do { - StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber"); - query.execute(); - Integer identifierNumeric = (Integer) query.getOutputParameterValue(1); - // some diagnostics here maybe - is it possible to determine that it's failing - // because the stored procedure hasn't been created in the database? - if (identifierNumeric == null) { - return null; - } - identifier = identifierNumeric.toString(); - } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); - - return identifier; - } - - /** - * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network) alos check for duplicate - * in EZID if needed - * @param userIdentifier - * @param dataset - * @param idServiceBean - * @return */ - public boolean isIdentifierUniqueInDatabase(String userIdentifier, Dataset dataset, IdServiceBean idServiceBean) { - String query = "SELECT d FROM Dataset d WHERE d.identifier = '" + userIdentifier + "'"; - query += " and d.protocol ='" + dataset.getProtocol() + "'"; - query += " and d.authority = '" + dataset.getAuthority() + "'"; - boolean u = em.createQuery(query).getResultList().isEmpty(); - - try{ - if (idServiceBean.alreadyExists(dataset)) { - u = false; - } - } catch (Exception e){ - //we can live with failure - means identifier not found remotely - } - - - return u; - } - - public DatasetVersion storeVersion( DatasetVersion dsv ) { - em.persist(dsv); - return dsv; - } - - public String createCitationRIS(DatasetVersion version) { - return createCitationRIS(version, null); - } - - public String createCitationRIS(DatasetVersion version, FileMetadata fileMetadata) { - String publisher = version.getRootDataverseNameforCitation(); - List authorList = version.getDatasetAuthors(); - String retString = "Provider: " + publisher + "\r\n"; - retString += "Content: text/plain; charset=\"us-ascii\"" + "\r\n"; - // Using type "DBASE" - "Online Database", for consistency with - // EndNote (see the longer comment in the EndNote section below)> - - retString += "TY - DBASE" + "\r\n"; - retString += "T1 - " + version.getTitle() + "\r\n"; - for (DatasetAuthor author : authorList) { - retString += "AU - " + author.getName().getDisplayValue() + "\r\n"; - } - retString += "DO - " + version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier() + "\r\n"; - retString += "PY - " + version.getVersionYear() + "\r\n"; - retString += "UR - " + version.getDataset().getPersistentURL() + "\r\n"; - retString += "PB - " + publisher + "\r\n"; - - // a DataFile citation also includes filename und UNF, if applicable: - if (fileMetadata != null) { - retString += "C1 - " + fileMetadata.getLabel() + "\r\n"; - - if (fileMetadata.getDataFile().isTabularData()) { - if (fileMetadata.getDataFile().getUnf() != null) { - retString += "C2 - " + fileMetadata.getDataFile().getUnf() + "\r\n"; - } - } - } - - // closing element: - retString += "ER - \r\n"; - - return retString; - } - - - private XMLOutputFactory xmlOutputFactory = null; - - public String createCitationXML(DatasetVersion datasetVersion, FileMetadata fileMetadata) { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - createEndNoteCitation(outStream, datasetVersion, fileMetadata); - String xml = outStream.toString(); - return xml; - } - - public void createEndNoteCitation(OutputStream os, DatasetVersion datasetVersion, FileMetadata fileMetadata) { - - xmlOutputFactory = javax.xml.stream.XMLOutputFactory.newInstance(); - XMLStreamWriter xmlw = null; - try { - xmlw = xmlOutputFactory.createXMLStreamWriter(os); - xmlw.writeStartDocument(); - createEndNoteXML(xmlw, datasetVersion, fileMetadata); - xmlw.writeEndDocument(); - } catch (XMLStreamException ex) { - Logger.getLogger("global").log(Level.SEVERE, null, ex); - throw new EJBException("ERROR occurred during creating endnote xml.", ex); - } finally { - try { - if (xmlw != null) { - xmlw.close(); - } - } catch (XMLStreamException ex) { - } - } - } - - private void createEndNoteXML(XMLStreamWriter xmlw, DatasetVersion version, FileMetadata fileMetadata) throws XMLStreamException { - - String title = version.getTitle(); - String versionYear = version.getVersionYear(); - String publisher = version.getRootDataverseNameforCitation(); - - List authorList = version.getDatasetAuthors(); - - xmlw.writeStartElement("xml"); - xmlw.writeStartElement("records"); - - xmlw.writeStartElement("record"); - - // "Ref-type" indicates which of the (numerous!) available EndNote - // schemas this record will be interpreted as. - // This is relatively important. Certain fields with generic - // names like "custom1" and "custom2" become very specific things - // in specific schemas; for example, custom1 shows as "legal notice" - // in "Journal Article" (ref-type 84), or as "year published" in - // "Government Document". - // We don't want the UNF to show as a "legal notice"! - // We have found a ref-type that works ok for our purposes - - // "Online Database" (type 45). In this one, the fields Custom1 - // and Custom2 are not translated and just show as is. - // And "Custom1" still beats "legal notice". - // -- L.A. 12.12.2014 beta 10 - - xmlw.writeStartElement("ref-type"); - xmlw.writeAttribute("name", "Online Database"); - xmlw.writeCharacters("45"); - xmlw.writeEndElement(); // ref-type - - xmlw.writeStartElement("contributors"); - xmlw.writeStartElement("authors"); - for (DatasetAuthor author : authorList) { - xmlw.writeStartElement("author"); - xmlw.writeCharacters(author.getName().getDisplayValue()); - xmlw.writeEndElement(); // author - } - xmlw.writeEndElement(); // authors - xmlw.writeEndElement(); // contributors - - xmlw.writeStartElement("titles"); - xmlw.writeStartElement("title"); - xmlw.writeCharacters(title); - xmlw.writeEndElement(); // title - - xmlw.writeEndElement(); // titles - - xmlw.writeStartElement("section"); - String sectionString; - if (version.getDataset().isReleased()) { - sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getDataset().getPublicationDate()); - } else { - sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getLastUpdateTime()); - } - - xmlw.writeCharacters(sectionString); - xmlw.writeEndElement(); // publisher - - xmlw.writeStartElement("dates"); - xmlw.writeStartElement("year"); - xmlw.writeCharacters(versionYear); - xmlw.writeEndElement(); // year - xmlw.writeEndElement(); // dates - - xmlw.writeStartElement("publisher"); - xmlw.writeCharacters(publisher); - xmlw.writeEndElement(); // publisher - - xmlw.writeStartElement("urls"); - xmlw.writeStartElement("related-urls"); - xmlw.writeStartElement("url"); - xmlw.writeCharacters(version.getDataset().getPersistentURL()); - xmlw.writeEndElement(); // url - xmlw.writeEndElement(); // related-urls - xmlw.writeEndElement(); // urls - - // a DataFile citation also includes the filename and (for Tabular - // files) the UNF signature, that we put into the custom1 and custom2 - // fields respectively: - - - if (fileMetadata != null) { - xmlw.writeStartElement("custom1"); - xmlw.writeCharacters(fileMetadata.getLabel()); - xmlw.writeEndElement(); // custom1 - - if (fileMetadata.getDataFile().isTabularData()) { - if (fileMetadata.getDataFile().getUnf() != null) { - xmlw.writeStartElement("custom2"); - xmlw.writeCharacters(fileMetadata.getDataFile().getUnf()); - xmlw.writeEndElement(); // custom2 - } - } - } - - xmlw.writeStartElement("electronic-resource-num"); - String electResourceNum = version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier(); - xmlw.writeCharacters(electResourceNum); - xmlw.writeEndElement(); - //10.3886/ICPSR03259.v1 - xmlw.writeEndElement(); // record - - xmlw.writeEndElement(); // records - xmlw.writeEndElement(); // xml - - } - - public DatasetVersionUser getDatasetVersionUser(DatasetVersion version, User user) { - - DatasetVersionUser ddu = null; - Query query = em.createQuery("select object(o) from DatasetVersionUser as o " - + "where o.datasetVersion.id =:versionId and o.authenticatedUser.id =:userId"); - query.setParameter("versionId", version.getId()); - String identifier = user.getIdentifier(); - identifier = identifier.startsWith("@") ? identifier.substring(1) : identifier; - AuthenticatedUser au = authentication.getAuthenticatedUser(identifier); - query.setParameter("userId", au.getId()); - try { - ddu = (DatasetVersionUser) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { - // DO nothing, just return null. - } - return ddu; - } - - public boolean checkDatasetLock(Long datasetId) { - TypedQuery lockCounter = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); - lockCounter.setParameter("datasetId", datasetId); - lockCounter.setMaxResults(1); - List lock = lockCounter.getResultList(); - return lock.size()>0; - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { - lock.setDataset(dataset); - dataset.addLock(lock); - em.persist(lock); - em.merge(dataset); - return lock; - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) /*?*/ - public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Long userId, String info) { - - Dataset dataset = em.find(Dataset.class, datasetId); - - AuthenticatedUser user = null; - if (userId != null) { - user = em.find(AuthenticatedUser.class, userId); - } - - DatasetLock lock = new DatasetLock(reason, user); - lock.setDataset(dataset); - lock.setInfo(info); - lock.setStartTime(new Date()); - - if (userId != null) { - lock.setUser(user); - if (user.getDatasetLocks() == null) { - user.setDatasetLocks(new ArrayList<>()); - } - user.getDatasetLocks().add(lock); - } - - return addDatasetLock(dataset, lock); - } - - /** - * Removes all {@link DatasetLock}s for the dataset whose id is passed and reason - * is {@code aReason}. - * @param datasetId Id of the dataset whose locks will b removed. - * @param aReason The reason of the locks that will be removed. - */ - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void removeDatasetLocks(Long datasetId, DatasetLock.Reason aReason) { - Dataset dataset = em.find(Dataset.class, datasetId); - new HashSet<>(dataset.getLocks()).stream() - .filter( l -> l.getReason() == aReason ) - .forEach( lock -> { - dataset.removeLock(lock); - - AuthenticatedUser user = lock.getUser(); - user.getDatasetLocks().remove(lock); - - em.remove(lock); - }); - } - - /* - getTitleFromLatestVersion methods use native query to return a dataset title - - There are two versions: - 1) The version with datasetId param only will return the title regardless of version state - 2)The version with the param 'includeDraft' boolean will return the most recently published title if the param is set to false - If no Title found return empty string - protects against calling with - include draft = false with no published version - */ - - public String getTitleFromLatestVersion(Long datasetId){ - return getTitleFromLatestVersion(datasetId, true); - } - - public String getTitleFromLatestVersion(Long datasetId, boolean includeDraft){ - - String whereDraft = ""; - //This clause will exclude draft versions from the select - if (!includeDraft) { - whereDraft = " and v.versionstate !='DRAFT' "; - } - - try { - return (String) em.createNativeQuery("select dfv.value from dataset d " - + " join datasetversion v on d.id = v.dataset_id " - + " join datasetfield df on v.id = df.datasetversion_id " - + " join datasetfieldvalue dfv on df.id = dfv.datasetfield_id " - + " join datasetfieldtype dft on df.datasetfieldtype_id = dft.id " - + " where dft.name = '" + DatasetFieldConstant.title + "' and v.dataset_id =" + datasetId - + whereDraft - + " order by v.versionnumber desc, v.minorVersionNumber desc limit 1 " - + ";").getSingleResult(); - - } catch (Exception ex) { - logger.log(Level.INFO, "exception trying to get title from latest version: {0}", ex); - return ""; - } - - } - - public Dataset getDatasetByHarvestInfo(Dataverse dataverse, String harvestIdentifier) { - String queryStr = "SELECT d FROM Dataset d, DvObject o WHERE d.id = o.id AND o.owner.id = " + dataverse.getId() + " and d.harvestIdentifier = '" + harvestIdentifier + "'"; - Query query = em.createQuery(queryStr); - List resultList = query.getResultList(); - Dataset dataset = null; - if (resultList.size() > 1) { - throw new EJBException("More than one dataset found in the dataverse (id= " + dataverse.getId() + "), with harvestIdentifier= " + harvestIdentifier); - } - if (resultList.size() == 1) { - dataset = (Dataset) resultList.get(0); - } - return dataset; - - } - - public Long getDatasetVersionCardImage(Long versionId, User user) { - if (versionId == null) { - return null; - } - - - - return null; - } - - /** - * Used to identify and properly display Harvested objects on the dataverse page. - * - * @param datasetIds - * @return - */ - public Map getArchiveDescriptionsForHarvestedDatasets(Set datasetIds){ - if (datasetIds == null || datasetIds.size() < 1) { - return null; - } - - String datasetIdStr = Strings.join(datasetIds, ", "); - - String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")"; - List searchResults; - - try { - searchResults = em.createNativeQuery(qstr).getResultList(); - } catch (Exception ex) { - searchResults = null; - } - - if (searchResults == null) { - return null; - } - - Map ret = new HashMap<>(); - - for (Object[] result : searchResults) { - Long dsId; - if (result[0] != null) { - try { - dsId = (Long)result[0]; - } catch (Exception ex) { - dsId = null; - } - if (dsId == null) { - continue; - } - - ret.put(dsId, (String)result[1]); - } - } - - return ret; - } - - - - public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) { - if (datasetVersion == null) { - return false; - } - - // First, check if this dataset has a designated thumbnail image: - - if (datasetVersion.getDataset() != null) { - DataFile dataFile = datasetVersion.getDataset().getThumbnailFile(); - if (dataFile != null) { - return ImageThumbConverter.isThumbnailAvailable(dataFile, 48); - } - } - - // If not, we'll try to use one of the files in this dataset version: - // (the first file with an available thumbnail, really) - - List fileMetadatas = datasetVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - DataFile dataFile = fileMetadata.getDataFile(); - - // TODO: use permissionsWrapper here - ? - // (we are looking up these download permissions on individual files, - // true, and those are unique... but the wrapper may be able to save - // us some queries when it determines the download permission on the - // dataset as a whole? -- L.A. 4.2.1 - - if (fileService.isThumbnailAvailable(dataFile) && permissionService.userOn(user, dataFile).has(Permission.DownloadFile)) { //, user)) { - return true; - } - - } - - return false; - } - - - // reExportAll *forces* a reexport on all published datasets; whether they - // have the "last export" time stamp set or not. - @Asynchronous - public void reExportAllAsync() { - exportAllDatasets(true); - } - - public void reExportAll() { - exportAllDatasets(true); - } - - - // exportAll() will try to export the yet unexported datasets (it will honor - // and trust the "last export" time stamp). - - @Asynchronous - public void exportAllAsync() { - exportAllDatasets(false); - } - - public void exportAll() { - exportAllDatasets(false); - } - - public void exportAllDatasets(boolean forceReExport) { - Integer countAll = 0; - Integer countSuccess = 0; - Integer countError = 0; - String logTimestamp = logFormatter.format(new Date()); - Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); - String logFileName = "../logs" + File.separator + "export_" + logTimestamp + ".log"; - FileHandler fileHandler; - boolean fileHandlerSuceeded; - try { - fileHandler = new FileHandler(logFileName); - exportLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; - } catch (IOException | SecurityException ex) { - Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; - } - - if (fileHandlerSuceeded) { - exportLogger.addHandler(fileHandler); - } else { - exportLogger = logger; - } - - exportLogger.info("Starting an export all job"); - - for (Long datasetId : findAllLocalDatasetIds()) { - // Potentially, there's a godzillion datasets in this Dataverse. - // This is why we go through the list of ids here, and instantiate - // only one dataset at a time. - Dataset dataset = this.find(datasetId); - if (dataset != null) { - // Accurate "is published?" test - ? - // Answer: Yes, it is! We can't trust dataset.isReleased() alone; because it is a dvobject method - // that returns (publicationDate != null). And "publicationDate" is essentially - // "the first publication date"; that stays the same as versions get - // published and/or deaccessioned. But in combination with !isDeaccessioned() - // it is indeed an accurate test. - if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { - - // can't trust dataset.getPublicationDate(), no. - Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) - if (forceReExport || (publicationDate != null - && (dataset.getLastExportTime() == null - || dataset.getLastExportTime().before(publicationDate)))) { - countAll++; - try { - recordService.exportAllFormatsInNewTransaction(dataset); - exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId()); - countSuccess++; - } catch (Exception ex) { - exportLogger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId() + "; " + ex.getMessage()); - countError++; - } - } - } - } - } - exportLogger.info("Datasets processed: " + countAll.toString()); - exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); - exportLogger.info("Datasets failures: " + countError.toString()); - exportLogger.info("Finished export-all job."); - - if (fileHandlerSuceeded) { - fileHandler.close(); - } - - } - - public void updateLastExportTimeStamp(Long datasetId) { - Date now = new Date(); - em.createNativeQuery("UPDATE Dataset SET lastExportTime='"+now.toString()+"' WHERE id="+datasetId).executeUpdate(); - } - - public Dataset setNonDatasetFileAsThumbnail(Dataset dataset, InputStream inputStream) { - if (dataset == null) { - logger.fine("In setNonDatasetFileAsThumbnail but dataset is null! Returning null."); - return null; - } - if (inputStream == null) { - logger.fine("In setNonDatasetFileAsThumbnail but inputStream is null! Returning null."); - return null; - } - dataset = DatasetUtil.persistDatasetLogoToStorageAndCreateThumbnail(dataset, inputStream); - dataset.setThumbnailFile(null); - return merge(dataset); - } - - public Dataset setDatasetFileAsThumbnail(Dataset dataset, DataFile datasetFileThumbnailToSwitchTo) { - if (dataset == null) { - logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); - return null; - } - if (datasetFileThumbnailToSwitchTo == null) { - logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); - return null; - } - DatasetUtil.deleteDatasetLogo(dataset); - dataset.setThumbnailFile(datasetFileThumbnailToSwitchTo); - dataset.setUseGenericThumbnail(false); - return merge(dataset); - } - - public Dataset removeDatasetThumbnail(Dataset dataset) { - if (dataset == null) { - logger.fine("In removeDatasetThumbnail but dataset is null! Returning null."); - return null; - } - DatasetUtil.deleteDatasetLogo(dataset); - dataset.setThumbnailFile(null); - dataset.setUseGenericThumbnail(true); - return merge(dataset); - } - - // persist assigned thumbnail in a single one-field-update query: - // (the point is to avoid doing an em.merge() on an entire dataset object...) - public void assignDatasetThumbnailByNativeQuery(Long datasetId, Long dataFileId) { - try { - em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFileId + " WHERE id=" + datasetId).executeUpdate(); - } catch (Exception ex) { - // it's ok to just ignore... - } - } - - public void assignDatasetThumbnailByNativeQuery(Dataset dataset, DataFile dataFile) { - try { - em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFile.getId() + " WHERE id=" + dataset.getId()).executeUpdate(); - } catch (Exception ex) { - // it's ok to just ignore... - } - } - - public WorkflowComment addWorkflowComment(WorkflowComment workflowComment) { - em.persist(workflowComment); - return workflowComment; - } + private static final Logger logger = Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); + @EJB + IndexServiceBean indexService; + + @EJB + DOIEZIdServiceBean doiEZIdServiceBean; + + @EJB + SettingsServiceBean settingsService; + + @EJB + DatasetVersionServiceBean versionService; + + @EJB + AuthenticationServiceBean authentication; + + @EJB + DataFileServiceBean fileService; + + @EJB + PermissionServiceBean permissionService; + + @EJB + OAIRecordServiceBean recordService; + + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + @PersistenceContext(unitName = "VDCNet-ejbPU") + protected EntityManager em; + + public Dataset find(Object pk) { + return em.find(Dataset.class, pk); + } + + public List findByOwnerId(Long ownerId) { + return findByOwnerId(ownerId, false); + } + + public List findPublishedByOwnerId(Long ownerId) { + return findByOwnerId(ownerId, true); + } + + private List findByOwnerId(Long ownerId, boolean onlyPublished) { + List retList = new ArrayList<>(); + TypedQuery query = em.createQuery( + "select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); + query.setParameter("ownerId", ownerId); + if (!onlyPublished) { + return query.getResultList(); + } else { + for (Dataset ds : query.getResultList()) { + if (ds.isReleased() && !ds.isDeaccessioned()) { + retList.add(ds); + } + } + return retList; + } + } + + public List findIdsByOwnerId(Long ownerId) { + return findIdsByOwnerId(ownerId, false); + } + + private List findIdsByOwnerId(Long ownerId, boolean onlyPublished) { + List retList = new ArrayList<>(); + if (!onlyPublished) { + TypedQuery query = em + .createQuery("select o.id from Dataset as o where o.owner.id =:ownerId order by o.id", Long.class); + query.setParameter("ownerId", ownerId); + return query.getResultList(); + } else { + TypedQuery query = em.createQuery( + "select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); + query.setParameter("ownerId", ownerId); + for (Dataset ds : query.getResultList()) { + if (ds.isReleased() && !ds.isDeaccessioned()) { + retList.add(ds.getId()); + } + } + return retList; + } + } + + public List findAll() { + return em.createQuery("select object(o) from Dataset as o order by o.id", Dataset.class).getResultList(); + } + + public List findAllLocalDatasetIds() { + return em.createQuery("SELECT o.id FROM Dataset o WHERE o.harvestedFrom IS null ORDER BY o.id", Long.class) + .getResultList(); + } + + public List findAllUnindexed() { + return em.createQuery("SELECT o.id FROM Dataset o WHERE o.indexTime IS null ORDER BY o.id DESC", Long.class) + .getResultList(); + } + + /** + * For docs, see the equivalent method on the DataverseServiceBean. + * + * @param numPartitions + * @param partitionId + * @param skipIndexed + * @return a list of datasets + * @see DataverseServiceBean#findAllOrSubset(long, long, boolean) + */ + public List findAllOrSubset(long numPartitions, long partitionId, boolean skipIndexed) { + if (numPartitions < 1) { + long saneNumPartitions = 1; + numPartitions = saneNumPartitions; + } + String skipClause = skipIndexed ? "AND o.indexTime is null " : ""; + TypedQuery typedQuery = em + .createQuery("SELECT o.id FROM Dataset o WHERE MOD( o.id, :numPartitions) = :partitionId " + skipClause + + "ORDER BY o.id", Long.class); + typedQuery.setParameter("numPartitions", numPartitions); + typedQuery.setParameter("partitionId", partitionId); + return typedQuery.getResultList(); + } + + /** + * Merges the passed dataset to the persistence context. + * + * @param ds + * the dataset whose new state we want to persist. + * @return The managed entity representing {@code ds}. + */ + public Dataset merge(Dataset ds) { + return em.merge(ds); + } + + public Dataset findByGlobalId(String globalId) { + + String protocol = ""; + String authority = ""; + String identifier = ""; + int index1 = globalId.indexOf(':'); + String nonNullDefaultIfKeyNotFound = ""; + // This is kind of wrong right here: we should not assume that this is *our* DOI + // - + // it can be somebody else's registered DOI that we harvested. And they can + // have their own separator characters defined - so we should not assume + // that everybody's DOIs will look like ours! + // Also, this separator character gets applied to handles lookups too, below. + // Which is probably wrong too... + // -- L.A. 4.2.4 + String separator = settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, + nonNullDefaultIfKeyNotFound); + int index2 = globalId.indexOf(separator, index1 + 1); + int index3; + if (index1 == -1) { + logger.info("Error parsing identifier: " + globalId + ". ':' not found in string"); + return null; + } else { + protocol = globalId.substring(0, index1); + } + if (index2 == -1) { + logger.info("Error parsing identifier: " + globalId + ". Second separator not found in string"); + return null; + } else { + authority = globalId.substring(index1 + 1, index2); + } + if (protocol.equals("doi")) { + + // //ICPSR DOIs have some lower case characters (for ex., + // 10.3886/ICPSR04599.v1), and that's how are they saved in the + // IQSS production database. So .toUpperCase() is now optional + Boolean useMixedCase = settingsService.isTrueForKey(SettingsServiceBean.Key.DoiUseMixedCase, false); + + identifier = globalId.substring(index2 + 1); // .toUpperCase(); + if (!useMixedCase) { + identifier = identifier.toUpperCase(); + } + } + String queryStr = "SELECT s from Dataset s where s.identifier = :identifier and s.protocol= :protocol and s.authority= :authority"; + Dataset foundDataset = null; + try { + Query query = em.createQuery(queryStr); + query.setParameter("identifier", identifier); + query.setParameter("protocol", protocol); + query.setParameter("authority", authority); + foundDataset = (Dataset) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no ds found: " + globalId); + // DO nothing, just return null. + } + return foundDataset; + } + + public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, + "randomString"); + String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + if (doiShoulder.indexOf(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "/")) >= 0) { + logger.warning("doiShoulder cannot contain / or doiSeparator"); + } + switch (doiIdentifierType) { + case "randomString": + return generateIdentifierAsRandomString(dataset, idServiceBean); + case "sequentialNumber": + return generateIdentifierAsSequentialNumber(dataset, idServiceBean); + case "shoulderWithRandomString": + return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); + case "shoulderWithSequentialNumber": + return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(dataset, idServiceBean); + } + } + + private String generateIdentifierAsRandomString(Dataset dataset, IdServiceBean idServiceBean) { + + String identifier = null; + do { + identifier = RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); + + return identifier; + } + + private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBean idServiceBean) { + + String identifier; + do { + StoredProcedureQuery query = this.em + .createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber"); + query.execute(); + Integer identifierNumeric = (Integer) query.getOutputParameterValue(1); + // some diagnostics here maybe - is it possible to determine that it's failing + // because the stored procedure hasn't been created in the database? + if (identifierNumeric == null) { + return null; + } + identifier = identifierNumeric.toString(); + } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); + + return identifier; + } + + /** + * Check that a identifier entered by the user is unique (not currently used for + * any other study in this Dataverse Network) alos check for duplicate in EZID + * if needed + * + * @param userIdentifier + * @param dataset + * @param idServiceBean + * @return + */ + public boolean isIdentifierUniqueInDatabase(String userIdentifier, Dataset dataset, IdServiceBean idServiceBean) { + String query = "SELECT d FROM Dataset d WHERE d.identifier = '" + userIdentifier + "'"; + query += " and d.protocol ='" + dataset.getProtocol() + "'"; + query += " and d.authority = '" + dataset.getAuthority() + "'"; + boolean u = em.createQuery(query).getResultList().isEmpty(); + + try { + if (idServiceBean.alreadyExists(dataset)) { + u = false; + } + } catch (Exception e) { + // we can live with failure - means identifier not found remotely + } + + return u; + } + + public DatasetVersion storeVersion(DatasetVersion dsv) { + em.persist(dsv); + return dsv; + } + + public String createCitationRIS(DatasetVersion version) { + return createCitationRIS(version, null); + } + + public String createCitationRIS(DatasetVersion version, FileMetadata fileMetadata) { + String publisher = version.getRootDataverseNameforCitation(); + List authorList = version.getDatasetAuthors(); + String retString = "Provider: " + publisher + "\r\n"; + retString += "Content: text/plain; charset=\"us-ascii\"" + "\r\n"; + // Using type "DBASE" - "Online Database", for consistency with + // EndNote (see the longer comment in the EndNote section below)> + + retString += "TY - DBASE" + "\r\n"; + retString += "T1 - " + version.getTitle() + "\r\n"; + for (DatasetAuthor author : authorList) { + retString += "AU - " + author.getName().getDisplayValue() + "\r\n"; + } + retString += "DO - " + version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier() + "\r\n"; + retString += "PY - " + version.getVersionYear() + "\r\n"; + retString += "UR - " + version.getDataset().getPersistentURL() + "\r\n"; + retString += "PB - " + publisher + "\r\n"; + + // a DataFile citation also includes filename und UNF, if applicable: + if (fileMetadata != null) { + retString += "C1 - " + fileMetadata.getLabel() + "\r\n"; + + if (fileMetadata.getDataFile().isTabularData()) { + if (fileMetadata.getDataFile().getUnf() != null) { + retString += "C2 - " + fileMetadata.getDataFile().getUnf() + "\r\n"; + } + } + } + + // closing element: + retString += "ER - \r\n"; + + return retString; + } + + private XMLOutputFactory xmlOutputFactory = null; + + public String createCitationXML(DatasetVersion datasetVersion, FileMetadata fileMetadata) { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + createEndNoteCitation(outStream, datasetVersion, fileMetadata); + String xml = outStream.toString(); + return xml; + } + + public void createEndNoteCitation(OutputStream os, DatasetVersion datasetVersion, FileMetadata fileMetadata) { + + xmlOutputFactory = javax.xml.stream.XMLOutputFactory.newInstance(); + XMLStreamWriter xmlw = null; + try { + xmlw = xmlOutputFactory.createXMLStreamWriter(os); + xmlw.writeStartDocument(); + createEndNoteXML(xmlw, datasetVersion, fileMetadata); + xmlw.writeEndDocument(); + } catch (XMLStreamException ex) { + Logger.getLogger("global").log(Level.SEVERE, null, ex); + throw new EJBException("ERROR occurred during creating endnote xml.", ex); + } finally { + try { + if (xmlw != null) { + xmlw.close(); + } + } catch (XMLStreamException ex) { + } + } + } + + private void createEndNoteXML(XMLStreamWriter xmlw, DatasetVersion version, FileMetadata fileMetadata) + throws XMLStreamException { + + String title = version.getTitle(); + String versionYear = version.getVersionYear(); + String publisher = version.getRootDataverseNameforCitation(); + + List authorList = version.getDatasetAuthors(); + + xmlw.writeStartElement("xml"); + xmlw.writeStartElement("records"); + + xmlw.writeStartElement("record"); + + // "Ref-type" indicates which of the (numerous!) available EndNote + // schemas this record will be interpreted as. + // This is relatively important. Certain fields with generic + // names like "custom1" and "custom2" become very specific things + // in specific schemas; for example, custom1 shows as "legal notice" + // in "Journal Article" (ref-type 84), or as "year published" in + // "Government Document". + // We don't want the UNF to show as a "legal notice"! + // We have found a ref-type that works ok for our purposes - + // "Online Database" (type 45). In this one, the fields Custom1 + // and Custom2 are not translated and just show as is. + // And "Custom1" still beats "legal notice". + // -- L.A. 12.12.2014 beta 10 + + xmlw.writeStartElement("ref-type"); + xmlw.writeAttribute("name", "Online Database"); + xmlw.writeCharacters("45"); + xmlw.writeEndElement(); // ref-type + + xmlw.writeStartElement("contributors"); + xmlw.writeStartElement("authors"); + for (DatasetAuthor author : authorList) { + xmlw.writeStartElement("author"); + xmlw.writeCharacters(author.getName().getDisplayValue()); + xmlw.writeEndElement(); // author + } + xmlw.writeEndElement(); // authors + xmlw.writeEndElement(); // contributors + + xmlw.writeStartElement("titles"); + xmlw.writeStartElement("title"); + xmlw.writeCharacters(title); + xmlw.writeEndElement(); // title + + xmlw.writeEndElement(); // titles + + xmlw.writeStartElement("section"); + String sectionString; + if (version.getDataset().isReleased()) { + sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getDataset().getPublicationDate()); + } else { + sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getLastUpdateTime()); + } + + xmlw.writeCharacters(sectionString); + xmlw.writeEndElement(); // publisher + + xmlw.writeStartElement("dates"); + xmlw.writeStartElement("year"); + xmlw.writeCharacters(versionYear); + xmlw.writeEndElement(); // year + xmlw.writeEndElement(); // dates + + xmlw.writeStartElement("publisher"); + xmlw.writeCharacters(publisher); + xmlw.writeEndElement(); // publisher + + xmlw.writeStartElement("urls"); + xmlw.writeStartElement("related-urls"); + xmlw.writeStartElement("url"); + xmlw.writeCharacters(version.getDataset().getPersistentURL()); + xmlw.writeEndElement(); // url + xmlw.writeEndElement(); // related-urls + xmlw.writeEndElement(); // urls + + // a DataFile citation also includes the filename and (for Tabular + // files) the UNF signature, that we put into the custom1 and custom2 + // fields respectively: + + if (fileMetadata != null) { + xmlw.writeStartElement("custom1"); + xmlw.writeCharacters(fileMetadata.getLabel()); + xmlw.writeEndElement(); // custom1 + + if (fileMetadata.getDataFile().isTabularData()) { + if (fileMetadata.getDataFile().getUnf() != null) { + xmlw.writeStartElement("custom2"); + xmlw.writeCharacters(fileMetadata.getDataFile().getUnf()); + xmlw.writeEndElement(); // custom2 + } + } + } + + xmlw.writeStartElement("electronic-resource-num"); + String electResourceNum = version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier(); + xmlw.writeCharacters(electResourceNum); + xmlw.writeEndElement(); + // 10.3886/ICPSR03259.v1 + xmlw.writeEndElement(); // record + + xmlw.writeEndElement(); // records + xmlw.writeEndElement(); // xml + + } + + public DatasetVersionUser getDatasetVersionUser(DatasetVersion version, User user) { + + DatasetVersionUser ddu = null; + Query query = em.createQuery("select object(o) from DatasetVersionUser as o " + + "where o.datasetVersion.id =:versionId and o.authenticatedUser.id =:userId"); + query.setParameter("versionId", version.getId()); + String identifier = user.getIdentifier(); + identifier = identifier.startsWith("@") ? identifier.substring(1) : identifier; + AuthenticatedUser au = authentication.getAuthenticatedUser(identifier); + query.setParameter("userId", au.getId()); + try { + ddu = (DatasetVersionUser) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // DO nothing, just return null. + } + return ddu; + } + + public boolean checkDatasetLock(Long datasetId) { + TypedQuery lockCounter = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); + lockCounter.setParameter("datasetId", datasetId); + lockCounter.setMaxResults(1); + List lock = lockCounter.getResultList(); + return lock.size() > 0; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { + lock.setDataset(dataset); + dataset.addLock(lock); + em.persist(lock); + em.merge(dataset); + return lock; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) /* ? */ + public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Long userId, String info) { + + Dataset dataset = em.find(Dataset.class, datasetId); + + AuthenticatedUser user = null; + if (userId != null) { + user = em.find(AuthenticatedUser.class, userId); + } + + DatasetLock lock = new DatasetLock(reason, user); + lock.setDataset(dataset); + lock.setInfo(info); + lock.setStartTime(new Date()); + + if (userId != null) { + lock.setUser(user); + if (user.getDatasetLocks() == null) { + user.setDatasetLocks(new ArrayList<>()); + } + user.getDatasetLocks().add(lock); + } + + return addDatasetLock(dataset, lock); + } + + /** + * Removes all {@link DatasetLock}s for the dataset whose id is passed and + * reason is {@code aReason}. + * + * @param datasetId + * Id of the dataset whose locks will b removed. + * @param aReason + * The reason of the locks that will be removed. + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void removeDatasetLocks(Long datasetId, DatasetLock.Reason aReason) { + Dataset dataset = em.find(Dataset.class, datasetId); + new HashSet<>(dataset.getLocks()).stream().filter(l -> l.getReason() == aReason).forEach(lock -> { + dataset.removeLock(lock); + + AuthenticatedUser user = lock.getUser(); + user.getDatasetLocks().remove(lock); + + em.remove(lock); + }); + } + + /* + * getTitleFromLatestVersion methods use native query to return a dataset title + * + * There are two versions: 1) The version with datasetId param only will return + * the title regardless of version state 2)The version with the param + * 'includeDraft' boolean will return the most recently published title if the + * param is set to false If no Title found return empty string - protects + * against calling with include draft = false with no published version + */ + + public String getTitleFromLatestVersion(Long datasetId) { + return getTitleFromLatestVersion(datasetId, true); + } + + public String getTitleFromLatestVersion(Long datasetId, boolean includeDraft) { + + String whereDraft = ""; + // This clause will exclude draft versions from the select + if (!includeDraft) { + whereDraft = " and v.versionstate !='DRAFT' "; + } + + try { + return (String) em + .createNativeQuery("select dfv.value from dataset d " + + " join datasetversion v on d.id = v.dataset_id " + + " join datasetfield df on v.id = df.datasetversion_id " + + " join datasetfieldvalue dfv on df.id = dfv.datasetfield_id " + + " join datasetfieldtype dft on df.datasetfieldtype_id = dft.id " + " where dft.name = '" + + DatasetFieldConstant.title + "' and v.dataset_id =" + datasetId + whereDraft + + " order by v.versionnumber desc, v.minorVersionNumber desc limit 1 " + ";") + .getSingleResult(); + + } catch (Exception ex) { + logger.log(Level.INFO, "exception trying to get title from latest version: {0}", ex); + return ""; + } + + } + + public Dataset getDatasetByHarvestInfo(Dataverse dataverse, String harvestIdentifier) { + String queryStr = "SELECT d FROM Dataset d, DvObject o WHERE d.id = o.id AND o.owner.id = " + dataverse.getId() + + " and d.harvestIdentifier = '" + harvestIdentifier + "'"; + Query query = em.createQuery(queryStr); + List resultList = query.getResultList(); + Dataset dataset = null; + if (resultList.size() > 1) { + throw new EJBException("More than one dataset found in the dataverse (id= " + dataverse.getId() + + "), with harvestIdentifier= " + harvestIdentifier); + } + if (resultList.size() == 1) { + dataset = (Dataset) resultList.get(0); + } + return dataset; + + } + + public Long getDatasetVersionCardImage(Long versionId, User user) { + if (versionId == null) { + return null; + } + + return null; + } + + /** + * Used to identify and properly display Harvested objects on the dataverse + * page. + * + * @param datasetIds + * @return + */ + public Map getArchiveDescriptionsForHarvestedDatasets(Set datasetIds) { + if (datasetIds == null || datasetIds.size() < 1) { + return null; + } + + String datasetIdStr = Strings.join(datasetIds, ", "); + + String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + + datasetIdStr + ")"; + List searchResults; + + try { + searchResults = em.createNativeQuery(qstr).getResultList(); + } catch (Exception ex) { + searchResults = null; + } + + if (searchResults == null) { + return null; + } + + Map ret = new HashMap<>(); + + for (Object[] result : searchResults) { + Long dsId; + if (result[0] != null) { + try { + dsId = (Long) result[0]; + } catch (Exception ex) { + dsId = null; + } + if (dsId == null) { + continue; + } + + ret.put(dsId, (String) result[1]); + } + } + + return ret; + } + + public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) { + if (datasetVersion == null) { + return false; + } + + // First, check if this dataset has a designated thumbnail image: + + if (datasetVersion.getDataset() != null) { + DataFile dataFile = datasetVersion.getDataset().getThumbnailFile(); + if (dataFile != null) { + return ImageThumbConverter.isThumbnailAvailable(dataFile, 48); + } + } + + // If not, we'll try to use one of the files in this dataset version: + // (the first file with an available thumbnail, really) + + List fileMetadatas = datasetVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + DataFile dataFile = fileMetadata.getDataFile(); + + // TODO: use permissionsWrapper here - ? + // (we are looking up these download permissions on individual files, + // true, and those are unique... but the wrapper may be able to save + // us some queries when it determines the download permission on the + // dataset as a whole? -- L.A. 4.2.1 + + if (fileService.isThumbnailAvailable(dataFile) + && permissionService.userOn(user, dataFile).has(Permission.DownloadFile)) { // , user)) { + return true; + } + + } + + return false; + } + + // reExportAll *forces* a reexport on all published datasets; whether they + // have the "last export" time stamp set or not. + @Asynchronous + public void reExportAllAsync() { + exportAllDatasets(true); + } + + public void reExportAll() { + exportAllDatasets(true); + } + + // exportAll() will try to export the yet unexported datasets (it will honor + // and trust the "last export" time stamp). + + @Asynchronous + public void exportAllAsync() { + exportAllDatasets(false); + } + + public void exportAll() { + exportAllDatasets(false); + } + + public void exportAllDatasets(boolean forceReExport) { + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + String logTimestamp = logFormatter.format(new Date()); + Logger exportLogger = Logger + .getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); + String logFileName = "../logs" + File.separator + "export_" + logTimestamp + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + exportLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + exportLogger.addHandler(fileHandler); + } else { + exportLogger = logger; + } + + exportLogger.info("Starting an export all job"); + + for (Long datasetId : findAllLocalDatasetIds()) { + // Potentially, there's a godzillion datasets in this Dataverse. + // This is why we go through the list of ids here, and instantiate + // only one dataset at a time. + Dataset dataset = this.find(datasetId); + if (dataset != null) { + // Accurate "is published?" test - ? + // Answer: Yes, it is! We can't trust dataset.isReleased() alone; because it is + // a dvobject method + // that returns (publicationDate != null). And "publicationDate" is essentially + // "the first publication date"; that stays the same as versions get + // published and/or deaccessioned. But in combination with !isDeaccessioned() + // it is indeed an accurate test. + if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { + + // can't trust dataset.getPublicationDate(), no. + Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a + // non-null released + // version! Maybe not - SEK + // 8/19 (We do now! :) + if (forceReExport || (publicationDate != null && (dataset.getLastExportTime() == null + || dataset.getLastExportTime().before(publicationDate)))) { + countAll++; + try { + recordService.exportAllFormatsInNewTransaction(dataset); + exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + + dataset.getGlobalId()); + countSuccess++; + } catch (Exception ex) { + exportLogger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + + dataset.getGlobalId() + "; " + ex.getMessage()); + countError++; + } + } + } + } + } + exportLogger.info("Datasets processed: " + countAll.toString()); + exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); + exportLogger.info("Datasets failures: " + countError.toString()); + exportLogger.info("Finished export-all job."); + + if (fileHandlerSuceeded) { + fileHandler.close(); + } + + } + + public void updateLastExportTimeStamp(Long datasetId) { + Date now = new Date(); + em.createNativeQuery("UPDATE Dataset SET lastExportTime='" + now.toString() + "' WHERE id=" + datasetId) + .executeUpdate(); + } + + public Dataset setNonDatasetFileAsThumbnail(Dataset dataset, InputStream inputStream) { + if (dataset == null) { + logger.fine("In setNonDatasetFileAsThumbnail but dataset is null! Returning null."); + return null; + } + if (inputStream == null) { + logger.fine("In setNonDatasetFileAsThumbnail but inputStream is null! Returning null."); + return null; + } + dataset = DatasetUtil.persistDatasetLogoToStorageAndCreateThumbnail(dataset, inputStream); + dataset.setThumbnailFile(null); + return merge(dataset); + } + + public Dataset setDatasetFileAsThumbnail(Dataset dataset, DataFile datasetFileThumbnailToSwitchTo) { + if (dataset == null) { + logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); + return null; + } + if (datasetFileThumbnailToSwitchTo == null) { + logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); + return null; + } + DatasetUtil.deleteDatasetLogo(dataset); + dataset.setThumbnailFile(datasetFileThumbnailToSwitchTo); + dataset.setUseGenericThumbnail(false); + return merge(dataset); + } + + public Dataset removeDatasetThumbnail(Dataset dataset) { + if (dataset == null) { + logger.fine("In removeDatasetThumbnail but dataset is null! Returning null."); + return null; + } + DatasetUtil.deleteDatasetLogo(dataset); + dataset.setThumbnailFile(null); + dataset.setUseGenericThumbnail(true); + return merge(dataset); + } + + // persist assigned thumbnail in a single one-field-update query: + // (the point is to avoid doing an em.merge() on an entire dataset object...) + public void assignDatasetThumbnailByNativeQuery(Long datasetId, Long dataFileId) { + try { + em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFileId + " WHERE id=" + datasetId) + .executeUpdate(); + } catch (Exception ex) { + // it's ok to just ignore... + } + } + + public void assignDatasetThumbnailByNativeQuery(Dataset dataset, DataFile dataFile) { + try { + em.createNativeQuery( + "UPDATE dataset SET thumbnailfile_id=" + dataFile.getId() + " WHERE id=" + dataset.getId()) + .executeUpdate(); + } catch (Exception ex) { + // it's ok to just ignore... + } + } + + public WorkflowComment addWorkflowComment(WorkflowComment workflowComment) { + em.persist(workflowComment); + return workflowComment; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 7b958b0c194..a94bd8e5c1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -187,6 +187,8 @@ public enum Key { * character separating the shoulder from the rest of the identifier is not '/' or DoiSeparator. */ DoiShoulder, + //Do not force DOIs to uppercase before searching in database + DoiUseMixedCase, /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up DoiUsername, DoiPassword, From d831b42a0ec4537618cb87c166c6d7b1532e16e1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 4 May 2018 15:49:02 -0400 Subject: [PATCH 05/44] cleaner diff --- .../iq/dataverse/DatasetServiceBean.java | 1646 ++++++++--------- 1 file changed, 812 insertions(+), 834 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 3f574dbaee7..cb2b9073d24 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -49,160 +49,152 @@ * @author skraffmiller */ + @Stateless @Named public class DatasetServiceBean implements java.io.Serializable { - private static final Logger logger = Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); - @EJB - IndexServiceBean indexService; - - @EJB - DOIEZIdServiceBean doiEZIdServiceBean; - - @EJB - SettingsServiceBean settingsService; - - @EJB - DatasetVersionServiceBean versionService; - - @EJB - AuthenticationServiceBean authentication; - - @EJB - DataFileServiceBean fileService; - - @EJB - PermissionServiceBean permissionService; - - @EJB - OAIRecordServiceBean recordService; + private static final Logger logger = Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); + @EJB + IndexServiceBean indexService; + + @EJB + DOIEZIdServiceBean doiEZIdServiceBean; + + @EJB + SettingsServiceBean settingsService; + + @EJB + DatasetVersionServiceBean versionService; + + @EJB + AuthenticationServiceBean authentication; + + @EJB + DataFileServiceBean fileService; + + @EJB + PermissionServiceBean permissionService; + + @EJB + OAIRecordServiceBean recordService; + + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + @PersistenceContext(unitName = "VDCNet-ejbPU") + protected EntityManager em; + + public Dataset find(Object pk) { + return em.find(Dataset.class, pk); + } + + public List findByOwnerId(Long ownerId) { + return findByOwnerId(ownerId, false); + } + + public List findPublishedByOwnerId(Long ownerId) { + return findByOwnerId(ownerId, true); + } + + private List findByOwnerId(Long ownerId, boolean onlyPublished) { + List retList = new ArrayList<>(); + TypedQuery query = em.createQuery("select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); + query.setParameter("ownerId", ownerId); + if (!onlyPublished) { + return query.getResultList(); + } else { + for (Dataset ds : query.getResultList()) { + if (ds.isReleased() && !ds.isDeaccessioned()) { + retList.add(ds); + } + } + return retList; + } + } + + public List findIdsByOwnerId(Long ownerId) { + return findIdsByOwnerId(ownerId, false); + } + + private List findIdsByOwnerId(Long ownerId, boolean onlyPublished) { + List retList = new ArrayList<>(); + if (!onlyPublished) { + TypedQuery query = em.createQuery("select o.id from Dataset as o where o.owner.id =:ownerId order by o.id", Long.class); + query.setParameter("ownerId", ownerId); + return query.getResultList(); + } else { + TypedQuery query = em.createQuery("select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); + query.setParameter("ownerId", ownerId); + for (Dataset ds : query.getResultList()) { + if (ds.isReleased() && !ds.isDeaccessioned()) { + retList.add(ds.getId()); + } + } + return retList; + } + } + + public List findAll() { + return em.createQuery("select object(o) from Dataset as o order by o.id", Dataset.class).getResultList(); + } + + + public List findAllLocalDatasetIds() { + return em.createQuery("SELECT o.id FROM Dataset o WHERE o.harvestedFrom IS null ORDER BY o.id", Long.class).getResultList(); + } + + public List findAllUnindexed() { + return em.createQuery("SELECT o.id FROM Dataset o WHERE o.indexTime IS null ORDER BY o.id DESC", Long.class).getResultList(); + } + + /** + * For docs, see the equivalent method on the DataverseServiceBean. + * @param numPartitions + * @param partitionId + * @param skipIndexed + * @return a list of datasets + * @see DataverseServiceBean#findAllOrSubset(long, long, boolean) + */ + public List findAllOrSubset(long numPartitions, long partitionId, boolean skipIndexed) { + if (numPartitions < 1) { + long saneNumPartitions = 1; + numPartitions = saneNumPartitions; + } + String skipClause = skipIndexed ? "AND o.indexTime is null " : ""; + TypedQuery typedQuery = em.createQuery("SELECT o.id FROM Dataset o WHERE MOD( o.id, :numPartitions) = :partitionId " + + skipClause + + "ORDER BY o.id", Long.class); + typedQuery.setParameter("numPartitions", numPartitions); + typedQuery.setParameter("partitionId", partitionId); + return typedQuery.getResultList(); + } + + /** + * Merges the passed dataset to the persistence context. + * @param ds the dataset whose new state we want to persist. + * @return The managed entity representing {@code ds}. + */ + public Dataset merge( Dataset ds ) { + return em.merge(ds); + } + + public Dataset findByGlobalId(String globalId) { + + String protocol = ""; + String authority = ""; + String identifier = ""; + int index1 = globalId.indexOf(':'); + String nonNullDefaultIfKeyNotFound = ""; + // This is kind of wrong right here: we should not assume that this is *our* DOI - + // it can be somebody else's registered DOI that we harvested. And they can + // have their own separator characters defined - so we should not assume + // that everybody's DOIs will look like ours! + // Also, this separator character gets applied to handles lookups too, below. + // Which is probably wrong too... + // -- L.A. 4.2.4 + String separator = settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); + int index2 = globalId.indexOf(separator, index1 + 1); - private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - - @PersistenceContext(unitName = "VDCNet-ejbPU") - protected EntityManager em; - - public Dataset find(Object pk) { - return em.find(Dataset.class, pk); - } - - public List findByOwnerId(Long ownerId) { - return findByOwnerId(ownerId, false); - } - - public List findPublishedByOwnerId(Long ownerId) { - return findByOwnerId(ownerId, true); - } - - private List findByOwnerId(Long ownerId, boolean onlyPublished) { - List retList = new ArrayList<>(); - TypedQuery query = em.createQuery( - "select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); - query.setParameter("ownerId", ownerId); - if (!onlyPublished) { - return query.getResultList(); - } else { - for (Dataset ds : query.getResultList()) { - if (ds.isReleased() && !ds.isDeaccessioned()) { - retList.add(ds); - } - } - return retList; - } - } - - public List findIdsByOwnerId(Long ownerId) { - return findIdsByOwnerId(ownerId, false); - } - - private List findIdsByOwnerId(Long ownerId, boolean onlyPublished) { - List retList = new ArrayList<>(); - if (!onlyPublished) { - TypedQuery query = em - .createQuery("select o.id from Dataset as o where o.owner.id =:ownerId order by o.id", Long.class); - query.setParameter("ownerId", ownerId); - return query.getResultList(); - } else { - TypedQuery query = em.createQuery( - "select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class); - query.setParameter("ownerId", ownerId); - for (Dataset ds : query.getResultList()) { - if (ds.isReleased() && !ds.isDeaccessioned()) { - retList.add(ds.getId()); - } - } - return retList; - } - } - - public List findAll() { - return em.createQuery("select object(o) from Dataset as o order by o.id", Dataset.class).getResultList(); - } - - public List findAllLocalDatasetIds() { - return em.createQuery("SELECT o.id FROM Dataset o WHERE o.harvestedFrom IS null ORDER BY o.id", Long.class) - .getResultList(); - } - - public List findAllUnindexed() { - return em.createQuery("SELECT o.id FROM Dataset o WHERE o.indexTime IS null ORDER BY o.id DESC", Long.class) - .getResultList(); - } - - /** - * For docs, see the equivalent method on the DataverseServiceBean. - * - * @param numPartitions - * @param partitionId - * @param skipIndexed - * @return a list of datasets - * @see DataverseServiceBean#findAllOrSubset(long, long, boolean) - */ - public List findAllOrSubset(long numPartitions, long partitionId, boolean skipIndexed) { - if (numPartitions < 1) { - long saneNumPartitions = 1; - numPartitions = saneNumPartitions; - } - String skipClause = skipIndexed ? "AND o.indexTime is null " : ""; - TypedQuery typedQuery = em - .createQuery("SELECT o.id FROM Dataset o WHERE MOD( o.id, :numPartitions) = :partitionId " + skipClause - + "ORDER BY o.id", Long.class); - typedQuery.setParameter("numPartitions", numPartitions); - typedQuery.setParameter("partitionId", partitionId); - return typedQuery.getResultList(); - } - - /** - * Merges the passed dataset to the persistence context. - * - * @param ds - * the dataset whose new state we want to persist. - * @return The managed entity representing {@code ds}. - */ - public Dataset merge(Dataset ds) { - return em.merge(ds); - } - - public Dataset findByGlobalId(String globalId) { - - String protocol = ""; - String authority = ""; - String identifier = ""; - int index1 = globalId.indexOf(':'); - String nonNullDefaultIfKeyNotFound = ""; - // This is kind of wrong right here: we should not assume that this is *our* DOI - // - - // it can be somebody else's registered DOI that we harvested. And they can - // have their own separator characters defined - so we should not assume - // that everybody's DOIs will look like ours! - // Also, this separator character gets applied to handles lookups too, below. - // Which is probably wrong too... - // -- L.A. 4.2.4 - String separator = settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, - nonNullDefaultIfKeyNotFound); - int index2 = globalId.indexOf(separator, index1 + 1); - int index3; if (index1 == -1) { logger.info("Error parsing identifier: " + globalId + ". ':' not found in string"); return null; @@ -227,690 +219,676 @@ public Dataset findByGlobalId(String globalId) { identifier = identifier.toUpperCase(); } } - String queryStr = "SELECT s from Dataset s where s.identifier = :identifier and s.protocol= :protocol and s.authority= :authority"; - Dataset foundDataset = null; - try { - Query query = em.createQuery(queryStr); - query.setParameter("identifier", identifier); - query.setParameter("protocol", protocol); - query.setParameter("authority", authority); - foundDataset = (Dataset) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { - // (set to .info, this can fill the log file with thousands of - // these messages during a large harvest run) - logger.fine("no ds found: " + globalId); - // DO nothing, just return null. - } - return foundDataset; - } - - public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, - "randomString"); - String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - if (doiShoulder.indexOf(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "/")) >= 0) { - logger.warning("doiShoulder cannot contain / or doiSeparator"); - } - switch (doiIdentifierType) { - case "randomString": - return generateIdentifierAsRandomString(dataset, idServiceBean); - case "sequentialNumber": - return generateIdentifierAsSequentialNumber(dataset, idServiceBean); - case "shoulderWithRandomString": - return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); - case "shoulderWithSequentialNumber": - return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, idServiceBean); - } - } - - private String generateIdentifierAsRandomString(Dataset dataset, IdServiceBean idServiceBean) { - - String identifier = null; - do { - identifier = RandomStringUtils.randomAlphanumeric(6).toUpperCase(); - } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); - - return identifier; - } - - private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBean idServiceBean) { - - String identifier; - do { - StoredProcedureQuery query = this.em - .createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber"); - query.execute(); - Integer identifierNumeric = (Integer) query.getOutputParameterValue(1); - // some diagnostics here maybe - is it possible to determine that it's failing - // because the stored procedure hasn't been created in the database? - if (identifierNumeric == null) { - return null; - } - identifier = identifierNumeric.toString(); - } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); - - return identifier; - } - - /** - * Check that a identifier entered by the user is unique (not currently used for - * any other study in this Dataverse Network) alos check for duplicate in EZID - * if needed - * - * @param userIdentifier - * @param dataset - * @param idServiceBean - * @return - */ - public boolean isIdentifierUniqueInDatabase(String userIdentifier, Dataset dataset, IdServiceBean idServiceBean) { - String query = "SELECT d FROM Dataset d WHERE d.identifier = '" + userIdentifier + "'"; - query += " and d.protocol ='" + dataset.getProtocol() + "'"; - query += " and d.authority = '" + dataset.getAuthority() + "'"; - boolean u = em.createQuery(query).getResultList().isEmpty(); - - try { - if (idServiceBean.alreadyExists(dataset)) { - u = false; - } - } catch (Exception e) { - // we can live with failure - means identifier not found remotely - } - - return u; - } - - public DatasetVersion storeVersion(DatasetVersion dsv) { - em.persist(dsv); - return dsv; - } - - public String createCitationRIS(DatasetVersion version) { - return createCitationRIS(version, null); - } - - public String createCitationRIS(DatasetVersion version, FileMetadata fileMetadata) { - String publisher = version.getRootDataverseNameforCitation(); - List authorList = version.getDatasetAuthors(); - String retString = "Provider: " + publisher + "\r\n"; - retString += "Content: text/plain; charset=\"us-ascii\"" + "\r\n"; - // Using type "DBASE" - "Online Database", for consistency with - // EndNote (see the longer comment in the EndNote section below)> - - retString += "TY - DBASE" + "\r\n"; - retString += "T1 - " + version.getTitle() + "\r\n"; - for (DatasetAuthor author : authorList) { - retString += "AU - " + author.getName().getDisplayValue() + "\r\n"; - } - retString += "DO - " + version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() - + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier() + "\r\n"; - retString += "PY - " + version.getVersionYear() + "\r\n"; - retString += "UR - " + version.getDataset().getPersistentURL() + "\r\n"; - retString += "PB - " + publisher + "\r\n"; - - // a DataFile citation also includes filename und UNF, if applicable: - if (fileMetadata != null) { - retString += "C1 - " + fileMetadata.getLabel() + "\r\n"; - - if (fileMetadata.getDataFile().isTabularData()) { - if (fileMetadata.getDataFile().getUnf() != null) { - retString += "C2 - " + fileMetadata.getDataFile().getUnf() + "\r\n"; - } - } - } - - // closing element: - retString += "ER - \r\n"; - - return retString; - } - - private XMLOutputFactory xmlOutputFactory = null; - - public String createCitationXML(DatasetVersion datasetVersion, FileMetadata fileMetadata) { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - createEndNoteCitation(outStream, datasetVersion, fileMetadata); - String xml = outStream.toString(); - return xml; - } - - public void createEndNoteCitation(OutputStream os, DatasetVersion datasetVersion, FileMetadata fileMetadata) { - - xmlOutputFactory = javax.xml.stream.XMLOutputFactory.newInstance(); - XMLStreamWriter xmlw = null; - try { - xmlw = xmlOutputFactory.createXMLStreamWriter(os); - xmlw.writeStartDocument(); - createEndNoteXML(xmlw, datasetVersion, fileMetadata); - xmlw.writeEndDocument(); - } catch (XMLStreamException ex) { - Logger.getLogger("global").log(Level.SEVERE, null, ex); - throw new EJBException("ERROR occurred during creating endnote xml.", ex); - } finally { - try { - if (xmlw != null) { - xmlw.close(); - } - } catch (XMLStreamException ex) { - } - } - } - - private void createEndNoteXML(XMLStreamWriter xmlw, DatasetVersion version, FileMetadata fileMetadata) - throws XMLStreamException { - - String title = version.getTitle(); - String versionYear = version.getVersionYear(); - String publisher = version.getRootDataverseNameforCitation(); - - List authorList = version.getDatasetAuthors(); - - xmlw.writeStartElement("xml"); - xmlw.writeStartElement("records"); - - xmlw.writeStartElement("record"); - - // "Ref-type" indicates which of the (numerous!) available EndNote - // schemas this record will be interpreted as. - // This is relatively important. Certain fields with generic - // names like "custom1" and "custom2" become very specific things - // in specific schemas; for example, custom1 shows as "legal notice" - // in "Journal Article" (ref-type 84), or as "year published" in - // "Government Document". - // We don't want the UNF to show as a "legal notice"! - // We have found a ref-type that works ok for our purposes - - // "Online Database" (type 45). In this one, the fields Custom1 - // and Custom2 are not translated and just show as is. - // And "Custom1" still beats "legal notice". - // -- L.A. 12.12.2014 beta 10 - - xmlw.writeStartElement("ref-type"); - xmlw.writeAttribute("name", "Online Database"); - xmlw.writeCharacters("45"); - xmlw.writeEndElement(); // ref-type - - xmlw.writeStartElement("contributors"); - xmlw.writeStartElement("authors"); - for (DatasetAuthor author : authorList) { - xmlw.writeStartElement("author"); - xmlw.writeCharacters(author.getName().getDisplayValue()); - xmlw.writeEndElement(); // author - } - xmlw.writeEndElement(); // authors - xmlw.writeEndElement(); // contributors - - xmlw.writeStartElement("titles"); - xmlw.writeStartElement("title"); - xmlw.writeCharacters(title); - xmlw.writeEndElement(); // title - - xmlw.writeEndElement(); // titles - - xmlw.writeStartElement("section"); - String sectionString; - if (version.getDataset().isReleased()) { - sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getDataset().getPublicationDate()); - } else { - sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getLastUpdateTime()); - } - - xmlw.writeCharacters(sectionString); - xmlw.writeEndElement(); // publisher - - xmlw.writeStartElement("dates"); - xmlw.writeStartElement("year"); - xmlw.writeCharacters(versionYear); - xmlw.writeEndElement(); // year - xmlw.writeEndElement(); // dates - - xmlw.writeStartElement("publisher"); - xmlw.writeCharacters(publisher); - xmlw.writeEndElement(); // publisher - - xmlw.writeStartElement("urls"); - xmlw.writeStartElement("related-urls"); - xmlw.writeStartElement("url"); - xmlw.writeCharacters(version.getDataset().getPersistentURL()); - xmlw.writeEndElement(); // url - xmlw.writeEndElement(); // related-urls - xmlw.writeEndElement(); // urls - - // a DataFile citation also includes the filename and (for Tabular - // files) the UNF signature, that we put into the custom1 and custom2 - // fields respectively: - - if (fileMetadata != null) { - xmlw.writeStartElement("custom1"); - xmlw.writeCharacters(fileMetadata.getLabel()); - xmlw.writeEndElement(); // custom1 - - if (fileMetadata.getDataFile().isTabularData()) { - if (fileMetadata.getDataFile().getUnf() != null) { - xmlw.writeStartElement("custom2"); - xmlw.writeCharacters(fileMetadata.getDataFile().getUnf()); - xmlw.writeEndElement(); // custom2 - } - } - } - - xmlw.writeStartElement("electronic-resource-num"); - String electResourceNum = version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() - + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier(); - xmlw.writeCharacters(electResourceNum); - xmlw.writeEndElement(); - // 10.3886/ICPSR03259.v1 - xmlw.writeEndElement(); // record - - xmlw.writeEndElement(); // records - xmlw.writeEndElement(); // xml - - } - - public DatasetVersionUser getDatasetVersionUser(DatasetVersion version, User user) { - - DatasetVersionUser ddu = null; - Query query = em.createQuery("select object(o) from DatasetVersionUser as o " - + "where o.datasetVersion.id =:versionId and o.authenticatedUser.id =:userId"); - query.setParameter("versionId", version.getId()); - String identifier = user.getIdentifier(); - identifier = identifier.startsWith("@") ? identifier.substring(1) : identifier; - AuthenticatedUser au = authentication.getAuthenticatedUser(identifier); - query.setParameter("userId", au.getId()); - try { - ddu = (DatasetVersionUser) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { - // DO nothing, just return null. - } - return ddu; - } - - public boolean checkDatasetLock(Long datasetId) { - TypedQuery lockCounter = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); - lockCounter.setParameter("datasetId", datasetId); - lockCounter.setMaxResults(1); - List lock = lockCounter.getResultList(); - return lock.size() > 0; - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { - lock.setDataset(dataset); - dataset.addLock(lock); - em.persist(lock); - em.merge(dataset); - return lock; - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) /* ? */ - public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Long userId, String info) { - - Dataset dataset = em.find(Dataset.class, datasetId); - - AuthenticatedUser user = null; - if (userId != null) { - user = em.find(AuthenticatedUser.class, userId); - } - - DatasetLock lock = new DatasetLock(reason, user); - lock.setDataset(dataset); - lock.setInfo(info); - lock.setStartTime(new Date()); - - if (userId != null) { - lock.setUser(user); - if (user.getDatasetLocks() == null) { - user.setDatasetLocks(new ArrayList<>()); - } - user.getDatasetLocks().add(lock); - } - - return addDatasetLock(dataset, lock); - } - - /** - * Removes all {@link DatasetLock}s for the dataset whose id is passed and - * reason is {@code aReason}. - * - * @param datasetId - * Id of the dataset whose locks will b removed. - * @param aReason - * The reason of the locks that will be removed. - */ - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void removeDatasetLocks(Long datasetId, DatasetLock.Reason aReason) { - Dataset dataset = em.find(Dataset.class, datasetId); - new HashSet<>(dataset.getLocks()).stream().filter(l -> l.getReason() == aReason).forEach(lock -> { - dataset.removeLock(lock); - - AuthenticatedUser user = lock.getUser(); - user.getDatasetLocks().remove(lock); - - em.remove(lock); - }); - } - - /* - * getTitleFromLatestVersion methods use native query to return a dataset title - * - * There are two versions: 1) The version with datasetId param only will return - * the title regardless of version state 2)The version with the param - * 'includeDraft' boolean will return the most recently published title if the - * param is set to false If no Title found return empty string - protects - * against calling with include draft = false with no published version - */ - - public String getTitleFromLatestVersion(Long datasetId) { - return getTitleFromLatestVersion(datasetId, true); - } - - public String getTitleFromLatestVersion(Long datasetId, boolean includeDraft) { - - String whereDraft = ""; - // This clause will exclude draft versions from the select - if (!includeDraft) { - whereDraft = " and v.versionstate !='DRAFT' "; - } - - try { - return (String) em - .createNativeQuery("select dfv.value from dataset d " - + " join datasetversion v on d.id = v.dataset_id " - + " join datasetfield df on v.id = df.datasetversion_id " - + " join datasetfieldvalue dfv on df.id = dfv.datasetfield_id " - + " join datasetfieldtype dft on df.datasetfieldtype_id = dft.id " + " where dft.name = '" - + DatasetFieldConstant.title + "' and v.dataset_id =" + datasetId + whereDraft - + " order by v.versionnumber desc, v.minorVersionNumber desc limit 1 " + ";") - .getSingleResult(); - - } catch (Exception ex) { - logger.log(Level.INFO, "exception trying to get title from latest version: {0}", ex); - return ""; - } - - } - - public Dataset getDatasetByHarvestInfo(Dataverse dataverse, String harvestIdentifier) { - String queryStr = "SELECT d FROM Dataset d, DvObject o WHERE d.id = o.id AND o.owner.id = " + dataverse.getId() - + " and d.harvestIdentifier = '" + harvestIdentifier + "'"; - Query query = em.createQuery(queryStr); - List resultList = query.getResultList(); - Dataset dataset = null; - if (resultList.size() > 1) { - throw new EJBException("More than one dataset found in the dataverse (id= " + dataverse.getId() - + "), with harvestIdentifier= " + harvestIdentifier); - } - if (resultList.size() == 1) { - dataset = (Dataset) resultList.get(0); - } - return dataset; - - } - - public Long getDatasetVersionCardImage(Long versionId, User user) { - if (versionId == null) { - return null; - } - - return null; - } - - /** - * Used to identify and properly display Harvested objects on the dataverse - * page. - * - * @param datasetIds - * @return - */ - public Map getArchiveDescriptionsForHarvestedDatasets(Set datasetIds) { - if (datasetIds == null || datasetIds.size() < 1) { - return null; - } - - String datasetIdStr = Strings.join(datasetIds, ", "); - - String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" - + datasetIdStr + ")"; - List searchResults; - - try { - searchResults = em.createNativeQuery(qstr).getResultList(); - } catch (Exception ex) { - searchResults = null; - } - - if (searchResults == null) { - return null; - } - - Map ret = new HashMap<>(); - - for (Object[] result : searchResults) { - Long dsId; - if (result[0] != null) { - try { - dsId = (Long) result[0]; - } catch (Exception ex) { - dsId = null; - } - if (dsId == null) { - continue; - } - - ret.put(dsId, (String) result[1]); - } - } - - return ret; - } - - public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) { - if (datasetVersion == null) { - return false; - } - - // First, check if this dataset has a designated thumbnail image: - - if (datasetVersion.getDataset() != null) { - DataFile dataFile = datasetVersion.getDataset().getThumbnailFile(); - if (dataFile != null) { - return ImageThumbConverter.isThumbnailAvailable(dataFile, 48); - } - } - - // If not, we'll try to use one of the files in this dataset version: - // (the first file with an available thumbnail, really) - - List fileMetadatas = datasetVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - DataFile dataFile = fileMetadata.getDataFile(); - - // TODO: use permissionsWrapper here - ? - // (we are looking up these download permissions on individual files, - // true, and those are unique... but the wrapper may be able to save - // us some queries when it determines the download permission on the - // dataset as a whole? -- L.A. 4.2.1 - - if (fileService.isThumbnailAvailable(dataFile) - && permissionService.userOn(user, dataFile).has(Permission.DownloadFile)) { // , user)) { - return true; - } - - } - - return false; - } - - // reExportAll *forces* a reexport on all published datasets; whether they - // have the "last export" time stamp set or not. - @Asynchronous - public void reExportAllAsync() { - exportAllDatasets(true); - } - - public void reExportAll() { - exportAllDatasets(true); - } - - // exportAll() will try to export the yet unexported datasets (it will honor - // and trust the "last export" time stamp). - - @Asynchronous - public void exportAllAsync() { - exportAllDatasets(false); - } - - public void exportAll() { - exportAllDatasets(false); - } - - public void exportAllDatasets(boolean forceReExport) { - Integer countAll = 0; - Integer countSuccess = 0; - Integer countError = 0; - String logTimestamp = logFormatter.format(new Date()); - Logger exportLogger = Logger - .getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); - String logFileName = "../logs" + File.separator + "export_" + logTimestamp + ".log"; - FileHandler fileHandler; - boolean fileHandlerSuceeded; - try { - fileHandler = new FileHandler(logFileName); - exportLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; - } catch (IOException | SecurityException ex) { - Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; - } - - if (fileHandlerSuceeded) { - exportLogger.addHandler(fileHandler); - } else { - exportLogger = logger; - } - - exportLogger.info("Starting an export all job"); - - for (Long datasetId : findAllLocalDatasetIds()) { - // Potentially, there's a godzillion datasets in this Dataverse. - // This is why we go through the list of ids here, and instantiate - // only one dataset at a time. - Dataset dataset = this.find(datasetId); - if (dataset != null) { - // Accurate "is published?" test - ? - // Answer: Yes, it is! We can't trust dataset.isReleased() alone; because it is - // a dvobject method - // that returns (publicationDate != null). And "publicationDate" is essentially - // "the first publication date"; that stays the same as versions get - // published and/or deaccessioned. But in combination with !isDeaccessioned() - // it is indeed an accurate test. - if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { - - // can't trust dataset.getPublicationDate(), no. - Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a - // non-null released - // version! Maybe not - SEK - // 8/19 (We do now! :) - if (forceReExport || (publicationDate != null && (dataset.getLastExportTime() == null - || dataset.getLastExportTime().before(publicationDate)))) { - countAll++; - try { - recordService.exportAllFormatsInNewTransaction(dataset); - exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " - + dataset.getGlobalId()); - countSuccess++; - } catch (Exception ex) { - exportLogger.info("Error exporting dataset: " + dataset.getDisplayName() + " " - + dataset.getGlobalId() + "; " + ex.getMessage()); - countError++; - } - } - } - } - } - exportLogger.info("Datasets processed: " + countAll.toString()); - exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); - exportLogger.info("Datasets failures: " + countError.toString()); - exportLogger.info("Finished export-all job."); - - if (fileHandlerSuceeded) { - fileHandler.close(); - } - - } - - public void updateLastExportTimeStamp(Long datasetId) { - Date now = new Date(); - em.createNativeQuery("UPDATE Dataset SET lastExportTime='" + now.toString() + "' WHERE id=" + datasetId) - .executeUpdate(); - } - - public Dataset setNonDatasetFileAsThumbnail(Dataset dataset, InputStream inputStream) { - if (dataset == null) { - logger.fine("In setNonDatasetFileAsThumbnail but dataset is null! Returning null."); - return null; - } - if (inputStream == null) { - logger.fine("In setNonDatasetFileAsThumbnail but inputStream is null! Returning null."); - return null; - } - dataset = DatasetUtil.persistDatasetLogoToStorageAndCreateThumbnail(dataset, inputStream); - dataset.setThumbnailFile(null); - return merge(dataset); - } - - public Dataset setDatasetFileAsThumbnail(Dataset dataset, DataFile datasetFileThumbnailToSwitchTo) { - if (dataset == null) { - logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); - return null; - } - if (datasetFileThumbnailToSwitchTo == null) { - logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); - return null; - } - DatasetUtil.deleteDatasetLogo(dataset); - dataset.setThumbnailFile(datasetFileThumbnailToSwitchTo); - dataset.setUseGenericThumbnail(false); - return merge(dataset); - } - - public Dataset removeDatasetThumbnail(Dataset dataset) { - if (dataset == null) { - logger.fine("In removeDatasetThumbnail but dataset is null! Returning null."); - return null; - } - DatasetUtil.deleteDatasetLogo(dataset); - dataset.setThumbnailFile(null); - dataset.setUseGenericThumbnail(true); - return merge(dataset); - } - - // persist assigned thumbnail in a single one-field-update query: - // (the point is to avoid doing an em.merge() on an entire dataset object...) - public void assignDatasetThumbnailByNativeQuery(Long datasetId, Long dataFileId) { - try { - em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFileId + " WHERE id=" + datasetId) - .executeUpdate(); - } catch (Exception ex) { - // it's ok to just ignore... - } - } - - public void assignDatasetThumbnailByNativeQuery(Dataset dataset, DataFile dataFile) { - try { - em.createNativeQuery( - "UPDATE dataset SET thumbnailfile_id=" + dataFile.getId() + " WHERE id=" + dataset.getId()) - .executeUpdate(); - } catch (Exception ex) { - // it's ok to just ignore... - } - } - - public WorkflowComment addWorkflowComment(WorkflowComment workflowComment) { - em.persist(workflowComment); - return workflowComment; - } + String queryStr = "SELECT s from Dataset s where s.identifier = :identifier and s.protocol= :protocol and s.authority= :authority"; + Dataset foundDataset = null; + try { + Query query = em.createQuery(queryStr); + query.setParameter("identifier", identifier); + query.setParameter("protocol", protocol); + query.setParameter("authority", authority); + foundDataset = (Dataset) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no ds found: " + globalId); + // DO nothing, just return null. + } + return foundDataset; + } + + public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + if(doiShoulder.indexOf(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "/"))>=0) { + logger.warning("doiShoulder cannot contain / or doiSeparator"); + } + switch (doiIdentifierType) { + case "randomString": + return generateIdentifierAsRandomString(dataset, idServiceBean); + case "sequentialNumber": + return generateIdentifierAsSequentialNumber(dataset, idServiceBean); + case "shoulderWithRandomString": + return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); + case "shoulderWithSequentialNumber": + return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(dataset, idServiceBean); + } + } + + private String generateIdentifierAsRandomString(Dataset dataset, IdServiceBean idServiceBean) { + + String identifier = null; + do { + identifier = RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); + + return identifier; + } + + private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBean idServiceBean) { + + String identifier; + do { + StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber"); + query.execute(); + Integer identifierNumeric = (Integer) query.getOutputParameterValue(1); + // some diagnostics here maybe - is it possible to determine that it's failing + // because the stored procedure hasn't been created in the database? + if (identifierNumeric == null) { + return null; + } + identifier = identifierNumeric.toString(); + } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); + + return identifier; + } + + /** + * Check that a identifier entered by the user is unique (not currently used + * for any other study in this Dataverse Network) alos check for duplicate + * in EZID if needed + * @param userIdentifier + * @param dataset + * @param idServiceBean + * @return */ + public boolean isIdentifierUniqueInDatabase(String userIdentifier, Dataset dataset, IdServiceBean idServiceBean) { + String query = "SELECT d FROM Dataset d WHERE d.identifier = '" + userIdentifier + "'"; + query += " and d.protocol ='" + dataset.getProtocol() + "'"; + query += " and d.authority = '" + dataset.getAuthority() + "'"; + boolean u = em.createQuery(query).getResultList().isEmpty(); + + try{ + if (idServiceBean.alreadyExists(dataset)) { + u = false; + } + } catch (Exception e){ + //we can live with failure - means identifier not found remotely + } + + + return u; + } + + public DatasetVersion storeVersion( DatasetVersion dsv ) { + em.persist(dsv); + return dsv; + } + + public String createCitationRIS(DatasetVersion version) { + return createCitationRIS(version, null); + } + + public String createCitationRIS(DatasetVersion version, FileMetadata fileMetadata) { + String publisher = version.getRootDataverseNameforCitation(); + List authorList = version.getDatasetAuthors(); + String retString = "Provider: " + publisher + "\r\n"; + retString += "Content: text/plain; charset=\"us-ascii\"" + "\r\n"; + // Using type "DBASE" - "Online Database", for consistency with + // EndNote (see the longer comment in the EndNote section below)> + + retString += "TY - DBASE" + "\r\n"; + retString += "T1 - " + version.getTitle() + "\r\n"; + for (DatasetAuthor author : authorList) { + retString += "AU - " + author.getName().getDisplayValue() + "\r\n"; + } + retString += "DO - " + version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier() + "\r\n"; + retString += "PY - " + version.getVersionYear() + "\r\n"; + retString += "UR - " + version.getDataset().getPersistentURL() + "\r\n"; + retString += "PB - " + publisher + "\r\n"; + + // a DataFile citation also includes filename und UNF, if applicable: + if (fileMetadata != null) { + retString += "C1 - " + fileMetadata.getLabel() + "\r\n"; + + if (fileMetadata.getDataFile().isTabularData()) { + if (fileMetadata.getDataFile().getUnf() != null) { + retString += "C2 - " + fileMetadata.getDataFile().getUnf() + "\r\n"; + } + } + } + + // closing element: + retString += "ER - \r\n"; + + return retString; + } + + + private XMLOutputFactory xmlOutputFactory = null; + + public String createCitationXML(DatasetVersion datasetVersion, FileMetadata fileMetadata) { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + createEndNoteCitation(outStream, datasetVersion, fileMetadata); + String xml = outStream.toString(); + return xml; + } + + public void createEndNoteCitation(OutputStream os, DatasetVersion datasetVersion, FileMetadata fileMetadata) { + + xmlOutputFactory = javax.xml.stream.XMLOutputFactory.newInstance(); + XMLStreamWriter xmlw = null; + try { + xmlw = xmlOutputFactory.createXMLStreamWriter(os); + xmlw.writeStartDocument(); + createEndNoteXML(xmlw, datasetVersion, fileMetadata); + xmlw.writeEndDocument(); + } catch (XMLStreamException ex) { + Logger.getLogger("global").log(Level.SEVERE, null, ex); + throw new EJBException("ERROR occurred during creating endnote xml.", ex); + } finally { + try { + if (xmlw != null) { + xmlw.close(); + } + } catch (XMLStreamException ex) { + } + } + } + + private void createEndNoteXML(XMLStreamWriter xmlw, DatasetVersion version, FileMetadata fileMetadata) throws XMLStreamException { + + String title = version.getTitle(); + String versionYear = version.getVersionYear(); + String publisher = version.getRootDataverseNameforCitation(); + + List authorList = version.getDatasetAuthors(); + + xmlw.writeStartElement("xml"); + xmlw.writeStartElement("records"); + + xmlw.writeStartElement("record"); + + // "Ref-type" indicates which of the (numerous!) available EndNote + // schemas this record will be interpreted as. + // This is relatively important. Certain fields with generic + // names like "custom1" and "custom2" become very specific things + // in specific schemas; for example, custom1 shows as "legal notice" + // in "Journal Article" (ref-type 84), or as "year published" in + // "Government Document". + // We don't want the UNF to show as a "legal notice"! + // We have found a ref-type that works ok for our purposes - + // "Online Database" (type 45). In this one, the fields Custom1 + // and Custom2 are not translated and just show as is. + // And "Custom1" still beats "legal notice". + // -- L.A. 12.12.2014 beta 10 + + xmlw.writeStartElement("ref-type"); + xmlw.writeAttribute("name", "Online Database"); + xmlw.writeCharacters("45"); + xmlw.writeEndElement(); // ref-type + + xmlw.writeStartElement("contributors"); + xmlw.writeStartElement("authors"); + for (DatasetAuthor author : authorList) { + xmlw.writeStartElement("author"); + xmlw.writeCharacters(author.getName().getDisplayValue()); + xmlw.writeEndElement(); // author + } + xmlw.writeEndElement(); // authors + xmlw.writeEndElement(); // contributors + + xmlw.writeStartElement("titles"); + xmlw.writeStartElement("title"); + xmlw.writeCharacters(title); + xmlw.writeEndElement(); // title + + xmlw.writeEndElement(); // titles + + xmlw.writeStartElement("section"); + String sectionString; + if (version.getDataset().isReleased()) { + sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getDataset().getPublicationDate()); + } else { + sectionString = new SimpleDateFormat("yyyy-MM-dd").format(version.getLastUpdateTime()); + } + + xmlw.writeCharacters(sectionString); + xmlw.writeEndElement(); // publisher + + xmlw.writeStartElement("dates"); + xmlw.writeStartElement("year"); + xmlw.writeCharacters(versionYear); + xmlw.writeEndElement(); // year + xmlw.writeEndElement(); // dates + + xmlw.writeStartElement("publisher"); + xmlw.writeCharacters(publisher); + xmlw.writeEndElement(); // publisher + + xmlw.writeStartElement("urls"); + xmlw.writeStartElement("related-urls"); + xmlw.writeStartElement("url"); + xmlw.writeCharacters(version.getDataset().getPersistentURL()); + xmlw.writeEndElement(); // url + xmlw.writeEndElement(); // related-urls + xmlw.writeEndElement(); // urls + + // a DataFile citation also includes the filename and (for Tabular + // files) the UNF signature, that we put into the custom1 and custom2 + // fields respectively: + + + if (fileMetadata != null) { + xmlw.writeStartElement("custom1"); + xmlw.writeCharacters(fileMetadata.getLabel()); + xmlw.writeEndElement(); // custom1 + + if (fileMetadata.getDataFile().isTabularData()) { + if (fileMetadata.getDataFile().getUnf() != null) { + xmlw.writeStartElement("custom2"); + xmlw.writeCharacters(fileMetadata.getDataFile().getUnf()); + xmlw.writeEndElement(); // custom2 + } + } + } + + xmlw.writeStartElement("electronic-resource-num"); + String electResourceNum = version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier(); + xmlw.writeCharacters(electResourceNum); + xmlw.writeEndElement(); + //10.3886/ICPSR03259.v1 + xmlw.writeEndElement(); // record + + xmlw.writeEndElement(); // records + xmlw.writeEndElement(); // xml + + } + + public DatasetVersionUser getDatasetVersionUser(DatasetVersion version, User user) { + + DatasetVersionUser ddu = null; + Query query = em.createQuery("select object(o) from DatasetVersionUser as o " + + "where o.datasetVersion.id =:versionId and o.authenticatedUser.id =:userId"); + query.setParameter("versionId", version.getId()); + String identifier = user.getIdentifier(); + identifier = identifier.startsWith("@") ? identifier.substring(1) : identifier; + AuthenticatedUser au = authentication.getAuthenticatedUser(identifier); + query.setParameter("userId", au.getId()); + try { + ddu = (DatasetVersionUser) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // DO nothing, just return null. + } + return ddu; + } + + public boolean checkDatasetLock(Long datasetId) { + TypedQuery lockCounter = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); + lockCounter.setParameter("datasetId", datasetId); + lockCounter.setMaxResults(1); + List lock = lockCounter.getResultList(); + return lock.size()>0; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { + lock.setDataset(dataset); + dataset.addLock(lock); + em.persist(lock); + em.merge(dataset); + return lock; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) /*?*/ + public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Long userId, String info) { + + Dataset dataset = em.find(Dataset.class, datasetId); + + AuthenticatedUser user = null; + if (userId != null) { + user = em.find(AuthenticatedUser.class, userId); + } + + DatasetLock lock = new DatasetLock(reason, user); + lock.setDataset(dataset); + lock.setInfo(info); + lock.setStartTime(new Date()); + + if (userId != null) { + lock.setUser(user); + if (user.getDatasetLocks() == null) { + user.setDatasetLocks(new ArrayList<>()); + } + user.getDatasetLocks().add(lock); + } + + return addDatasetLock(dataset, lock); + } + + /** + * Removes all {@link DatasetLock}s for the dataset whose id is passed and reason + * is {@code aReason}. + * @param datasetId Id of the dataset whose locks will b removed. + * @param aReason The reason of the locks that will be removed. + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void removeDatasetLocks(Long datasetId, DatasetLock.Reason aReason) { + Dataset dataset = em.find(Dataset.class, datasetId); + new HashSet<>(dataset.getLocks()).stream() + .filter( l -> l.getReason() == aReason ) + .forEach( lock -> { + dataset.removeLock(lock); + + AuthenticatedUser user = lock.getUser(); + user.getDatasetLocks().remove(lock); + + em.remove(lock); + }); + } + + /* + getTitleFromLatestVersion methods use native query to return a dataset title + + There are two versions: + 1) The version with datasetId param only will return the title regardless of version state + 2)The version with the param 'includeDraft' boolean will return the most recently published title if the param is set to false + If no Title found return empty string - protects against calling with + include draft = false with no published version + */ + + public String getTitleFromLatestVersion(Long datasetId){ + return getTitleFromLatestVersion(datasetId, true); + } + + public String getTitleFromLatestVersion(Long datasetId, boolean includeDraft){ + + String whereDraft = ""; + //This clause will exclude draft versions from the select + if (!includeDraft) { + whereDraft = " and v.versionstate !='DRAFT' "; + } + + try { + return (String) em.createNativeQuery("select dfv.value from dataset d " + + " join datasetversion v on d.id = v.dataset_id " + + " join datasetfield df on v.id = df.datasetversion_id " + + " join datasetfieldvalue dfv on df.id = dfv.datasetfield_id " + + " join datasetfieldtype dft on df.datasetfieldtype_id = dft.id " + + " where dft.name = '" + DatasetFieldConstant.title + "' and v.dataset_id =" + datasetId + + whereDraft + + " order by v.versionnumber desc, v.minorVersionNumber desc limit 1 " + + ";").getSingleResult(); + + } catch (Exception ex) { + logger.log(Level.INFO, "exception trying to get title from latest version: {0}", ex); + return ""; + } + + } + + public Dataset getDatasetByHarvestInfo(Dataverse dataverse, String harvestIdentifier) { + String queryStr = "SELECT d FROM Dataset d, DvObject o WHERE d.id = o.id AND o.owner.id = " + dataverse.getId() + " and d.harvestIdentifier = '" + harvestIdentifier + "'"; + Query query = em.createQuery(queryStr); + List resultList = query.getResultList(); + Dataset dataset = null; + if (resultList.size() > 1) { + throw new EJBException("More than one dataset found in the dataverse (id= " + dataverse.getId() + "), with harvestIdentifier= " + harvestIdentifier); + } + if (resultList.size() == 1) { + dataset = (Dataset) resultList.get(0); + } + return dataset; + + } + + public Long getDatasetVersionCardImage(Long versionId, User user) { + if (versionId == null) { + return null; + } + + + + return null; + } + + /** + * Used to identify and properly display Harvested objects on the dataverse page. + * + * @param datasetIds + * @return + */ + public Map getArchiveDescriptionsForHarvestedDatasets(Set datasetIds){ + if (datasetIds == null || datasetIds.size() < 1) { + return null; + } + + String datasetIdStr = Strings.join(datasetIds, ", "); + + String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")"; + List searchResults; + + try { + searchResults = em.createNativeQuery(qstr).getResultList(); + } catch (Exception ex) { + searchResults = null; + } + + if (searchResults == null) { + return null; + } + + Map ret = new HashMap<>(); + + for (Object[] result : searchResults) { + Long dsId; + if (result[0] != null) { + try { + dsId = (Long)result[0]; + } catch (Exception ex) { + dsId = null; + } + if (dsId == null) { + continue; + } + + ret.put(dsId, (String)result[1]); + } + } + + return ret; + } + + + + public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) { + if (datasetVersion == null) { + return false; + } + + // First, check if this dataset has a designated thumbnail image: + + if (datasetVersion.getDataset() != null) { + DataFile dataFile = datasetVersion.getDataset().getThumbnailFile(); + if (dataFile != null) { + return ImageThumbConverter.isThumbnailAvailable(dataFile, 48); + } + } + + // If not, we'll try to use one of the files in this dataset version: + // (the first file with an available thumbnail, really) + + List fileMetadatas = datasetVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + DataFile dataFile = fileMetadata.getDataFile(); + + // TODO: use permissionsWrapper here - ? + // (we are looking up these download permissions on individual files, + // true, and those are unique... but the wrapper may be able to save + // us some queries when it determines the download permission on the + // dataset as a whole? -- L.A. 4.2.1 + + if (fileService.isThumbnailAvailable(dataFile) && permissionService.userOn(user, dataFile).has(Permission.DownloadFile)) { //, user)) { + return true; + } + + } + + return false; + } + + + // reExportAll *forces* a reexport on all published datasets; whether they + // have the "last export" time stamp set or not. + @Asynchronous + public void reExportAllAsync() { + exportAllDatasets(true); + } + + public void reExportAll() { + exportAllDatasets(true); + } + + + // exportAll() will try to export the yet unexported datasets (it will honor + // and trust the "last export" time stamp). + + @Asynchronous + public void exportAllAsync() { + exportAllDatasets(false); + } + + public void exportAll() { + exportAllDatasets(false); + } + + public void exportAllDatasets(boolean forceReExport) { + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + String logTimestamp = logFormatter.format(new Date()); + Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); + String logFileName = "../logs" + File.separator + "export_" + logTimestamp + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + exportLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + exportLogger.addHandler(fileHandler); + } else { + exportLogger = logger; + } + + exportLogger.info("Starting an export all job"); + + for (Long datasetId : findAllLocalDatasetIds()) { + // Potentially, there's a godzillion datasets in this Dataverse. + // This is why we go through the list of ids here, and instantiate + // only one dataset at a time. + Dataset dataset = this.find(datasetId); + if (dataset != null) { + // Accurate "is published?" test - ? + // Answer: Yes, it is! We can't trust dataset.isReleased() alone; because it is a dvobject method + // that returns (publicationDate != null). And "publicationDate" is essentially + // "the first publication date"; that stays the same as versions get + // published and/or deaccessioned. But in combination with !isDeaccessioned() + // it is indeed an accurate test. + if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { + + // can't trust dataset.getPublicationDate(), no. + Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) + if (forceReExport || (publicationDate != null + && (dataset.getLastExportTime() == null + || dataset.getLastExportTime().before(publicationDate)))) { + countAll++; + try { + recordService.exportAllFormatsInNewTransaction(dataset); + exportLogger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId()); + countSuccess++; + } catch (Exception ex) { + exportLogger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId() + "; " + ex.getMessage()); + countError++; + } + } + } + } + } + exportLogger.info("Datasets processed: " + countAll.toString()); + exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); + exportLogger.info("Datasets failures: " + countError.toString()); + exportLogger.info("Finished export-all job."); + + if (fileHandlerSuceeded) { + fileHandler.close(); + } + + } + + public void updateLastExportTimeStamp(Long datasetId) { + Date now = new Date(); + em.createNativeQuery("UPDATE Dataset SET lastExportTime='"+now.toString()+"' WHERE id="+datasetId).executeUpdate(); + } + + public Dataset setNonDatasetFileAsThumbnail(Dataset dataset, InputStream inputStream) { + if (dataset == null) { + logger.fine("In setNonDatasetFileAsThumbnail but dataset is null! Returning null."); + return null; + } + if (inputStream == null) { + logger.fine("In setNonDatasetFileAsThumbnail but inputStream is null! Returning null."); + return null; + } + dataset = DatasetUtil.persistDatasetLogoToStorageAndCreateThumbnail(dataset, inputStream); + dataset.setThumbnailFile(null); + return merge(dataset); + } + + public Dataset setDatasetFileAsThumbnail(Dataset dataset, DataFile datasetFileThumbnailToSwitchTo) { + if (dataset == null) { + logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); + return null; + } + if (datasetFileThumbnailToSwitchTo == null) { + logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); + return null; + } + DatasetUtil.deleteDatasetLogo(dataset); + dataset.setThumbnailFile(datasetFileThumbnailToSwitchTo); + dataset.setUseGenericThumbnail(false); + return merge(dataset); + } + + public Dataset removeDatasetThumbnail(Dataset dataset) { + if (dataset == null) { + logger.fine("In removeDatasetThumbnail but dataset is null! Returning null."); + return null; + } + DatasetUtil.deleteDatasetLogo(dataset); + dataset.setThumbnailFile(null); + dataset.setUseGenericThumbnail(true); + return merge(dataset); + } + + // persist assigned thumbnail in a single one-field-update query: + // (the point is to avoid doing an em.merge() on an entire dataset object...) + public void assignDatasetThumbnailByNativeQuery(Long datasetId, Long dataFileId) { + try { + em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFileId + " WHERE id=" + datasetId).executeUpdate(); + } catch (Exception ex) { + // it's ok to just ignore... + } + } + + public void assignDatasetThumbnailByNativeQuery(Dataset dataset, DataFile dataFile) { + try { + em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFile.getId() + " WHERE id=" + dataset.getId()).executeUpdate(); + } catch (Exception ex) { + // it's ok to just ignore... + } + } + + public WorkflowComment addWorkflowComment(WorkflowComment workflowComment) { + em.persist(workflowComment); + return workflowComment; + } } From 0b25d57f23fc1615ee440b430c01cde75192b3c3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 14 May 2018 21:43:13 -0400 Subject: [PATCH 06/44] Removing DoiSeparator key since the authority:shoulder separator is always '/' 'for DOIs and Handles, and any shoulder:identifier separator is now handled as part of the separator. Also fixed the bug I introduced case logic for Handles, which are case sensitive. --- .../edu/harvard/iq/dataverse/Dataset.java | 11 +----- .../edu/harvard/iq/dataverse/DatasetPage.java | 4 -- .../iq/dataverse/DatasetServiceBean.java | 37 +++++++------------ .../CollectionDepositManagerImpl.java | 2 - .../command/impl/CreateDatasetCommand.java | 4 +- .../settings/SettingsServiceBean.java | 6 +-- .../iq/dataverse/util/json/JsonParser.java | 1 - .../dataverse/util/json/JsonParserTest.java | 3 -- 8 files changed, 16 insertions(+), 52 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 84b4a4934bc..05f87c3c3ef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -84,8 +84,7 @@ public class Dataset extends DvObjectContainer { private String protocol; private String authority; - private String doiSeparator; - + @Temporal(value = TemporalType.TIMESTAMP) private Date globalIdCreateTime; @@ -240,14 +239,6 @@ public void setIdentifier(String identifier) { this.identifier = identifier; } - public String getDoiSeparator() { - return doiSeparator; - } - - public void setDoiSeparator(String doiSeparator) { - this.doiSeparator = doiSeparator; - } - public Date getGlobalIdCreateTime() { return globalIdCreateTime; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 7cf3bc40446..92a7121e34a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -220,7 +220,6 @@ public enum DisplayMode { private String version; private String protocol = ""; private String authority = ""; - private String separator = ""; private String customFields=""; private boolean noDVsAtAll = false; @@ -1388,7 +1387,6 @@ private String init(boolean initFull) { String nonNullDefaultIfKeyNotFound = ""; protocol = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); authority = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - separator = settingsWrapper.getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); if (dataset.getId() != null || versionId != null || persistentId != null) { // view mode for a dataset @@ -1537,9 +1535,7 @@ private String init(boolean initFull) { dataset.setOwner(dataverseService.find(ownerId)); dataset.setProtocol(protocol); dataset.setAuthority(authority); - dataset.setDoiSeparator(separator); //Wait until the create command before actually getting an identifier - //dataset.setIdentifier(datasetService.generateDatasetIdentifier(protocol, authority, separator)); if (dataset.getOwner() == null) { return permissionsWrapper.notFound(); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index cb2b9073d24..e285fecdb00 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -184,16 +184,7 @@ public Dataset findByGlobalId(String globalId) { String authority = ""; String identifier = ""; int index1 = globalId.indexOf(':'); - String nonNullDefaultIfKeyNotFound = ""; - // This is kind of wrong right here: we should not assume that this is *our* DOI - - // it can be somebody else's registered DOI that we harvested. And they can - // have their own separator characters defined - so we should not assume - // that everybody's DOIs will look like ours! - // Also, this separator character gets applied to handles lookups too, below. - // Which is probably wrong too... - // -- L.A. 4.2.4 - String separator = settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); - int index2 = globalId.indexOf(separator, index1 + 1); + int index2 = globalId.indexOf('/', index1 + 1); if (index1 == -1) { logger.info("Error parsing identifier: " + globalId + ". ':' not found in string"); @@ -202,22 +193,20 @@ public Dataset findByGlobalId(String globalId) { protocol = globalId.substring(0, index1); } if (index2 == -1) { - logger.info("Error parsing identifier: " + globalId + ". Second separator not found in string"); + logger.info("Error parsing identifier: " + globalId + ". '/' not found in string"); return null; } else { authority = globalId.substring(index1 + 1, index2); } - if (protocol.equals("doi")) { - - // //ICPSR DOIs have some lower case characters (for ex., - // 10.3886/ICPSR04599.v1), and that's how are they saved in the - // IQSS production database. So .toUpperCase() is now optional - Boolean useMixedCase = settingsService.isTrueForKey(SettingsServiceBean.Key.DoiUseMixedCase, false); - - identifier = globalId.substring(index2 + 1); // .toUpperCase(); - if (!useMixedCase) { - identifier = identifier.toUpperCase(); - } + // //ICPSR DOIs have some lower case characters (for ex., + // 10.3886/ICPSR04599.v1), and that's how are they saved in the + // IQSS production database. So .toUpperCase() is now optional + Boolean useMixedCase = settingsService.isTrueForKey(SettingsServiceBean.Key.DoiUseMixedCase, false); + + identifier = globalId.substring(index2 + 1); + //Handles are case sensitive, DOIs are not by default, but can be forced to be case sensitive with the useMixedCase flag + if (protocol.equals("doi") && !useMixedCase) { + identifier = identifier.toUpperCase(); } String queryStr = "SELECT s from Dataset s where s.identifier = :identifier and s.protocol= :protocol and s.authority= :authority"; Dataset foundDataset = null; @@ -239,8 +228,8 @@ public Dataset findByGlobalId(String globalId) { public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - if(doiShoulder.indexOf(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "/"))>=0) { - logger.warning("doiShoulder cannot contain / or doiSeparator"); + if(doiShoulder.indexOf("/")>=0) { + logger.warning("doiShoulder cannot contain '/' "); } switch (doiIdentifierType) { case "randomString": diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index 43d296edfee..1780ae9d1bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -97,10 +97,8 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede String nonNullDefaultIfKeyNotFound = ""; String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - String separator = settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); dataset.setProtocol(protocol); dataset.setAuthority(authority); - dataset.setDoiSeparator(separator); //Wait until the create command before actually getting an identifier //dataset.setIdentifier(datasetService.generateDatasetIdentifier(protocol, authority, separator)); logger.log(Level.FINE, "DS Deposit identifier: {0}", dataset.getIdentifier()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java index 4fba6cf65d0..941268354fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java @@ -132,11 +132,9 @@ public Dataset execute(CommandContext ctxt) throws CommandException { String nonNullDefaultIfKeyNotFound = ""; String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - String doiSeparator = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); String doiProvider = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DoiProvider, nonNullDefaultIfKeyNotFound); if (theDataset.getProtocol()==null) theDataset.setProtocol(protocol); if (theDataset.getAuthority()==null) theDataset.setAuthority(authority); - if (theDataset.getDoiSeparator()==null) theDataset.setDoiSeparator(doiSeparator); if (theDataset.getStorageIdentifier() == null) { try { DataAccess.createNewStorageIO(theDataset, "placeholder"); @@ -144,7 +142,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // if setting the storage identifier through createNewStorageIO fails, dataset creation // does not have to fail. we just set the storage id to a default -SF String storageDriver = (System.getProperty("dataverse.files.storage-driver-id") != null) ? System.getProperty("dataverse.files.storage-driver-id") : "file"; - theDataset.setStorageIdentifier(storageDriver + "://" + theDataset.getAuthority()+theDataset.getDoiSeparator()+theDataset.getIdentifier()); + theDataset.setStorageIdentifier(storageDriver + "://" + theDataset.getAuthority()+ "/" +theDataset.getIdentifier()); logger.info("Failed to create StorageIO. StorageIdentifier set to default. Not fatal." + "(" + ioex.getMessage() + ")"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index a94bd8e5c1f..2650ce7302d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -180,11 +180,7 @@ public enum Key { Authority, /** DoiProvider for global id */ DoiProvider, - DoiSeparator, - /** DoiShoulder for global id - should not include DoiSeparator unless/until logic for separating authority and identifier is more robust. - * This case can be handled by combining the authority and shoulder as part of the Authority setting. Use this DoiShoulder for cases - * where there is no character separating the shoulder from the rest of the identifier or where the - * character separating the shoulder from the rest of the identifier is not '/' or DoiSeparator. + /** DoiShoulder for global id - should not include '/'. */ DoiShoulder, //Do not force DOIs to uppercase before searching in database diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 5da814c5b36..5f0c3af09d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -255,7 +255,6 @@ public Dataset parseDataset(JsonObject obj) throws JsonParseException { dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); - dataset.setDoiSeparator(obj.getString("doiSeparator", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator) : obj.getString("doiSeparator")); dataset.setIdentifier(obj.getString("identifier",null)); DatasetVersion dsv = new DatasetVersion(); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java index 156bf6ee015..1c3ec6e9f1c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java @@ -422,7 +422,6 @@ public void testParseEmptyDataset() throws JsonParseException { System.out.println(dsJson != null); Dataset actual = sut.parseDataset(dsJson); assertEquals("10.5072/FK2", actual.getAuthority()); - assertEquals("/", actual.getDoiSeparator()); assertEquals("doi", actual.getProtocol()); } catch (IOException ioe) { throw new JsonParseException("Couldn't read test file", ioe); @@ -628,8 +627,6 @@ public String getValueForKey( Key key /*, String defaultValue */) { return "10.5072/FK2"; case Protocol: return "doi"; - case DoiSeparator: - return "/"; default: break; } From 356d37ad4439fc27fd07f6c277699c6ff1bcfd6e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 14 May 2018 21:46:57 -0400 Subject: [PATCH 07/44] shoulder can contain a '/' - the findByGlobalId logic looks for the first indexOf '/' only, so the restriction is now obsolete. --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index e285fecdb00..4f1868b0f8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -228,9 +228,7 @@ public Dataset findByGlobalId(String globalId) { public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - if(doiShoulder.indexOf("/")>=0) { - logger.warning("doiShoulder cannot contain '/' "); - } + switch (doiIdentifierType) { case "randomString": return generateIdentifierAsRandomString(dataset, idServiceBean); From c77c8a99f02a410afcc492a2a8ae0dceb07e58bd Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 14 May 2018 21:50:16 -0400 Subject: [PATCH 08/44] comment update --- .../edu/harvard/iq/dataverse/settings/SettingsServiceBean.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 2650ce7302d..49d5ad815f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -180,8 +180,7 @@ public enum Key { Authority, /** DoiProvider for global id */ DoiProvider, - /** DoiShoulder for global id - should not include '/'. - */ + /** DoiShoulder for global id */ DoiShoulder, //Do not force DOIs to uppercase before searching in database DoiUseMixedCase, From 65c0c8df4d61b74ce1d8d581502513d0526a99e0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 10:14:30 -0400 Subject: [PATCH 09/44] remove doiSeparator key after merge with DOI for files code Both Handles and DOIs require '/' as the authority/identifier separator. Since any separator / sub-identifier separator is now just part of the shoulder, there is no need for a configurable value for that. It is also problematic to make the authority/identifier separator configurable at this point since there are many places in the code where '/' is hardcoded (reasonable given that Handle and DOI both require it), so supporting a new Id type that doesn't use this character requires more work than creating a new serviceBean and setting the key. Nominally, all identifiers will have '/' as the separator in the database and the separator column in the dataset table (and now for datafiles too) could be removed, but that would complicate id matching relative to current code which just concatenates and would add work to restore it if a different identifier with new separator is ever created. So - leaving this, and code to get/set it as is... --- .../edu/harvard/iq/dataverse/AbstractIdServiceBean.java | 3 ++- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 5 ++++- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- .../api/datadeposit/CollectionDepositManagerImpl.java | 3 +++ .../iq/dataverse/api/imports/ImportDDIServiceBean.java | 6 +++--- .../dataverse/engine/command/impl/CreateDatasetCommand.java | 4 ++++ .../engine/command/impl/RegisterDvObjectCommand.java | 4 +++- .../java/edu/harvard/iq/dataverse/util/json/JsonParser.java | 2 ++ .../edu/harvard/iq/dataverse/util/json/JsonParserTest.java | 1 + 9 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java index 1f5ab39a9a3..b6f1bf4f026 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java @@ -11,6 +11,7 @@ public abstract class AbstractIdServiceBean implements IdServiceBean { private static final Logger logger = Logger.getLogger(AbstractIdServiceBean.class.getCanonicalName()); + private static final String ID_SEPARATOR="/"; @EJB DataverseServiceBean dataverseService; @@ -96,7 +97,7 @@ public DvObject generateIdentifier(DvObject dvObject) { String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol(); String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority(); - String doiSeparator = dvObject.getDoiSeparator() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator) : dvObject.getDoiSeparator(); + String doiSeparator = dvObject.getDoiSeparator() == null ? ID_SEPARATOR : dvObject.getDoiSeparator(); IdServiceBean idServiceBean = IdServiceBean.getBean(protocol, commandEngine.getContext()); if (dvObject.isInstanceofDataset()) { dvObject.setIdentifier(datasetService.generateDatasetIdentifier((Dataset) dvObject, idServiceBean)); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index ad16d4f65f3..bf2f84a6568 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -221,6 +221,7 @@ public enum DisplayMode { private String version; private String protocol = ""; private String authority = ""; + private String separator=""; private String customFields=""; private boolean noDVsAtAll = false; @@ -1366,7 +1367,8 @@ private String init(boolean initFull) { String nonNullDefaultIfKeyNotFound = ""; protocol = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); authority = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - + //Current code assumes the authority - identifier separator is '/' as is required by DOI and Handle + separator="/"; if (dataset.getId() != null || versionId != null || persistentId != null) { // view mode for a dataset DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; @@ -1514,6 +1516,7 @@ private String init(boolean initFull) { dataset.setOwner(dataverseService.find(ownerId)); dataset.setProtocol(protocol); dataset.setAuthority(authority); + dataset.setDoiSeparator(separator); //Wait until the create command before actually getting an identifier if (dataset.getOwner() == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 001536c2a78..86300a2dad6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -957,7 +957,7 @@ public void obtainPersistentIdentifiersForDatafiles(Dataset dataset) { datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority, "")); } if (datafile.getDoiSeparator() == null) { - datafile.setDoiSeparator(settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator, "")); + datafile.setDoiSeparator("/"); } logger.info("identifier: " + datafile.getIdentifier()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index 1780ae9d1bd..28967a9b69b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -97,8 +97,11 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede String nonNullDefaultIfKeyNotFound = ""; String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); + dataset.setProtocol(protocol); dataset.setAuthority(authority); + // Current code assumes '/' as the authority - identifier separator as required by DOI and Handle + dataset.setDoiSeparator("/"); //Wait until the create command before actually getting an identifier //dataset.setIdentifier(datasetService.generateDatasetIdentifier(protocol, authority, separator)); logger.log(Level.FINE, "DS Deposit identifier: {0}", dataset.getIdentifier()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 54ad825cb85..ee8a780d9de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1587,7 +1587,7 @@ private void parseStudyIdHandle(String _id, DatasetDTO datasetDTO) { private void parseStudyIdDOI(String _id, DatasetDTO datasetDTO) throws ImportException{ int index1 = _id.indexOf(':'); - int index2 = _id.lastIndexOf('/'); + int index2 = _id.indexOf('/'); if (index1==-1) { throw new EJBException("Error parsing (DOI) IdNo: "+_id+". ':' not found in string"); } @@ -1608,11 +1608,11 @@ private void parseStudyIdDoiICPSRdara(String _id, DatasetDTO datasetDTO) throws /* dara/ICPSR DOIs are formatted without the hdl: prefix; for example - 10.3886/ICPSR06635.v1 - so we assume that everything before the last "/" is the authority, + so we assume that everything before the "/" is the authority, and everything past it - the identifier: */ - int index = _id.lastIndexOf('/'); + int index = _id.indexOf('/'); if (index == -1) { throw new ImportException("Error parsing ICPSR/dara DOI IdNo: "+_id+". '/' not found in string"); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java index ad2bf18475b..bbe7b8a9046 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java @@ -154,6 +154,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getAuthority() == null) { theDataset.setAuthority(authority); } + if (theDataset.getDoiSeparator() == null) { + // '/', the required authority - identifier separator for DOI and Handle is hardcoded in many places + theDataset.setDoiSeparator("/"); + } if (theDataset.getStorageIdentifier() == null) { try { DataAccess.createNewStorageIO(theDataset, "placeholder"); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 239f1f3bd62..5997a6195f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -37,7 +37,9 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { String nonNullDefaultIfKeyNotFound = ""; String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - String doiSeparator = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); + // Current code assumes in many places that the authority - identifier separator for new identifiers + // is '/' as is true for DOI and Handle + String doiSeparator = "/"; IdServiceBean idServiceBean = IdServiceBean.getBean(target.getProtocol(), ctxt); try { //Test to see if identifier already present diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 5f0c3af09d3..db470527d75 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -255,6 +255,8 @@ public Dataset parseDataset(JsonObject obj) throws JsonParseException { dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); + // hardcoded '/' is the required Authority-identifier separator for DOI and Handle + dataset.setDoiSeparator(obj.getString("doiSeparator", null) == null ? "/" : obj.getString("doiSeparator")); dataset.setIdentifier(obj.getString("identifier",null)); DatasetVersion dsv = new DatasetVersion(); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java index 1c3ec6e9f1c..a569f9bfe1a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java @@ -422,6 +422,7 @@ public void testParseEmptyDataset() throws JsonParseException { System.out.println(dsJson != null); Dataset actual = sut.parseDataset(dsJson); assertEquals("10.5072/FK2", actual.getAuthority()); + assertEquals("/", actual.getDoiSeparator()); assertEquals("doi", actual.getProtocol()); } catch (IOException ioe) { throw new JsonParseException("Couldn't read test file", ioe); From d986c4c15225585882451d3c683ea59c4b45ec51 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 10:20:31 -0400 Subject: [PATCH 10/44] mixed case flag no longer needed --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 3351791dfee..4b8a0ba872a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -185,10 +185,8 @@ public enum Key { Authority, /** DoiProvider for global id */ DoiProvider, - /** DoiShoulder for global id */ + /** DoiShoulder for global id - used to create a common prefix on identifiers */ DoiShoulder, - //Do not force DOIs to uppercase before searching in database - DoiUseMixedCase, /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up DoiUsername, DoiPassword, From f5b4e57b2a1df25eb4763b89cd2c36c96020dc1c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 10:26:06 -0400 Subject: [PATCH 11/44] when parsing IDs should now look for first '/' separator, not the last. --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 2 +- .../iq/dataverse/api/imports/ImportGenericServiceBean.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 86300a2dad6..9cd7fc9f77a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -298,7 +298,7 @@ public Long getMaximumExistingDatafileIdentifier(Dataset dataset) { if (idResults != null) { for (Object raw: idResults){ String identifier = (String) raw; - identifier = identifier.substring(identifier.lastIndexOf("/") + 1); + identifier = identifier.substring(identifier.indexOf("/") + 1); testVal = new Long(identifier) ; if (testVal > retVal){ retVal = testVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 962ca02141b..8c906834d58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -126,7 +126,7 @@ private boolean parsePersistentId(String identifierString){ } int index1 = identifierString.indexOf(':'); - int index2 = identifierString.lastIndexOf('/'); + int index2 = identifierString.indexOf('/'); if (index1==-1) { return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 442043ed397..2258ac0952c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -393,7 +393,7 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { private String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO datasetDTO) { int index1 = identifierString.indexOf(':'); - int index2 = identifierString.lastIndexOf('/'); + int index2 = identifierString.indexOf('/'); if (index1==-1) { logger.warning("Error parsing identifier: " + identifierString + ". ':' not found in string"); return null; @@ -420,7 +420,7 @@ private String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO logger.warning("HTTP Url in supplied as the identifier is neither a Handle nor DOI resolver: "+identifierString); return null; } - // index2 was already found as the *last* index of '/' - so it's still good. + // index2 was already found as the index of '/' - so it's still good. } else { logger.warning("Unknown identifier format: "+identifierString); return null; From 470af8651811324da0ed1ef5efee41dbf3885346 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 10:33:06 -0400 Subject: [PATCH 12/44] merge miss - this was deleted in develop after I branched. --- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index cdc1da2ca5f..77ddb51f908 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -93,7 +93,7 @@ public class Dataset extends DvObjectContainer { @Temporal(value = TemporalType.TIMESTAMP) private Date lastExportTime; - @NotBlank(message = "Please enter an identifier for your dataset.") + @Column(nullable = false) private String identifier; From dc2893d9b487cb6c5796a7726582200fc4e27783 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 13:08:51 -0400 Subject: [PATCH 13/44] Separating the choice of identifier generation style for datasets and files Given the code for generating datafile ids, it looks like the new ability to support a shoulder should only apply to datasets as a start. This commit splits the existing IdGenerationStyle key into two one for datasets and one for datafiles. Datafiles will continue to only support random or sequential identifiers (with an additional choice as to whether to those are independent or dependent (appended to the dataset identifier) while datasets can also use the shoulder with random or sequential options. It seems like allowing files to use a shoulder when they are independent would also make sense, but I don't think using that with dependent would make as much sense. If that's a reasonable choice, we might want to rethink having the dependent/independent choice as a separate key (datafiles would ultimately have 6 choices - random or sequential with dependent/independent or independent with shoulder and random or sequential). --- .../iq/dataverse/DataFileServiceBean.java | 2 +- .../harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- .../dataverse/settings/SettingsServiceBean.java | 3 ++- .../harvard/iq/dataverse/util/SystemConfig.java | 2 +- .../edu/harvard/iq/dataverse/api/DatasetsIT.java | 16 ++++++++-------- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 977138c2548..f4c9cc14893 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1532,7 +1532,7 @@ public List selectFilesWithMissingOriginalTypes() { public String generateDataFileIdentifier(DataFile datafile, IdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatafileIdentifierGenerationStyle, "randomString"); String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); String datasetIdentifer = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 9cd7fc9f77a..04f1bb03fa0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -208,7 +208,7 @@ public Dataset findByGlobalId(String globalId) { } public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle, "randomString"); String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); switch (doiIdentifierType) { @@ -298,7 +298,7 @@ public Long getMaximumExistingDatafileIdentifier(Dataset dataset) { if (idResults != null) { for (Object raw: idResults){ String identifier = (String) raw; - identifier = identifier.substring(identifier.indexOf("/") + 1); + identifier = identifier.substring(identifier.lastIndexOf("/") + 1); testVal = new Long(identifier) ; if (testVal > retVal){ retVal = testVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 4b8a0ba872a..71b00019b6a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -73,7 +73,8 @@ public enum Key { * StorageSite database table. */ LocalDataAccessPath, - IdentifierGenerationStyle, + DatafileIdentifierGenerationStyle, + DatasetIdentifierGenerationStyle, OAuth2CallbackUrl, DefaultAuthProvider, FooterCopyright, diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 392ac4a75f4..20f0790b7db 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -950,7 +950,7 @@ public boolean isRsyncDownload() } public boolean isDataFilePIDSequentialDependent(){ - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatafileIdentifierGenerationStyle, "randomString"); String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); if (doiIdentifierType.equals("sequentialNumber") && doiDataFileFormat.equals("DEPENDENT")){ return true; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 90bd1e9ee47..bbcb46fda5d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -54,8 +54,8 @@ public class DatasetsIT { public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); - Response removeIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.IdentifierGenerationStyle); - removeIdentifierGenerationStyle.then().assertThat() + Response removeDatasetIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle); + removeDatasetIdentifierGenerationStyle.then().assertThat() .statusCode(200); Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); @@ -74,8 +74,8 @@ public static void setUpClass() { @AfterClass public static void afterClass() { - Response removeIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.IdentifierGenerationStyle); - removeIdentifierGenerationStyle.then().assertThat() + Response removeDatasetIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle); + removeDatasetIdentifierGenerationStyle.then().assertThat() .statusCode(200); Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); @@ -487,7 +487,7 @@ public void testExcludeEmail() { } @Test - public void testSequentialNumberAsIdentifierGenerationStyle() { + public void testSequentialNumberAsDatasetIdentifierGenerationStyle() { Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); @@ -498,8 +498,8 @@ public void testSequentialNumberAsIdentifierGenerationStyle() { createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response setSequentialNumberAsIdentifierGenerationStyle = UtilIT.setSetting(SettingsServiceBean.Key.IdentifierGenerationStyle, "sequentialNumber"); - setSequentialNumberAsIdentifierGenerationStyle.then().assertThat() + Response setSequentialNumberAsDatasetIdentifierGenerationStyle = UtilIT.setSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle, "sequentialNumber"); + setSequentialNumberAsDatasetIdentifierGenerationStyle.then().assertThat() .statusCode(OK.getStatusCode()); Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); @@ -526,7 +526,7 @@ public void testSequentialNumberAsIdentifierGenerationStyle() { deleteUserResponse.prettyPrint(); assertEquals(200, deleteUserResponse.getStatusCode()); - Response remove = UtilIT.deleteSetting(SettingsServiceBean.Key.IdentifierGenerationStyle); + Response remove = UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle); remove.then().assertThat() .statusCode(200); From dded772d9474c6ae5011b320276cb20159901576 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 13:53:15 -0400 Subject: [PATCH 14/44] one more instance of doiSeparator --- .../batch/jobs/importer/filesystem/FileRecordWriter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java index 49ecce4fdbb..8079a640afd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java @@ -365,7 +365,8 @@ private DataFile createPackageDataFile(List files) { String nonNullDefaultIfKeyNotFound = ""; String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - String doiSeparator = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.DoiSeparator, nonNullDefaultIfKeyNotFound); + //Hardcoded as '/' which is what's required for DOIs and Handles as the authority - identifier separator. + String doiSeparator = "/"; if (packageFile.getProtocol() == null) { packageFile.setProtocol(protocol); } From a9cb711d277fcf17584c2003a47647035fe0607b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 14:04:20 -0400 Subject: [PATCH 15/44] Update test to expect shoulder as part of identifier --- .../java/edu/harvard/iq/dataverse/GlobalIdTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java b/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java index 9bd987dd5ab..71ca9c181c4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java @@ -50,8 +50,8 @@ public void testValidDOI() { GlobalId instance = new GlobalId("doi:10.5072/FK2/BYM3IW"); assertEquals("doi", instance.getProtocol()); - assertEquals("10.5072/FK2", instance.getAuthority()); - assertEquals("BYM3IW", instance.getIdentifier()); + assertEquals("10.5072", instance.getAuthority()); + assertEquals("FK2/BYM3IW", instance.getIdentifier()); // TODO review the generated test code and remove the default call to fail. } @@ -72,14 +72,14 @@ public void testContructFromDataset(){ Dataset testDS = new Dataset(); testDS.setProtocol("doi"); - testDS.setAuthority("10.5072/FK2"); - testDS.setIdentifier("BYM3IW"); + testDS.setAuthority("10.5072"); + testDS.setIdentifier("FK2/BYM3IW"); GlobalId instance = new GlobalId(testDS); assertEquals("doi", instance.getProtocol()); - assertEquals("10.5072/FK2", instance.getAuthority()); - assertEquals("BYM3IW", instance.getIdentifier()); + assertEquals("10.5072", instance.getAuthority()); + assertEquals("FK2/BYM3IW", instance.getIdentifier()); } From 7fc3b0f8087bd88cfeb11899181d42249d98f826 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 14:53:59 -0400 Subject: [PATCH 16/44] shoulder support for datafiles and fix for uniqueid checks with shoulder These updates fix an issue with checking the db for entries that match the new one so that tests include the shoulder. They also use a shoulder, if defined, for datafile, only when the ids for datafiles are being generated as 'INDEPENDENT'. When DEPENDENT, the shoulder is already used once in the part of the id coming from the dataset, so it is not used again. --- .../iq/dataverse/DataFileServiceBean.java | 19 +++++++++----- .../iq/dataverse/DatasetServiceBean.java | 26 +++++++++---------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index f4c9cc14893..f57ab395d43 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1534,6 +1534,7 @@ public List selectFilesWithMissingOriginalTypes() { public String generateDataFileIdentifier(DataFile datafile, IdServiceBean idServiceBean) { String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatafileIdentifierGenerationStyle, "randomString"); String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); String datasetIdentifer = ""; //If format is dependent then pre-pend the dataset identifier @@ -1543,30 +1544,34 @@ public String generateDataFileIdentifier(DataFile datafile, IdServiceBean idServ switch (doiIdentifierType) { case "randomString": - return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean); + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ + return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean, shoulder); + } else { + return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean, ""); + } case "sequentialNumber": if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return generateIdentifierAsIndependentSequentialNumber(datafile, idServiceBean); + return generateIdentifierAsIndependentSequentialNumber(datafile, idServiceBean, shoulder); } else { return generateIdentifierAsDependentSequentialNumber(datafile, idServiceBean); } default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean); + return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean, ""); } } - private String generateIdentifierAsRandomString(DataFile datafile, IdServiceBean idServiceBean) { + private String generateIdentifierAsRandomString(DataFile datafile, IdServiceBean idServiceBean, String shoulder) { String identifier = null; do { - identifier = RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + identifier = shoulder + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); } while (!isIdentifierUniqueInDatabase(identifier, datafile, idServiceBean)); return identifier; } - private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile, IdServiceBean idServiceBean) { + private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile, IdServiceBean idServiceBean, String shoulder) { String identifier; do { @@ -1578,7 +1583,7 @@ private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile if (identifierNumeric == null) { return null; } - identifier = identifierNumeric.toString(); + identifier = shoulder + identifierNumeric.toString(); } while (!isIdentifierUniqueInDatabase(identifier, datafile, idServiceBean)); return identifier; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 04f1bb03fa0..bfbc01249ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -208,35 +208,35 @@ public Dataset findByGlobalId(String globalId) { } public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle, "randomString"); - String doiShoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle, "randomString"); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - switch (doiIdentifierType) { + switch (identifierType) { case "randomString": - return generateIdentifierAsRandomString(dataset, idServiceBean); + return generateIdentifierAsRandomString(dataset, idServiceBean, ""); case "sequentialNumber": - return generateIdentifierAsSequentialNumber(dataset, idServiceBean); + return generateIdentifierAsSequentialNumber(dataset, idServiceBean, ""); case "shoulderWithRandomString": - return doiShoulder + generateIdentifierAsRandomString(dataset, idServiceBean); + return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); case "shoulderWithSequentialNumber": - return doiShoulder + generateIdentifierAsSequentialNumber(dataset, idServiceBean); + return generateIdentifierAsSequentialNumber(dataset, idServiceBean, shoulder); default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, idServiceBean); + return generateIdentifierAsRandomString(dataset, idServiceBean,""); } } - private String generateIdentifierAsRandomString(Dataset dataset, IdServiceBean idServiceBean) { + private String generateIdentifierAsRandomString(Dataset dataset, IdServiceBean idServiceBean, String shoulder) { String identifier = null; do { - identifier = RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + identifier = shoulder + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); return identifier; } - private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBean idServiceBean) { + private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBean idServiceBean, String shoulder) { String identifier; do { @@ -248,7 +248,7 @@ private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBe if (identifierNumeric == null) { return null; } - identifier = identifierNumeric.toString(); + identifier = shoulder + identifierNumeric.toString(); } while (!isIdentifierUniqueInDatabase(identifier, dataset, idServiceBean)); return identifier; @@ -256,7 +256,7 @@ private String generateIdentifierAsSequentialNumber(Dataset dataset, IdServiceBe /** * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network) alos check for duplicate + * for any other study in this Dataverse Network) also check for duplicate * in EZID if needed * @param userIdentifier * @param dataset From 54d43a44ba690208d162a5f6462bd34a5d9faa83 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 17 May 2018 16:23:57 -0400 Subject: [PATCH 17/44] Simplifying - no need for new cases Having a shoulder set will cause it to be used. --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index bfbc01249ab..dbb28f92315 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -213,13 +213,9 @@ public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idService switch (identifierType) { case "randomString": - return generateIdentifierAsRandomString(dataset, idServiceBean, ""); + return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); case "sequentialNumber": - return generateIdentifierAsSequentialNumber(dataset, idServiceBean, ""); - case "shoulderWithRandomString": - return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); - case "shoulderWithSequentialNumber": - return generateIdentifierAsSequentialNumber(dataset, idServiceBean, shoulder); + return generateIdentifierAsSequentialNumber(dataset, idServiceBean, shoulder); default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ return generateIdentifierAsRandomString(dataset, idServiceBean,""); From e3a7eab9f4ebc290af8a5138664aa6142ee0b221 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 18 May 2018 14:49:01 -0400 Subject: [PATCH 18/44] Single IdentifierGenerationStyle key and simplify prepend logic Datafile identifiers now prepend either the dataset identifier (dependent mode) or the shoulder (if defined, independent mode only) and the appropriate prepend string is sent into the appropriate generateId method so it can be prepended before the isIdentifierUniqueInDatabase call. --- .../iq/dataverse/DataFileServiceBean.java | 40 +++++++++---------- .../iq/dataverse/DatasetServiceBean.java | 2 +- .../settings/SettingsServiceBean.java | 3 +- .../iq/dataverse/util/SystemConfig.java | 2 +- .../harvard/iq/dataverse/api/DatasetsIT.java | 16 ++++---- 5 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index f57ab395d43..4dc45fbbd9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1532,46 +1532,44 @@ public List selectFilesWithMissingOriginalTypes() { public String generateDataFileIdentifier(DataFile datafile, IdServiceBean idServiceBean) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatafileIdentifierGenerationStyle, "randomString"); + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); - - String datasetIdentifer = ""; - //If format is dependent then pre-pend the dataset identifier - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){ - datasetIdentifer = datafile.getOwner().getIdentifier() + "/"; + + String prepend = ""; + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){ + //If format is dependent then pre-pend the dataset identifier + prepend = datafile.getOwner().getIdentifier() + "/"; + } else { + //If there's a shoulder prepend independent identifiers with it + prepend = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); } switch (doiIdentifierType) { case "randomString": - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean, shoulder); - } else { - return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean, ""); - } + return generateIdentifierAsRandomString(datafile, idServiceBean, prepend); case "sequentialNumber": if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return generateIdentifierAsIndependentSequentialNumber(datafile, idServiceBean, shoulder); + return generateIdentifierAsIndependentSequentialNumber(datafile, idServiceBean, prepend); } else { - return generateIdentifierAsDependentSequentialNumber(datafile, idServiceBean); + return generateIdentifierAsDependentSequentialNumber(datafile, idServiceBean, prepend); } default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return datasetIdentifer + generateIdentifierAsRandomString(datafile, idServiceBean, ""); + return generateIdentifierAsRandomString(datafile, idServiceBean, prepend); } } - private String generateIdentifierAsRandomString(DataFile datafile, IdServiceBean idServiceBean, String shoulder) { + private String generateIdentifierAsRandomString(DataFile datafile, IdServiceBean idServiceBean, String prepend) { String identifier = null; do { - identifier = shoulder + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); } while (!isIdentifierUniqueInDatabase(identifier, datafile, idServiceBean)); return identifier; } - private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile, IdServiceBean idServiceBean, String shoulder) { + private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile, IdServiceBean idServiceBean, String prepend) { String identifier; do { @@ -1583,13 +1581,13 @@ private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile if (identifierNumeric == null) { return null; } - identifier = shoulder + identifierNumeric.toString(); + identifier = prepend + identifierNumeric.toString(); } while (!isIdentifierUniqueInDatabase(identifier, datafile, idServiceBean)); return identifier; } - private String generateIdentifierAsDependentSequentialNumber(DataFile datafile, IdServiceBean idServiceBean) { + private String generateIdentifierAsDependentSequentialNumber(DataFile datafile, IdServiceBean idServiceBean, String prepend) { String identifier; Long retVal; @@ -1599,7 +1597,7 @@ private String generateIdentifierAsDependentSequentialNumber(DataFile datafile, do { retVal++; - identifier = datafile.getOwner().getIdentifier() + "/" + retVal.toString(); + identifier = prepend + retVal.toString(); } while (!isIdentifierUniqueInDatabase(identifier, datafile, idServiceBean)); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dbb28f92315..7c112c1b5c5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -208,7 +208,7 @@ public Dataset findByGlobalId(String globalId) { } public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { - String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle, "randomString"); + String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); switch (identifierType) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 71b00019b6a..4b8a0ba872a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -73,8 +73,7 @@ public enum Key { * StorageSite database table. */ LocalDataAccessPath, - DatafileIdentifierGenerationStyle, - DatasetIdentifierGenerationStyle, + IdentifierGenerationStyle, OAuth2CallbackUrl, DefaultAuthProvider, FooterCopyright, diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 20f0790b7db..392ac4a75f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -950,7 +950,7 @@ public boolean isRsyncDownload() } public boolean isDataFilePIDSequentialDependent(){ - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.DatafileIdentifierGenerationStyle, "randomString"); + String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); if (doiIdentifierType.equals("sequentialNumber") && doiDataFileFormat.equals("DEPENDENT")){ return true; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index bbcb46fda5d..90bd1e9ee47 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -54,8 +54,8 @@ public class DatasetsIT { public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); - Response removeDatasetIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle); - removeDatasetIdentifierGenerationStyle.then().assertThat() + Response removeIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.IdentifierGenerationStyle); + removeIdentifierGenerationStyle.then().assertThat() .statusCode(200); Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); @@ -74,8 +74,8 @@ public static void setUpClass() { @AfterClass public static void afterClass() { - Response removeDatasetIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle); - removeDatasetIdentifierGenerationStyle.then().assertThat() + Response removeIdentifierGenerationStyle = UtilIT.deleteSetting(SettingsServiceBean.Key.IdentifierGenerationStyle); + removeIdentifierGenerationStyle.then().assertThat() .statusCode(200); Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); @@ -487,7 +487,7 @@ public void testExcludeEmail() { } @Test - public void testSequentialNumberAsDatasetIdentifierGenerationStyle() { + public void testSequentialNumberAsIdentifierGenerationStyle() { Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); @@ -498,8 +498,8 @@ public void testSequentialNumberAsDatasetIdentifierGenerationStyle() { createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response setSequentialNumberAsDatasetIdentifierGenerationStyle = UtilIT.setSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle, "sequentialNumber"); - setSequentialNumberAsDatasetIdentifierGenerationStyle.then().assertThat() + Response setSequentialNumberAsIdentifierGenerationStyle = UtilIT.setSetting(SettingsServiceBean.Key.IdentifierGenerationStyle, "sequentialNumber"); + setSequentialNumberAsIdentifierGenerationStyle.then().assertThat() .statusCode(OK.getStatusCode()); Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); @@ -526,7 +526,7 @@ public void testSequentialNumberAsDatasetIdentifierGenerationStyle() { deleteUserResponse.prettyPrint(); assertEquals(200, deleteUserResponse.getStatusCode()); - Response remove = UtilIT.deleteSetting(SettingsServiceBean.Key.DatasetIdentifierGenerationStyle); + Response remove = UtilIT.deleteSetting(SettingsServiceBean.Key.IdentifierGenerationStyle); remove.then().assertThat() .statusCode(200); From 9347a47188189adebd2e1fbb7a34d53752e4e121 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 18 May 2018 14:51:19 -0400 Subject: [PATCH 19/44] hardcode id separator in AbstractIdServiceBean --- .../java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java index b6f1bf4f026..ba9c46e04eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java @@ -11,7 +11,6 @@ public abstract class AbstractIdServiceBean implements IdServiceBean { private static final Logger logger = Logger.getLogger(AbstractIdServiceBean.class.getCanonicalName()); - private static final String ID_SEPARATOR="/"; @EJB DataverseServiceBean dataverseService; @@ -97,7 +96,7 @@ public DvObject generateIdentifier(DvObject dvObject) { String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol(); String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority(); - String doiSeparator = dvObject.getDoiSeparator() == null ? ID_SEPARATOR : dvObject.getDoiSeparator(); + String doiSeparator = dvObject.getDoiSeparator() == null ? "/" : dvObject.getDoiSeparator(); IdServiceBean idServiceBean = IdServiceBean.getBean(protocol, commandEngine.getContext()); if (dvObject.isInstanceofDataset()) { dvObject.setIdentifier(datasetService.generateDatasetIdentifier((Dataset) dvObject, idServiceBean)); From 49176974daf60904182ac8ac3602f1c721802907 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 18 May 2018 18:29:19 -0400 Subject: [PATCH 20/44] hardcode id separator as '/' --- .../harvard/iq/dataverse/AbstractIdServiceBean.java | 8 ++------ .../harvard/iq/dataverse/DOIDataCiteServiceBean.java | 5 ++--- .../edu/harvard/iq/dataverse/DOIEZIdServiceBean.java | 9 ++++----- .../edu/harvard/iq/dataverse/DataFileServiceBean.java | 2 +- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 2 +- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 4 ---- .../edu/harvard/iq/dataverse/DatasetServiceBean.java | 9 +++------ src/main/java/edu/harvard/iq/dataverse/DvObject.java | 9 --------- .../edu/harvard/iq/dataverse/HandlenetServiceBean.java | 2 +- .../java/edu/harvard/iq/dataverse/IdServiceBean.java | 5 ++--- .../api/datadeposit/CollectionDepositManagerImpl.java | 3 --- .../edu/harvard/iq/dataverse/api/dto/DatasetDTO.java | 9 --------- .../iq/dataverse/api/imports/ImportDDIServiceBean.java | 3 --- .../api/imports/ImportGenericServiceBean.java | 3 +-- .../jobs/importer/filesystem/FileRecordProcessor.java | 2 +- .../jobs/importer/filesystem/FileRecordWriter.java | 9 ++------- .../harvard/iq/dataverse/dataaccess/FileAccessIO.java | 2 +- .../harvard/iq/dataverse/dataaccess/SwiftAccessIO.java | 6 +++--- .../engine/command/impl/CreateDatasetCommand.java | 6 +----- .../engine/command/impl/RegisterDvObjectCommand.java | 10 ---------- .../edu/harvard/iq/dataverse/util/json/JsonParser.java | 2 -- .../harvard/iq/dataverse/util/json/JsonParserTest.java | 2 +- 22 files changed, 26 insertions(+), 86 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java index ba9c46e04eb..11a28bebe20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AbstractIdServiceBean.java @@ -26,9 +26,9 @@ public abstract class AbstractIdServiceBean implements IdServiceBean { SystemConfig systemConfig; @Override - public String getIdentifierForLookup(String protocol, String authority, String separator, String identifier) { + public String getIdentifierForLookup(String protocol, String authority, String identifier) { logger.log(Level.FINE,"getIdentifierForLookup"); - return protocol + ":" + authority + separator + identifier; + return protocol + ":" + authority + "/" + identifier; } @@ -96,7 +96,6 @@ public DvObject generateIdentifier(DvObject dvObject) { String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol(); String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority(); - String doiSeparator = dvObject.getDoiSeparator() == null ? "/" : dvObject.getDoiSeparator(); IdServiceBean idServiceBean = IdServiceBean.getBean(protocol, commandEngine.getContext()); if (dvObject.isInstanceofDataset()) { dvObject.setIdentifier(datasetService.generateDatasetIdentifier((Dataset) dvObject, idServiceBean)); @@ -109,9 +108,6 @@ public DvObject generateIdentifier(DvObject dvObject) { if (dvObject.getAuthority() == null) { dvObject.setAuthority(authority); } - if (dvObject.getDoiSeparator() == null) { - dvObject.setDoiSeparator(doiSeparator); - } return dvObject; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java index 4f160f832e1..44a7e9efad9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java @@ -95,15 +95,14 @@ public HashMap getIdentifierMetadata(DvObject dvObject) { * Looks up the metadata for a Global Identifier * @param protocol the identifier system, e.g. "doi" * @param authority the namespace that the authority manages in the identifier system - * @param separator the string that separates authority from local identifier part * @param identifier the local identifier part * @return a Map of metadata. It is empty when the lookup failed, e.g. when * the identifier does not exist. */ @Override - public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String separator, String identifier) { + public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) { logger.log(Level.FINE,"lookupMetadataFromIdentifier"); - String identifierOut = getIdentifierForLookup(protocol, authority, separator, identifier); + String identifierOut = getIdentifierForLookup(protocol, authority, identifier); HashMap metadata = new HashMap<>(); try { metadata = doiDataCiteRegisterService.getMetadata(identifierOut); diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java index 62a1504d384..4ed7003252a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java @@ -99,16 +99,15 @@ public HashMap getIdentifierMetadata(DvObject dvObject) { * @param protocol the identifier system, e.g. "doi" * @param authority the namespace that the authority manages in the * identifier system - * @param separator the string that separates authority from local * identifier part * @param identifier the local identifier part * @return a Map of metadata. It is empty when the lookup failed, e.g. when * the identifier does not exist. */ @Override - public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String separator, String identifier) { + public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) { logger.log(Level.FINE,"lookupMetadataFromIdentifier"); - String identifierOut = getIdentifierForLookup(protocol, authority, separator, identifier); + String identifierOut = getIdentifierForLookup(protocol, authority, identifier); HashMap metadata = new HashMap<>(); try { metadata = ezidService.getMetadata(identifierOut); @@ -181,7 +180,7 @@ public void deleteIdentifier(DvObject dvObject) throws Exception { updateIdentifierStatus(dvObject, "unavailable | withdrawn by author"); HashMap metadata = new HashMap<>(); metadata.put("_target", "http://ezid.cdlib.org/id/" + dvObject.getProtocol() + ":" + dvObject.getAuthority() - + dvObject.getDoiSeparator() + dvObject.getIdentifier()); + + "/" + dvObject.getIdentifier()); try { modifyIdentifierTargetURL(dvObject); if (dvObject instanceof Dataset ) { @@ -189,7 +188,7 @@ public void deleteIdentifier(DvObject dvObject) throws Exception { for (DataFile df : dataset.getFiles()) { metadata = new HashMap<>(); metadata.put("_target", "http://ezid.cdlib.org/id/" + df.getProtocol() + ":" + df.getAuthority() - + df.getDoiSeparator() + df.getIdentifier()); + + "/" + df.getIdentifier()); modifyIdentifierTargetURL(df); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 4dc45fbbd9a..99ca8dc81da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -166,7 +166,7 @@ public DataFile find(Object pk) { public DataFile findByGlobalId(String globalId) { - String queryStr = "select s.id from dvobject s where Concat(s.protocol, ':' , s.authority , s.doiseparator , s.identifier) = '" + globalId + "'"; + String queryStr = "select s.id from dvobject s where Concat(s.protocol, ':' , s.authority , '/' , s.identifier) = '" + globalId + "'"; DataFile file = null; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 77ddb51f908..e65b7f217da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -74,7 +74,7 @@ sequence. Used when the Dataverse is (optionally) configured to use @Table(indexes = { @Index(columnList = "guestbook_id"), @Index(columnList = "thumbnailfile_id")}, - uniqueConstraints = @UniqueConstraint(columnNames = {"authority,protocol,identifier,doiseparator"})) + uniqueConstraints = @UniqueConstraint(columnNames = {"authority,protocol,identifier"})) public class Dataset extends DvObjectContainer { public static final String TARGET_URL = "/citation?persistentId="; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index bf2f84a6568..cfdb6535be5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -221,7 +221,6 @@ public enum DisplayMode { private String version; private String protocol = ""; private String authority = ""; - private String separator=""; private String customFields=""; private boolean noDVsAtAll = false; @@ -1367,8 +1366,6 @@ private String init(boolean initFull) { String nonNullDefaultIfKeyNotFound = ""; protocol = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); authority = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - //Current code assumes the authority - identifier separator is '/' as is required by DOI and Handle - separator="/"; if (dataset.getId() != null || versionId != null || persistentId != null) { // view mode for a dataset DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; @@ -1516,7 +1513,6 @@ private String init(boolean initFull) { dataset.setOwner(dataverseService.find(ownerId)); dataset.setProtocol(protocol); dataset.setAuthority(authority); - dataset.setDoiSeparator(separator); //Wait until the create command before actually getting an identifier if (dataset.getOwner() == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 7c112c1b5c5..15f38a11a0f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -192,7 +192,7 @@ public Dataset merge( Dataset ds ) { public Dataset findByGlobalId(String globalId) { - String queryStr = "select s.id from dvobject s where Concat(s.protocol, ':' , s.authority , s.doiseparator , s.identifier) = '" + globalId +"'"; + String queryStr = "select s.id from dvobject s where Concat(s.protocol, ':' , s.authority , '/' , s.identifier) = '" + globalId +"'"; Dataset foundDataset = null; try { Query query = em.createNativeQuery(queryStr); @@ -327,7 +327,7 @@ public String createCitationRIS(DatasetVersion version, FileMetadata fileMetadat for (DatasetAuthor author : authorList) { retString += "AU - " + author.getName().getDisplayValue() + "\r\n"; } - retString += "DO - " + version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier() + "\r\n"; + retString += "DO - " + version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + "/" + version.getDataset().getIdentifier() + "\r\n"; retString += "PY - " + version.getVersionYear() + "\r\n"; retString += "UR - " + version.getDataset().getPersistentURL() + "\r\n"; retString += "PB - " + publisher + "\r\n"; @@ -479,7 +479,7 @@ private void createEndNoteXML(XMLStreamWriter xmlw, DatasetVersion version, File } xmlw.writeStartElement("electronic-resource-num"); - String electResourceNum = version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + version.getDataset().getDoiSeparator() + version.getDataset().getIdentifier(); + String electResourceNum = version.getDataset().getProtocol() + "/" + version.getDataset().getAuthority() + "/" + version.getDataset().getIdentifier(); xmlw.writeCharacters(electResourceNum); xmlw.writeEndElement(); //10.3886/ICPSR03259.v1 @@ -952,9 +952,6 @@ public void obtainPersistentIdentifiersForDatafiles(Dataset dataset) { if (datafile.getAuthority() == null) { datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority, "")); } - if (datafile.getDoiSeparator() == null) { - datafile.setDoiSeparator("/"); - } logger.info("identifier: " + datafile.getIdentifier()); diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 6b47f2052f1..8f9288cd42d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -118,7 +118,6 @@ public String visit(DataFile df) { private String protocol; private String authority; - private String doiSeparator; @Temporal(value = TemporalType.TIMESTAMP) private Date globalIdCreateTime; @@ -257,14 +256,6 @@ public void setAuthority(String authority) { this.authority = authority; } - public String getDoiSeparator() { - return doiSeparator; - } - - public void setDoiSeparator(String doiSeparator) { - this.doiSeparator = doiSeparator; - } - public Date getGlobalIdCreateTime() { return globalIdCreateTime; } diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index f9beef5fdce..9f0f4d17bbd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -335,7 +335,7 @@ public HashMap getIdentifierMetadata(DvObject dvObject) { } @Override - public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String separator, String identifier) { + public HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier) { throw new NotImplementedException(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/IdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/IdServiceBean.java index 07d22ef5754..7994939c439 100644 --- a/src/main/java/edu/harvard/iq/dataverse/IdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/IdServiceBean.java @@ -21,17 +21,16 @@ public interface IdServiceBean { HashMap getIdentifierMetadata(DvObject dvObject); - HashMap lookupMetadataFromIdentifier(String protocol, String authority, String separator, String identifier); + HashMap lookupMetadataFromIdentifier(String protocol, String authority, String identifier); /** * Concatenate the parts that make up a Global Identifier. * @param protocol the identifier system, e.g. "doi" * @param authority the namespace that the authority manages in the identifier system - * @param separator the string that separates authority from local identifier part * @param identifier the local identifier part * @return the Global Identifier, e.g. "doi:10.12345/67890" */ - String getIdentifierForLookup(String protocol, String authority, String separator, String identifier); + String getIdentifierForLookup(String protocol, String authority, String identifier); String modifyIdentifierTargetURL(DvObject dvObject) throws Exception; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index 28967a9b69b..6c1d2e0ca85 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -100,10 +100,7 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede dataset.setProtocol(protocol); dataset.setAuthority(authority); - // Current code assumes '/' as the authority - identifier separator as required by DOI and Handle - dataset.setDoiSeparator("/"); //Wait until the create command before actually getting an identifier - //dataset.setIdentifier(datasetService.generateDatasetIdentifier(protocol, authority, separator)); logger.log(Level.FINE, "DS Deposit identifier: {0}", dataset.getIdentifier()); CreateDatasetCommand createDatasetCommand = new CreateDatasetCommand(dataset, dvReq, false); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java index e03135466bb..87564c96637 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java @@ -13,7 +13,6 @@ public class DatasetDTO implements java.io.Serializable { private String protocol; private String authority; private String globalIdCreateTime; - private String doiSeparator; private String publisher; private String publicationDate; private DatasetVersionDTO datasetVersion; @@ -78,14 +77,6 @@ public void setDataFiles(List dataFiles) { this.dataFiles = dataFiles; } - public String getDoiSeparator() { - return doiSeparator; - } - - public void setDoiSeparator(String doiSeparator) { - this.doiSeparator = doiSeparator; - } - public String getPublisher() { return publisher; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index ee8a780d9de..df81b5dc76c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -1580,7 +1580,6 @@ private void parseStudyIdHandle(String _id, DatasetDTO datasetDTO) { } else { datasetDTO.setAuthority(_id.substring(index1+1, index2)); } - datasetDTO.setDoiSeparator("/"); datasetDTO.setProtocol("hdl"); datasetDTO.setIdentifier(_id.substring(index2+1)); } @@ -1599,7 +1598,6 @@ private void parseStudyIdDOI(String _id, DatasetDTO datasetDTO) throws ImportExc datasetDTO.setAuthority(_id.substring(index1+1, index2)); } datasetDTO.setProtocol("doi"); - datasetDTO.setDoiSeparator("/"); datasetDTO.setIdentifier(_id.substring(index2+1)); } @@ -1624,7 +1622,6 @@ private void parseStudyIdDoiICPSRdara(String _id, DatasetDTO datasetDTO) throws datasetDTO.setAuthority(_id.substring(0, index)); datasetDTO.setProtocol("doi"); - datasetDTO.setDoiSeparator("/"); datasetDTO.setIdentifier(_id.substring(index+1)); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 2258ac0952c..b5554816c0a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -427,7 +427,7 @@ private String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO } if (index2 == -1) { - logger.warning("Error parsing identifier: " + identifierString + ". Second separator not found in string"); + logger.warning("Error parsing identifier: " + identifierString + ". Second '/' not found in string"); return null; } @@ -435,7 +435,6 @@ private String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO String identifier = identifierString.substring(index2 + 1); datasetDTO.setProtocol(protocol); - datasetDTO.setDoiSeparator("/"); datasetDTO.setAuthority(authority); datasetDTO.setIdentifier(identifier); diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordProcessor.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordProcessor.java index b0f9d84640e..af7caf32a7c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordProcessor.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordProcessor.java @@ -69,7 +69,7 @@ public Object processItem(Object object) throws Exception { DatasetVersion version = dataset.getLatestVersion(); String path = object.toString(); - String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier(); + String gid = dataset.getAuthority() + "/" + dataset.getIdentifier(); String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1); // skip if it already exists diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java index 8079a640afd..22bfd062d7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java @@ -235,7 +235,7 @@ private DataFile createPackageDataFile(List files) { totalSize = 0L; } - String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier(); + String gid = dataset.getAuthority() + "/" + dataset.getIdentifier(); packageFile.setChecksumType(DataFile.ChecksumType.SHA1); // initial default @@ -365,17 +365,12 @@ private DataFile createPackageDataFile(List files) { String nonNullDefaultIfKeyNotFound = ""; String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - //Hardcoded as '/' which is what's required for DOIs and Handles as the authority - identifier separator. - String doiSeparator = "/"; if (packageFile.getProtocol() == null) { packageFile.setProtocol(protocol); } if (packageFile.getAuthority() == null) { packageFile.setAuthority(authority); } - if (packageFile.getDoiSeparator() == null) { - packageFile.setDoiSeparator(doiSeparator); - } if (!packageFile.isIdentifierRegistered()) { String doiRetString = ""; @@ -408,7 +403,7 @@ private DataFile createDataFile(File file) { DatasetVersion version = dataset.getLatestVersion(); String path = file.getAbsolutePath(); - String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier(); + String gid = dataset.getAuthority() + "/" + dataset.getIdentifier(); String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1); DataFile datafile = new DataFile("application/octet-stream"); // we don't determine mime type diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 000f1cae6d3..05b5be8d6fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -155,7 +155,7 @@ public void open (DataAccessOption... options) throws IOException { if (dataset.getFileSystemDirectory() != null && !Files.exists(dataset.getFileSystemDirectory())) { Files.createDirectories(dataset.getFileSystemDirectory()); } - dataset.setStorageIdentifier("file://"+dataset.getAuthority()+dataset.getDoiSeparator()+dataset.getIdentifier()); + dataset.setStorageIdentifier("file://"+dataset.getAuthority()+"/"+dataset.getIdentifier()); } } else if (dvObject instanceof Dataverse) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 8621e83714c..a8a69e63cfe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -520,7 +520,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt //swiftFolderPath = dataFile.getOwner().getDisplayName(); String swiftFolderPathSeparator = "-"; - String authorityNoSlashes = owner.getAuthority().replace(owner.getDoiSeparator(), swiftFolderPathSeparator); + String authorityNoSlashes = owner.getAuthority().replace("/", swiftFolderPathSeparator); swiftFolderPath = owner.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + owner.getIdentifier(); @@ -570,7 +570,7 @@ private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxIt Properties p = getSwiftProperties(); swiftEndPoint = p.getProperty("swift.default.endpoint"); String swiftFolderPathSeparator = "-"; - String authorityNoSlashes = dataset.getAuthority().replace(dataset.getDoiSeparator(), swiftFolderPathSeparator); + String authorityNoSlashes = dataset.getAuthority().replace("/", swiftFolderPathSeparator); swiftFolderPath = dataset.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + dataset.getIdentifier(); @@ -830,7 +830,7 @@ public String getSwiftContainerName() { swiftFolderPathSeparator = "_"; } if (dvObject instanceof DataFile) { - String authorityNoSlashes = this.getDataFile().getOwner().getAuthority().replace(this.getDataFile().getOwner().getDoiSeparator(), swiftFolderPathSeparator); + String authorityNoSlashes = this.getDataFile().getOwner().getAuthority().replace("/", swiftFolderPathSeparator); return this.getDataFile().getOwner().getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + this.getDataFile().getOwner().getIdentifier(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java index bbe7b8a9046..b76e4499959 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetCommand.java @@ -154,10 +154,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getAuthority() == null) { theDataset.setAuthority(authority); } - if (theDataset.getDoiSeparator() == null) { - // '/', the required authority - identifier separator for DOI and Handle is hardcoded in many places - theDataset.setDoiSeparator("/"); - } if (theDataset.getStorageIdentifier() == null) { try { DataAccess.createNewStorageIO(theDataset, "placeholder"); @@ -165,7 +161,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // if setting the storage identifier through createNewStorageIO fails, dataset creation // does not have to fail. we just set the storage id to a default -SF String storageDriver = (System.getProperty("dataverse.files.storage-driver-id") != null) ? System.getProperty("dataverse.files.storage-driver-id") : "file"; - theDataset.setStorageIdentifier(storageDriver + "://" + theDataset.getAuthority() + theDataset.getDoiSeparator() + theDataset.getIdentifier()); + theDataset.setStorageIdentifier(storageDriver + "://" + theDataset.getAuthority() + "/" + theDataset.getIdentifier()); logger.info("Failed to create StorageIO. StorageIdentifier set to default. Not fatal." + "(" + ioex.getMessage() + ")"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 5997a6195f2..3d60844dfba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -37,9 +37,6 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { String nonNullDefaultIfKeyNotFound = ""; String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); String authority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - // Current code assumes in many places that the authority - identifier separator for new identifiers - // is '/' as is true for DOI and Handle - String doiSeparator = "/"; IdServiceBean idServiceBean = IdServiceBean.getBean(target.getProtocol(), ctxt); try { //Test to see if identifier already present @@ -57,10 +54,6 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { if (target.getAuthority() == null) { target.setAuthority(authority); } - if (target.getDoiSeparator() == null) { - target.setDoiSeparator(doiSeparator); - } - } if (idServiceBean.alreadyExists(target)) { @@ -90,9 +83,6 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { if (df.getAuthority() == null) { df.setAuthority(authority); } - if (df.getDoiSeparator() == null) { - df.setDoiSeparator(doiSeparator); - } } doiRetString = idServiceBean.createIdentifier(df); if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index db470527d75..5f0c3af09d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -255,8 +255,6 @@ public Dataset parseDataset(JsonObject obj) throws JsonParseException { dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); - // hardcoded '/' is the required Authority-identifier separator for DOI and Handle - dataset.setDoiSeparator(obj.getString("doiSeparator", null) == null ? "/" : obj.getString("doiSeparator")); dataset.setIdentifier(obj.getString("identifier",null)); DatasetVersion dsv = new DatasetVersion(); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java index a569f9bfe1a..c8200b9dcce 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java @@ -422,7 +422,6 @@ public void testParseEmptyDataset() throws JsonParseException { System.out.println(dsJson != null); Dataset actual = sut.parseDataset(dsJson); assertEquals("10.5072/FK2", actual.getAuthority()); - assertEquals("/", actual.getDoiSeparator()); assertEquals("doi", actual.getProtocol()); } catch (IOException ioe) { throw new JsonParseException("Couldn't read test file", ioe); @@ -430,6 +429,7 @@ public void testParseEmptyDataset() throws JsonParseException { } /** + * * Expect an exception when the dataset version JSON contains fields * that the {@link DatasetFieldService} doesn't know about. * @throws JsonParseException as expected From 88f4a022aeb1990d3ded1970f65c25bc55311982 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 19 May 2018 10:53:24 -0400 Subject: [PATCH 21/44] fixes per discussion --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- .../edu/harvard/iq/dataverse/util/json/JsonParserTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 062fd98f893..1422d4e0cb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -221,7 +221,7 @@ public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idService return generateIdentifierAsSequentialNumber(dataset, idServiceBean, shoulder); default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, idServiceBean,""); + return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java index c8200b9dcce..ff02a3eefa8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java @@ -421,7 +421,7 @@ public void testParseEmptyDataset() throws JsonParseException { dsJson = Json.createReader(reader).readObject(); System.out.println(dsJson != null); Dataset actual = sut.parseDataset(dsJson); - assertEquals("10.5072/FK2", actual.getAuthority()); + assertEquals("10.5072", actual.getAuthority()); assertEquals("doi", actual.getProtocol()); } catch (IOException ioe) { throw new JsonParseException("Couldn't read test file", ioe); @@ -625,7 +625,7 @@ private static class MockSettingsSvc extends SettingsServiceBean { public String getValueForKey( Key key /*, String defaultValue */) { switch (key) { case Authority: - return "10.5072/FK2"; + return "10.5072"; case Protocol: return "doi"; default: From c2c26bc69840e4e3a328adc8cee6e3dc2c71eeff Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 19 May 2018 11:02:29 -0400 Subject: [PATCH 22/44] removing authority and protocol from dataset --- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index e65b7f217da..02686f01bed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -39,7 +39,7 @@ */ @NamedQueries({ @NamedQuery(name = "Dataset.findByIdentifier", - query = "SELECT d FROM Dataset d WHERE d.identifier=:identifier"), + query = "SELECT d FROM DvObject d WHERE d.identifier=:identifier"), @NamedQuery(name = "Dataset.findByOwnerIdentifier", query = "SELECT o.identifier FROM DvObject o WHERE o.owner.id=:owner_id") }) @@ -74,7 +74,7 @@ sequence. Used when the Dataverse is (optionally) configured to use @Table(indexes = { @Index(columnList = "guestbook_id"), @Index(columnList = "thumbnailfile_id")}, - uniqueConstraints = @UniqueConstraint(columnNames = {"authority,protocol,identifier"})) + uniqueConstraints = @UniqueConstraint(columnNames = {"identifier"})) public class Dataset extends DvObjectContainer { public static final String TARGET_URL = "/citation?persistentId="; @@ -84,9 +84,6 @@ public class Dataset extends DvObjectContainer { @OrderBy("id") private List files = new ArrayList<>(); - private String protocol; - private String authority; - @Temporal(value = TemporalType.TIMESTAMP) private Date globalIdCreateTime; @@ -631,7 +628,7 @@ public String getRemoteArchiveURL() { // the study: //String icpsrId = identifier; //return this.getOwner().getHarvestingClient().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing"; - return "http://doi.org/" + authority + "/" + identifier; + return "http://doi.org/" + this.getAuthority() + "/" + identifier; } else if (HarvestingClient.HARVEST_STYLE_NESSTAR.equals(this.getHarvestedFrom().getHarvestStyle())) { String nServerURL = this.getHarvestedFrom().getArchiveUrl(); // chop any trailing slashes in the server URL - or they will result From a3bcb9b99cec596bce1d17ab9c1d38617d2b4d37 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 22 May 2018 14:07:29 -0400 Subject: [PATCH 23/44] add statements to SQL upgrade script #3583 #898 --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 77d727ad48c..6d5730d71dc 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -54,3 +54,6 @@ ALTER TABLE dataset DROP COLUMN globalidcreatetime; ALTER TABLE dataset DROP COLUMN identifier; ALTER TABLE dataset DROP COLUMN protocol; +UPDATE dvObject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE +strpos(authority,doiseparator)>0; +UPDATE dvObject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0; From 599a224f2f876f338bfa4fb5659cbb1475a1ccce Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 22 May 2018 15:01:48 -0400 Subject: [PATCH 24/44] more table changes and new findByGlobalId query --- .../iq/dataverse/DataFileServiceBean.java | 23 +---------- .../edu/harvard/iq/dataverse/Dataset.java | 8 +--- .../iq/dataverse/DatasetServiceBean.java | 21 ++-------- .../edu/harvard/iq/dataverse/DvObject.java | 7 +++- .../iq/dataverse/DvObjectServiceBean.java | 40 +++++++++++++++++++ 5 files changed, 52 insertions(+), 47 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 29f2046945b..79a01428e17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -53,7 +53,7 @@ public class DataFileServiceBean implements java.io.Serializable { private static final Logger logger = Logger.getLogger(DataFileServiceBean.class.getCanonicalName()); @EJB - DatasetServiceBean datasetService; + DvObjectServiceBean dvObjectService; @EJB PermissionServiceBean permissionService; @EJB @@ -165,26 +165,7 @@ public DataFile find(Object pk) { }*/ public DataFile findByGlobalId(String globalId) { - -/* - Concatenate pieces of global Id for selection until more permanent fix implemented - */ - String queryStr = "select s.id from dvobject s where s.protocol || ':' || s.authority || '/' || s.identifier = '" + globalId +"'"; - - DataFile file = null; - try { - Query query = em.createNativeQuery(queryStr); - Long fileId = new Long((Integer) query.getSingleResult()); - file = em.find(DataFile.class, fileId); - - } catch (javax.persistence.NoResultException e) { - // (set to .info, this can fill the log file with thousands of - // these messages during a large harvest run) - logger.fine("no file found: " + globalId); - // DO nothing, just return null. - } - return file; - + return (DataFile) dvObjectService.findByGlobalId(globalId, DataFile.class); } public DataFile findReplacementFile(Long previousFileId){ diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 02686f01bed..3ee3c5dab7a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -38,8 +38,6 @@ * @author skraffmiller */ @NamedQueries({ - @NamedQuery(name = "Dataset.findByIdentifier", - query = "SELECT d FROM DvObject d WHERE d.identifier=:identifier"), @NamedQuery(name = "Dataset.findByOwnerIdentifier", query = "SELECT o.identifier FROM DvObject o WHERE o.owner.id=:owner_id") }) @@ -73,8 +71,7 @@ sequence. Used when the Dataverse is (optionally) configured to use @Entity @Table(indexes = { @Index(columnList = "guestbook_id"), - @Index(columnList = "thumbnailfile_id")}, - uniqueConstraints = @UniqueConstraint(columnNames = {"identifier"})) + @Index(columnList = "thumbnailfile_id")}) public class Dataset extends DvObjectContainer { public static final String TARGET_URL = "/citation?persistentId="; @@ -84,9 +81,6 @@ public class Dataset extends DvObjectContainer { @OrderBy("id") private List files = new ArrayList<>(); - @Temporal(value = TemporalType.TIMESTAMP) - private Date globalIdCreateTime; - @Temporal(value = TemporalType.TIMESTAMP) private Date lastExportTime; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 1422d4e0cb9..8d7c152dac5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -73,6 +73,9 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB DatasetVersionServiceBean versionService; + @EJB + DvObjectServiceBean dvObjectService; + @EJB AuthenticationServiceBean authentication; @@ -191,23 +194,7 @@ public Dataset merge( Dataset ds ) { } public Dataset findByGlobalId(String globalId) { - -/* - Concatenate pieces of global Id for selection until more permanent fix implemented - */ - String queryStr = "select s.id from dvobject s where s.protocol || ':' || s.authority || '/' || s.identifier = '" + globalId +"'"; - Dataset foundDataset = null; - try { - Query query = em.createNativeQuery(queryStr); - Long datasetId = new Long((Integer) query.getSingleResult()); - foundDataset = em.find(Dataset.class, datasetId); - } catch (javax.persistence.NoResultException e) { - // (set to .info, this can fill the log file with thousands of - // these messages during a large harvest run) - logger.fine("no ds found: " + globalId); - // DO nothing, just return null. - } - return foundDataset; + return (Dataset) dvObjectService.findByGlobalId(globalId, Dataset.class); } public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 8f9288cd42d..b8c5c938d7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -20,7 +20,9 @@ @NamedQuery(name = "DvObject.findById", query = "SELECT o FROM DvObject o WHERE o.id=:id"), @NamedQuery(name = "DvObject.ownedObjectsById", - query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id") + query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), + @NamedQuery(name = "DvObject.findByGlobalId", + query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol") }) @Entity // Inheritance strategy "JOINED" will create 4 db tables - @@ -32,7 +34,8 @@ @Table(indexes = {@Index(columnList="dtype") , @Index(columnList="owner_id") , @Index(columnList="creator_id") - , @Index(columnList="releaseuser_id")}) + , @Index(columnList="releaseuser_id")}, + uniqueConstraints = @UniqueConstraint(columnNames = {"authority,protocol,identifier"})) public abstract class DvObject extends DataverseEntity implements java.io.Serializable { public static final String DATAVERSE_DTYPE_STRING = "Dataverse"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 7e73066d476..89f4a9a3f2c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -59,6 +59,46 @@ public List findAll() { return em.createNamedQuery("DvObject.findAll", DvObject.class).getResultList(); } + public T findByGlobalId(String globalId, Class resultClass) { + + String protocol = ""; + String authority = ""; + String identifier = ""; + int index1 = globalId.indexOf(':'); + if (index1 > 0) { // ':' found with one or more characters before it + int index2 = globalId.indexOf('/', index1 + 1); + if (index2 > 0 && (index2 + 1) < globalId.length()) { // '/' found with one or more characters between ':' + // and '/' and there are characters after '/' + protocol = globalId.substring(0, index1); + authority = globalId.substring(index1 + 1, index2); + identifier = globalId.substring(index2 + 1); + + DvObject foundDvObject = null; + try { + Query query; + query = em.createNamedQuery("DvObject.findByGlobalId", resultClass); + query.setParameter("identifier", identifier); + query.setParameter("protocol", protocol); + query.setParameter("authority", authority); + foundDvObject = (DvObject) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + globalId); + // DO nothing, just return null. + } + return resultClass.cast(foundDvObject); + } else { + logger.info( + "Error parsing identifier: " + globalId + ": ':/' not found in string"); + return null; + } + } else { + logger.info("Error parsing identifier: " + globalId + ": ':' not found in string"); + return null; + } + } + public DvObject updateContentIndexTime(DvObject dvObject) { /** * @todo to avoid a possible OptimisticLockException, should we merge From 0c57f8a72271ad11f7c4e3b6907f1ca71ef471fb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 22 May 2018 15:11:28 -0400 Subject: [PATCH 25/44] don't create doiseparator column in dvobject --- .../upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 112 +++++++++--------- 1 file changed, 53 insertions(+), 59 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 6d5730d71dc..e3dbd755c75 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -1,59 +1,53 @@ -ALTER TABLE externaltool ADD COLUMN type character varying(255); -ALTER TABLE externaltool ALTER COLUMN type SET NOT NULL; --- Previously, the only explore tool was TwoRavens. We now persist the name of the tool. -UPDATE guestbookresponse SET downloadtype = 'TwoRavens' WHERE downloadtype = 'Explore'; -ALTER TABLE filemetadata ADD COLUMN prov_freeform text; --- ALTER TABLE datafile ADD COLUMN prov_cplid int; -ALTER TABLE datafile ADD COLUMN prov_entityname text; - --- Moves DOI fields from Dataset to DVObject --- so that Identifiers may be added to DataFiles - -ALTER TABLE dvobject ADD COLUMN - authority character varying(255), - ADD COLUMN doiseparator character varying(255), - ADD COLUMN globalidcreatetime timestamp without time zone, - ADD COLUMN identifierRegistered boolean, - ADD COLUMN identifier character varying(255), - ADD COLUMN protocol character varying(255); - - -UPDATE dvobject -SET authority=(SELECT dataset.authority -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; - -UPDATE dvobject -SET doiseparator=(SELECT dataset.doiseparator -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; - -UPDATE dvobject -SET globalidcreatetime=(SELECT dataset.globalidcreatetime -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; - -UPDATE dvobject -SET identifierRegistered= true where globalidcreatetime is not null; - -UPDATE dvobject -SET identifier=(SELECT dataset.identifier -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; - -UPDATE dvobject -SET protocol=(SELECT dataset.protocol -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; - -ALTER TABLE dataset ALTER identifier DROP NOT NULL; - -ALTER TABLE dataset DROP COLUMN authority; -ALTER TABLE dataset DROP COLUMN doiseparator; -ALTER TABLE dataset DROP COLUMN globalidcreatetime; -ALTER TABLE dataset DROP COLUMN identifier; -ALTER TABLE dataset DROP COLUMN protocol; - -UPDATE dvObject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE -strpos(authority,doiseparator)>0; -UPDATE dvObject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0; +ALTER TABLE externaltool ADD COLUMN type character varying(255); +ALTER TABLE externaltool ALTER COLUMN type SET NOT NULL; +-- Previously, the only explore tool was TwoRavens. We now persist the name of the tool. +UPDATE guestbookresponse SET downloadtype = 'TwoRavens' WHERE downloadtype = 'Explore'; +ALTER TABLE filemetadata ADD COLUMN prov_freeform text; +-- ALTER TABLE datafile ADD COLUMN prov_cplid int; +ALTER TABLE datafile ADD COLUMN prov_entityname text; + +-- Moves DOI fields from Dataset to DVObject +-- so that Identifiers may be added to DataFiles + +ALTER TABLE dvobject ADD COLUMN + authority character varying(255), + ADD COLUMN globalidcreatetime timestamp without time zone, + ADD COLUMN identifierRegistered boolean, + ADD COLUMN identifier character varying(255), + ADD COLUMN protocol character varying(255); + + +UPDATE dvobject +SET authority=(SELECT dataset.authority +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; + +UPDATE dvobject +SET globalidcreatetime=(SELECT dataset.globalidcreatetime +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; + +UPDATE dvobject +SET identifierRegistered= true where globalidcreatetime is not null; + +UPDATE dvobject +SET identifier=(SELECT dataset.identifier +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; + +UPDATE dvobject +SET protocol=(SELECT dataset.protocol +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; + +ALTER TABLE dataset ALTER identifier DROP NOT NULL; + +ALTER TABLE dataset DROP COLUMN authority; +ALTER TABLE dataset DROP COLUMN doiseparator; +ALTER TABLE dataset DROP COLUMN globalidcreatetime; +ALTER TABLE dataset DROP COLUMN identifier; +ALTER TABLE dataset DROP COLUMN protocol; + +UPDATE dvObject SET identifier=substring(authority, strpos(authority,'/')+1) || '/' || identifier WHERE +strpos(authority,'/')>0; +UPDATE dvObject SET authority=substring(authority from 0 for strpos(authority,'/')) WHERE strpos(authority,'/')>0; From 31236d37398b372a009f7436bcc0d9a7ee66f36f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 22 May 2018 16:12:02 -0400 Subject: [PATCH 26/44] Add migration for non-shoulder authority --- .../upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index e3dbd755c75..63bbdaa69c4 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -10,31 +10,45 @@ ALTER TABLE datafile ADD COLUMN prov_entityname text; -- so that Identifiers may be added to DataFiles ALTER TABLE dvobject ADD COLUMN - authority character varying(255), - ADD COLUMN globalidcreatetime timestamp without time zone, - ADD COLUMN identifierRegistered boolean, - ADD COLUMN identifier character varying(255), - ADD COLUMN protocol character varying(255); + authority character varying(255), + ADD COLUMN globalidcreatetime timestamp without time zone, + ADD COLUMN identifierRegistered boolean, + ADD COLUMN identifier character varying(255), + ADD COLUMN protocol character varying(255); +--add authority shoulder to identifier +UPDATE dvobject +SET identifier=(SELECT substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)>0) where dvobject.dtype='Dataset'; +--just copy if there's no shoulder UPDATE dvobject -SET authority=(SELECT dataset.authority +SET identifier=(SELECT identifier FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)=0) where dvobject.dtype='Dataset'; +--strip shoulder from authority UPDATE dvobject -SET globalidcreatetime=(SELECT dataset.globalidcreatetime +SET authority=(SELECT substring(authority from 0 for strpos(authority,doiseparator)) FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)>0) where dvobject.dtype='Dataset' ; +-- no shoulder UPDATE dvobject -SET identifierRegistered= true where globalidcreatetime is not null; +SET authority=(SELECT authority +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)=0) where dvobject.dtype='Dataset'; + UPDATE dvobject -SET identifier=(SELECT dataset.identifier +SET globalidcreatetime=(SELECT dataset.globalidcreatetime FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; +UPDATE dvobject +SET identifierRegistered= true where globalidcreatetime is not null; + UPDATE dvobject SET protocol=(SELECT dataset.protocol FROM dataset @@ -47,7 +61,3 @@ ALTER TABLE dataset DROP COLUMN doiseparator; ALTER TABLE dataset DROP COLUMN globalidcreatetime; ALTER TABLE dataset DROP COLUMN identifier; ALTER TABLE dataset DROP COLUMN protocol; - -UPDATE dvObject SET identifier=substring(authority, strpos(authority,'/')+1) || '/' || identifier WHERE -strpos(authority,'/')>0; -UPDATE dvObject SET authority=substring(authority from 0 for strpos(authority,'/')) WHERE strpos(authority,'/')>0; From 59bbbae14a2c94e58ac11cd8a3cfd4cd0692dfa4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 22 May 2018 17:33:36 -0400 Subject: [PATCH 27/44] change query to explicitly check for dtype --- .../edu/harvard/iq/dataverse/DataFileServiceBean.java | 2 +- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- src/main/java/edu/harvard/iq/dataverse/DvObject.java | 4 +++- .../edu/harvard/iq/dataverse/DvObjectServiceBean.java | 8 +++++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 79a01428e17..6763a311ee1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -165,7 +165,7 @@ public DataFile find(Object pk) { }*/ public DataFile findByGlobalId(String globalId) { - return (DataFile) dvObjectService.findByGlobalId(globalId, DataFile.class); + return (DataFile) dvObjectService.findByGlobalId(globalId, DataFile.DATAFILE_DTYPE_STRING); } public DataFile findReplacementFile(Long previousFileId){ diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 8d7c152dac5..cdb371cdc8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -194,7 +194,7 @@ public Dataset merge( Dataset ds ) { } public Dataset findByGlobalId(String globalId) { - return (Dataset) dvObjectService.findByGlobalId(globalId, Dataset.class); + return (Dataset) dvObjectService.findByGlobalId(globalId, Dataset.DATASET_DTYPE_STRING); } public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index b8c5c938d7f..ef7ebee2df6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -22,7 +22,7 @@ @NamedQuery(name = "DvObject.ownedObjectsById", query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), @NamedQuery(name = "DvObject.findByGlobalId", - query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol") + query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype") }) @Entity // Inheritance strategy "JOINED" will create 4 db tables - @@ -115,6 +115,8 @@ public String visit(DataFile df) { @Column private String storageIdentifier; + @Column(insertable = false, updatable = false) private String dtype; + /* * Add DOI related fields */ diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 89f4a9a3f2c..4c1357ce28d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -59,7 +59,7 @@ public List findAll() { return em.createNamedQuery("DvObject.findAll", DvObject.class).getResultList(); } - public T findByGlobalId(String globalId, Class resultClass) { + public DvObject findByGlobalId(String globalId, String typeString) { String protocol = ""; String authority = ""; @@ -76,18 +76,20 @@ public T findByGlobalId(String globalId, Class resultCla DvObject foundDvObject = null; try { Query query; - query = em.createNamedQuery("DvObject.findByGlobalId", resultClass); + query = em.createNamedQuery("DvObject.findByGlobalId"); query.setParameter("identifier", identifier); query.setParameter("protocol", protocol); query.setParameter("authority", authority); + query.setParameter("dtype", typeString); foundDvObject = (DvObject) query.getSingleResult(); } catch (javax.persistence.NoResultException e) { // (set to .info, this can fill the log file with thousands of // these messages during a large harvest run) logger.fine("no dvObject found: " + globalId); // DO nothing, just return null. + return null; } - return resultClass.cast(foundDvObject); + return foundDvObject; } else { logger.info( "Error parsing identifier: " + globalId + ": ':/' not found in string"); From 04b9dda2a4dbe7696d6a672d93323284bf844a97 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 23 May 2018 12:21:55 -0400 Subject: [PATCH 28/44] Remove identifier from Dataset --- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 3ee3c5dab7a..ee240029d0b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -84,9 +84,6 @@ public class Dataset extends DvObjectContainer { @Temporal(value = TemporalType.TIMESTAMP) private Date lastExportTime; - - @Column(nullable = false) - private String identifier; @OneToMany(mappedBy = "dataset", orphanRemoval = true, cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) @OrderBy("versionNumber DESC, minorVersionNumber DESC") @@ -622,7 +619,7 @@ public String getRemoteArchiveURL() { // the study: //String icpsrId = identifier; //return this.getOwner().getHarvestingClient().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing"; - return "http://doi.org/" + this.getAuthority() + "/" + identifier; + return "http://doi.org/" + this.getAuthority() + "/" + this.getIdentifier(); } else if (HarvestingClient.HARVEST_STYLE_NESSTAR.equals(this.getHarvestedFrom().getHarvestStyle())) { String nServerURL = this.getHarvestedFrom().getArchiveUrl(); // chop any trailing slashes in the server URL - or they will result @@ -637,12 +634,12 @@ public String getRemoteArchiveURL() { String NesstarWebviewPage = nServerURL + "/webview/?mode=documentation&submode=abstract&studydoc=" + nServerURLencoded + "%2Fobj%2FfStudy%2F" - + identifier + + this.getIdentifier() + "&top=yes"; return NesstarWebviewPage; } else if (HarvestingClient.HARVEST_STYLE_ROPER.equals(this.getHarvestedFrom().getHarvestStyle())) { - return this.getHarvestedFrom().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + identifier; + return this.getHarvestedFrom().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + this.getIdentifier(); } else if (HarvestingClient.HARVEST_STYLE_HGL.equals(this.getHarvestedFrom().getHarvestStyle())) { // a bit of a hack, true. // HGL documents, when turned into Dataverse studies/datasets From 9b7fe11c9bb567def2f153a61902eb89af42ecd3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 23 May 2018 15:12:56 -0400 Subject: [PATCH 29/44] integrating/removing duplicate code... --- .../iq/dataverse/DvObjectServiceBean.java | 55 +++++++---------- .../edu/harvard/iq/dataverse/GlobalId.java | 60 ++++++++++--------- 2 files changed, 52 insertions(+), 63 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 4c1357ce28d..6dcd854c607 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -59,44 +59,31 @@ public List findAll() { return em.createNamedQuery("DvObject.findAll", DvObject.class).getResultList(); } - public DvObject findByGlobalId(String globalId, String typeString) { + public DvObject findByGlobalId(String globalIdString, String typeString) { - String protocol = ""; - String authority = ""; - String identifier = ""; - int index1 = globalId.indexOf(':'); - if (index1 > 0) { // ':' found with one or more characters before it - int index2 = globalId.indexOf('/', index1 + 1); - if (index2 > 0 && (index2 + 1) < globalId.length()) { // '/' found with one or more characters between ':' - // and '/' and there are characters after '/' - protocol = globalId.substring(0, index1); - authority = globalId.substring(index1 + 1, index2); - identifier = globalId.substring(index2 + 1); + try { + GlobalId gid = new GlobalId(globalIdString); - DvObject foundDvObject = null; - try { - Query query; - query = em.createNamedQuery("DvObject.findByGlobalId"); - query.setParameter("identifier", identifier); - query.setParameter("protocol", protocol); - query.setParameter("authority", authority); - query.setParameter("dtype", typeString); - foundDvObject = (DvObject) query.getSingleResult(); - } catch (javax.persistence.NoResultException e) { - // (set to .info, this can fill the log file with thousands of - // these messages during a large harvest run) - logger.fine("no dvObject found: " + globalId); - // DO nothing, just return null. - return null; - } - return foundDvObject; - } else { - logger.info( - "Error parsing identifier: " + globalId + ": ':/' not found in string"); + DvObject foundDvObject = null; + try { + Query query; + query = em.createNamedQuery("DvObject.findByGlobalId"); + query.setParameter("identifier", gid.getIdentifier()); + query.setParameter("protocol", gid.getProtocol()); + query.setParameter("authority", gid.getAuthority()); + query.setParameter("dtype", typeString); + foundDvObject = (DvObject) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + globalIdString); + // DO nothing, just return null. return null; } - } else { - logger.info("Error parsing identifier: " + globalId + ": ':' not found in string"); + return foundDvObject; + + } catch (IllegalArgumentException iae) { + logger.info("Invalid identifier: " + globalIdString); return null; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 8c906834d58..06d18785058 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -12,6 +12,7 @@ import java.util.logging.Logger; import java.net.URL; import javax.ejb.EJB; +import javax.persistence.Query; /** * @@ -25,6 +26,8 @@ public class GlobalId implements java.io.Serializable { public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; public static final String DOI_RESOLVER_URL = "https://doi.org/"; + private static final Logger logger = Logger.getLogger(GlobalId.class.getName()); + @EJB SettingsServiceBean settingsService; @@ -96,7 +99,7 @@ public URL toURL() { url = new URL(HDL_RESOLVER_URL + authority + "/" + identifier); } } catch (MalformedURLException ex) { - Logger.getLogger(GlobalId.class.getName()).log(Level.SEVERE, null, ex); + logger.log(Level.SEVERE, null, ex); } return url; } @@ -119,42 +122,41 @@ public URL toURL() { * */ - private boolean parsePersistentId(String identifierString){ + private boolean parsePersistentId(String identifierString) { - if (identifierString == null){ - return false; - } - - int index1 = identifierString.indexOf(':'); - int index2 = identifierString.indexOf('/'); - if (index1==-1) { - return false; - } - - String protocol = identifierString.substring(0, index1); - - if (!"doi".equals(protocol) && !"hdl".equals(protocol)) { + if (identifierString == null) { return false; } - - - if (index2 == -1) { - return false; - } - - this.protocol = protocol; - this.authority = formatIdentifierString(identifierString.substring(index1+1, index2)); - this.identifier = formatIdentifierString(identifierString.substring(index2+1)); - - if (this.protocol.equals(DOI_PROTOCOL)) { - if (!this.checkDOIAuthority(this.authority)) { + int index1 = identifierString.indexOf(':'); + if (index1 > 0) { // ':' found with one or more characters before it + int index2 = identifierString.indexOf('/', index1 + 1); + if (index2 > 0 && (index2 + 1) < identifierString.length()) { // '/' found with one or more characters + // between ':' + String protocol = identifierString.substring(0, index1); // and '/' and there are characters after '/' + if (!"doi".equals(protocol) && !"hdl".equals(protocol)) { + return false; + } + if (this.protocol.equals(DOI_PROTOCOL)) { + if (!this.checkDOIAuthority(this.authority)) { + return false; + } + } + // Passed all checks + this.protocol = protocol; + //Strip any whitespace, ; and ' from authority and identifier parts (should finding them cause a failure instead?) + this.authority = formatIdentifierString(identifierString.substring(index1 + 1, index2)); + this.identifier = formatIdentifierString(identifierString.substring(index2 + 1)); + } else { + logger.info("Error parsing identifier: " + identifierString + + ": ':/' not found in string"); return false; } + } else { + logger.info("Error parsing identifier: " + identifierString + ": ':' not found in string"); + return false; } return true; - } - private String formatIdentifierString(String str){ From 402ed5941fca5cee1d97c21d244c6a4c4f88da42 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 24 May 2018 09:13:31 -0400 Subject: [PATCH 30/44] if should use local variables, class vars not yet set --- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 06d18785058..e7500089c06 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -136,8 +136,8 @@ private boolean parsePersistentId(String identifierString) { if (!"doi".equals(protocol) && !"hdl".equals(protocol)) { return false; } - if (this.protocol.equals(DOI_PROTOCOL)) { - if (!this.checkDOIAuthority(this.authority)) { + if (protocol.equals(DOI_PROTOCOL)) { + if (!this.checkDOIAuthority(authority)) { return false; } } From b2b9e55ede53a5f8dca0db883f66596c3cea7279 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 24 May 2018 09:25:50 -0400 Subject: [PATCH 31/44] authority not set before authority check --- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index e7500089c06..31638c2decf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -132,20 +132,20 @@ private boolean parsePersistentId(String identifierString) { int index2 = identifierString.indexOf('/', index1 + 1); if (index2 > 0 && (index2 + 1) < identifierString.length()) { // '/' found with one or more characters // between ':' - String protocol = identifierString.substring(0, index1); // and '/' and there are characters after '/' + protocol = identifierString.substring(0, index1); // and '/' and there are characters after '/' if (!"doi".equals(protocol) && !"hdl".equals(protocol)) { return false; } + //Strip any whitespace, ; and ' from authority (should finding them cause a failure instead?) + authority = formatIdentifierString(identifierString.substring(index1 + 1, index2)); if (protocol.equals(DOI_PROTOCOL)) { if (!this.checkDOIAuthority(authority)) { return false; } } // Passed all checks - this.protocol = protocol; - //Strip any whitespace, ; and ' from authority and identifier parts (should finding them cause a failure instead?) - this.authority = formatIdentifierString(identifierString.substring(index1 + 1, index2)); - this.identifier = formatIdentifierString(identifierString.substring(index2 + 1)); + //Strip any whitespace, ; and ' from identifier (should finding them cause a failure instead?) + identifier = formatIdentifierString(identifierString.substring(index2 + 1)); } else { logger.info("Error parsing identifier: " + identifierString + ": ':/' not found in string"); From ed253f7f71af5e249d206afe8587abb61b44c3d1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 24 May 2018 09:26:39 -0400 Subject: [PATCH 32/44] update comment --- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 31638c2decf..3b18c4072fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -110,8 +110,8 @@ public URL toURL() { * * Example 1: doi:10.5072/FK2/BYM3IW * protocol: doi - * authority: 10.5072/FK2 - * identifier: BYM3IW + * authority: 10.5072 + * identifier: FK2/BYM3IW * * Example 2: hdl:1902.1/111012 * protocol: hdl From 2159050c1c3965b757c97b482b9c159b74158e03 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 25 May 2018 10:05:24 -0400 Subject: [PATCH 33/44] #3583 Add updates to settings for DOI Shoulder and Authority --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 63bbdaa69c4..0e1e2b08d62 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -61,3 +61,13 @@ ALTER TABLE dataset DROP COLUMN doiseparator; ALTER TABLE dataset DROP COLUMN globalidcreatetime; ALTER TABLE dataset DROP COLUMN identifier; ALTER TABLE dataset DROP COLUMN protocol; + +--Add new setting into content for doishoulder +INSERT INTO setting(name, content) +VALUES (':DoiShoulder', (SELECT substring(content, strpos(content,'/')+1) || '/' from setting where name = ':Authority')); + + --strip shoulder from authority setting + UPDATE setting + SET content=(SELECT substring(content from 0 for strpos(content,'/')) + FROM setting + WHERE name=':Authority' and strpos(content,'/')>0) where name=':Authority'; From 83a76a090a56c80f548816f025d36767ff1484ad Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 25 May 2018 10:40:49 -0400 Subject: [PATCH 34/44] remove :DoiSeparator, add :DoiShoulder (misnomer, affects handles) #3583 #898 --- .../source/installation/config.rst | 18 +++++++++--------- scripts/api/setup-all.sh | 4 ++-- scripts/api/setup-optional-harvard.sh | 2 ++ 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 39f6733f51d..3ab3c5443c4 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -124,7 +124,7 @@ Out of the box, Dataverse is configured for DOIs. Here are the configuration opt - :ref:`:DoiProvider <:DoiProvider>` - :ref:`:Protocol <:Protocol>` - :ref:`:Authority <:Authority>` -- :ref:`:DoiSeparator <:DoiSeparator>` +- :ref:`:DoiShoulder <:DoiShoulder>` - :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional) - :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional) @@ -552,7 +552,7 @@ See also these related database settings below: - :ref:`:DoiProvider` - :ref:`:Protocol` - :ref:`:Authority` -- :ref:`:DoiSeparator` +- :ref:`:DoiShoulder` .. _doi.username: @@ -717,7 +717,7 @@ As of this writing "EZID" and "DataCite" are the only valid options. DoiProvider ``curl -X PUT -d EZID http://localhost:8080/api/admin/settings/:DoiProvider`` -This setting relates to the ``:Protocol``, ``:Authority``, ``:DoiSeparator``, and ``:IdentifierGenerationStyle`` database settings below as well as the following JVM options: +This setting relates to the ``:Protocol``, ``:Authority``, ``:DoiShoulder``, and ``:IdentifierGenerationStyle`` database settings below as well as the following JVM options: - :ref:`doi.baseurlstring` - :ref:`doi.username` @@ -741,16 +741,16 @@ Use the authority assigned to you by your DoiProvider or HandleProvider. ``curl -X PUT -d 10.xxxx http://localhost:8080/api/admin/settings/:Authority`` -.. _:DoiSeparator: +.. _:DoiShoulder: -:DoiSeparator -+++++++++++++ +:DoiShoulder +++++++++++++ -It is recommended that you keep this as a slash ("/"). +Out of the box, the DOI shoulder is set to "FK2/" but this is for testing only! When you apply for your DOI namespace, you may have requested a shoulder. The following is only an example and a trailing slash is optional. -``curl -X PUT -d "/" http://localhost:8080/api/admin/settings/:DoiSeparator`` +``curl -X PUT -d "MyShoulder/" http://localhost:8080/api/admin/settings/:DoiShoulder`` -**Note:** The name DoiSeparator is a misnomer. This setting is used by some **handles**-specific code too. It *must* be set to '/' when using handles. +**Note:** The name DoiShoulder is a misnomer. This setting is used by some **handles**-specific code too. It *must* be set to '/' when using handles. .. _:IdentifierGenerationStyle: diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index 56cc24b4af8..8dd81c6bdb5 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -49,9 +49,9 @@ curl -X PUT -d yes "$SERVER/admin/settings/:AllowSignUp" curl -X PUT -d /dataverseuser.xhtml?editMode=CREATE "$SERVER/admin/settings/:SignUpUrl" curl -X PUT -d doi "$SERVER/admin/settings/:Protocol" -curl -X PUT -d 10.5072/FK2 "$SERVER/admin/settings/:Authority" +curl -X PUT -d 10.5072 "$SERVER/admin/settings/:Authority" +curl -X PUT -d "FK2/" "$SERVER/admin/settings/:DoiShoulder" curl -X PUT -d EZID "$SERVER/admin/settings/:DoiProvider" -curl -X PUT -d / "$SERVER/admin/settings/:DoiSeparator" curl -X PUT -d burrito $SERVER/admin/settings/BuiltinUsers.KEY curl -X PUT -d localhost-only $SERVER/admin/settings/:BlockedApiPolicy echo diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index 2478ae139f9..83fedff1602 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -2,6 +2,8 @@ SERVER=http://localhost:8080/api echo "Setting up Harvard-specific settings" +curl -X PUT -d 10.7910 "$SERVER/admin/settings/:Authority" +curl -X PUT -d "DVN/" "$SERVER/admin/settings/:DoiShoulder" echo "- Application Status header" curl -s -X PUT -d 'Upgrade in progress...' $SERVER/admin/settings/:StatusMessageHeader echo "- Application Status message" From cf7a720181e1cfd66e46eb7be9f47bace07d3646 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 25 May 2018 11:44:18 -0400 Subject: [PATCH 35/44] Update documentation doiShoulder is a misnomer, it is called in the generation of new identifiers, regardless of the protocol. However, it is no longer required to be '/' as the misnamed doiSeparator was. I also updated the doc to note that the shoulder is applied to identifiers, including INDEPENDENT file ids. --- doc/sphinx-guides/source/installation/config.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 3ab3c5443c4..5439ba5ca91 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -750,16 +750,16 @@ Out of the box, the DOI shoulder is set to "FK2/" but this is for testing only! ``curl -X PUT -d "MyShoulder/" http://localhost:8080/api/admin/settings/:DoiShoulder`` -**Note:** The name DoiShoulder is a misnomer. This setting is used by some **handles**-specific code too. It *must* be set to '/' when using handles. +**Note:** The name DoiShoulder is a misnomer. This setting is applied in the generation of new **handles** as well. .. _:IdentifierGenerationStyle: :IdentifierGenerationStyle ++++++++++++++++++++++++++ -By default, Dataverse generates a random 6 character string to use as the identifier +By default, Dataverse generates a random 6 character string, pre-pended by the doiShoulder if set, to use as the identifier for a Dataset. Set this to ``sequentialNumber`` to use sequential numeric values -instead. (the assumed default setting is ``randomString``). +instead (again pre-pended by the doiShoulder if set). (the assumed default setting is ``randomString``). In addition to this setting, a database sequence must be created in the database. We provide the script below (downloadable :download:`here `). You may need to make some changes to suit your system setup, see the comments for more information: @@ -785,7 +785,7 @@ This setting controls the way that the "identifier" component of a file's persis By default the identifier for a file is dependent on its parent dataset. For example, if the identifier of a dataset is "TJCLKP", the identifier for a file within that dataset will consist of the parent dataset's identifier followed by a slash ("/"), followed by a random 6 character string, yielding "TJCLKP/MLGWJO". Identifiers in this format are what you should expect if you leave ``:DataFilePIDFormat`` undefined or set it to ``DEPENDENT`` and have not changed the ``:IdentifierGenerationStyle`` setting from its default. -Alternatively, the indentifier for File PIDs can be configured to be independent of Dataset PIDs using the setting "``INDEPENDENT``". In this case, file PIDs will not contain the PIDs of their parent datasets, and their PIDs will be generated the exact same way that datasets' PIDs are, based on the ``:IdentifierGenerationStyle`` setting described above (random 6 character strings or sequential numbers). +Alternatively, the identifier for File PIDs can be configured to be independent of Dataset PIDs using the setting "``INDEPENDENT``". In this case, file PIDs will not contain the PIDs of their parent datasets, and their PIDs will be generated the exact same way that datasets' PIDs are, based on the ``:IdentifierGenerationStyle`` setting described above (random 6 character strings or sequential numbers, pre-pended by any shoulder). The chart below shows examples from each possible combination of parameters from the two settings. ``:IdentifierGenerationStyle`` can be either ``randomString`` (the default) or ``sequentialNumber`` and ``:DataFilePIDFormat`` can be either ``DEPENDENT`` (the default) or ``INDEPENDENT``. In the examples below the "identifier" for the dataset is "TJCLKP" for "randomString" and "100001" for "sequentialNumber". From baa90527010ee04e7fa9caa81cbef4bfeb3c3479 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 25 May 2018 12:28:00 -0400 Subject: [PATCH 36/44] changing name of DoiShoulder to Shoulder since it applies to generated Handles as well. --- .../source/installation/config.rst | 18 ++++++++---------- scripts/api/setup-all.sh | 2 +- scripts/api/setup-optional-harvard.sh | 2 +- .../upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 4 ++-- .../iq/dataverse/DataFileServiceBean.java | 2 +- .../iq/dataverse/DatasetServiceBean.java | 2 +- .../settings/SettingsServiceBean.java | 4 ++-- 7 files changed, 16 insertions(+), 18 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5439ba5ca91..33579b3712c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -124,7 +124,7 @@ Out of the box, Dataverse is configured for DOIs. Here are the configuration opt - :ref:`:DoiProvider <:DoiProvider>` - :ref:`:Protocol <:Protocol>` - :ref:`:Authority <:Authority>` -- :ref:`:DoiShoulder <:DoiShoulder>` +- :ref:`:Shoulder <:Shoulder>` - :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional) - :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional) @@ -552,7 +552,7 @@ See also these related database settings below: - :ref:`:DoiProvider` - :ref:`:Protocol` - :ref:`:Authority` -- :ref:`:DoiShoulder` +- :ref:`:Shoulder` .. _doi.username: @@ -717,7 +717,7 @@ As of this writing "EZID" and "DataCite" are the only valid options. DoiProvider ``curl -X PUT -d EZID http://localhost:8080/api/admin/settings/:DoiProvider`` -This setting relates to the ``:Protocol``, ``:Authority``, ``:DoiShoulder``, and ``:IdentifierGenerationStyle`` database settings below as well as the following JVM options: +This setting relates to the ``:Protocol``, ``:Authority``, ``:Shoulder``, and ``:IdentifierGenerationStyle`` database settings below as well as the following JVM options: - :ref:`doi.baseurlstring` - :ref:`doi.username` @@ -741,25 +741,23 @@ Use the authority assigned to you by your DoiProvider or HandleProvider. ``curl -X PUT -d 10.xxxx http://localhost:8080/api/admin/settings/:Authority`` -.. _:DoiShoulder: +.. _:Shoulder: -:DoiShoulder +:Shoulder ++++++++++++ Out of the box, the DOI shoulder is set to "FK2/" but this is for testing only! When you apply for your DOI namespace, you may have requested a shoulder. The following is only an example and a trailing slash is optional. -``curl -X PUT -d "MyShoulder/" http://localhost:8080/api/admin/settings/:DoiShoulder`` - -**Note:** The name DoiShoulder is a misnomer. This setting is applied in the generation of new **handles** as well. +``curl -X PUT -d "MyShoulder/" http://localhost:8080/api/admin/settings/:Shoulder`` .. _:IdentifierGenerationStyle: :IdentifierGenerationStyle ++++++++++++++++++++++++++ -By default, Dataverse generates a random 6 character string, pre-pended by the doiShoulder if set, to use as the identifier +By default, Dataverse generates a random 6 character string, pre-pended by the Shoulder if set, to use as the identifier for a Dataset. Set this to ``sequentialNumber`` to use sequential numeric values -instead (again pre-pended by the doiShoulder if set). (the assumed default setting is ``randomString``). +instead (again pre-pended by the Shoulder if set). (the assumed default setting is ``randomString``). In addition to this setting, a database sequence must be created in the database. We provide the script below (downloadable :download:`here `). You may need to make some changes to suit your system setup, see the comments for more information: diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index 8dd81c6bdb5..03cb2252f2c 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -50,7 +50,7 @@ curl -X PUT -d /dataverseuser.xhtml?editMode=CREATE "$SERVER/admin/settings/:Sig curl -X PUT -d doi "$SERVER/admin/settings/:Protocol" curl -X PUT -d 10.5072 "$SERVER/admin/settings/:Authority" -curl -X PUT -d "FK2/" "$SERVER/admin/settings/:DoiShoulder" +curl -X PUT -d "FK2/" "$SERVER/admin/settings/:Shoulder" curl -X PUT -d EZID "$SERVER/admin/settings/:DoiProvider" curl -X PUT -d burrito $SERVER/admin/settings/BuiltinUsers.KEY curl -X PUT -d localhost-only $SERVER/admin/settings/:BlockedApiPolicy diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index 83fedff1602..72dd5cecd7f 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -3,7 +3,7 @@ SERVER=http://localhost:8080/api echo "Setting up Harvard-specific settings" curl -X PUT -d 10.7910 "$SERVER/admin/settings/:Authority" -curl -X PUT -d "DVN/" "$SERVER/admin/settings/:DoiShoulder" +curl -X PUT -d "DVN/" "$SERVER/admin/settings/:Shoulder" echo "- Application Status header" curl -s -X PUT -d 'Upgrade in progress...' $SERVER/admin/settings/:StatusMessageHeader echo "- Application Status message" diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 0e1e2b08d62..16962d5842e 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -62,9 +62,9 @@ ALTER TABLE dataset DROP COLUMN globalidcreatetime; ALTER TABLE dataset DROP COLUMN identifier; ALTER TABLE dataset DROP COLUMN protocol; ---Add new setting into content for doishoulder +--Add new setting into content for shoulder INSERT INTO setting(name, content) -VALUES (':DoiShoulder', (SELECT substring(content, strpos(content,'/')+1) || '/' from setting where name = ':Authority')); +VALUES (':Shoulder', (SELECT substring(content, strpos(content,'/')+1) || '/' from setting where name = ':Authority')); --strip shoulder from authority setting UPDATE setting diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 6763a311ee1..ea55ed918c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1525,7 +1525,7 @@ public String generateDataFileIdentifier(DataFile datafile, IdServiceBean idServ prepend = datafile.getOwner().getIdentifier() + "/"; } else { //If there's a shoulder prepend independent identifiers with it - prepend = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + prepend = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); } switch (doiIdentifierType) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index cdb371cdc8b..8c7e2a6ab03 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -199,7 +199,7 @@ public Dataset findByGlobalId(String globalId) { public String generateDatasetIdentifier(Dataset dataset, IdServiceBean idServiceBean) { String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.DoiShoulder, ""); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); switch (identifierType) { case "randomString": diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 4b8a0ba872a..f7ccce74f7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -185,8 +185,8 @@ public enum Key { Authority, /** DoiProvider for global id */ DoiProvider, - /** DoiShoulder for global id - used to create a common prefix on identifiers */ - DoiShoulder, + /** Shoulder for global id - used to create a common prefix on identifiers */ + Shoulder, /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up DoiUsername, DoiPassword, From d497dee11a8a563feede6ff3e3eb8c4dc7383f62 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 31 May 2018 08:04:57 -0400 Subject: [PATCH 37/44] don't set :Authority and :Shoulder in Harvard script #3583 #898 --- scripts/api/setup-optional-harvard.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index 72dd5cecd7f..b4dd6b33b53 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -2,8 +2,9 @@ SERVER=http://localhost:8080/api echo "Setting up Harvard-specific settings" -curl -X PUT -d 10.7910 "$SERVER/admin/settings/:Authority" -curl -X PUT -d "DVN/" "$SERVER/admin/settings/:Shoulder" +# :Authority and :Shoulder are commented out so this script can be used on test servers +#curl -X PUT -d 10.7910 "$SERVER/admin/settings/:Authority" +#curl -X PUT -d "DVN/" "$SERVER/admin/settings/:Shoulder" echo "- Application Status header" curl -s -X PUT -d 'Upgrade in progress...' $SERVER/admin/settings/:StatusMessageHeader echo "- Application Status message" From 57896b70bc4dc4d6a65d210046346c5756e36c2a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 31 May 2018 10:25:14 -0400 Subject: [PATCH 38/44] Redo migration script to migrate doi to dvobject then re-parse it there --- .../upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 16962d5842e..3d46ba2623d 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -16,43 +16,38 @@ ALTER TABLE dvobject ADD COLUMN ADD COLUMN identifier character varying(255), ADD COLUMN protocol character varying(255); ---add authority shoulder to identifier +--Migrate data from Dataset to DvObject UPDATE dvobject -SET identifier=(SELECT substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier +SET authority=(SELECT dataset.authority FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)>0) where dvobject.dtype='Dataset'; +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; ---just copy if there's no shoulder UPDATE dvobject -SET identifier=(SELECT identifier +SET doiseparator=(SELECT dataset.doiseparator FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)=0) where dvobject.dtype='Dataset'; +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; ---strip shoulder from authority UPDATE dvobject -SET authority=(SELECT substring(authority from 0 for strpos(authority,doiseparator)) +SET globalidcreatetime=(SELECT dataset.globalidcreatetime FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)>0) where dvobject.dtype='Dataset' ; +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; --- no shoulder UPDATE dvobject -SET authority=(SELECT authority -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' and strpos(authority,doiseparator)=0) where dvobject.dtype='Dataset'; - +SET identifierRegistered= true where globalidcreatetime is not null; UPDATE dvobject -SET globalidcreatetime=(SELECT dataset.globalidcreatetime +SET identifier=(SELECT dataset.identifier FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; -UPDATE dvobject -SET identifierRegistered= true where globalidcreatetime is not null; - UPDATE dvobject SET protocol=(SELECT dataset.protocol FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; + +--Once in DvObject re-parse identifier and authority +UPDATE dvobject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE strpos(authority,doiseparator)>0; +UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0; ALTER TABLE dataset ALTER identifier DROP NOT NULL; From 3198d5208e3c87a8f22cda81399f95f9858dd2e0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 31 May 2018 11:22:49 -0400 Subject: [PATCH 39/44] doiseparator isn't used in dvobject --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 3d46ba2623d..4a6bcbec95c 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -22,11 +22,6 @@ SET authority=(SELECT dataset.authority FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; -UPDATE dvobject -SET doiseparator=(SELECT dataset.doiseparator -FROM dataset -WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; - UPDATE dvobject SET globalidcreatetime=(SELECT dataset.globalidcreatetime FROM dataset @@ -46,8 +41,8 @@ FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; --Once in DvObject re-parse identifier and authority -UPDATE dvobject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE strpos(authority,doiseparator)>0; -UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0; +UPDATE dvobject SET identifier=substring(authority, strpos(authority,'/')+1) || '/' || identifier WHERE strpos(authority,'/')>0; +UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,'/')) WHERE strpos(authority,'/')>0; ALTER TABLE dataset ALTER identifier DROP NOT NULL; From 1872f7220d7ce0930d937c94f4c659e2b78352f2 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 31 May 2018 11:37:52 -0400 Subject: [PATCH 40/44] #3583 fix for generic separator --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 4a6bcbec95c..0b31e0e8277 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -41,8 +41,8 @@ FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; --Once in DvObject re-parse identifier and authority -UPDATE dvobject SET identifier=substring(authority, strpos(authority,'/')+1) || '/' || identifier WHERE strpos(authority,'/')>0; -UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,'/')) WHERE strpos(authority,'/')>0; +UPDATE dvobject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE strpos(authority,doiseparator)>0; +UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0; ALTER TABLE dataset ALTER identifier DROP NOT NULL; From bb4d64de4a37b815d422a5976f5cffda560457f5 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 31 May 2018 13:55:48 -0400 Subject: [PATCH 41/44] #3583 Add and remove doi separator from dvobject - for migration only --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 0b31e0e8277..ea9675eea0a 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -12,6 +12,7 @@ ALTER TABLE datafile ADD COLUMN prov_entityname text; ALTER TABLE dvobject ADD COLUMN authority character varying(255), ADD COLUMN globalidcreatetime timestamp without time zone, + ADD COLUMN doiseparator character varying(255), ADD COLUMN identifierRegistered boolean, ADD COLUMN identifier character varying(255), ADD COLUMN protocol character varying(255); @@ -52,6 +53,8 @@ ALTER TABLE dataset DROP COLUMN globalidcreatetime; ALTER TABLE dataset DROP COLUMN identifier; ALTER TABLE dataset DROP COLUMN protocol; +ALTER TABLE dvobject DROP COLUMN doiseparator; + --Add new setting into content for shoulder INSERT INTO setting(name, content) VALUES (':Shoulder', (SELECT substring(content, strpos(content,'/')+1) || '/' from setting where name = ':Authority')); From db5ea2dde6c053d6fc923c5f9194e8bad5ef0e1a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 31 May 2018 14:35:49 -0400 Subject: [PATCH 42/44] Update upgrade_v4.8.6_to_v4.9.0.sql --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index ea9675eea0a..141b25eeb16 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -28,6 +28,11 @@ SET globalidcreatetime=(SELECT dataset.globalidcreatetime FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; +UPDATE dvobject +SET doiseparator=(SELECT dataset.doiseparator +FROM dataset +WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset') where dvobject.dtype='Dataset'; + UPDATE dvobject SET identifierRegistered= true where globalidcreatetime is not null; From 715d4d8a3dbb0b6b8b815d6018a587a020036268 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 31 May 2018 15:20:51 -0400 Subject: [PATCH 43/44] check separator for null --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 141b25eeb16..8067d332909 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -47,8 +47,8 @@ FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; --Once in DvObject re-parse identifier and authority -UPDATE dvobject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE strpos(authority,doiseparator)>0; -UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0; +UPDATE dvobject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE strpos(authority,doiseparator)>0 and doiseparator != '' and doiseparator is not null; +UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0 and doiseparator != '' and doiseparator is not null; ALTER TABLE dataset ALTER identifier DROP NOT NULL; From f646a8237d3417e05a5a71338a2f28e63a59d4f2 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 31 May 2018 16:51:42 -0400 Subject: [PATCH 44/44] Update upgrade_v4.8.6_to_v4.9.0.sql --- scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql index 8067d332909..d19a7f4e9c9 100644 --- a/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql +++ b/scripts/database/upgrades/upgrade_v4.8.6_to_v4.9.0.sql @@ -47,8 +47,8 @@ FROM dataset WHERE dataset.id=dvobject.id AND dvobject.dtype='Dataset' ) where dvobject.dtype='Dataset'; --Once in DvObject re-parse identifier and authority -UPDATE dvobject SET identifier=substring(authority, strpos(authority,doiseparator)+1) || doiseparator || identifier WHERE strpos(authority,doiseparator)>0 and doiseparator != '' and doiseparator is not null; -UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,doiseparator)) WHERE strpos(authority,doiseparator)>0 and doiseparator != '' and doiseparator is not null; +UPDATE dvobject SET identifier=substring(authority, strpos(authority,'/')+1) || doiseparator || identifier WHERE strpos(authority,'/')>0 ; +UPDATE dvobject SET authority=substring(authority from 0 for strpos(authority,'/')) WHERE strpos(authority,'/')>0; ALTER TABLE dataset ALTER identifier DROP NOT NULL;