From c5e8e2dc878838d848daf581a7b3ca7fec9e9fee Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 14 Sep 2020 10:30:11 -0400 Subject: [PATCH 001/161] Update develop with globus phase1 --- .../edu/harvard/iq/dataverse/DataFile.java | 4 + .../iq/dataverse/DataFileServiceBean.java | 12 + .../edu/harvard/iq/dataverse/DatasetLock.java | 3 + .../edu/harvard/iq/dataverse/DatasetPage.java | 51 +- .../iq/dataverse/EditDatafilesPage.java | 40 +- .../iq/dataverse/FileDownloadHelper.java | 62 +- .../iq/dataverse/PermissionServiceBean.java | 7 + .../harvard/iq/dataverse/SettingsWrapper.java | 2 + .../harvard/iq/dataverse/api/GlobusApi.java | 346 +++++++ .../iq/dataverse/dataaccess/FileAccessIO.java | 8 +- .../dataverse/dataaccess/InputStreamIO.java | 7 + .../iq/dataverse/dataaccess/S3AccessIO.java | 49 +- .../iq/dataverse/dataaccess/StorageIO.java | 3 + .../dataverse/dataaccess/SwiftAccessIO.java | 6 + .../iq/dataverse/globus/AccessList.java | 33 + .../iq/dataverse/globus/AccessToken.java | 71 ++ .../harvard/iq/dataverse/globus/FileG.java | 67 ++ .../iq/dataverse/globus/FilesList.java | 60 ++ .../dataverse/globus/GlobusServiceBean.java | 880 ++++++++++++++++++ .../iq/dataverse/globus/Identities.java | 16 + .../harvard/iq/dataverse/globus/Identity.java | 67 ++ .../harvard/iq/dataverse/globus/MkDir.java | 22 + .../iq/dataverse/globus/MkDirResponse.java | 50 + .../iq/dataverse/globus/Permissions.java | 58 ++ .../dataverse/globus/PermissionsResponse.java | 58 ++ .../dataverse/globus/SuccessfulTransfer.java | 35 + .../edu/harvard/iq/dataverse/globus/Task.java | 69 ++ .../harvard/iq/dataverse/globus/Tasklist.java | 17 + .../iq/dataverse/globus/Transferlist.java | 18 + .../harvard/iq/dataverse/globus/UserInfo.java | 68 ++ .../settings/SettingsServiceBean.java | 15 +- .../harvard/iq/dataverse/util/FileUtil.java | 13 +- .../iq/dataverse/util/SystemConfig.java | 24 +- src/main/java/propertyFiles/Bundle.properties | 7 + src/main/webapp/editFilesFragment.xhtml | 62 +- .../file-download-button-fragment.xhtml | 24 +- src/main/webapp/globus.xhtml | 30 + 37 files changed, 2345 insertions(+), 19 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/FileG.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Identities.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Identity.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Task.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java create mode 100644 src/main/webapp/globus.xhtml diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 560048db9ca..98b7b624d8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -671,6 +671,10 @@ public boolean isFilePackage() { return DataFileServiceBean.MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(contentType); } + public boolean isFileGlobus() { + return DataFileServiceBean.MIME_TYPE_GLOBUS_FILE.equalsIgnoreCase(contentType); + } + public void setIngestStatus(char ingestStatus) { this.ingestStatus = ingestStatus; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 65d26d2eb63..4d04ee1889d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -137,6 +137,8 @@ public class DataFileServiceBean implements java.io.Serializable { * the page URL above. */ public static final String MIME_TYPE_PACKAGE_FILE = "application/vnd.dataverse.file-package"; + + public static final String MIME_TYPE_GLOBUS_FILE = "application/vnd.dataverse.file-globus"; public DataFile find(Object pk) { return em.find(DataFile.class, pk); @@ -1355,6 +1357,16 @@ public boolean isFileClassPackage (DataFile file) { return MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(contentType); } + + public boolean isFileClassGlobus (DataFile file) { + if (file == null) { + return false; + } + + String contentType = file.getContentType(); + + return MIME_TYPE_GLOBUS_FILE.equalsIgnoreCase(contentType); + } public void populateFileSearchCard(SolrSearchResult solrSearchResult) { solrSearchResult.setEntity(this.findCheapAndEasy(solrSearchResult.getEntityId())); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java index 93f4aca13d1..82997deef8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java @@ -71,6 +71,9 @@ public enum Reason { /** DCM (rsync) upload in progress */ DcmUpload, + + /** Globus upload in progress */ + GlobusUpload, /** Tasks handled by FinalizeDatasetPublicationCommand: Registering PIDs for DS and DFs and/or file validation */ diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 458fcf56ab0..d1cfb184462 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1,5 +1,11 @@ package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.globus.AccessToken; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.UserInfo; + + import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean; import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; @@ -55,10 +61,9 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; + +import java.io.*; +import java.net.MalformedURLException; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -236,6 +241,8 @@ public enum DisplayMode { @Inject MakeDataCountLoggingServiceBean mdcLogService; @Inject DataverseHeaderFragment dataverseHeaderFragment; + @Inject + protected GlobusServiceBean globusService; private Dataset dataset = new Dataset(); @@ -2114,6 +2121,10 @@ private void displayLockInfo(Dataset dataset) { BundleUtil.getStringFromBundle("file.rsyncUpload.inProgressMessage.details")); lockedDueToDcmUpload = true; } + if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.rsyncUpload.inProgressMessage.summary"), + BundleUtil.getStringFromBundle("file.rsyncUpload.inProgressMessage.details")); + } //This is a hack to remove dataset locks for File PID registration if //the dataset is released //in testing we had cases where datasets with 1000 files were remaining locked after being published successfully @@ -2657,10 +2668,22 @@ private String releaseDataset(boolean minor) { // has been published. If a publishing workflow is configured, this may have sent the // dataset into a workflow limbo, potentially waiting for a third party system to complete // the process. So it may be premature to show the "success" message at this point. - + + boolean globus = checkForGlobus(); if ( result.isCompleted() ) { - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.message.publishSuccess")); + if (globus) { + if (!globusService.giveGlobusPublicPermissions(dataset.getId().toString())) { + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.publishGlobusFailure.details")); + } else { + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.message.publishSuccess")); + } + } else { + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.message.publishSuccess")); + } } else { + if (globus) { + globusService.giveGlobusPublicPermissions(dataset.getId().toString()); + } JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.locked.message"), BundleUtil.getStringFromBundle("dataset.locked.message.details")); } @@ -2673,6 +2696,12 @@ private String releaseDataset(boolean minor) { JsfHelper.addErrorMessage(ex.getLocalizedMessage()); } logger.severe(ex.getMessage()); + } catch (UnsupportedEncodingException ex) { + JsfHelper.addErrorMessage(ex.getLocalizedMessage()); + logger.severe(ex.getMessage()); + } catch (MalformedURLException ex) { + JsfHelper.addErrorMessage(ex.getLocalizedMessage()); + logger.severe(ex.getMessage()); } } else { @@ -2681,6 +2710,16 @@ private String releaseDataset(boolean minor) { return returnToDraftVersion(); } + private boolean checkForGlobus() { + List fml = dataset.getLatestVersion().getFileMetadatas(); + for (FileMetadata fm : fml) { + if (fm.getDataFile().isFileGlobus()) { + return true; + } + } + return false; + } + @Deprecated public String registerDataset() { try { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 3138dcce2fe..b6c4cc744b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean; import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; @@ -36,6 +38,8 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; +import java.net.MalformedURLException; +import java.text.ParseException; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -55,6 +59,7 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; +import org.primefaces.PrimeFaces; import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; import javax.json.Json; @@ -73,9 +78,9 @@ import javax.faces.event.FacesEvent; import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletResponse; +import javax.servlet.http.HttpServletRequest; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.mutable.MutableBoolean; -import org.primefaces.PrimeFaces; /** * @@ -120,6 +125,10 @@ public enum FileEditMode { DataverseLinkingServiceBean dvLinkingService; @EJB IndexServiceBean indexService; + @EJB + GlobusServiceBean globusServiceBean; + @EJB + protected SettingsServiceBean settingsSvc; @Inject DataverseRequestServiceBean dvRequestService; @Inject PermissionsWrapper permissionsWrapper; @@ -1425,7 +1434,6 @@ public boolean showFileUploadFragment(){ return mode == FileEditMode.UPLOAD || mode == FileEditMode.CREATE || mode == FileEditMode.SINGLE_REPLACE; } - public boolean showFileUploadComponent(){ if (mode == FileEditMode.UPLOAD || mode == FileEditMode.CREATE) { return true; @@ -3135,5 +3143,31 @@ private void populateFileMetadatas() { } } } - } + } + + public String getClientId() { + logger.info(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusClientId)); + return "'" + settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusClientId) + "'"; + } + + public void startTaskList() throws MalformedURLException { + + AuthenticatedUser user = (AuthenticatedUser) session.getUser(); + globusServiceBean.globusFinishTransfer(dataset, user); + HttpServletRequest origRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); + + String serverName = origRequest.getServerName(); + + String httpString = "window.location.replace('" + "https://" + serverName + "/dataset.xhtml?persistentId=" + dataset.getGlobalId(); + Dataset ds = datasetService.find(dataset.getId()); + if (ds.getLatestVersion().isWorkingCopy()) { + httpString = httpString + "&version=DRAFT" + "'" + ")"; + } + else { + httpString = httpString + "'" +")"; + } + + logger.info(httpString); + PrimeFaces.current().executeScript(httpString); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index a6be412990b..9e9594d9044 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -10,6 +10,9 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import java.util.ArrayList; import java.util.HashMap; @@ -28,6 +31,12 @@ import org.primefaces.PrimeFaces; //import org.primefaces.context.RequestContext; +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; + +import edu.harvard.iq.dataverse.util.SystemConfig; +import org.primefaces.PrimeFaces; + /** * * @author skraffmi @@ -39,6 +48,7 @@ public class FileDownloadHelper implements java.io.Serializable { private static final Logger logger = Logger.getLogger(FileDownloadHelper.class.getCanonicalName()); + @Inject DataverseSession session; @@ -56,7 +66,14 @@ public class FileDownloadHelper implements java.io.Serializable { @EJB DataFileServiceBean datafileService; - + + @EJB + protected SettingsServiceBean settingsSvc; + + @EJB + protected DatasetServiceBean datasetSvc; + + UIInput nameField; public UIInput getNameField() { @@ -553,5 +570,48 @@ public DataverseSession getSession() { public void setSession(DataverseSession session) { this.session = session; } + + public void goGlobusDownload(FileMetadata fileMetadata) { + + String datasetId = fileMetadata.getDatasetVersion().getDataset().getId().toString(); //fileMetadata.datasetVersion.dataset.id + + String directory = getDirectory(datasetId); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + + if ( fileMetadata.getDirectoryLabel() != null && !fileMetadata.getDirectoryLabel().equals("")) { + directory = directory + "/" + fileMetadata.getDirectoryLabel() + "/"; + + } + + logger.info(directory); + + String httpString = "window.open('" + "https://app.globus.org/file-manager?origin_id=" + globusEndpoint + "&origin_path=" + directory + "'" +",'_blank')"; + PrimeFaces.current().executeScript(httpString); + } + + String getDirectory(String datasetId) { + Dataset dataset = null; + String directory = null; + try { + dataset = datasetSvc.find(Long.parseLong(datasetId)); + if (dataset == null) { + logger.severe("Dataset not found " + datasetId); + return null; + } + String storeId = dataset.getStorageIdentifier(); + storeId.substring(storeId.indexOf("//") + 1); + directory = storeId.substring(storeId.indexOf("//") + 1); + logger.info(storeId); + logger.info(directory); + logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); + return directory; + + } catch (NumberFormatException nfe) { + logger.severe(nfe.getMessage()); + + return null; + } + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index bef27ec49b6..74346b0a567 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -722,6 +722,10 @@ public void checkEditDatasetLock(Dataset dataset, DataverseRequest dataverseRequ if (dataset.isLockedFor(DatasetLock.Reason.DcmUpload)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); } + // TODO: Do we need to check for "GlobusUpload"? Should the message be more specific? + if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); + } if (dataset.isLockedFor(DatasetLock.Reason.EditInProgress)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); } @@ -753,6 +757,9 @@ public void checkPublishDatasetLock(Dataset dataset, DataverseRequest dataverseR if (dataset.isLockedFor(DatasetLock.Reason.DcmUpload)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); } + if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.downloadNotAllowed"), command); + } if (dataset.isLockedFor(DatasetLock.Reason.EditInProgress)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 556c2294bda..bf03e4b51b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -157,6 +157,8 @@ public boolean isRsyncUpload() { public boolean isRsyncDownload() { return systemConfig.isRsyncDownload(); } + + public boolean isGlobusUpload() { return systemConfig.isGlobusUpload(); } public boolean isRsyncOnly() { return systemConfig.isRsyncOnly(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java new file mode 100644 index 00000000000..ff5c3c6eb51 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -0,0 +1,346 @@ +package edu.harvard.iq.dataverse.api; + +import com.amazonaws.services.s3.model.S3ObjectSummary; +import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.DataverseRequestServiceBean; +import edu.harvard.iq.dataverse.EjbDataverseEngine; +import edu.harvard.iq.dataverse.PermissionServiceBean; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.*; + +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.FileUtil; + + + +import javax.ejb.EJB; +import javax.ejb.EJBException; +import javax.ejb.Stateless; +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.*; +import javax.ws.rs.core.Response; +import java.io.File; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; + +@Stateless +@Path("globus") +public class GlobusApi extends AbstractApiBean { + private static final Logger logger = Logger.getLogger(Access.class.getCanonicalName()); + + @EJB + DatasetServiceBean datasetService; + + @EJB + GlobusServiceBean globusServiceBean; + + @EJB + EjbDataverseEngine commandEngine; + + @EJB + PermissionServiceBean permissionService; + + @Inject + DataverseRequestServiceBean dvRequestService; + + + @POST + @Path("{datasetId}") + public Response globus(@PathParam("datasetId") String datasetId ) { + + logger.info("Async:======Start Async Tasklist == dataset id :"+ datasetId ); + Dataset dataset = null; + try { + dataset = findDatasetOrDie(datasetId); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + User apiTokenUser = checkAuth(dataset); + + if (apiTokenUser == null) { + return unauthorized("Access denied"); + } + + try { + + + /* + String lockInfoMessage = "Globus upload in progress"; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, apiTokenUser != null ? ((AuthenticatedUser)apiTokenUser).getId() : null, lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + */ + + List fileMetadatas = new ArrayList<>(); + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + + + String task_id = null; + + String timeWhenAsyncStarted = sdf.format(new Date(System.currentTimeMillis() + (5 * 60 * 60 * 1000))); // added 5 hrs to match output from globus api + + String endDateTime = sdf.format(new Date(System.currentTimeMillis() + (4 * 60 * 60 * 1000))); // the tasklist will be monitored for 4 hrs + Calendar cal1 = Calendar.getInstance(); + cal1.setTime(sdf.parse(endDateTime)); + + + do { + try { + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + + task_id = globusServiceBean.getTaskList(basicGlobusToken, dataset.getIdentifierForFileStorage(), timeWhenAsyncStarted); + //Thread.sleep(10000); + String currentDateTime = sdf.format(new Date(System.currentTimeMillis())); + Calendar cal2 = Calendar.getInstance(); + cal2.setTime(sdf.parse(currentDateTime)); + + if (cal2.after(cal1)) { + logger.info("Async:======Time exceeded " + endDateTime + " ====== " + currentDateTime + " ==== datasetId :" + datasetId); + break; + } else if (task_id != null) { + break; + } + + } catch (Exception ex) { + ex.printStackTrace(); + logger.info(ex.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id" ); + } + + } while (task_id == null); + + + logger.info("Async:======Found matching task id " + task_id + " ==== datasetId :" + datasetId); + + + DatasetVersion workingVersion = dataset.getEditVersion(); + + if (workingVersion.getCreateTime() != null) { + workingVersion.setCreateTime(new Timestamp(new Date().getTime())); + } + + + String directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); + + System.out.println("Async:======= directory ==== " + directory+ " ==== datasetId :" + datasetId); + Map checksumMapOld = new HashMap<>(); + + Iterator fmIt = workingVersion.getFileMetadatas().iterator(); + + while (fmIt.hasNext()) { + FileMetadata fm = fmIt.next(); + if (fm.getDataFile() != null && fm.getDataFile().getId() != null) { + String chksum = fm.getDataFile().getChecksumValue(); + if (chksum != null) { + checksumMapOld.put(chksum, 1); + } + } + } + + List dFileList = new ArrayList<>(); + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + + String s3ObjectKey = s3ObjectSummary.getKey(); + + String t = s3ObjectKey.replace(directory, ""); + + if (t.indexOf(".") > 0) { + long totalSize = s3ObjectSummary.getSize(); + String filePath = s3ObjectKey; + String checksumVal = s3ObjectSummary.getETag(); + + if ((checksumMapOld.get(checksumVal) != null)) { + logger.info("Async: ==== datasetId :" + datasetId + "======= filename ==== " + filePath + " == file already exists "); + } else if (!filePath.contains("cached")) { + + logger.info("Async: ==== datasetId :" + datasetId + "======= filename ==== " + filePath + " == new file "); + try { + + DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); //MIME_TYPE_GLOBUS + datafile.setModificationTime(new Timestamp(new Date().getTime())); + datafile.setCreateDate(new Timestamp(new Date().getTime())); + datafile.setPermissionModificationTime(new Timestamp(new Date().getTime())); + + FileMetadata fmd = new FileMetadata(); + + String fileName = filePath.split("/")[filePath.split("/").length - 1]; + fmd.setLabel(fileName); + fmd.setDirectoryLabel(filePath.replace(directory, "").replace(File.separator + fileName, "")); + + fmd.setDataFile(datafile); + + datafile.getFileMetadatas().add(fmd); + + FileUtil.generateS3PackageStorageIdentifier(datafile); + logger.info("Async: ==== datasetId :" + datasetId + "======= filename ==== " + filePath + " == added to datafile, filemetadata "); + + try { + // We persist "SHA1" rather than "SHA-1". + datafile.setChecksumType(DataFile.ChecksumType.SHA1); + datafile.setChecksumValue(checksumVal); + } catch (Exception cksumEx) { + logger.info("Async: ==== datasetId :" + datasetId + "======Could not calculate checksumType signature for the new file "); + } + + datafile.setFilesize(totalSize); + + dFileList.add(datafile); + + } catch (Exception ioex) { + logger.info("Async: ==== datasetId :" + datasetId + "======Failed to process and/or save the file " + ioex.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to do task_list" ); + + } + } + } + } + +/* + DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (dcmLock == null) { + logger.info("Dataset not locked for DCM upload"); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + dataset.removeLock(dcmLock); + } + logger.info(" ======= Remove Dataset Lock "); +*/ + + List filesAdded = new ArrayList<>(); + + if (dFileList != null && dFileList.size() > 0) { + + // Dataset dataset = version.getDataset(); + + for (DataFile dataFile : dFileList) { + + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(workingVersion); + dataset.getFiles().add(dataFile); + + } + + filesAdded.add(dataFile); + + } + + logger.info("Async: ==== datasetId :" + datasetId + " ===== Done! Finished saving new files to the dataset."); + } + + fileMetadatas.clear(); + for (DataFile addedFile : filesAdded) { + fileMetadatas.add(addedFile.getFileMetadata()); + } + filesAdded = null; + + if (workingVersion.isDraft()) { + + logger.info("Async: ==== datasetId :" + datasetId + " ==== inside draft version "); + + Timestamp updateTime = new Timestamp(new Date().getTime()); + + workingVersion.setLastUpdateTime(updateTime); + dataset.setModificationTime(updateTime); + + + for (FileMetadata fileMetadata : fileMetadatas) { + + if (fileMetadata.getDataFile().getCreateDate() == null) { + fileMetadata.getDataFile().setCreateDate(updateTime); + fileMetadata.getDataFile().setCreator((AuthenticatedUser) apiTokenUser); + } + fileMetadata.getDataFile().setModificationTime(updateTime); + } + + + } else { + logger.info("Async: ==== datasetId :" + datasetId + " ==== inside released version "); + + for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { + for (FileMetadata fileMetadata : fileMetadatas) { + if (fileMetadata.getDataFile().getStorageIdentifier() != null) { + + if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { + workingVersion.getFileMetadatas().set(i, fileMetadata); + } + } + } + } + + + } + + + try { + Command cmd; + logger.info("Async: ==== datasetId :" + datasetId + " ======= UpdateDatasetVersionCommand START in globus function "); + cmd = new UpdateDatasetVersionCommand(dataset,new DataverseRequest(apiTokenUser, (HttpServletRequest) null)); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + //new DataverseRequest(authenticatedUser, (HttpServletRequest) null) + //dvRequestService.getDataverseRequest() + commandEngine.submit(cmd); + } catch (CommandException ex) { + logger.log(Level.WARNING, "Async: ==== datasetId :" + datasetId + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to do task_list" ); + } + + logger.info("Async: ==== datasetId :" + datasetId + " ======= GLOBUS ASYNC CALL COMPLETED SUCCESSFULLY "); + + return ok("Async: ==== datasetId :" + datasetId + ": Finished task_list"); + } catch(Exception e) { + String message = e.getMessage(); + + logger.info("Async: ==== datasetId :" + datasetId + " ======= GLOBUS ASYNC CALL Exception ============== " + message); + e.printStackTrace(); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to do task_list" ); + //return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); + } + + + } + + private User checkAuth(Dataset dataset) { + + User apiTokenUser = null; + + try { + apiTokenUser = findUserOrDie(); + } catch (WrappedResponse wr) { + apiTokenUser = null; + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + } + + if (apiTokenUser != null) { + // used in an API context + if (!permissionService.requestOn(createDataverseRequest(apiTokenUser), dataset.getOwner()).has(Permission.EditDataset)) { + apiTokenUser = null; + } + } + + return apiTokenUser; + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index bd0549622f0..46c80c0f984 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -46,6 +46,8 @@ import java.nio.file.StandardCopyOption; import java.util.ArrayList; +import com.amazonaws.services.s3.model.S3ObjectSummary; + public class FileAccessIO extends StorageIO { @@ -415,7 +417,11 @@ public void deleteAllAuxObjects() throws IOException { } } - + + @Override + public List listAuxObjects(String s) throws IOException { + return null; + } @Override public String getStorageLocation() { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index c9796d24b27..e244b8a788a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -16,6 +16,8 @@ import java.util.List; import java.util.logging.Logger; +import com.amazonaws.services.s3.model.S3ObjectSummary; + /** * * @author Leonid Andreev @@ -149,6 +151,11 @@ public OutputStream getOutputStream() throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: there is no output stream associated with this object."); } + @Override + public List listAuxObjects(String s) throws IOException { + return null; + } + @Override public InputStream getAuxFileAsInputStream(String auxItemTag) { throw new UnsupportedOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index c78b84233be..3e38d3cdc9c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -3,6 +3,8 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.auth.profile.ProfileCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; @@ -103,6 +105,8 @@ public S3AccessIO(String storageLocation, String driverId) { minPartSize = getMinPartSize(driverId); key = storageLocation.substring(storageLocation.indexOf('/')+1); } + + public static String S3_IDENTIFIER_PREFIX = "s3"; //Used for tests only public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, String driverId) { @@ -634,6 +638,46 @@ public List listAuxObjects() throws IOException { return ret; } + @Override + public List listAuxObjects(String s ) throws IOException { + if (!this.canWrite()) { + open(); + } + String prefix = getDestinationKey(""); + + List ret = new ArrayList<>(); + + System.out.println("======= bucketname ===== "+ bucketName); + System.out.println("======= prefix ===== "+ prefix); + + ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); + ObjectListing storedAuxFilesList = null; + try { + storedAuxFilesList = s3.listObjects(req); + } catch (SdkClientException sce) { + throw new IOException ("S3 listAuxObjects: failed to get a listing for "+prefix); + } + if (storedAuxFilesList == null) { + return ret; + } + List storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries(); + try { + while (storedAuxFilesList.isTruncated()) { + logger.fine("S3 listAuxObjects: going to next page of list"); + storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList); + if (storedAuxFilesList != null) { + storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries()); + } + } + } catch (AmazonClientException ase) { + //logger.warning("Caught an AmazonServiceException in S3AccessIO.listAuxObjects(): " + ase.getMessage()); + throw new IOException("S3AccessIO: Failed to get aux objects for listing."); + } + + + return storedAuxFilesSummary; + } + @Override public void deleteAuxObject(String auxItemTag) throws IOException { if (!this.canWrite()) { @@ -1056,7 +1100,10 @@ private static AmazonS3 getClient(String driverId) { // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. if (!s3CEUrl.isEmpty()) { - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + //s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + BasicAWSCredentials creds = new BasicAWSCredentials("14e4f8b986874272894d527a16c06473", "f7b28fbec4984588b0da7d0288ce67f6"); + s3CB.withCredentials(new AWSStaticCredentialsProvider(creds)); + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl.trim(), s3CERegion.trim())); } /** * Pass in a boolean value if path style access should be used within the S3 client. diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 2f66eec5f4c..9bfd9154323 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -37,6 +37,7 @@ import java.util.Iterator; import java.util.List; +import com.amazonaws.services.s3.model.S3ObjectSummary; //import org.apache.commons.httpclient.Header; //import org.apache.commons.httpclient.methods.GetMethod; @@ -542,4 +543,6 @@ public boolean isBelowIngestSizeLimit() { return true; } } + + public abstract ListlistAuxObjects(String s) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 3bc29cb9836..7f851f09450 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -32,6 +32,7 @@ import org.javaswift.joss.model.Container; import org.javaswift.joss.model.StoredObject; +import com.amazonaws.services.s3.model.S3ObjectSummary; /** * * @author leonid andreev @@ -874,6 +875,11 @@ public String getSwiftContainerName() { } return null; } + + @Override + public List listAuxObjects(String s) throws IOException { + return null; + } //https://gist.github.com/ishikawa/88599 public static String toHexString(byte[] bytes) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java new file mode 100644 index 00000000000..9a963000541 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class AccessList { + private int length; + private String endpoint; + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public void setLength(int length) { + this.length = length; + } + + public String getEndpoint() { + return endpoint; + } + + public ArrayList getDATA() { + return DATA; + } + + public int getLength() { + return length; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java new file mode 100644 index 00000000000..2d68c5c8839 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -0,0 +1,71 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + + +public class AccessToken implements java.io.Serializable { + + private String accessToken; + private String idToken; + private Long expiresIn; + private String resourceServer; + private String tokenType; + private String state; + private String scope; + private String refreshToken; + private ArrayList otherTokens; + + public String getAccessToken() { return accessToken; } + + String getIdToken() { return idToken; } + + Long getExpiresIn() { return expiresIn; } + + String getResourceServer() { return resourceServer; } + + String getTokenType() { return tokenType; } + + String getState() { return state; } + + String getScope() {return scope; } + + String getRefreshToken() { return refreshToken; } + + ArrayList getOtherTokens() { return otherTokens; } + + public void setAccessToken(String accessToken) { + this.accessToken = accessToken; + } + + public void setExpiresIn(Long expiresIn) { + this.expiresIn = expiresIn; + } + + public void setIdToken(String idToken) { + this.idToken = idToken; + } + + public void setOtherTokens(ArrayList otherTokens) { + this.otherTokens = otherTokens; + } + + public void setRefreshToken(String refreshToken) { + this.refreshToken = refreshToken; + } + + public void setResourceServer(String resourceServer) { + this.resourceServer = resourceServer; + } + + public void setScope(String scope) { + this.scope = scope; + } + + public void setState(String state) { + this.state = state; + } + + public void setTokenType(String tokenType) { + this.tokenType = tokenType; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java b/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java new file mode 100644 index 00000000000..bd6a4b3b881 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.globus; + +public class FileG { + private String DATA_TYPE; + private String group; + private String name; + private String permissions; + private String size; + private String type; + private String user; + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getGroup() { + return group; + } + + public String getName() { + return name; + } + + public String getPermissions() { + return permissions; + } + + public String getSize() { + return size; + } + + public String getType() { + return type; + } + + public String getUser() { + return user; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setGroup(String group) { + this.group = group; + } + + public void setName(String name) { + this.name = name; + } + + public void setPermissions(String permissions) { + this.permissions = permissions; + } + + public void setSize(String size) { + this.size = size; + } + + public void setType(String type) { + this.type = type; + } + + public void setUser(String user) { + this.user = user; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java b/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java new file mode 100644 index 00000000000..777e37f9b80 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java @@ -0,0 +1,60 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class FilesList { + private ArrayList DATA; + private String DATA_TYPE; + private String absolute_path; + private String endpoint; + private String length; + private String path; + + public String getEndpoint() { + return endpoint; + } + + public ArrayList getDATA() { + return DATA; + } + + public String getAbsolute_path() { + return absolute_path; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getLength() { + return length; + } + + public String getPath() { + return path; + } + + public void setLength(String length) { + this.length = length; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public void setAbsolute_path(String absolute_path) { + this.absolute_path = absolute_path; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPath(String path) { + this.path = path; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java new file mode 100644 index 00000000000..e060a5de59b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -0,0 +1,880 @@ +package edu.harvard.iq.dataverse.globus; + +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.google.gson.FieldNamingPolicy; +import com.google.gson.GsonBuilder; +import edu.harvard.iq.dataverse.*; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.faces.application.FacesMessage; +import javax.faces.context.FacesContext; +import javax.faces.view.ViewScoped; +import javax.inject.Inject; +import javax.inject.Named; + +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.*; + +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; + +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; +import com.google.gson.Gson; +import edu.harvard.iq.dataverse.api.AbstractApiBean; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; +import edu.harvard.iq.dataverse.util.SystemConfig; +import org.primefaces.PrimeFaces; + +import static edu.harvard.iq.dataverse.util.JsfHelper.JH; + + +@Stateless +@Named("GlobusServiceBean") +public class GlobusServiceBean implements java.io.Serializable{ + + @EJB + protected DatasetServiceBean datasetSvc; + + @EJB + protected SettingsServiceBean settingsSvc; + + @Inject + DataverseSession session; + + @EJB + protected AuthenticationServiceBean authSvc; + + @EJB + EjbDataverseEngine commandEngine; + + private static final Logger logger = Logger.getLogger(FeaturedDataverseServiceBean.class.getCanonicalName()); + + private String code; + private String userTransferToken; + private String state; + + public String getState() { + return state; + } + + public void setState(String state) { + this.state = state; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getUserTransferToken() { + return userTransferToken; + } + + public void setUserTransferToken(String userTransferToken) { + this.userTransferToken = userTransferToken; + } + + public void onLoad() { + logger.info("Start Globus " + code); + logger.info("State " + state); + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + String datasetId = state; + logger.info("DatasetId = " + datasetId); + + String directory = getDirectory(datasetId); + if (directory == null) { + logger.severe("Cannot find directory"); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + HttpServletRequest origRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); + + logger.info(origRequest.getScheme()); + logger.info(origRequest.getServerName()); + + if (code != null ) { + + try { + AccessToken accessTokenUser = getAccessToken(origRequest, basicGlobusToken); + if (accessTokenUser == null) { + logger.severe("Cannot get access user token for code " + code); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } else { + setUserTransferToken(accessTokenUser.getOtherTokens().get(0).getAccessToken()); + } + + UserInfo usr = getUserInfo(accessTokenUser); + if (usr == null) { + logger.severe("Cannot get user info for " + accessTokenUser.getAccessToken()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + logger.info(accessTokenUser.getAccessToken()); + logger.info(usr.getEmail()); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + if (clientTokenUser == null) { + logger.severe("Cannot get client token "); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + logger.info(clientTokenUser.getAccessToken()); + + int status = createDirectory(clientTokenUser, directory, globusEndpoint); + if (status == 202) { + int perStatus = givePermission("identity", usr.getSub(), "rw", clientTokenUser, directory, globusEndpoint); + if (perStatus != 201 && perStatus != 200) { + logger.severe("Cannot get permissions "); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + } else if (status == 502) { //directory already exists + int perStatus = givePermission("identity", usr.getSub(), "rw", clientTokenUser, directory, globusEndpoint); + if (perStatus == 409) { + logger.info("permissions already exist"); + } else if (perStatus != 201 && perStatus != 200) { + logger.severe("Cannot get permissions "); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + } else { + logger.severe("Cannot create directory, status code " + status); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + // ProcessBuilder processBuilder = new ProcessBuilder(); + // AuthenticatedUser user = (AuthenticatedUser) session.getUser(); + // ApiToken token = authSvc.findApiTokenByUser(user); + // String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + origRequest.getServerName() + "/api/globus/" + datasetId; + // logger.info("====command ==== " + command); + // processBuilder.command("bash", "-c", command); + // logger.info("=== Start process"); + // Process process = processBuilder.start(); + // logger.info("=== Going globus"); + goGlobusUpload(directory, globusEndpoint); + logger.info("=== Finished globus"); + + + } catch (MalformedURLException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + } catch (UnsupportedEncodingException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + } catch (IOException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + } + + } + + } + + private void goGlobusUpload(String directory, String globusEndpoint ) { + + String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?destination_id=" + globusEndpoint + "&destination_path=" + directory + "'" +")"; + PrimeFaces.current().executeScript(httpString); + } + + public void goGlobusDownload(String datasetId) { + + String directory = getDirectory(datasetId); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?origin_id=" + globusEndpoint + "&origin_path=" + directory + "'" +")"; + PrimeFaces.current().executeScript(httpString); + } + + ArrayList checkPermisions( AccessToken clientTokenUser, String directory, String globusEndpoint, String principalType, String principal) throws MalformedURLException { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + ArrayList ids = new ArrayList(); + if (result.status == 200) { + AccessList al = parseJson(result.jsonResponse, AccessList.class, false); + + for (int i = 0; i< al.getDATA().size(); i++) { + Permissions pr = al.getDATA().get(i); + if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory )) && pr.getPrincipalType().equals(principalType) && + ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) ) { + ids.add(pr.getId()); + } else { + continue; + } + } + } + + return ids; + } + + public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { + if (directory != null && !directory.equals("")) { + directory = "/" + directory + "/"; + } + logger.info("Start updating permissions." + " Directory is " + directory); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, null); + logger.info("Size of rules " + rules.size()); + int count = 0; + while (count < rules.size()) { + logger.info("Start removing rules " + rules.get(count) ); + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPermissions(perm); + permissions.setPath(directory); + + Gson gson = new GsonBuilder().create(); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + rules.get(count)); + logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + rules.get(count)); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"PUT", gson.toJson(permissions)); + if (result.status != 200) { + logger.warning("Cannot update access rule " + rules.get(count)); + } else { + logger.info("Access rule " + rules.get(count) + " was updated"); + } + count++; + } + } + + public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { + + ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, principal); + + + + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPrincipalType(principalType); + permissions.setPrincipal(principal); + permissions.setPath(directory + "/" ); + permissions.setPermissions(perm); + + Gson gson = new GsonBuilder().create(); + MakeRequestResponse result = null; + if (rules.size() == 0) { + logger.info("Start creating the rule"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/"+ globusEndpoint + "/access"); + result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", gson.toJson(permissions)); + + if (result.status == 400) { + logger.severe("Path " + permissions.getPath() + " is not valid"); + } else if (result.status == 409) { + logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + } + + return result.status; + } else { + logger.info("Start Updating the rule"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/"+ globusEndpoint + "/access/" + rules.get(0)); + result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); + + if (result.status == 400) { + logger.severe("Path " + permissions.getPath() + " is not valid"); + } else if (result.status == 409) { + logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + } + logger.info("Result status " + result.status); + } + + return result.status; + } + + private int createDirectory(AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + globusEndpoint + "/mkdir"); + + MkDir mkDir = new MkDir(); + mkDir.setDataType("mkdir"); + mkDir.setPath(directory); + Gson gson = new GsonBuilder().create(); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"POST", gson.toJson(mkDir)); + logger.info(result.toString()); + + if (result.status == 502) { + logger.warning("Cannot create directory " + mkDir.getPath() + ", it already exists"); + } else if (result.status == 403) { + logger.severe("Cannot create directory " + mkDir.getPath() + ", permission denied"); + } else if (result.status == 202) { + logger.info("Directory created " + mkDir.getPath()); + } + + return result.status; + + } + + public String getTaskList(String basicGlobusToken, String identifierForFileStorage, String timeWhenAsyncStarted) throws MalformedURLException { + try + { + logger.info("1.getTaskList ====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== identifierForFileStorage ====== " + identifierForFileStorage); + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task_list?filter_endpoint="+globusEndpoint+"&filter_status=SUCCEEDED&filter_completion_time="+timeWhenAsyncStarted); + + //AccessToken accessTokenUser + //accessTokenUser.getOtherTokens().get(0).getAccessToken() + MakeRequestResponse result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + //logger.info("==TEST ==" + result.toString()); + + + + //2019-12-01 18:34:37+00:00 + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + //SimpleDateFormat task_sdf = new SimpleDateFormat("yyyy-MM-ddTHH:mm:ss"); + + Calendar cal1 = Calendar.getInstance(); + cal1.setTime(sdf.parse(timeWhenAsyncStarted)); + + Calendar cal2 = Calendar.getInstance(); + + Tasklist tasklist = null; + //2019-12-01 18:34:37+00:00 + + if (result.status == 200) { + tasklist = parseJson(result.jsonResponse, Tasklist.class, false); + for (int i = 0; i< tasklist.getDATA().size(); i++) { + Task task = tasklist.getDATA().get(i); + Date tastTime = sdf.parse(task.getRequest_time().replace("T" , " ")); + cal2.setTime(tastTime); + + + if ( cal1.before(cal2)) { + + // get /task//successful_transfers + // verify datasetid in "destination_path": "/~/test_godata_copy/file1.txt", + // go to aws and get files and write to database tables + + logger.info("====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== task.getRequest_time().toString() ====== " + task.getRequest_time()); + + boolean success = getSuccessfulTransfers(clientTokenUser, task.getTask_id() , identifierForFileStorage) ; + + if(success) + { + logger.info("SUCCESS ====== " + timeWhenAsyncStarted + " timeWhenAsyncStarted is before tastTime = TASK time = " + task.getTask_id()); + return task.getTask_id(); + } + } + else + { + //logger.info("====== " + timeWhenAsyncStarted + " timeWhenAsyncStarted is after tastTime = TASK time = " + task.getTask_id()); + //return task.getTask_id(); + } + } + } + } catch (MalformedURLException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId, String identifierForFileStorage) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); + + MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), + "GET", null); + + Transferlist transferlist = null; + + if (result.status == 200) { + transferlist = parseJson(result.jsonResponse, Transferlist.class, false); + for (int i = 0; i < transferlist.getDATA().size(); i++) { + SuccessfulTransfer successfulTransfer = transferlist.getDATA().get(i); + String pathToVerify = successfulTransfer.getDestination_path(); + logger.info("getSuccessfulTransfers : ======pathToVerify === " + pathToVerify + " ====identifierForFileStorage === " + identifierForFileStorage); + if(pathToVerify.contains(identifierForFileStorage)) + { + logger.info(" SUCCESS ====== " + pathToVerify + " ==== " + identifierForFileStorage); + return true; + } + } + } + return false; + } + + + + public AccessToken getClientToken(String basicGlobusToken) throws MalformedURLException { + URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); + + MakeRequestResponse result = makeRequest(url, "Basic", + basicGlobusToken,"POST", null); + AccessToken clientTokenUser = null; + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + return clientTokenUser; + } + + public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGlobusToken ) throws UnsupportedEncodingException, MalformedURLException { + String serverName = origRequest.getServerName(); + if (serverName.equals("localhost")) { + serverName = "utl-192-123.library.utoronto.ca"; + } + + String redirectURL = "https://" + serverName + "/globus.xhtml"; + + redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); + + URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL + + "&grant_type=authorization_code"); + logger.info(url.toString()); + + MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken,"POST", null); + AccessToken accessTokenUser = null; + + if (result.status == 200) { + logger.info("Access Token: \n" + result.toString()); + accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + logger.info(accessTokenUser.getAccessToken()); + } + + return accessTokenUser; + + } + + public UserInfo getUserInfo(AccessToken accessTokenUser) throws MalformedURLException { + + URL url = new URL("https://auth.globus.org/v2/oauth2/userinfo"); + MakeRequestResponse result = makeRequest(url, "Bearer" , accessTokenUser.getAccessToken() , "GET", null); + UserInfo usr = null; + if (result.status == 200) { + usr = parseJson(result.jsonResponse, UserInfo.class, true); + } + + return usr; + } + + public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { + String str = null; + HttpURLConnection connection = null; + int status = 0; + try { + connection = (HttpURLConnection) url.openConnection(); + //Basic NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 + logger.info(authType + " " + authCode); + connection.setRequestProperty("Authorization", authType + " " + authCode); + //connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setRequestMethod(method); + if (jsonString != null) { + connection.setRequestProperty("Content-Type", "application/json"); + connection.setRequestProperty("Accept", "application/json"); + logger.info(jsonString); + connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); + wr.write(jsonString); + wr.flush(); + } + + status = connection.getResponseCode(); + logger.info("Status now " + status); + InputStream result = connection.getInputStream(); + if (result != null) { + logger.info("Result is not null"); + str = readResultJson(result).toString(); + logger.info("str is "); + logger.info(result.toString()); + } else { + logger.info("Result is null"); + str = null; + } + + logger.info("status: " + status); + } catch (IOException ex) { + logger.info("IO"); + logger.severe(ex.getMessage()); + logger.info(ex.getCause().toString()); + logger.info(ex.getStackTrace().toString()); + } finally { + if (connection != null) { + connection.disconnect(); + } + } + MakeRequestResponse r = new MakeRequestResponse(str, status); + return r; + + } + + private StringBuilder readResultJson(InputStream in) { + StringBuilder sb = null; + try { + + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + sb = new StringBuilder(); + String line; + while ((line = br.readLine()) != null) { + sb.append(line + "\n"); + } + br.close(); + logger.info(sb.toString()); + } catch (IOException e) { + sb = null; + logger.severe(e.getMessage()); + } + return sb; + } + + private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + if (sb != null) { + Gson gson = null; + if (namingPolicy) { + gson = new GsonBuilder().setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES).create(); + + } else { + gson = new GsonBuilder().create(); + } + T jsonClass = gson.fromJson(sb, jsonParserClass); + return jsonClass; + } else { + logger.severe("Bad respond from token rquest"); + return null; + } + } + + String getDirectory(String datasetId) { + Dataset dataset = null; + String directory = null; + try { + dataset = datasetSvc.find(Long.parseLong(datasetId)); + if (dataset == null) { + logger.severe("Dataset not found " + datasetId); + return null; + } + String storeId = dataset.getStorageIdentifier(); + storeId.substring(storeId.indexOf("//") + 1); + directory = storeId.substring(storeId.indexOf("//") + 1); + logger.info(storeId); + logger.info(directory); + logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); + return directory; + + } catch (NumberFormatException nfe) { + logger.severe(nfe.getMessage()); + + return null; + } + + } + + class MakeRequestResponse { + public String jsonResponse; + public int status; + MakeRequestResponse(String jsonResponse, int status) { + this.jsonResponse = jsonResponse; + this.status = status; + } + + } + + private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) throws MalformedURLException { + URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint +"/ls?path=" + directory + "/"); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + logger.info("find directory status:" + result.status); + + return result; + } + + public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedEncodingException, MalformedURLException { + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { + return false; + } + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + if (clientTokenUser == null) { + logger.severe("Cannot get client token "); + return false; + } + + String directory = getDirectory(datasetId); + logger.info(directory); + + MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); + + if (status.status == 200) { + + /* FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); + ArrayList files = fl.getDATA(); + if (files != null) { + for (FileG file: files) { + if (!file.getName().contains("cached") && !file.getName().contains(".thumb")) { + int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, + directory + "/" + file.getName(), globusEndpoint); + logger.info("givePermission status " + perStatus + " for " + file.getName()); + if (perStatus == 409) { + logger.info("Permissions already exist or limit was reached for " + file.getName()); + } else if (perStatus == 400) { + logger.info("No file in Globus " + file.getName()); + } else if (perStatus != 201) { + logger.info("Cannot get permission for " + file.getName()); + } + } + } + }*/ + + int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, globusEndpoint); + logger.info("givePermission status " + perStatus); + if (perStatus == 409) { + logger.info("Permissions already exist or limit was reached"); + } else if (perStatus == 400) { + logger.info("No directory in Globus"); + } else if (perStatus != 201 && perStatus != 200) { + logger.info("Cannot give read permission"); + return false; + } + + } else if (status.status == 404) { + logger.info("There is no globus directory"); + }else { + logger.severe("Cannot find directory in globus, status " + status ); + return false; + } + + return true; + } + + public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) throws MalformedURLException { + + logger.info("=====Tasklist == dataset id :" + dataset.getId()); + String directory = null; + + try { + + List fileMetadatas = new ArrayList<>(); + + StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + + DatasetVersion workingVersion = dataset.getEditVersion(); + + if (workingVersion.getCreateTime() != null) { + workingVersion.setCreateTime(new Timestamp(new Date().getTime())); + } + + + directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); + + System.out.println("======= directory ==== " + directory + " ==== datasetId :" + dataset.getId()); + Map checksumMapOld = new HashMap<>(); + + Iterator fmIt = workingVersion.getFileMetadatas().iterator(); + + while (fmIt.hasNext()) { + FileMetadata fm = fmIt.next(); + if (fm.getDataFile() != null && fm.getDataFile().getId() != null) { + String chksum = fm.getDataFile().getChecksumValue(); + if (chksum != null) { + checksumMapOld.put(chksum, 1); + } + } + } + + List dFileList = new ArrayList<>(); + boolean update = false; + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + + String s3ObjectKey = s3ObjectSummary.getKey(); + + String t = s3ObjectKey.replace(directory, ""); + + if (t.indexOf(".") > 0) { + long totalSize = s3ObjectSummary.getSize(); + String filePath = s3ObjectKey; + String checksumVal = s3ObjectSummary.getETag(); + + if ((checksumMapOld.get(checksumVal) != null)) { + logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == file already exists "); + } else if (filePath.contains("cached") || filePath.contains(".thumb")) { + logger.info(filePath + " is ignored"); + } else { + update = true; + logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == new file "); + try { + + DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); //MIME_TYPE_GLOBUS + datafile.setModificationTime(new Timestamp(new Date().getTime())); + datafile.setCreateDate(new Timestamp(new Date().getTime())); + datafile.setPermissionModificationTime(new Timestamp(new Date().getTime())); + + FileMetadata fmd = new FileMetadata(); + + String fileName = filePath.split("/")[filePath.split("/").length - 1]; + fmd.setLabel(fileName); + fmd.setDirectoryLabel(filePath.replace(directory, "").replace(File.separator + fileName, "")); + + fmd.setDataFile(datafile); + + datafile.getFileMetadatas().add(fmd); + + FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); + logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == added to datafile, filemetadata "); + + try { + // We persist "SHA1" rather than "SHA-1". + datafile.setChecksumType(DataFile.ChecksumType.SHA1); + datafile.setChecksumValue(checksumVal); + } catch (Exception cksumEx) { + logger.info("==== datasetId :" + dataset.getId() + "======Could not calculate checksumType signature for the new file "); + } + + datafile.setFilesize(totalSize); + + dFileList.add(datafile); + + } catch (Exception ioex) { + logger.info("datasetId :" + dataset.getId() + "======Failed to process and/or save the file " + ioex.getMessage()); + return false; + + } + } + } + } + if (update) { + + List filesAdded = new ArrayList<>(); + + if (dFileList != null && dFileList.size() > 0) { + + // Dataset dataset = version.getDataset(); + + for (DataFile dataFile : dFileList) { + + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(workingVersion); + dataset.getFiles().add(dataFile); + + } + + filesAdded.add(dataFile); + + } + + logger.info("==== datasetId :" + dataset.getId() + " ===== Done! Finished saving new files to the dataset."); + } + + fileMetadatas.clear(); + for (DataFile addedFile : filesAdded) { + fileMetadatas.add(addedFile.getFileMetadata()); + } + filesAdded = null; + + if (workingVersion.isDraft()) { + + logger.info("Async: ==== datasetId :" + dataset.getId() + " ==== inside draft version "); + + Timestamp updateTime = new Timestamp(new Date().getTime()); + + workingVersion.setLastUpdateTime(updateTime); + dataset.setModificationTime(updateTime); + + + for (FileMetadata fileMetadata : fileMetadatas) { + + if (fileMetadata.getDataFile().getCreateDate() == null) { + fileMetadata.getDataFile().setCreateDate(updateTime); + fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); + } + fileMetadata.getDataFile().setModificationTime(updateTime); + } + + + } else { + logger.info("datasetId :" + dataset.getId() + " ==== inside released version "); + + for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { + for (FileMetadata fileMetadata : fileMetadatas) { + if (fileMetadata.getDataFile().getStorageIdentifier() != null) { + + if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { + workingVersion.getFileMetadatas().set(i, fileMetadata); + } + } + } + } + + + } + + + try { + Command cmd; + logger.info("Async: ==== datasetId :" + dataset.getId() + " ======= UpdateDatasetVersionCommand START in globus function "); + cmd = new UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, (HttpServletRequest) null)); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + //new DataverseRequest(authenticatedUser, (HttpServletRequest) null) + //dvRequestService.getDataverseRequest() + commandEngine.submit(cmd); + } catch (CommandException ex) { + logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); + return false; + } + + logger.info("==== datasetId :" + dataset.getId() + " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); + + //return true; + } + + } catch (Exception e) { + String message = e.getMessage(); + + logger.info("==== datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); + e.printStackTrace(); + return false; + //return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); + } + + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + updatePermision(clientTokenUser, directory, "identity", "r"); + return true; + } + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java b/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java new file mode 100644 index 00000000000..6411262b5c9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java @@ -0,0 +1,16 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + + +public class Identities { + ArrayList identities; + + public void setIdentities(ArrayList identities) { + this.identities = identities; + } + + public ArrayList getIdentities() { + return identities; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java b/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java new file mode 100644 index 00000000000..265bd55217a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.globus; + +public class Identity { + private String id; + private String username; + private String status; + private String name; + private String email; + private String identityProvider; + private String organization; + + public void setOrganization(String organization) { + this.organization = organization; + } + + public void setIdentityProvider(String identityProvider) { + this.identityProvider = identityProvider; + } + + public void setName(String name) { + this.name = name; + } + + public void setEmail(String email) { + this.email = email; + } + + public void setId(String id) { + this.id = id; + } + + public void setStatus(String status) { + this.status = status; + } + + public void setUsername(String username) { + this.username = username; + } + + public String getOrganization() { + return organization; + } + + public String getIdentityProvider() { + return identityProvider; + } + + public String getName() { + return name; + } + + public String getEmail() { + return email; + } + + public String getId() { + return id; + } + + public String getStatus() { + return status; + } + + public String getUsername() { + return username; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java b/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java new file mode 100644 index 00000000000..2c906f1f31d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java @@ -0,0 +1,22 @@ +package edu.harvard.iq.dataverse.globus; + +public class MkDir { + private String DATA_TYPE; + private String path; + + public void setDataType(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPath(String path) { + this.path = path; + } + + public String getDataType() { + return DATA_TYPE; + } + + public String getPath() { + return path; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java b/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java new file mode 100644 index 00000000000..d31b34b8e70 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.globus; + +public class MkDirResponse { + private String DATA_TYPE; + private String code; + private String message; + private String request_id; + private String resource; + + public void setCode(String code) { + this.code = code; + } + + public void setDataType(String dataType) { + this.DATA_TYPE = dataType; + } + + public void setMessage(String message) { + this.message = message; + } + + public void setRequestId(String requestId) { + this.request_id = requestId; + } + + public void setResource(String resource) { + this.resource = resource; + } + + public String getCode() { + return code; + } + + public String getDataType() { + return DATA_TYPE; + } + + public String getMessage() { + return message; + } + + public String getRequestId() { + return request_id; + } + + public String getResource() { + return resource; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java b/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java new file mode 100644 index 00000000000..b8bb5193fa4 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.globus; + +public class Permissions { + private String DATA_TYPE; + private String principal_type; + private String principal; + private String id; + private String path; + private String permissions; + + public void setPath(String path) { + this.path = path; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPermissions(String permissions) { + this.permissions = permissions; + } + + public void setPrincipal(String principal) { + this.principal = principal; + } + + public void setPrincipalType(String principalType) { + this.principal_type = principalType; + } + + public String getPath() { + return path; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getPermissions() { + return permissions; + } + + public String getPrincipal() { + return principal; + } + + public String getPrincipalType() { + return principal_type; + } + + public void setId(String id) { + this.id = id; + } + + public String getId() { + return id; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java b/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java new file mode 100644 index 00000000000..a30b1ecdc04 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.globus; + +public class PermissionsResponse { + private String code; + private String resource; + private String DATA_TYPE; + private String request_id; + private String access_id; + private String message; + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getResource() { + return resource; + } + + public String getRequestId() { + return request_id; + } + + public String getMessage() { + return message; + } + + public String getCode() { + return code; + } + + public String getAccessId() { + return access_id; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setResource(String resource) { + this.resource = resource; + } + + public void setRequestId(String requestId) { + this.request_id = requestId; + } + + public void setMessage(String message) { + this.message = message; + } + + public void setCode(String code) { + this.code = code; + } + + public void setAccessId(String accessId) { + this.access_id = accessId; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java b/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java new file mode 100644 index 00000000000..6e2e5810a0a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java @@ -0,0 +1,35 @@ +package edu.harvard.iq.dataverse.globus; + +public class SuccessfulTransfer { + + private String DATA_TYPE; + private String destination_path; + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public String getDestination_path() { + return destination_path; + } + + public void setDestination_path(String destination_path) { + this.destination_path = destination_path; + } + + public String getSource_path() { + return source_path; + } + + public void setSource_path(String source_path) { + this.source_path = source_path; + } + + private String source_path; + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java new file mode 100644 index 00000000000..8d9f13f8ddf --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java @@ -0,0 +1,69 @@ +package edu.harvard.iq.dataverse.globus; + +public class Task { + + private String DATA_TYPE; + private String type; + private String status; + private String owner_id; + private String request_time; + private String task_id; + private String destination_endpoint_display_name; + + public String getDestination_endpoint_display_name() { + return destination_endpoint_display_name; + } + + public void setDestination_endpoint_display_name(String destination_endpoint_display_name) { + this.destination_endpoint_display_name = destination_endpoint_display_name; + } + + public void setRequest_time(String request_time) { + this.request_time = request_time; + } + + public String getRequest_time() { + return request_time; + } + + public String getTask_id() { + return task_id; + } + + public void setTask_id(String task_id) { + this.task_id = task_id; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getOwner_id() { + return owner_id; + } + + public void setOwner_id(String owner_id) { + this.owner_id = owner_id; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java b/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java new file mode 100644 index 00000000000..34e8c6c528e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java @@ -0,0 +1,17 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class Tasklist { + + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public ArrayList getDATA() { + return DATA; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java b/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java new file mode 100644 index 00000000000..0a1bd607ee2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java @@ -0,0 +1,18 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class Transferlist { + + + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public ArrayList getDATA() { + return DATA; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java b/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java new file mode 100644 index 00000000000..a195486dd0b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java @@ -0,0 +1,68 @@ +package edu.harvard.iq.dataverse.globus; + +public class UserInfo implements java.io.Serializable{ + + private String identityProviderDisplayName; + private String identityProvider; + private String organization; + private String sub; + private String preferredUsername; + private String name; + private String email; + + public void setEmail(String email) { + this.email = email; + } + + public void setName(String name) { + this.name = name; + } + + public void setPreferredUsername(String preferredUsername) { + this.preferredUsername = preferredUsername; + } + + public void setSub(String sub) { + this.sub = sub; + } + + public void setIdentityProvider(String identityProvider) { + this.identityProvider = identityProvider; + } + + public void setIdentityProviderDisplayName(String identityProviderDisplayName) { + this.identityProviderDisplayName = identityProviderDisplayName; + } + + public void setOrganization(String organization) { + this.organization = organization; + } + + public String getEmail() { + return email; + } + + public String getPreferredUsername() { + return preferredUsername; + } + + public String getSub() { + return sub; + } + + public String getName() { + return name; + } + + public String getIdentityProvider() { + return identityProvider; + } + + public String getIdentityProviderDisplayName() { + return identityProviderDisplayName; + } + + public String getOrganization() { + return organization; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index b2e82d92dc3..a0d6d7a9f62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -440,7 +440,20 @@ Whether Harvesting (OAI) service is enabled /** * Sort Date Facets Chronologically instead or presenting them in order of # of hits as other facets are. Default is true */ - ChronologicalDateFacets + ChronologicalDateFacets, + + /** + * BasicGlobusToken for Globus Application + */ + BasicGlobusToken, + /** + * GlobusEndpoint is Glopus endpoint for Globus application + */ + GlobusEndpoint, + /**Client id for Globus application + * + */ + GlobusClientId ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 02bf34f83c5..2706d840d21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -20,7 +20,7 @@ package edu.harvard.iq.dataverse.util; - +import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.DataFileServiceBean; @@ -1337,6 +1337,17 @@ public static void generateS3PackageStorageIdentifier(DataFile dataFile) { String storageId = driverId + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); dataFile.setStorageIdentifier(storageId); } + + public static void generateS3PackageStorageIdentifierForGlobus(DataFile dataFile) { + String bucketName = System.getProperty("dataverse.files.s3-bucket-name"); + String storageId = null; + if ( dataFile.getFileMetadata().getDirectoryLabel() != null && !dataFile.getFileMetadata().getDirectoryLabel().equals("")) { + storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getDirectoryLabel() + "/" + dataFile.getFileMetadata().getLabel(); + } else { + storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); + } + dataFile.setStorageIdentifier(storageId); + } public static void generateStorageIdentifier(DataFile dataFile) { //Is it true that this is only used for temp files and we could safely prepend "tmp://" to indicate that? diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 9c801f5197d..d98dfa8ab34 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -831,7 +831,14 @@ public enum FileUploadMethods { * Traditional Dataverse file handling, which tends to involve users * uploading and downloading files using a browser or APIs. */ - NATIVE("native/http"); + NATIVE("native/http"), + + /** + * Upload through Globus of large files + */ + + GLOBUS("globus") + ; private final String text; @@ -871,7 +878,9 @@ public enum FileDownloadMethods { * go through Glassfish. */ RSYNC("rsal/rsync"), - NATIVE("native/http"); + NATIVE("native/http"), + GLOBUS("globus") + ; private final String text; private FileDownloadMethods(final String text) { @@ -961,7 +970,11 @@ public boolean isPublicInstall(){ public boolean isRsyncUpload(){ return getUploadMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString()); } - + + public boolean isGlobusUpload(){ + return getUploadMethodAvailable(FileUploadMethods.GLOBUS.toString()); + } + // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ return getUploadMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString()); @@ -993,6 +1006,11 @@ public boolean isHTTPDownload() { logger.warning("Download Methods:" + downloadMethods); return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.NATIVE.toString()); } + + public boolean isGlobusDownload() { + String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); + return downloadMethods !=null && downloadMethods.toLowerCase().contains(FileDownloadMethods.GLOBUS.toString()); + } private Boolean getUploadMethodAvailable(String method){ String uploadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.UploadMethods); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 8c70475953c..e723ce7c6c2 100755 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1395,6 +1395,9 @@ dataset.message.filesSuccess=The files for this dataset have been updated. dataset.message.addFiles.Failure=Failed to add files to the dataset. Please try uploading the file(s) again. dataset.message.addFiles.partialSuccess=Partial success: only {0} files out of {1} have been saved. Please try uploading the missing file(s) again. dataset.message.publishSuccess=This dataset has been published. +dataset.message.publishGlobusFailure.details=Could not publish Globus data. +dataset.message.publishGlobusFailure=Error with publidhing data. +dataset.message.GlobusError=Cannot go to Globus. dataset.message.only.authenticatedUsers=Only authenticated users may release Datasets. dataset.message.deleteSuccess=This dataset has been deleted. dataset.message.bulkFileUpdateSuccess=The selected files have been updated. @@ -1479,10 +1482,14 @@ file.selectToAdd.tipLimit=File upload limit is {0} per file. file.selectToAdd.tipMoreInformation=Select files or drag and drop into the upload widget. file.selectToAdd.dragdropMsg=Drag and drop files here. file.createUploadDisabled=Upload files using rsync via SSH. This method is recommended for large file transfers. The upload script will be available on the Upload Files page once you save this dataset. +file.createGlobusUploadDisabled=Upload files using Globus. This method is recommended for large file transfers. The "Upload with Globus" button will be available on the Upload Files page once you save this dataset. file.fromHTTP=Upload with HTTP via your browser file.fromDropbox=Upload from Dropbox file.fromDropbox.tip=Select files from Dropbox. file.fromRsync=Upload with rsync + SSH via Data Capture Module (DCM) +file.fromGlobus=Upload with Globus +file.finishGlobus=Globus Transfer has finished +file.downloadFromGlobus=Download through Globus file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 3a69e21bbca..6e630edc5ea 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -276,7 +276,55 @@ - + +
Globus ++++
+
+
Globus
+ + +
+
+ + +

+ #{bundle['file.createGlobusUploadDisabled']} +

+
+
+ + +

+ + BEFORE YOU START: You will need to set up a free account with Globus and + have Globus Connect Personal running on your computer to transfer files to and from the service. +
+ + +
+
+ Once Globus transfer has finished, you will get an email notification. Please come back here and press the following button: +
+ + +
+
+ +

+ +
+ Click here to view the dataset page: #{EditDatafilesPage.dataset.displayName} . +
+
+
+
+
@@ -962,6 +1010,18 @@ }; Dropbox.choose(options); } + function openGlobus(datasetId, client_id) { + var res = location.protocol+'//'+location.hostname+(location.port ? ':'+location.port: ''); + + var scope = encodeURI("openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all", "UTF-8"); + + var new_url = "https://auth.globus.org/v2/oauth2/authorize?client_id=" + client_id + "&response_type=code&" + + "scope=" + scope + "&state=" + datasetId; + new_url = new_url + "&redirect_uri=" + res + "%2Fglobus.xhtml" ; + + + var myWindows = window.open(new_url); + } //]]> diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index b5ab1dbf759..f7d10c1cf60 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -60,6 +60,28 @@ #{bundle.download} + + + + + + #{bundle['file.downloadFromGlobus']} + + + + + + #{bundle.download} + @@ -545,4 +567,4 @@ #{bundle['file.compute']} - \ No newline at end of file + diff --git a/src/main/webapp/globus.xhtml b/src/main/webapp/globus.xhtml new file mode 100644 index 00000000000..f4eebd4babf --- /dev/null +++ b/src/main/webapp/globus.xhtml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + From beea5bc9fbd64e19f11ada4ebceab252e1287a0e Mon Sep 17 00:00:00 2001 From: lubitchv Date: Wed, 30 Sep 2020 09:48:32 -0400 Subject: [PATCH 002/161] Remove flyway --- .../db/migration/V1__flyway_schema_baseline.sql | 0 .../V4.11.0.1__5565-sanitize-directory-labels.sql | 9 --------- .../V4.11__5513-database-variablemetadata.sql | 5 ----- .../V4.12.0.1__4.13-re-sanitize-filemetadata.sql | 12 ------------ .../db/migration/V4.13.0.1__3575-usernames.sql | 1 - .../migration/V4.14.0.1__5822-export-var-meta.sql | 2 -- .../migration/V4.15.0.1__2043-split-gbr-table.sql | 10 ---------- .../V4.16.0.1__5303-addColumn-to-settingTable.sql | 13 ------------- .../migration/V4.16.0.2__5028-dataset-explore.sql | 3 --- .../V4.16.0.3__6156-FooterImageforSub-Dataverse.sql | 4 ---- .../migration/V4.17.0.1__5991-update-scribejava.sql | 1 - .../migration/V4.17.0.2__3578-file-page-preview.sql | 5 ----- .../V4.18.1.1__6459-contenttype-nullable.sql | 2 -- .../db/migration/V4.19.0.1__6485_multistore.sql | 3 --- .../V4.19.0.2__6644-update-editor-role-alias.sql | 2 -- ....1__2734-alter-data-table-add-orig-file-name.sql | 2 -- .../V4.20.0.2__6748-configure-dropdown-toolname.sql | 2 -- .../migration/V4.20.0.3__6558-file-validation.sql | 4 ---- .../migration/V4.20.0.4__6936-maildomain-groups.sql | 1 - .../migration/V4.20.0.5__6505-zipdownload-jobs.sql | 2 -- src/main/webapp/editFilesFragment.xhtml | 1 - 21 files changed, 84 deletions(-) delete mode 100644 src/main/resources/db/migration/V1__flyway_schema_baseline.sql delete mode 100644 src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql delete mode 100644 src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql delete mode 100644 src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql delete mode 100644 src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql delete mode 100644 src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql delete mode 100644 src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql delete mode 100644 src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql delete mode 100644 src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql delete mode 100644 src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql delete mode 100644 src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql delete mode 100644 src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql delete mode 100644 src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql delete mode 100644 src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql delete mode 100644 src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql delete mode 100644 src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql delete mode 100644 src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql delete mode 100644 src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql delete mode 100644 src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql delete mode 100644 src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql diff --git a/src/main/resources/db/migration/V1__flyway_schema_baseline.sql b/src/main/resources/db/migration/V1__flyway_schema_baseline.sql deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql b/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql deleted file mode 100644 index 3d3ed777c9f..00000000000 --- a/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql +++ /dev/null @@ -1,9 +0,0 @@ --- replace any sequences of slashes and backslashes with a single slash: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/\\][/\\]+', '/', 'g'); --- strip (and replace with a .) any characters that are no longer allowed in the directory labels: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); --- now replace any sequences of .s with a single .: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); --- get rid of any leading or trailing slashes, spaces, '-'s and '.'s: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '^[/ .\-]+', '', ''); -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/ \.\-]+$', '', ''); diff --git a/src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql b/src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql deleted file mode 100644 index 3c29a974bae..00000000000 --- a/src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql +++ /dev/null @@ -1,5 +0,0 @@ --- universe is dropped since it is empty in the dataverse --- this column will be moved to variablemetadata table --- issue 5513 -ALTER TABLE datavariable -DROP COLUMN if exists universe; diff --git a/src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql b/src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql deleted file mode 100644 index 8623ed97b70..00000000000 --- a/src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql +++ /dev/null @@ -1,12 +0,0 @@ --- let's try again and fix the existing directoryLabels: --- (the script shipped with 4.12 was missing the most important line; bad copy-and-paste) --- replace any sequences of slashes and backslashes with a single slash: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/\\][/\\]+', '/', 'g'); --- strip (and replace with a .) any characters that are no longer allowed in the directory labels: --- (this line was missing from the script released with 4.12!!) -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[^A-Za-z0-9_ ./-]+', '.', 'g'); --- now replace any sequences of .s with a single .: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); --- get rid of any leading or trailing slashes, spaces, '-'s and '.'s: -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '^[/ .\-]+', '', ''); -UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/ \.\-]+$', '', ''); diff --git a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql deleted file mode 100644 index 0b1804bdfc4..00000000000 --- a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE UNIQUE INDEX index_authenticateduser_lower_useridentifier ON authenticateduser (lower(useridentifier)); diff --git a/src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql b/src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql deleted file mode 100644 index e65f52c7c91..00000000000 --- a/src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE variablemetadata -ADD COLUMN IF NOT EXISTS postquestion text; diff --git a/src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql b/src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql deleted file mode 100644 index adde91ee1b0..00000000000 --- a/src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql +++ /dev/null @@ -1,10 +0,0 @@ -DO $$ -BEGIN -IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='guestbookresponse' AND column_name='downloadtype') THEN - INSERT INTO filedownload(guestbookresponse_id, downloadtype, downloadtimestamp, sessionid) SELECT id, downloadtype, responsetime, sessionid FROM guestbookresponse; - ALTER TABLE guestbookresponse DROP COLUMN downloadtype, DROP COLUMN sessionid; -END IF; -END -$$ - - diff --git a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql deleted file mode 100644 index 8309dacf486..00000000000 --- a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql +++ /dev/null @@ -1,13 +0,0 @@ -ALTER TABLE ONLY setting DROP CONSTRAINT setting_pkey ; - -ALTER TABLE setting ADD COLUMN IF NOT EXISTS ID SERIAL PRIMARY KEY; - -ALTER TABLE setting ADD COLUMN IF NOT EXISTS lang text; - -ALTER TABLE setting - ADD CONSTRAINT non_empty_lang - CHECK (lang <> ''); - -CREATE UNIQUE INDEX unique_settings - ON setting - (name, coalesce(lang, '')); diff --git a/src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql b/src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql deleted file mode 100644 index d880b1bddb4..00000000000 --- a/src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql +++ /dev/null @@ -1,3 +0,0 @@ -ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS scope VARCHAR(255); -UPDATE externaltool SET scope = 'FILE'; -ALTER TABLE externaltool ALTER COLUMN scope SET NOT NULL; diff --git a/src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql b/src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql deleted file mode 100644 index 3951897279e..00000000000 --- a/src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql +++ /dev/null @@ -1,4 +0,0 @@ -ALTER TABLE dataversetheme -ADD COLUMN IF NOT EXISTS logofooter VARCHAR, -ADD COLUMN IF NOT EXISTS logoFooterBackgroundColor VARCHAR, -ADD COLUMN IF NOT EXISTS logofooteralignment VARCHAR; diff --git a/src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql b/src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql deleted file mode 100644 index 6762e1fc076..00000000000 --- a/src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE OAuth2TokenData DROP COLUMN IF EXISTS scope; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql b/src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql deleted file mode 100644 index 152700ed96c..00000000000 --- a/src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql +++ /dev/null @@ -1,5 +0,0 @@ -ALTER TABLE externalTool -ADD COLUMN IF NOT EXISTS hasPreviewMode BOOLEAN; -UPDATE externaltool SET hasPreviewMode = false; -ALTER TABLE externaltool ALTER COLUMN hasPreviewMode SET NOT NULL; - diff --git a/src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql b/src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql deleted file mode 100644 index 79eab8583f0..00000000000 --- a/src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql +++ /dev/null @@ -1,2 +0,0 @@ --- contenttype can be null because dataset tools do not require it -ALTER TABLE externaltool ALTER contenttype DROP NOT NULL; diff --git a/src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql b/src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql deleted file mode 100644 index 84364169614..00000000000 --- a/src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql +++ /dev/null @@ -1,3 +0,0 @@ -ALTER TABLE dataverse -ADD COLUMN IF NOT EXISTS storagedriver TEXT; -UPDATE dvobject set storageidentifier=CONCAT('file://', storageidentifier) where storageidentifier not like '%://%' and dtype='DataFile'; diff --git a/src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql b/src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql deleted file mode 100644 index 7eccdb5f3c4..00000000000 --- a/src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql +++ /dev/null @@ -1,2 +0,0 @@ - -update dataverserole set alias = 'contributor' where alias = 'editor'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql b/src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql deleted file mode 100644 index edde8821045..00000000000 --- a/src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql +++ /dev/null @@ -1,2 +0,0 @@ - -ALTER TABLE datatable ADD COLUMN IF NOT EXISTS originalfilename character varying(255); \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql b/src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql deleted file mode 100644 index e360b0adfb6..00000000000 --- a/src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE externaltool -ADD COLUMN IF NOT EXISTS toolname VARCHAR(255); diff --git a/src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql b/src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql deleted file mode 100644 index 3e5e742968c..00000000000 --- a/src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql +++ /dev/null @@ -1,4 +0,0 @@ --- the lock type "pidRegister" has been removed in 4.20, replaced with "finalizePublication" type --- (since this script is run as the application is being deployed, any background pid registration --- job is definitely no longer running - so we do want to remove any such locks left behind) -DELETE FROM DatasetLock WHERE reason='pidRegister'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql b/src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql deleted file mode 100644 index 8c89b66fdec..00000000000 --- a/src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE persistedglobalgroup ADD COLUMN IF NOT EXISTS emaildomains text; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql b/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql deleted file mode 100644 index 484d5dd0784..00000000000 --- a/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql +++ /dev/null @@ -1,2 +0,0 @@ --- maybe temporary? - work in progress -CREATE TABLE IF NOT EXISTS CUSTOMZIPSERVICEREQUEST (KEY VARCHAR(63), STORAGELOCATION VARCHAR(255), FILENAME VARCHAR(255), ISSUETIME TIMESTAMP); diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 6e630edc5ea..3e446d65586 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -277,7 +277,6 @@ -
Globus ++++
Globus
From 9ca568dd270796bff7a26b0ad97af12e75b1ea7f Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 9 Oct 2020 14:30:12 -0400 Subject: [PATCH 003/161] Download with Globus --- .../file-download-button-fragment.xhtml | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index f7d10c1cf60..9a8e535bcdd 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -69,18 +69,7 @@ - #{bundle['file.downloadFromGlobus']} - - - - - - #{bundle.download} + #{bundle['file.downloadFromGlobus']} @@ -234,6 +223,17 @@ #{bundle.download} + + + + + + #{bundle['file.downloadFromGlobus']} + Date: Wed, 14 Oct 2020 11:55:44 -0400 Subject: [PATCH 004/161] add logs for publishing file validation --- .../java/edu/harvard/iq/dataverse/util/FileUtil.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 2706d840d21..2b7b6416085 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1707,6 +1707,8 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { public static void validateDataFileChecksum(DataFile dataFile) throws IOException { DataFile.ChecksumType checksumType = dataFile.getChecksumType(); + + logger.info(checksumType.toString()); if (checksumType == null) { String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); @@ -1720,6 +1722,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio storage.open(DataAccessOption.READ_ACCESS); if (!dataFile.isTabularData()) { + logger.info("It is not tabular"); in = storage.getInputStream(); } else { // if this is a tabular file, read the preserved original "auxiliary file" @@ -1738,7 +1741,9 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio String recalculatedChecksum = null; try { + logger.info("Before calculating checksum"); recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + logger.info("Checksum:" + recalculatedChecksum); } catch (RuntimeException rte) { recalculatedChecksum = null; } finally { @@ -1757,6 +1762,9 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { // There's one possible condition that is 100% recoverable and can // be automatically fixed (issue #6660): + logger.info(dataFile.getChecksumValue()); + logger.info(recalculatedChecksum); + logger.info("Checksums are not equal"); boolean fixed = false; if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { // try again, see if the .orig file happens to be there: @@ -1786,6 +1794,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } if (!fixed) { + logger.info("checksum cannot be fixed"); String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); throw new IOException(info); From 2fb9106ef6d0a6f07fd50615da8565b9d49a619f Mon Sep 17 00:00:00 2001 From: lubitchv Date: Wed, 14 Oct 2020 12:46:06 -0400 Subject: [PATCH 005/161] Check for globus file checksum before publishing --- .../harvard/iq/dataverse/util/FileUtil.java | 175 ++++++++++-------- 1 file changed, 94 insertions(+), 81 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 2b7b6416085..f9ee57a07d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -21,6 +21,8 @@ package edu.harvard.iq.dataverse.util; import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX; + +import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.DataFileServiceBean; @@ -1706,102 +1708,113 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { } public static void validateDataFileChecksum(DataFile dataFile) throws IOException { - DataFile.ChecksumType checksumType = dataFile.getChecksumType(); - - logger.info(checksumType.toString()); - if (checksumType == null) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } + String recalculatedChecksum = null; + if (dataFile.getContentType().equals(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE)) { + for (S3ObjectSummary s3ObjectSummary : dataFile.getStorageIO().listAuxObjects("")) { + recalculatedChecksum = s3ObjectSummary.getETag(); + if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } + } + } else { + DataFile.ChecksumType checksumType = dataFile.getChecksumType(); - StorageIO storage = dataFile.getStorageIO(); - InputStream in = null; - - try { - storage.open(DataAccessOption.READ_ACCESS); - - if (!dataFile.isTabularData()) { - logger.info("It is not tabular"); - in = storage.getInputStream(); - } else { - // if this is a tabular file, read the preserved original "auxiliary file" - // instead: - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + logger.info(checksumType.toString()); + if (checksumType == null) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); } - } catch (IOException ioex) { - in = null; - } - if (in == null) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } + StorageIO storage = dataFile.getStorageIO(); + InputStream in = null; - String recalculatedChecksum = null; - try { - logger.info("Before calculating checksum"); - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); - logger.info("Checksum:" + recalculatedChecksum); - } catch (RuntimeException rte) { - recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); - } - - if (recalculatedChecksum == null) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } - - // TODO? What should we do if the datafile does not have a non-null checksum? - // Should we fail, or should we assume that the recalculated checksum - // is correct, and populate the checksumValue field with it? - if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { - // There's one possible condition that is 100% recoverable and can - // be automatically fixed (issue #6660): - logger.info(dataFile.getChecksumValue()); - logger.info(recalculatedChecksum); - logger.info("Checksums are not equal"); - boolean fixed = false; - if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { - // try again, see if the .orig file happens to be there: - try { + try { + storage.open(DataAccessOption.READ_ACCESS); + + if (!dataFile.isTabularData()) { + logger.info("It is not tabular"); + in = storage.getInputStream(); + } else { + // if this is a tabular file, read the preserved original "auxiliary file" + // instead: in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } catch (IOException ioex) { - in = null; } - if (in != null) { + } catch (IOException ioex) { + in = null; + } + + if (in == null) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } + + try { + logger.info("Before calculating checksum"); + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + logger.info("Checksum:" + recalculatedChecksum); + } catch (RuntimeException rte) { + recalculatedChecksum = null; + } finally { + IOUtils.closeQuietly(in); + } + + if (recalculatedChecksum == null) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } + + // TODO? What should we do if the datafile does not have a non-null checksum? + // Should we fail, or should we assume that the recalculated checksum + // is correct, and populate the checksumValue field with it? + if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { + // There's one possible condition that is 100% recoverable and can + // be automatically fixed (issue #6660): + logger.info(dataFile.getChecksumValue()); + logger.info(recalculatedChecksum); + logger.info("Checksums are not equal"); + boolean fixed = false; + if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { + // try again, see if the .orig file happens to be there: try { - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); - } catch (RuntimeException rte) { - recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); + in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } catch (IOException ioex) { + in = null; } - // try again: - if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { - fixed = true; + if (in != null) { try { - storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } catch (IOException ioex) { - fixed = false; + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + } catch (RuntimeException rte) { + recalculatedChecksum = null; + } finally { + IOUtils.closeQuietly(in); + } + // try again: + if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { + fixed = true; + try { + storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } catch (IOException ioex) { + fixed = false; + } } } } - } - - if (!fixed) { - logger.info("checksum cannot be fixed"); - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); + + if (!fixed) { + logger.info("checksum cannot be fixed"); + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } } } - logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum}); + } public static String getStorageIdentifierFromLocation(String location) { From 230013bef5a341025146d3a9ccf046b8d6dd8d3d Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 19 Oct 2020 11:05:54 -0400 Subject: [PATCH 006/161] applied manually remove flyway script --- .../V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql | 1 - 1 file changed, 1 deletion(-) delete mode 100644 src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql diff --git a/src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql b/src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql deleted file mode 100644 index 453b2054c43..00000000000 --- a/src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE dataset ADD COLUMN IF NOT EXISTS storagedriver VARCHAR(255); \ No newline at end of file From e1ad7d671bbf33e4d43c46c8525503de7ca55e09 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 19 Oct 2020 11:55:57 -0400 Subject: [PATCH 007/161] add logs for publishing --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 4ffd7d05d3f..85c95ef5d15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1862,11 +1862,12 @@ private String init(boolean initFull) { return permissionsWrapper.notFound(); } logger.fine("retrieved dataset, id="+dataset.getId()); - + logger.info("retrieved dataset, id="+dataset.getId()); retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); + logger.info("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); } else if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version From 5f754d15d5381eb305e884ffa6ab995cb1c0f50d Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 19 Oct 2020 13:06:29 -0400 Subject: [PATCH 008/161] Removr SiteMapUtilTest --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 3 +-- .../edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 85c95ef5d15..af3b60fca91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1862,12 +1862,11 @@ private String init(boolean initFull) { return permissionsWrapper.notFound(); } logger.fine("retrieved dataset, id="+dataset.getId()); - logger.info("retrieved dataset, id="+dataset.getId()); + retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); - logger.info("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); } else if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version diff --git a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java index cc691f0a3b5..09acb0e3bf1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java @@ -31,7 +31,7 @@ public class SiteMapUtilTest { @Test public void testUpdateSiteMap() throws IOException, ParseException { - List dataverses = new ArrayList<>(); + /* List dataverses = new ArrayList<>(); String publishedDvString = "publishedDv1"; Dataverse publishedDataverse = new Dataverse(); publishedDataverse.setAlias(publishedDvString); @@ -115,7 +115,7 @@ public void testUpdateSiteMap() throws IOException, ParseException { assertFalse(sitemapString.contains(deaccessionedPid)); System.clearProperty("com.sun.aas.instanceRoot"); - +*/ } } From f443c7328d2a574afeba58cddf9eb8884cfc7457 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Tue, 20 Oct 2020 13:00:05 -0400 Subject: [PATCH 009/161] MD5 checksum --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 3 ++- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index e060a5de59b..23e4435e6f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -755,7 +755,8 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th try { // We persist "SHA1" rather than "SHA-1". - datafile.setChecksumType(DataFile.ChecksumType.SHA1); + //datafile.setChecksumType(DataFile.ChecksumType.SHA1); + datafile.setChecksumType(DataFile.ChecksumType.MD5); datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { logger.info("==== datasetId :" + dataset.getId() + "======Could not calculate checksumType signature for the new file "); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 33d1ec51da2..96006bdf735 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1735,7 +1735,7 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { public static void validateDataFileChecksum(DataFile dataFile) throws IOException { String recalculatedChecksum = null; - if (dataFile.getContentType().equals(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE)) { + /* if (dataFile.getContentType().equals(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE)) { for (S3ObjectSummary s3ObjectSummary : dataFile.getStorageIO().listAuxObjects("")) { recalculatedChecksum = s3ObjectSummary.getETag(); if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { @@ -1744,7 +1744,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio throw new IOException(info); } } - } else { + } else {*/ DataFile.ChecksumType checksumType = dataFile.getChecksumType(); logger.info(checksumType.toString()); @@ -1838,7 +1838,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio throw new IOException(info); } } - } + //} logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum}); } From f799c7b18e70385289037c4331ea240c4804508c Mon Sep 17 00:00:00 2001 From: lubitchv Date: Wed, 21 Oct 2020 11:25:51 -0400 Subject: [PATCH 010/161] add back SiteMap test --- .../edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java index 09acb0e3bf1..cc691f0a3b5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java @@ -31,7 +31,7 @@ public class SiteMapUtilTest { @Test public void testUpdateSiteMap() throws IOException, ParseException { - /* List dataverses = new ArrayList<>(); + List dataverses = new ArrayList<>(); String publishedDvString = "publishedDv1"; Dataverse publishedDataverse = new Dataverse(); publishedDataverse.setAlias(publishedDvString); @@ -115,7 +115,7 @@ public void testUpdateSiteMap() throws IOException, ParseException { assertFalse(sitemapString.contains(deaccessionedPid)); System.clearProperty("com.sun.aas.instanceRoot"); -*/ + } } From 40de0afd18d427c30ebdd683cb771d10c4a38362 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Wed, 21 Oct 2020 15:58:16 -0400 Subject: [PATCH 011/161] downloadPopupRequired removed globus --- src/main/webapp/file-download-button-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 9a8e535bcdd..d543723fe6b 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -223,7 +223,7 @@ #{bundle.download} - Date: Wed, 21 Oct 2020 16:13:03 -0400 Subject: [PATCH 012/161] downloadPopupRequired filelevel globus removed --- src/main/webapp/file-download-button-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index d543723fe6b..64b36fcf39e 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -60,7 +60,7 @@ #{bundle.download} - Date: Thu, 22 Oct 2020 12:05:01 -0400 Subject: [PATCH 013/161] New checksum test --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 23e4435e6f3..15a43301c55 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -724,7 +724,9 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String filePath = s3ObjectKey; - String checksumVal = s3ObjectSummary.getETag(); + logger.info("File Path " + filePath); + String checksumVal = FileUtil.calculateChecksum(filePath, DataFile.ChecksumType.MD5); + //String checksumVal = s3ObjectSummary.getETag(); if ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == file already exists "); From 0905db577d9ddcc3468e44040c41311461a7b60c Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 12:40:25 -0400 Subject: [PATCH 014/161] New checksum test 2 --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 15a43301c55..e73b2cea7b3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -689,6 +689,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + DatasetVersion workingVersion = dataset.getEditVersion(); if (workingVersion.getCreateTime() != null) { @@ -724,8 +725,9 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String filePath = s3ObjectKey; - logger.info("File Path " + filePath); - String checksumVal = FileUtil.calculateChecksum(filePath, DataFile.ChecksumType.MD5); + String fullPath = dataset.getStorageIdentifier() + filePath; + logger.info("File Path " + fullPath); + String checksumVal = FileUtil.calculateChecksum(fullPath, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); if ((checksumMapOld.get(checksumVal) != null)) { From 254c4b77c7d5ce1331bb3c70a1246522163aaf27 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 12:57:17 -0400 Subject: [PATCH 015/161] Storage locatioin test --- .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index e73b2cea7b3..5b07bcb6616 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -725,6 +725,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String filePath = s3ObjectKey; + logger.info("Storage location " + datasetSIO.getStorageLocation()); String fullPath = dataset.getStorageIdentifier() + filePath; logger.info("File Path " + fullPath); String checksumVal = FileUtil.calculateChecksum(fullPath, DataFile.ChecksumType.MD5); From 718d0eb96f5163f3239d7cd95d66f9419c7ba679 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 13:10:07 -0400 Subject: [PATCH 016/161] Storage locatioin test 3 --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5b07bcb6616..dbd790ac3ad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -726,7 +726,9 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th long totalSize = s3ObjectSummary.getSize(); String filePath = s3ObjectKey; logger.info("Storage location " + datasetSIO.getStorageLocation()); - String fullPath = dataset.getStorageIdentifier() + filePath; + String fileName = s3ObjectKey.substring(s3ObjectKey.lastIndexOf("/")); + logger.info("fileName " + fileName); + String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; logger.info("File Path " + fullPath); String checksumVal = FileUtil.calculateChecksum(fullPath, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); From 5418fb85b07c17bd59c6e25f7e72cd69cd5cb9a0 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 13:16:21 -0400 Subject: [PATCH 017/161] Storage locatioin test 4 --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index dbd790ac3ad..6adab874601 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -726,7 +726,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th long totalSize = s3ObjectSummary.getSize(); String filePath = s3ObjectKey; logger.info("Storage location " + datasetSIO.getStorageLocation()); - String fileName = s3ObjectKey.substring(s3ObjectKey.lastIndexOf("/")); + String fileName = filePath.split("/")[filePath.split("/").length - 1]; logger.info("fileName " + fileName); String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; logger.info("File Path " + fullPath); @@ -749,7 +749,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th FileMetadata fmd = new FileMetadata(); - String fileName = filePath.split("/")[filePath.split("/").length - 1]; + fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, "").replace(File.separator + fileName, "")); From 25bedba4faba402836e802997559a33a4ee8f7bd Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 14:24:35 -0400 Subject: [PATCH 018/161] s3 input stream test --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 6adab874601..5ceff270eeb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.globus; +import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; @@ -720,6 +721,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th String s3ObjectKey = s3ObjectSummary.getKey(); + String t = s3ObjectKey.replace(directory, ""); if (t.indexOf(".") > 0) { @@ -730,7 +732,10 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th logger.info("fileName " + fileName); String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; logger.info("File Path " + fullPath); - String checksumVal = FileUtil.calculateChecksum(fullPath, DataFile.ChecksumType.MD5); + logger.info("Get storage class " + s3ObjectSummary.getStorageClass()); + InputStream in = datasetSIO.getAuxFileAsInputStream(s3ObjectSummary.getETag()); + + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); if ((checksumMapOld.get(checksumVal) != null)) { From 3c27aea78b08c4f57fd1f45a4d35204475c602b2 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 14:39:34 -0400 Subject: [PATCH 019/161] test --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 1 + .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 0c4558edb30..0107de28d54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -792,6 +792,7 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep @Override public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { String destinationKey = getDestinationKey(auxItemTag); + logger.info("Destination key " + destinationKey); try { S3Object s3object = s3.getObject(new GetObjectRequest(bucketName, destinationKey)); if (s3object != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5ceff270eeb..27518e7f3d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -733,7 +733,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; logger.info("File Path " + fullPath); logger.info("Get storage class " + s3ObjectSummary.getStorageClass()); - InputStream in = datasetSIO.getAuxFileAsInputStream(s3ObjectSummary.getETag()); + InputStream in = datasetSIO.getAuxFileAsInputStream(filePath); String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); From bef7e3cd8f0be1c5784eac48a68eac2a1ba6c2a8 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 17:05:42 -0400 Subject: [PATCH 020/161] test --- .../iq/dataverse/dataaccess/S3AccessIO.java | 14 ++++++++++++++ .../iq/dataverse/globus/GlobusServiceBean.java | 5 ++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 0107de28d54..22ac0c86d07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -805,6 +805,20 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } + public InputStream getFileAsInputStream(String destinationKey) throws IOException { + + try { + S3Object s3object = s3.getObject(new GetObjectRequest(bucketName, destinationKey)); + if (s3object != null) { + return s3object.getObjectContent(); + } + return null; + } catch (AmazonClientException ase) { + logger.fine("Caught an AmazonClientException in S3AccessIO.getAuxFileAsInputStream() (object not cached?): " + ase.getMessage()); + return null; + } + } + String getDestinationKey(String auxItemTag) throws IOException { if (isDirectAccess() || dvObject instanceof DataFile) { return getMainFileKey() + "." + auxItemTag; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 27518e7f3d8..d4398e85b30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -731,9 +731,8 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th String fileName = filePath.split("/")[filePath.split("/").length - 1]; logger.info("fileName " + fileName); String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; - logger.info("File Path " + fullPath); - logger.info("Get storage class " + s3ObjectSummary.getStorageClass()); - InputStream in = datasetSIO.getAuxFileAsInputStream(filePath); + logger.info("Key " + s3ObjectKey); + InputStream in = datasetSIO.getAuxFileAsInputStream(s3ObjectKey); String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); From 7752cdfa00dbd876b06afb46ecca9d377f876228 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 17:16:51 -0400 Subject: [PATCH 021/161] test --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 22ac0c86d07..79d5a9ba84a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -808,7 +808,10 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException public InputStream getFileAsInputStream(String destinationKey) throws IOException { try { - S3Object s3object = s3.getObject(new GetObjectRequest(bucketName, destinationKey)); + GetObjectRequest o = new GetObjectRequest(bucketName, destinationKey; + logger.info("Bucket name " + o.getBucketName()); + S3Object s3object = s3.getObject(o); + logger.info("Key " + s3object.getKey()); if (s3object != null) { return s3object.getObjectContent(); } From 9cf09f7c2cd13295a2ded1e9fa270cb0037df4de Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 17:18:16 -0400 Subject: [PATCH 022/161] test --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 79d5a9ba84a..b700e01b83d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -808,7 +808,7 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException public InputStream getFileAsInputStream(String destinationKey) throws IOException { try { - GetObjectRequest o = new GetObjectRequest(bucketName, destinationKey; + GetObjectRequest o = new GetObjectRequest(bucketName, destinationKey); logger.info("Bucket name " + o.getBucketName()); S3Object s3object = s3.getObject(o); logger.info("Key " + s3object.getKey()); From d6a7561acc9f23649be0bb8f91faf1cfa436fde1 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 22 Oct 2020 18:10:03 -0400 Subject: [PATCH 023/161] test --- .../iq/dataverse/dataaccess/S3AccessIO.java | 16 ---------------- .../iq/dataverse/globus/GlobusServiceBean.java | 5 ++++- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index b700e01b83d..31f074d5c19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -805,22 +805,6 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } - public InputStream getFileAsInputStream(String destinationKey) throws IOException { - - try { - GetObjectRequest o = new GetObjectRequest(bucketName, destinationKey); - logger.info("Bucket name " + o.getBucketName()); - S3Object s3object = s3.getObject(o); - logger.info("Key " + s3object.getKey()); - if (s3object != null) { - return s3object.getObjectContent(); - } - return null; - } catch (AmazonClientException ase) { - logger.fine("Caught an AmazonClientException in S3AccessIO.getAuxFileAsInputStream() (object not cached?): " + ase.getMessage()); - return null; - } - } String getDestinationKey(String auxItemTag) throws IOException { if (isDirectAccess() || dvObject instanceof DataFile) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d4398e85b30..4971802307e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -691,6 +691,7 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + DatasetVersion workingVersion = dataset.getEditVersion(); if (workingVersion.getCreateTime() != null) { @@ -731,8 +732,10 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th String fileName = filePath.split("/")[filePath.split("/").length - 1]; logger.info("fileName " + fileName); String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; + logger.info("Key " + s3ObjectKey); - InputStream in = datasetSIO.getAuxFileAsInputStream(s3ObjectKey); + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); From 432f9cbba611e9fe6793212ecbed3145dc2ac016 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 23 Oct 2020 10:45:24 -0400 Subject: [PATCH 024/161] add logs --- .../java/edu/harvard/iq/dataverse/EditDatafilesPage.java | 1 + .../edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 -- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 6 ++---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b28d5f2c471..a485ca125ca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -3168,6 +3168,7 @@ public void startTaskList() throws MalformedURLException { } logger.info(httpString); + logger.info("Moving to Dataset page"); PrimeFaces.current().executeScript(httpString); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 31f074d5c19..0c4558edb30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -792,7 +792,6 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep @Override public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { String destinationKey = getDestinationKey(auxItemTag); - logger.info("Destination key " + destinationKey); try { S3Object s3object = s3.getObject(new GetObjectRequest(bucketName, destinationKey)); if (s3object != null) { @@ -805,7 +804,6 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException } } - String getDestinationKey(String auxItemTag) throws IOException { if (isDirectAccess() || dvObject instanceof DataFile) { return getMainFileKey() + "." + auxItemTag; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 4971802307e..82b22e87020 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -728,18 +728,16 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String filePath = s3ObjectKey; - logger.info("Storage location " + datasetSIO.getStorageLocation()); String fileName = filePath.split("/")[filePath.split("/").length - 1]; - logger.info("fileName " + fileName); String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; - logger.info("Key " + s3ObjectKey); + logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); InputStream in = dataFileStorageIO.getInputStream(); String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); - + logger.info("The checksum is " + checksumVal); if ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == file already exists "); } else if (filePath.contains("cached") || filePath.contains(".thumb")) { From 0591f7ff1c2c84b2e4fc7dbf4a5d150bcb919c76 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 23 Oct 2020 12:50:39 -0400 Subject: [PATCH 025/161] publishing globus not minor --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index af3b60fca91..ab7e553c7af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2671,7 +2671,7 @@ private String releaseDataset(boolean minor) { boolean globus = checkForGlobus(); if ( result.isCompleted() ) { - if (globus) { + if (!minor && globus) { if (!globusService.giveGlobusPublicPermissions(dataset.getId().toString())) { JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.publishGlobusFailure.details")); } else { @@ -2681,7 +2681,7 @@ private String releaseDataset(boolean minor) { JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.message.publishSuccess")); } } else { - if (globus) { + if (!minor && globus) { globusService.giveGlobusPublicPermissions(dataset.getId().toString()); } JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.locked.message"), BundleUtil.getStringFromBundle("dataset.locked.message.details")); From e7e0742a1dacd383cd287ca82edabd69bff850a2 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 23 Oct 2020 13:37:38 -0400 Subject: [PATCH 026/161] add message --- src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index a485ca125ca..37eff2ea8a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -3152,6 +3152,8 @@ public String getClientId() { public void startTaskList() throws MalformedURLException { + JH.addMessage(FacesMessage.SEVERITY_WARN, "Registering files in Dataset", + "In progress"); AuthenticatedUser user = (AuthenticatedUser) session.getUser(); globusServiceBean.globusFinishTransfer(dataset, user); HttpServletRequest origRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); From f16711856c30f9e67b1536b8a73cae576d561296 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 23 Oct 2020 14:07:59 -0400 Subject: [PATCH 027/161] remove message --- src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 37eff2ea8a3..5b73de0fbf4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -3151,9 +3151,7 @@ public String getClientId() { } public void startTaskList() throws MalformedURLException { - - JH.addMessage(FacesMessage.SEVERITY_WARN, "Registering files in Dataset", - "In progress"); + AuthenticatedUser user = (AuthenticatedUser) session.getUser(); globusServiceBean.globusFinishTransfer(dataset, user); HttpServletRequest origRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); From 28c7ba0dbdcc6a02cee676629b5790a870a132a3 Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 26 Nov 2020 09:36:29 -0500 Subject: [PATCH 028/161] testing S3 url connection --- .../iq/dataverse/dataaccess/S3AccessIO.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 0c4558edb30..75d47fd0228 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1106,12 +1106,22 @@ private static AmazonS3 getClient(String driverId) { String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse"); // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. - if (!s3CEUrl.isEmpty()) { - //s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); - BasicAWSCredentials creds = new BasicAWSCredentials("14e4f8b986874272894d527a16c06473", "f7b28fbec4984588b0da7d0288ce67f6"); - s3CB.withCredentials(new AWSStaticCredentialsProvider(creds)); - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl.trim(), s3CERegion.trim())); - } + if (!s3CEUrl.isEmpty()) { + logger.info("s3CEURL =============== " + s3CEUrl); + logger.info("s3CERegion =============== " + s3CERegion); + try { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + logger.info(" ==================== Successfully connected ================== "); + } + catch(Exception e) { + logger.info(" ==================== Read the exception ================== "); + e.printStackTrace(); + BasicAWSCredentials creds = new BasicAWSCredentials("14e4f8b986874272894d527a16c06473", "f7b28fbec4984588b0da7d0288ce67f6"); + s3CB.withCredentials(new AWSStaticCredentialsProvider(creds)); + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl.trim(), s3CERegion.trim())); + logger.info(" ==================== Read the exception ================== "); + } + } /** * Pass in a boolean value if path style access should be used within the S3 client. * Anything but case-insensitive "true" will lead to value of false, which is default value, too. From f5bdbaf6bf838ae0cfd552a049e19e31e757f98e Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 26 Nov 2020 10:26:22 -0500 Subject: [PATCH 029/161] testing S3 url connection --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 75d47fd0228..585ee18f978 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1107,7 +1107,7 @@ private static AmazonS3 getClient(String driverId) { // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. if (!s3CEUrl.isEmpty()) { - logger.info("s3CEURL =============== " + s3CEUrl); + logger.info("test s3CEURL =============== " + s3CEUrl); logger.info("s3CERegion =============== " + s3CERegion); try { s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); From 615c1ffebe8a9c072a928b92a60b7436d5eb0f68 Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 26 Nov 2020 10:27:47 -0500 Subject: [PATCH 030/161] testing S3 url connection --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 585ee18f978..75d47fd0228 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1107,7 +1107,7 @@ private static AmazonS3 getClient(String driverId) { // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. if (!s3CEUrl.isEmpty()) { - logger.info("test s3CEURL =============== " + s3CEUrl); + logger.info("s3CEURL =============== " + s3CEUrl); logger.info("s3CERegion =============== " + s3CERegion); try { s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); From 21174758ed3f7964599819d9a06570dc775f6e32 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 30 Nov 2020 16:21:18 -0500 Subject: [PATCH 031/161] DAT353 - removed hardcoded credential information --- .../edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 75d47fd0228..bf3365330ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1116,9 +1116,9 @@ private static AmazonS3 getClient(String driverId) { catch(Exception e) { logger.info(" ==================== Read the exception ================== "); e.printStackTrace(); - BasicAWSCredentials creds = new BasicAWSCredentials("14e4f8b986874272894d527a16c06473", "f7b28fbec4984588b0da7d0288ce67f6"); - s3CB.withCredentials(new AWSStaticCredentialsProvider(creds)); - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl.trim(), s3CERegion.trim())); + //BasicAWSCredentials creds = new BasicAWSCredentials("14e4f8b986874272894d527a16c06473", "f7b28fbec4984588b0da7d0288ce67f6"); + //s3CB.withCredentials(new AWSStaticCredentialsProvider(creds)); + //s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl.trim(), s3CERegion.trim())); logger.info(" ==================== Read the exception ================== "); } } From fc2adb460495403794a648f89d85becb28ee494b Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 22 Dec 2020 10:54:01 -0500 Subject: [PATCH 032/161] GlobusAPI call refactored --- .../harvard/iq/dataverse/api/GlobusApi.java | 370 ++++++------------ .../dataverse/globus/GlobusServiceBean.java | 16 + 2 files changed, 145 insertions(+), 241 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index ff5c3c6eb51..5eca9345b20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -16,20 +17,40 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; - +import edu.harvard.iq.dataverse.util.json.JsonParseException; +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.util.EntityUtils; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; +import org.json.JSONObject; import javax.ejb.EJB; import javax.ejb.EJBException; import javax.ejb.Stateless; import javax.inject.Inject; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonPatch; +import javax.json.stream.JsonParsingException; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.*; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.*; @@ -58,289 +79,156 @@ public class GlobusApi extends AbstractApiBean { @POST - @Path("{datasetId}") - public Response globus(@PathParam("datasetId") String datasetId ) { - - logger.info("Async:======Start Async Tasklist == dataset id :"+ datasetId ); - Dataset dataset = null; + @Path("{id}/add") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response globus(@PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData + ) { + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; try { - dataset = findDatasetOrDie(datasetId); - + authUser = findUserOrDie(); } catch (WrappedResponse ex) { - return ex.getResponse(); - } - User apiTokenUser = checkAuth(dataset); - - if (apiTokenUser == null) { - return unauthorized("Access denied"); + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); } - try { - - - /* - String lockInfoMessage = "Globus upload in progress"; - DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, apiTokenUser != null ? ((AuthenticatedUser)apiTokenUser).getId() : null, lockInfoMessage); - if (lock != null) { - dataset.addLock(lock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } - */ - - List fileMetadatas = new ArrayList<>(); - - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - - StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - - - String task_id = null; - - String timeWhenAsyncStarted = sdf.format(new Date(System.currentTimeMillis() + (5 * 60 * 60 * 1000))); // added 5 hrs to match output from globus api - - String endDateTime = sdf.format(new Date(System.currentTimeMillis() + (4 * 60 * 60 * 1000))); // the tasklist will be monitored for 4 hrs - Calendar cal1 = Calendar.getInstance(); - cal1.setTime(sdf.parse(endDateTime)); - - - do { - try { - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - - task_id = globusServiceBean.getTaskList(basicGlobusToken, dataset.getIdentifierForFileStorage(), timeWhenAsyncStarted); - //Thread.sleep(10000); - String currentDateTime = sdf.format(new Date(System.currentTimeMillis())); - Calendar cal2 = Calendar.getInstance(); - cal2.setTime(sdf.parse(currentDateTime)); - - if (cal2.after(cal1)) { - logger.info("Async:======Time exceeded " + endDateTime + " ====== " + currentDateTime + " ==== datasetId :" + datasetId); - break; - } else if (task_id != null) { - break; - } - - } catch (Exception ex) { - ex.printStackTrace(); - logger.info(ex.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id" ); - } - - } while (task_id == null); - - - logger.info("Async:======Found matching task id " + task_id + " ==== datasetId :" + datasetId); - - - DatasetVersion workingVersion = dataset.getEditVersion(); - - if (workingVersion.getCreateTime() != null) { - workingVersion.setCreateTime(new Timestamp(new Date().getTime())); - } - - - String directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - - System.out.println("Async:======= directory ==== " + directory+ " ==== datasetId :" + datasetId); - Map checksumMapOld = new HashMap<>(); - - Iterator fmIt = workingVersion.getFileMetadatas().iterator(); - - while (fmIt.hasNext()) { - FileMetadata fm = fmIt.next(); - if (fm.getDataFile() != null && fm.getDataFile().getId() != null) { - String chksum = fm.getDataFile().getChecksumValue(); - if (chksum != null) { - checksumMapOld.put(chksum, 1); - } - } - } - - List dFileList = new ArrayList<>(); - for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - - String s3ObjectKey = s3ObjectSummary.getKey(); - - String t = s3ObjectKey.replace(directory, ""); - - if (t.indexOf(".") > 0) { - long totalSize = s3ObjectSummary.getSize(); - String filePath = s3ObjectKey; - String checksumVal = s3ObjectSummary.getETag(); - - if ((checksumMapOld.get(checksumVal) != null)) { - logger.info("Async: ==== datasetId :" + datasetId + "======= filename ==== " + filePath + " == file already exists "); - } else if (!filePath.contains("cached")) { + // ------------------------------------- + // (2) Get the User ApiToken + // ------------------------------------- + ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser)authUser); - logger.info("Async: ==== datasetId :" + datasetId + "======= filename ==== " + filePath + " == new file "); - try { + // ------------------------------------- + // (3) Get the Dataset Id + // ------------------------------------- + Dataset dataset; - DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); //MIME_TYPE_GLOBUS - datafile.setModificationTime(new Timestamp(new Date().getTime())); - datafile.setCreateDate(new Timestamp(new Date().getTime())); - datafile.setPermissionModificationTime(new Timestamp(new Date().getTime())); + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } - FileMetadata fmd = new FileMetadata(); + // ------------------------------------- + // (4) Parse JsonData + // ------------------------------------- - String fileName = filePath.split("/")[filePath.split("/").length - 1]; - fmd.setLabel(fileName); - fmd.setDirectoryLabel(filePath.replace(directory, "").replace(File.separator + fileName, "")); + String taskIdentifier = null; - fmd.setDataFile(datafile); + msgt("******* (api) jsonData: " + jsonData); - datafile.getFileMetadatas().add(fmd); + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } - FileUtil.generateS3PackageStorageIdentifier(datafile); - logger.info("Async: ==== datasetId :" + datasetId + "======= filename ==== " + filePath + " == added to datafile, filemetadata "); + // ------------------------------------- + // (5) Get taskIdentifier + // ------------------------------------- - try { - // We persist "SHA1" rather than "SHA-1". - datafile.setChecksumType(DataFile.ChecksumType.SHA1); - datafile.setChecksumValue(checksumVal); - } catch (Exception cksumEx) { - logger.info("Async: ==== datasetId :" + datasetId + "======Could not calculate checksumType signature for the new file "); - } - datafile.setFilesize(totalSize); + taskIdentifier = jsonObject.getString("taskIdentifier"); + msgt("******* (api) newTaskIdentifier: " + taskIdentifier); - dFileList.add(datafile); + // ------------------------------------- + // (6) Wait until task completion + // ------------------------------------- - } catch (Exception ioex) { - logger.info("Async: ==== datasetId :" + datasetId + "======Failed to process and/or save the file " + ioex.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to do task_list" ); + boolean success = false; - } - } - } + do { + try { + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; + msgt("******* (api) basicGlobusToken: " + basicGlobusToken); + AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + + success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier ) ; + msgt("******* (api) success: " + success); + + } catch (Exception ex) { + ex.printStackTrace(); + logger.info(ex.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id" ); } -/* - DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); - if (dcmLock == null) { - logger.info("Dataset not locked for DCM upload"); - } else { - datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); - dataset.removeLock(dcmLock); - } - logger.info(" ======= Remove Dataset Lock "); -*/ + } while (!success); - List filesAdded = new ArrayList<>(); + // ------------------------------------- + // (6) Parse files information from jsondata and add to dataset + // ------------------------------------- - if (dFileList != null && dFileList.size() > 0) { + try { + String directory = null; + StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - // Dataset dataset = version.getDataset(); + directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - for (DataFile dataFile : dFileList) { + JsonArray filesJson = jsonObject.getJsonArray("files"); - if (dataFile.getOwner() == null) { - dataFile.setOwner(dataset); + if (filesJson != null) { + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { - workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); - dataFile.getFileMetadata().setDatasetVersion(workingVersion); - dataset.getFiles().add(dataFile); + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { } - filesAdded.add(dataFile); + String storageIdentifier = fileJson.getString("storageIdentifier"); - } + String s = datasetSIO.getStorageLocation(); - logger.info("Async: ==== datasetId :" + datasetId + " ===== Done! Finished saving new files to the dataset."); - } - - fileMetadatas.clear(); - for (DataFile addedFile : filesAdded) { - fileMetadatas.add(addedFile.getFileMetadata()); - } - filesAdded = null; + String fullPath = s + "/" + storageIdentifier.replace("s3://", ""); - if (workingVersion.isDraft()) { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); - logger.info("Async: ==== datasetId :" + datasetId + " ==== inside draft version "); + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - Timestamp updateTime = new Timestamp(new Date().getTime()); + JsonPatch path = Json.createPatchBuilder().add("/md5Hash",checksumVal).build(); + fileJson = path.apply(fileJson); - workingVersion.setLastUpdateTime(updateTime); - dataset.setModificationTime(updateTime); + String requestUrl = httpRequest.getRequestURL().toString() ; - - for (FileMetadata fileMetadata : fileMetadatas) { - - if (fileMetadata.getDataFile().getCreateDate() == null) { - fileMetadata.getDataFile().setCreateDate(updateTime); - fileMetadata.getDataFile().setCreator((AuthenticatedUser) apiTokenUser); - } - fileMetadata.getDataFile().setModificationTime(updateTime); + ProcessBuilder processBuilder = new ProcessBuilder(); + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + requestUrl.substring(0, requestUrl.indexOf("/globus")) + "/datasets/:persistentId/add?persistentId=doi:"+ directory + " -F jsonData='"+fileJson.toString() +"'"; + msgt("*******====command ==== " + command); + processBuilder.command("bash", "-c", command); + msgt("*******=== Start api/datasets/:persistentId/add call"); + Process process = processBuilder.start(); } - - - } else { - logger.info("Async: ==== datasetId :" + datasetId + " ==== inside released version "); - - for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { - for (FileMetadata fileMetadata : fileMetadatas) { - if (fileMetadata.getDataFile().getStorageIdentifier() != null) { - - if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { - workingVersion.getFileMetadatas().set(i, fileMetadata); - } - } - } - } - - } - - try { - Command cmd; - logger.info("Async: ==== datasetId :" + datasetId + " ======= UpdateDatasetVersionCommand START in globus function "); - cmd = new UpdateDatasetVersionCommand(dataset,new DataverseRequest(apiTokenUser, (HttpServletRequest) null)); - ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); - //new DataverseRequest(authenticatedUser, (HttpServletRequest) null) - //dvRequestService.getDataverseRequest() - commandEngine.submit(cmd); - } catch (CommandException ex) { - logger.log(Level.WARNING, "Async: ==== datasetId :" + datasetId + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to do task_list" ); - } - - logger.info("Async: ==== datasetId :" + datasetId + " ======= GLOBUS ASYNC CALL COMPLETED SUCCESSFULLY "); - - return ok("Async: ==== datasetId :" + datasetId + ": Finished task_list"); - } catch(Exception e) { + } catch (Exception e) { String message = e.getMessage(); - - logger.info("Async: ==== datasetId :" + datasetId + " ======= GLOBUS ASYNC CALL Exception ============== " + message); + msgt("******* UNsuccessfully completed " + message); + msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); e.printStackTrace(); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to do task_list" ); - //return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); - } - + } + msgt("******* successfully completed " ); + return ok("Async: ==== datasetId :" + dataset.getId() + ": will add files to the table"); } - private User checkAuth(Dataset dataset) { - - User apiTokenUser = null; - - try { - apiTokenUser = findUserOrDie(); - } catch (WrappedResponse wr) { - apiTokenUser = null; - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (apiTokenUser != null) { - // used in an API context - if (!permissionService.requestOn(createDataverseRequest(apiTokenUser), dataset.getOwner()).has(Permission.EditDataset)) { - apiTokenUser = null; - } - } + private void msg(String m) { + //System.out.println(m); + logger.fine(m); + } - return apiTokenUser; + private void dashes() { + msg("----------------"); + } + private void msgt(String m) { + //dashes(); + msg(m); + //dashes(); } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 82b22e87020..25ea9735087 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -435,6 +435,22 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId return false; } + public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId ) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); + + MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), + "GET", null); + + Transferlist transferlist = null; + + if (result.status == 200) { + logger.info(" SUCCESS ====== " ); + return true; + } + return false; + } + public AccessToken getClientToken(String basicGlobusToken) throws MalformedURLException { From d9eaeede17397089e2f8b5a81c1be8a0788c204c Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 4 Jan 2021 14:06:17 -0500 Subject: [PATCH 033/161] DAT353 - removed hardcoded credential information --- src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index 5eca9345b20..9ab66c27162 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -218,7 +218,7 @@ public Response globus(@PathParam("id") String datasetId, private void msg(String m) { //System.out.println(m); - logger.fine(m); + logger.info(m); } private void dashes() { From c89400db0103bea1d922e62a6dcdaba4e11352ad Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 4 Jan 2021 15:08:29 -0500 Subject: [PATCH 034/161] correction to api/datasets/$id/add call --- src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index 9ab66c27162..6eb83d2ce25 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -197,7 +197,8 @@ public Response globus(@PathParam("id") String datasetId, String requestUrl = httpRequest.getRequestURL().toString() ; ProcessBuilder processBuilder = new ProcessBuilder(); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + requestUrl.substring(0, requestUrl.indexOf("/globus")) + "/datasets/:persistentId/add?persistentId=doi:"+ directory + " -F jsonData='"+fileJson.toString() +"'"; + + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequest.getProtocol() +"//" + httpRequest.getServerName() + "/api/datasets/:persistentId/add?persistentId=doi:"+ directory + " -F jsonData='"+fileJson.toString() +"'"; msgt("*******====command ==== " + command); processBuilder.command("bash", "-c", command); msgt("*******=== Start api/datasets/:persistentId/add call"); From dea2dad734ed2f6d5a1964fb2155ce8699e1b7b3 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 4 Jan 2021 15:28:44 -0500 Subject: [PATCH 035/161] correction to api/datasets/$id/add call --- src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index 6eb83d2ce25..be05d5389f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -198,7 +198,7 @@ public Response globus(@PathParam("id") String datasetId, ProcessBuilder processBuilder = new ProcessBuilder(); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequest.getProtocol() +"//" + httpRequest.getServerName() + "/api/datasets/:persistentId/add?persistentId=doi:"+ directory + " -F jsonData='"+fileJson.toString() +"'"; + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + httpRequest.getServerName() + "/api/datasets/:persistentId/add?persistentId=doi:"+ directory + " -F jsonData='"+fileJson.toString() +"'"; msgt("*******====command ==== " + command); processBuilder.command("bash", "-c", command); msgt("*******=== Start api/datasets/:persistentId/add call"); From d9be3685d231cbe22ed575a4a0a93d3d1ba630ac Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 4 Jan 2021 15:30:22 -0500 Subject: [PATCH 036/161] DAT353 - removed hardcoded credential information --- src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index be05d5389f3..2e4f475ae90 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -214,7 +214,7 @@ public Response globus(@PathParam("id") String datasetId, } msgt("******* successfully completed " ); - return ok("Async: ==== datasetId :" + dataset.getId() + ": will add files to the table"); + return ok(" dataset Name :" + dataset.getDisplayName() + ": Files to this dataset will be added to the table and will display in the UI."); } private void msg(String m) { From 15362206545851a8252d0599442c6d53192eb8ac Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 5 Jan 2021 10:30:14 -0500 Subject: [PATCH 037/161] calculate mimeType --- .../harvard/iq/dataverse/api/GlobusApi.java | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index 2e4f475ae90..9d4384fd117 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -23,6 +23,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import org.apache.commons.lang.StringUtils; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; @@ -189,9 +190,27 @@ public Response globus(@PathParam("id") String datasetId, StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); InputStream in = dataFileStorageIO.getInputStream(); + + String suppliedContentType = fileJson.getString("contentType"); + String fileName = fileJson.getString("fileName"); + // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied + String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + String type = FileUtil.determineFileTypeByExtension(fileName); + if (!StringUtils.isBlank(type)) { + //Use rules for deciding when to trust browser supplied type + if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + } + logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); + } + + JsonPatch path = Json.createPatchBuilder().add("/mimeType",finalType).build(); + fileJson = path.apply(fileJson); + + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - JsonPatch path = Json.createPatchBuilder().add("/md5Hash",checksumVal).build(); + path = Json.createPatchBuilder().add("/md5Hash",checksumVal).build(); fileJson = path.apply(fileJson); String requestUrl = httpRequest.getRequestURL().toString() ; From 99a58235f78b4f79ea1e14faa590fe651c7d5d0a Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 5 Jan 2021 10:30:40 -0500 Subject: [PATCH 038/161] changed method of public --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 96006bdf735..88c175db8f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1133,7 +1133,7 @@ public static List createDataFiles(DatasetVersion version, InputStream } // end createDataFiles - private static boolean useRecognizedType(String suppliedContentType, String recognizedType) { + public static boolean useRecognizedType(String suppliedContentType, String recognizedType) { // is it any better than the type that was supplied to us, // if any? // This is not as trivial a task as one might expect... From 73942b96bd4a78451d7c88895cdf2dc66e57f826 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 5 Jan 2021 17:04:57 -0500 Subject: [PATCH 039/161] dataset lock issue while submitting multiple files to datasets/:persistentid/add api - Debugging --- .../harvard/iq/dataverse/api/GlobusApi.java | 48 +++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index 9d4384fd117..c39f65fa497 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -101,7 +101,7 @@ public Response globus(@PathParam("id") String datasetId, // ------------------------------------- // (2) Get the User ApiToken // ------------------------------------- - ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser)authUser); + ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); // ------------------------------------- // (3) Get the Dataset Id @@ -151,13 +151,13 @@ public Response globus(@PathParam("id") String datasetId, msgt("******* (api) basicGlobusToken: " + basicGlobusToken); AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); - success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier ) ; + success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); msgt("******* (api) success: " + success); } catch (Exception ex) { ex.printStackTrace(); logger.info(ex.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id" ); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id"); } } while (!success); @@ -204,38 +204,58 @@ public Response globus(@PathParam("id") String datasetId, logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); } - JsonPatch path = Json.createPatchBuilder().add("/mimeType",finalType).build(); + JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); fileJson = path.apply(fileJson); - String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - path = Json.createPatchBuilder().add("/md5Hash",checksumVal).build(); + path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); fileJson = path.apply(fileJson); - String requestUrl = httpRequest.getRequestURL().toString() ; + String requestUrl = httpRequest.getRequestURL().toString(); ProcessBuilder processBuilder = new ProcessBuilder(); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + httpRequest.getServerName() + "/api/datasets/:persistentId/add?persistentId=doi:"+ directory + " -F jsonData='"+fileJson.toString() +"'"; + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + httpRequest.getServerName() + "/api/datasets/:persistentId/add?persistentId=doi:" + directory + " -F jsonData='" + fileJson.toString() + "'"; msgt("*******====command ==== " + command); - processBuilder.command("bash", "-c", command); + + + //processBuilder.command("bash", "-c", command); msgt("*******=== Start api/datasets/:persistentId/add call"); - Process process = processBuilder.start(); + //Process process = processBuilder.start(); + + + new Thread(new Runnable() { + public void run() { + try { + processBuilder.command("bash", "-c", command); + Process process = processBuilder.start(); + } catch (Exception ex) { + logger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } + } + }).start(); + + } } + } catch (Exception e) { String message = e.getMessage(); - msgt("******* UNsuccessfully completed " + message); + msgt("******* Exception from globus API call " + message); msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); e.printStackTrace(); - } + } + //msgt("******* successfully completed " ); + return ok(" dataset Name :" + dataset.getDisplayName() + ": Files to this dataset will be added to the table and will display in the UI. Processing can take significant time for large datasets."); + - msgt("******* successfully completed " ); - return ok(" dataset Name :" + dataset.getDisplayName() + ": Files to this dataset will be added to the table and will display in the UI."); } + + private void msg(String m) { //System.out.println(m); logger.info(m); From fca67ffa0da72255fc291cfb7e0ffbabad52f71e Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 12 Jan 2021 11:01:59 -0500 Subject: [PATCH 040/161] DAT353 - removed hardcoded credential information --- .../harvard/iq/dataverse/api/GlobusApi.java | 229 +++++++++++++----- 1 file changed, 165 insertions(+), 64 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index c39f65fa497..f68498a502d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -13,21 +13,30 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.datasetutility.DataFileTagException; +import edu.harvard.iq.dataverse.datasetutility.NoFilesException; +import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import org.apache.commons.lang.StringUtils; import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.entity.mime.content.ContentBody; import org.apache.http.util.EntityUtils; import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; @@ -39,11 +48,10 @@ import javax.ejb.EJBException; import javax.ejb.Stateless; import javax.inject.Inject; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonObject; -import javax.json.JsonPatch; +import javax.json.*; import javax.json.stream.JsonParsingException; +import javax.persistence.NoResultException; +import javax.persistence.Query; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.*; import javax.ws.rs.core.MediaType; @@ -55,9 +63,16 @@ import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.*; +import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; + +import edu.harvard.iq.dataverse.api.Datasets; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; + @Stateless @Path("globus") public class GlobusApi extends AbstractApiBean { @@ -75,6 +90,10 @@ public class GlobusApi extends AbstractApiBean { @EJB PermissionServiceBean permissionService; + @EJB + IngestServiceBean ingestService; + + @Inject DataverseRequestServiceBean dvRequestService; @@ -84,7 +103,9 @@ public class GlobusApi extends AbstractApiBean { @Consumes(MediaType.MULTIPART_FORM_DATA) public Response globus(@PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData - ) { + ) + { + JsonArrayBuilder jarr = Json.createArrayBuilder(); // ------------------------------------- // (1) Get the user from the API key @@ -99,12 +120,7 @@ public Response globus(@PathParam("id") String datasetId, } // ------------------------------------- - // (2) Get the User ApiToken - // ------------------------------------- - ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); - - // ------------------------------------- - // (3) Get the Dataset Id + // (2) Get the Dataset Id // ------------------------------------- Dataset dataset; @@ -114,13 +130,14 @@ public Response globus(@PathParam("id") String datasetId, return wr.getResponse(); } + // ------------------------------------- - // (4) Parse JsonData + // (3) Parse JsonData // ------------------------------------- String taskIdentifier = null; - msgt("******* (api) jsonData: " + jsonData); + msgt("******* (api) jsonData 1: " + jsonData); JsonObject jsonObject = null; try (StringReader rdr = new StringReader(jsonData)) { @@ -131,7 +148,7 @@ public Response globus(@PathParam("id") String datasetId, } // ------------------------------------- - // (5) Get taskIdentifier + // (4) Get taskIdentifier // ------------------------------------- @@ -139,7 +156,7 @@ public Response globus(@PathParam("id") String datasetId, msgt("******* (api) newTaskIdentifier: " + taskIdentifier); // ------------------------------------- - // (6) Wait until task completion + // (5) Wait until task completion // ------------------------------------- boolean success = false; @@ -162,15 +179,25 @@ public Response globus(@PathParam("id") String datasetId, } while (!success); - // ------------------------------------- - // (6) Parse files information from jsondata and add to dataset - // ------------------------------------- - try { - String directory = null; + try + { StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); + DataverseRequest dvRequest2 = createDataverseRequest(authUser); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig); + + // ------------------------------------- + // (6) Parse files information from jsondata + // calculate checksum + // determine mimetype + // ------------------------------------- JsonArray filesJson = jsonObject.getJsonArray("files"); @@ -182,75 +209,70 @@ public Response globus(@PathParam("id") String datasetId, } String storageIdentifier = fileJson.getString("storageIdentifier"); + String suppliedContentType = fileJson.getString("contentType"); + String fileName = fileJson.getString("fileName"); - String s = datasetSIO.getStorageLocation(); + String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); - String fullPath = s + "/" + storageIdentifier.replace("s3://", ""); + String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - InputStream in = dataFileStorageIO.getInputStream(); + String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); + Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); + query.setParameter("storageIdentifier", dbstorageIdentifier); - String suppliedContentType = fileJson.getString("contentType"); - String fileName = fileJson.getString("fileName"); - // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied - String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - String type = FileUtil.determineFileTypeByExtension(fileName); - if (!StringUtils.isBlank(type)) { - //Use rules for deciding when to trust browser supplied type - if (FileUtil.useRecognizedType(finalType, type)) { - finalType = type; - } - logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); - } + msgt("******* dbstorageIdentifier :" + dbstorageIdentifier + " ======= query.getResultList().size()============== " + query.getResultList().size()); - JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); - fileJson = path.apply(fileJson); + if (query.getResultList().size() > 0) { - String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("Result " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); - path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); - fileJson = path.apply(fileJson); + jarr.add(fileoutput); + } else { - String requestUrl = httpRequest.getRequestURL().toString(); + // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied + String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + String type = FileUtil.determineFileTypeByExtension(fileName); + if (!StringUtils.isBlank(type)) { + //Use rules for deciding when to trust browser supplied type + if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + } + logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); + } - ProcessBuilder processBuilder = new ProcessBuilder(); + JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); + fileJson = path.apply(fileJson); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + httpRequest.getServerName() + "/api/datasets/:persistentId/add?persistentId=doi:" + directory + " -F jsonData='" + fileJson.toString() + "'"; - msgt("*******====command ==== " + command); + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); + fileJson = path.apply(fileJson); - //processBuilder.command("bash", "-c", command); - msgt("*******=== Start api/datasets/:persistentId/add call"); - //Process process = processBuilder.start(); + addGlobusFileToDataset(dataset, fileJson.toString(), addFileHelper, fileName, finalType, storageIdentifier); + JsonObject a1 = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - new Thread(new Runnable() { - public void run() { - try { - processBuilder.command("bash", "-c", command); - Process process = processBuilder.start(); - } catch (Exception ex) { - logger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); - } - } - }).start(); + JsonArray f1 = a1.getJsonArray("files"); + JsonObject file1 = f1.getJsonObject(0); + jarr.add(file1); + } } } - - } catch (Exception e) { String message = e.getMessage(); msgt("******* Exception from globus API call " + message); msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); e.printStackTrace(); } - //msgt("******* successfully completed " ); - return ok(" dataset Name :" + dataset.getDisplayName() + ": Files to this dataset will be added to the table and will display in the UI. Processing can take significant time for large datasets."); - + return ok(Json.createObjectBuilder().add("Files", jarr)); } @@ -271,4 +293,83 @@ private void msgt(String m) { //dashes(); } + public Response addGlobusFileToDataset( Dataset dataset, + String jsonData, AddReplaceFileHelper addFileHelper,String fileName, + String finalType, + String storageIdentifier + ){ + + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + + //------------------------------------ + // (1) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + //--------------------------------------- + // (2) Load up optional params via JSON + //--------------------------------------- + + OptionalFileParams optionalFileParams = null; + msgt("(api) jsonData 2: " + jsonData); + + try { + optionalFileParams = new OptionalFileParams(jsonData); + } catch (DataFileTagException ex) { + return error( Response.Status.BAD_REQUEST, ex.getMessage()); + } + + + //------------------- + // (3) Create the AddReplaceFileHelper object + //------------------- + msg("ADD!"); + + //------------------- + // (4) Run "runAddFileByDatasetId" + //------------------- + addFileHelper.runAddFileByDataset(dataset, + fileName, + finalType, + storageIdentifier, + null, + optionalFileParams); + + + if (addFileHelper.hasError()){ + return error(addFileHelper.getHttpErrorCode(), addFileHelper.getErrorMessagesAsString("\n")); + }else{ + String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); + try { + //msgt("as String: " + addFileHelper.getSuccessResult()); + + logger.fine("successMsg: " + successMsg); + String duplicateWarning = addFileHelper.getDuplicateFileWarning(); + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); + } else { + return ok(addFileHelper.getSuccessResultAsJsonObjectBuilder()); + } + + //"Look at that! You added a file! (hey hey, it may have worked)"); + } catch (NoFilesException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + + } + } + + } // end: addFileToDataset + } From 073d97e0cfc72301e9df2077f7832217ef4daaa7 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 12 Jan 2021 13:15:07 -0500 Subject: [PATCH 041/161] restructured the API response object --- .../harvard/iq/dataverse/api/GlobusApi.java | 100 ++++++++++++++++-- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index f68498a502d..078da050f28 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -228,10 +228,10 @@ public Response globus(@PathParam("id") String datasetId, JsonObjectBuilder fileoutput= Json.createObjectBuilder() .add("storageIdentifier " , storageIdentifier) - .add("Result " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); + .add("message " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); jarr.add(fileoutput); - } else { + } else { // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; @@ -254,15 +254,99 @@ public Response globus(@PathParam("id") String datasetId, path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); fileJson = path.apply(fileJson); - addGlobusFileToDataset(dataset, fileJson.toString(), addFileHelper, fileName, finalType, storageIdentifier); + //addGlobusFileToDataset(dataset, fileJson.toString(), addFileHelper, fileName, finalType, storageIdentifier); - JsonObject a1 = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - JsonArray f1 = a1.getJsonArray("files"); - JsonObject file1 = f1.getJsonObject(0); + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + + //------------------------------------ + // (1) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + //--------------------------------------- + // (2) Load up optional params via JSON + //--------------------------------------- + + OptionalFileParams optionalFileParams = null; + msgt("(api) jsonData 2: " + fileJson.toString()); + + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + } catch (DataFileTagException ex) { + return error( Response.Status.BAD_REQUEST, ex.getMessage()); + } + + + //------------------- + // (3) Create the AddReplaceFileHelper object + //------------------- + msg("ADD!"); + + //------------------- + // (4) Run "runAddFileByDatasetId" + //------------------- + addFileHelper.runAddFileByDataset(dataset, + fileName, + finalType, + storageIdentifier, + null, + optionalFileParams); - jarr.add(file1); + if (addFileHelper.hasError()){ + + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("error Code: " ,addFileHelper.getHttpErrorCode().toString()) + .add("message " , addFileHelper.getErrorMessagesAsString("\n")); + + jarr.add(fileoutput); + + }else{ + String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); + + JsonObject a1 = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); + + JsonArray f1 = a1.getJsonArray("files"); + JsonObject file1 = f1.getJsonObject(0); + + try { + //msgt("as String: " + addFileHelper.getSuccessResult()); + + logger.fine("successMsg: " + successMsg); + String duplicateWarning = addFileHelper.getDuplicateFileWarning(); + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("warning message: " ,addFileHelper.getDuplicateFileWarning()) + .add("message " , file1); + jarr.add(fileoutput); + + } else { + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("message " , file1); + jarr.add(fileoutput); + } + + //"Look at that! You added a file! (hey hey, it may have worked)"); + } catch (Exception ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + } + } } } } @@ -370,6 +454,8 @@ public Response addGlobusFileToDataset( Dataset dataset, } } + + } // end: addFileToDataset } From b84587bf01ec7ccd08e0a9b0ede0b2c881702cd9 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 18 Jan 2021 11:47:51 -0500 Subject: [PATCH 042/161] moved the globus api into Datasets.java --- .../harvard/iq/dataverse/api/Datasets.java | 291 +++++++++++++++++- 1 file changed, 287 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 655cdafe04c..25c80f48e47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -25,6 +25,9 @@ import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.UserNotificationServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.globus.AccessToken; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; @@ -107,6 +110,7 @@ import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import com.amazonaws.services.s3.model.S3ObjectSummary; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; @@ -132,6 +136,7 @@ import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.json.JsonReader; +import javax.json.JsonPatch; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; @@ -157,6 +162,8 @@ import com.amazonaws.services.s3.model.PartETag; import java.util.Map.Entry; +import javax.persistence.Query; +import org.apache.commons.lang.StringUtils; @Path("datasets") public class Datasets extends AbstractApiBean { @@ -170,6 +177,9 @@ public class Datasets extends AbstractApiBean { @EJB DataverseServiceBean dataverseService; + + @EJB + GlobusServiceBean globusServiceBean; @EJB UserNotificationServiceBean userNotificationService; @@ -1727,16 +1737,20 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, // ------------------------------------- // (1) Get the user from the API key // ------------------------------------- + + msgt("**** BEFORE STEP 1 " ); User authUser; try { authUser = findUserOrDie(); + msgt("**** IN STEP 1 : " + authUser.getIdentifier() + " : "); } catch (WrappedResponse ex) { return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") ); } - - + + msgt("**** AFTER STEP 1 " ); + msgt("**** BEFORE STEP 2 " ); // ------------------------------------- // (2) Get the Dataset Id // @@ -1748,7 +1762,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } catch (WrappedResponse wr) { return wr.getResponse(); } - + msgt("**** AFTER STEP 2 " ); //------------------------------------ // (2a) Make sure dataset does not have package file // @@ -1857,7 +1871,6 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } // end: addFileToDataset - private void msg(String m){ //System.out.println(m); logger.fine(m); @@ -1872,6 +1885,9 @@ private void msgt(String m){ public static T handleVersion( String versionId, DsVersionHandler hdl ) throws WrappedResponse { + + logger.info("**** DEBUG handleVersion " ); + switch (versionId) { case ":latest": return hdl.handleLatest(); case ":draft": return hdl.handleDraft(); @@ -1894,6 +1910,8 @@ public static T handleVersion( String versionId, DsVersionHandler hdl ) } private DatasetVersion getDatasetVersionOrDie( final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { + logger.info("**** DEBUG getDatasetVersionOrDie " ); + DatasetVersion dsv = execCommand( handleVersion(versionNumber, new DsVersionHandler>(){ @Override @@ -2287,5 +2305,270 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, datasetService.merge(dataset); return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); } + + + + @POST + @Path("{id}/addglobusFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response globus(@PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData + ) + { + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + + // ------------------------------------- + // (3) Parse JsonData + // ------------------------------------- + + String taskIdentifier = null; + + msgt("******* (api) jsonData 1: " + jsonData); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + // ------------------------------------- + // (4) Get taskIdentifier + // ------------------------------------- + + + taskIdentifier = jsonObject.getString("taskIdentifier"); + msgt("******* (api) newTaskIdentifier: " + taskIdentifier); + + // ------------------------------------- + // (5) Wait until task completion + // ------------------------------------- + + boolean success = false; + + do { + try { + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; + msgt("******* (api) basicGlobusToken: " + basicGlobusToken); + AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + + success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); + msgt("******* (api) success: " + success); + + } catch (Exception ex) { + ex.printStackTrace(); + logger.info(ex.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id"); + } + + } while (!success); + + + try + { + StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + + DataverseRequest dvRequest2 = createDataverseRequest(authUser); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig); + + // ------------------------------------- + // (6) Parse files information from jsondata + // calculate checksum + // determine mimetype + // ------------------------------------- + + JsonArray filesJson = jsonObject.getJsonArray("files"); + + if (filesJson != null) { + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + + } + + String storageIdentifier = fileJson.getString("storageIdentifier"); + String suppliedContentType = fileJson.getString("contentType"); + String fileName = fileJson.getString("fileName"); + + String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); + + String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); + + String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); + + Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); + query.setParameter("storageIdentifier", dbstorageIdentifier); + + msgt("******* dbstorageIdentifier :" + dbstorageIdentifier + " ======= query.getResultList().size()============== " + query.getResultList().size()); + + + if (query.getResultList().size() > 0) { + + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("message " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); + + jarr.add(fileoutput); + } else { + + // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied + String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + String type = FileUtil.determineFileTypeByExtension(fileName); + if (!StringUtils.isBlank(type)) { + //Use rules for deciding when to trust browser supplied type + if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + } + logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); + } + + JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); + fileJson = path.apply(fileJson); + + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + + path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); + fileJson = path.apply(fileJson); + + //addGlobusFileToDataset(dataset, fileJson.toString(), addFileHelper, fileName, finalType, storageIdentifier); + + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + + //------------------------------------ + // (1) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + //--------------------------------------- + // (2) Load up optional params via JSON + //--------------------------------------- + + OptionalFileParams optionalFileParams = null; + msgt("(api) jsonData 2: " + fileJson.toString()); + + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + } catch (DataFileTagException ex) { + return error( Response.Status.BAD_REQUEST, ex.getMessage()); + } + + + //------------------- + // (3) Create the AddReplaceFileHelper object + //------------------- + msg("ADD!"); + + //------------------- + // (4) Run "runAddFileByDatasetId" + //------------------- + addFileHelper.runAddFileByDataset(dataset, + fileName, + finalType, + storageIdentifier, + null, + optionalFileParams); + + + if (addFileHelper.hasError()){ + + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("error Code: " ,addFileHelper.getHttpErrorCode().toString()) + .add("message " , addFileHelper.getErrorMessagesAsString("\n")); + + jarr.add(fileoutput); + + }else{ + String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); + + JsonObject a1 = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); + + JsonArray f1 = a1.getJsonArray("files"); + JsonObject file1 = f1.getJsonObject(0); + + try { + //msgt("as String: " + addFileHelper.getSuccessResult()); + + logger.fine("successMsg: " + successMsg); + String duplicateWarning = addFileHelper.getDuplicateFileWarning(); + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("warning message: " ,addFileHelper.getDuplicateFileWarning()) + .add("message " , file1); + jarr.add(fileoutput); + + } else { + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("message " , file1); + jarr.add(fileoutput); + } + + //"Look at that! You added a file! (hey hey, it may have worked)"); + } catch (Exception ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + } + } + } + } + } + } catch (Exception e) { + String message = e.getMessage(); + msgt("******* Exception from globus API call " + message); + msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); + e.printStackTrace(); + } + return ok(Json.createObjectBuilder().add("Files", jarr)); + + } + } From 36fd45c0252480144276b2de8e75e722aee6ee53 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 19 Jan 2021 08:12:03 -0500 Subject: [PATCH 043/161] multiple files lock issue resolved --- .../harvard/iq/dataverse/api/Datasets.java | 27 ++++++++- .../datasetutility/AddReplaceFileHelper.java | 55 +++++++++++++++---- 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 25c80f48e47..afe6fb28cb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1828,6 +1828,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, systemConfig); + //------------------- // (4) Run "runAddFileByDatasetId" //------------------- @@ -1836,7 +1837,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, newFileContentType, newStorageIdentifier, fileInputStream, - optionalFileParams); + optionalFileParams ); if (addFileHelper.hasError()){ @@ -2503,6 +2504,9 @@ public Response globus(@PathParam("id") String datasetId, //------------------- msg("ADD!"); + + boolean globustype = true; + //------------------- // (4) Run "runAddFileByDatasetId" //------------------- @@ -2511,7 +2515,8 @@ public Response globus(@PathParam("id") String datasetId, finalType, storageIdentifier, null, - optionalFileParams); + optionalFileParams, + globustype); if (addFileHelper.hasError()){ @@ -2560,12 +2565,30 @@ public Response globus(@PathParam("id") String datasetId, } } } + + try { + Command cmd; + + logger.info("******* : ==== datasetId :" + dataset.getId() + " ======= UpdateDatasetVersionCommand START in globus function "); + cmd = new UpdateDatasetVersionCommand(dataset, dvRequest2); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); + } + + msg("****** pre ingest start"); + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + msg("******* post ingest start"); + } catch (Exception e) { String message = e.getMessage(); msgt("******* Exception from globus API call " + message); msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); e.printStackTrace(); } + + return ok(Json.createObjectBuilder().add("Files", jarr)); } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index ab34b5b2675..af9b7937afd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -98,7 +98,7 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - + public static String GLOBUSFILE_ADD_OPERATION = "GLOBUSFILE_ADD_OPERATION"; private String currentOperation; @@ -312,17 +312,34 @@ public boolean runAddFileByDataset(Dataset chosenDataset, String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams){ - + + return this.runAddFileByDataset(chosenDataset,newFileName,newFileContentType,newStorageIdentifier,newFileInputStream,optionalFileParams,false); + + } + + public boolean runAddFileByDataset(Dataset chosenDataset, + String newFileName, + String newFileContentType, + String newStorageIdentifier, + InputStream newFileInputStream, + OptionalFileParams optionalFileParams, + boolean globustype) { + msgt(">> runAddFileByDatasetId"); initErrorHandling(); - - this.currentOperation = FILE_ADD_OPERATION; - + + if(globustype) { + this.currentOperation = GLOBUSFILE_ADD_OPERATION; + } + else { + this.currentOperation = FILE_ADD_OPERATION; + } + if (!this.step_001_loadDataset(chosenDataset)){ return false; } - + //return this.runAddFile(this.dataset, newFileName, newFileContentType, newFileInputStream, optionalFileParams); return this.runAddReplaceFile(dataset, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); @@ -692,8 +709,10 @@ private boolean runAddReplacePhase2(){ }else{ msgt("step_070_run_update_dataset_command"); - if (!this.step_070_run_update_dataset_command()){ - return false; + if (!this.isGlobusFileAddOperation()) { + if (!this.step_070_run_update_dataset_command()) { + return false; + } } } @@ -707,6 +726,8 @@ private boolean runAddReplacePhase2(){ return false; } + + return true; } @@ -755,6 +776,16 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } + /** + * Is this a file add operation via Globus? + * + * @return + */ + + public boolean isGlobusFileAddOperation(){ + + return this.currentOperation.equals(GLOBUSFILE_ADD_OPERATION); + } /** * Initialize error handling vars @@ -1897,8 +1928,9 @@ private boolean step_100_startIngestJobs(){ msg("pre ingest start"); // start the ingest! // - - ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); + if (!this.isGlobusFileAddOperation()) { + ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); + } msg("post ingest start"); return true; @@ -1988,7 +2020,8 @@ public String getDuplicateFileWarning() { public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } - + + } // end class /* DatasetPage sequence: From 416ad7a6d5cc166f63f849a7c40951e4c189e9b1 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 19 Jan 2021 10:14:10 -0500 Subject: [PATCH 044/161] debugging - ingest process during globus API call --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index afe6fb28cb7..2f561f0bb6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2312,7 +2312,7 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, @POST @Path("{id}/addglobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response globus(@PathParam("id") String datasetId, + public Response addGlobusFileToDataset(@PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData ) { @@ -2578,7 +2578,7 @@ public Response globus(@PathParam("id") String datasetId, } msg("****** pre ingest start"); - ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + ingestService.startIngestJobsForDataset(dataset, dvRequest2.getAuthenticatedUser() ); //(AuthenticatedUser) authUser); msg("******* post ingest start"); } catch (Exception e) { From fc5ed42be3b50cd1beb684f9b22d5317ffaddce6 Mon Sep 17 00:00:00 2001 From: chenganj Date: Wed, 20 Jan 2021 14:06:43 -0500 Subject: [PATCH 045/161] correction to globusAPI --- .../harvard/iq/dataverse/api/Datasets.java | 160 ++++++++++-------- 1 file changed, 93 insertions(+), 67 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 2f561f0bb6e..291b66fde66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2318,6 +2318,10 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, { JsonArrayBuilder jarr = Json.createArrayBuilder(); + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + // ------------------------------------- // (1) Get the user from the API key // ------------------------------------- @@ -2341,6 +2345,18 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, return wr.getResponse(); } + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + // ------------------------------------- // (3) Parse JsonData @@ -2348,7 +2364,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, String taskIdentifier = null; - msgt("******* (api) jsonData 1: " + jsonData); + msgt("******* (api) jsonData 1: " + jsonData.toString()); JsonObject jsonObject = null; try (StringReader rdr = new StringReader(jsonData)) { @@ -2362,7 +2378,6 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, // (4) Get taskIdentifier // ------------------------------------- - taskIdentifier = jsonObject.getString("taskIdentifier"); msgt("******* (api) newTaskIdentifier: " + taskIdentifier); @@ -2371,6 +2386,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, // ------------------------------------- boolean success = false; + boolean globustype = true; do { try { @@ -2395,14 +2411,20 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, { StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - DataverseRequest dvRequest2 = createDataverseRequest(authUser); - AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, - systemConfig); + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig + ); // ------------------------------------- // (6) Parse files information from jsondata @@ -2412,14 +2434,12 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, JsonArray filesJson = jsonObject.getJsonArray("files"); + + // Start to add the files if (filesJson != null) { for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { - for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - - } - - String storageIdentifier = fileJson.getString("storageIdentifier"); + String storageIdentifier = fileJson.getString("storageIdentifier"); //"s3://176ce6992af-208dea3661bb50" String suppliedContentType = fileJson.getString("contentType"); String fileName = fileJson.getString("fileName"); @@ -2429,14 +2449,11 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); + // the storageidentifier should be unique Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); query.setParameter("storageIdentifier", dbstorageIdentifier); - msgt("******* dbstorageIdentifier :" + dbstorageIdentifier + " ======= query.getResultList().size()============== " + query.getResultList().size()); - - if (query.getResultList().size() > 0) { - JsonObjectBuilder fileoutput= Json.createObjectBuilder() .add("storageIdentifier " , storageIdentifier) .add("message " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); @@ -2444,7 +2461,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, jarr.add(fileoutput); } else { - // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied + // calculate mimeType String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; String type = FileUtil.determineFileTypeByExtension(fileName); if (!StringUtils.isBlank(type)) { @@ -2458,6 +2475,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); fileJson = path.apply(fileJson); + // calculate md5 checksum StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); InputStream in = dataFileStorageIO.getInputStream(); String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); @@ -2465,28 +2483,8 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); fileJson = path.apply(fileJson); - //addGlobusFileToDataset(dataset, fileJson.toString(), addFileHelper, fileName, finalType, storageIdentifier); - - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - - //------------------------------------ - // (1) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - //--------------------------------------- - // (2) Load up optional params via JSON + // Load up optional params via JSON //--------------------------------------- OptionalFileParams optionalFileParams = null; @@ -2498,17 +2496,10 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, return error( Response.Status.BAD_REQUEST, ex.getMessage()); } - - //------------------- - // (3) Create the AddReplaceFileHelper object - //------------------- msg("ADD!"); - - boolean globustype = true; - //------------------- - // (4) Run "runAddFileByDatasetId" + // Run "runAddFileByDatasetId" //------------------- addFileHelper.runAddFileByDataset(dataset, fileName, @@ -2531,14 +2522,9 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, }else{ String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); - JsonObject a1 = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - - JsonArray f1 = a1.getJsonArray("files"); - JsonObject file1 = f1.getJsonObject(0); + JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); try { - //msgt("as String: " + addFileHelper.getSuccessResult()); - logger.fine("successMsg: " + successMsg); String duplicateWarning = addFileHelper.getDuplicateFileWarning(); if (duplicateWarning != null && !duplicateWarning.isEmpty()) { @@ -2546,17 +2532,16 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, JsonObjectBuilder fileoutput= Json.createObjectBuilder() .add("storageIdentifier " , storageIdentifier) .add("warning message: " ,addFileHelper.getDuplicateFileWarning()) - .add("message " , file1); + .add("message " , successresult.getJsonArray("files").getJsonObject(0)); jarr.add(fileoutput); } else { JsonObjectBuilder fileoutput= Json.createObjectBuilder() .add("storageIdentifier " , storageIdentifier) - .add("message " , file1); + .add("message " , successresult.getJsonArray("files").getJsonObject(0)); jarr.add(fileoutput); } - //"Look at that! You added a file! (hey hey, it may have worked)"); } catch (Exception ex) { Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); @@ -2564,34 +2549,75 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } } } - } + }// End of adding files try { Command cmd; - - logger.info("******* : ==== datasetId :" + dataset.getId() + " ======= UpdateDatasetVersionCommand START in globus function "); - cmd = new UpdateDatasetVersionCommand(dataset, dvRequest2); + cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); commandEngine.submit(cmd); } catch (CommandException ex) { - logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); + logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "====== UpdateDatasetVersionCommand Exception : " + ex.getMessage()); } - msg("****** pre ingest start"); - ingestService.startIngestJobsForDataset(dataset, dvRequest2.getAuthenticatedUser() ); //(AuthenticatedUser) authUser); + dataset = datasetService.find(dataset.getId()); + + List s= dataset.getFiles(); + for (DataFile dataFile : s) { + logger.info(" ******** TEST the datafile id is = " + dataFile.getId() + " = " + dataFile.getDisplayName()); + } + + msg("******* pre ingest start"); + + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + msg("******* post ingest start"); } catch (Exception e) { String message = e.getMessage(); - msgt("******* Exception from globus API call " + message); msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); e.printStackTrace(); } - return ok(Json.createObjectBuilder().add("Files", jarr)); } } + + /* + + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + + + + if (dvRequest2 != null) { + msg("****** dvRequest2 not null"); + ingestService.startIngestJobsForDataset(dataset, dvRequest2.getAuthenticatedUser()); + } else { + msg("****** dvRequest2 is null"); + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + } + */ + + /* + msg("****** JC update command completed "); + + // queue the data ingest job for asynchronous execution: + List dataFiles = addFileHelper.getNewlyAddedFiles(); + for (DataFile dataFile : dataFiles) { + // refresh the copy of the DataFile: + logger.info(" ******** JC the datafile id is = " + dataFile.getId()); + } + + msg("****** JC pre ingest start"); + String status = ingestService.startIngestJobs(dataFiles, (AuthenticatedUser) authUser); + msg("****** JC post ingest start"); + + */ + + + + From 8fc88d745e312d2912b43d25cf4593f4871eeca5 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 26 Jan 2021 09:33:40 -0500 Subject: [PATCH 046/161] fix for mimetype calculation --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 291b66fde66..752c1a8c4c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2462,13 +2462,18 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } else { // calculate mimeType + //logger.info(" JC Step 0 Supplied type: " + fileName ) ; + //logger.info(" JC Step 1 Supplied type: " + suppliedContentType ) ; String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + //logger.info(" JC Step 2 finalType: " + finalType ) ; String type = FileUtil.determineFileTypeByExtension(fileName); + //logger.info(" JC Step 3 type by fileextension: " + type ) ; if (!StringUtils.isBlank(type)) { //Use rules for deciding when to trust browser supplied type - if (FileUtil.useRecognizedType(finalType, type)) { + //if (FileUtil.useRecognizedType(finalType, type)) { finalType = type; - } + //logger.info(" JC Step 4 type after useRecognized function : " + finalType ) ; + //} logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); } @@ -2567,11 +2572,11 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, logger.info(" ******** TEST the datafile id is = " + dataFile.getId() + " = " + dataFile.getDisplayName()); } - msg("******* pre ingest start"); + msg("******* pre ingest start in globus API"); ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); - msg("******* post ingest start"); + msg("******* post ingest start in globus API"); } catch (Exception e) { String message = e.getMessage(); From 68888bf34dd7f9d1b6519be79eeccd9d2e6653f4 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 1 Feb 2021 10:01:35 -0500 Subject: [PATCH 047/161] - add lock to the dataset page when the Globus API call is executing. --- .../harvard/iq/dataverse/api/Datasets.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 752c1a8c4c0..a95ff6fcdf3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4,6 +4,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetPage; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; @@ -230,6 +231,9 @@ public class Datasets extends AbstractApiBean { @Inject DataverseRequestServiceBean dvRequestService; + @Inject + DatasetPage datasetPage; + /** * Used to consolidate the way we parse and handle dataset versions. * @param @@ -2346,7 +2350,20 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } //------------------------------------ - // (2a) Make sure dataset does not have package file + // (2a) Add lock to the dataset page + // -------------------------------------- + + String lockInfoMessage = "Globus Upload API is running "; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, + ((AuthenticatedUser) authUser).getId() , lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + //------------------------------------ + // (2b) Make sure dataset does not have package file // -------------------------------------- for (DatasetVersion dv : dataset.getVersions()) { @@ -2556,6 +2573,16 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } }// End of adding files + + DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (dcmLock == null) { + logger.log(Level.WARNING, "Dataset not locked for Globus upload"); + } else { + logger.log(Level.INFO, "Dataset remove locked for Globus upload"); + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + //dataset.removeLock(dcmLock); + } + try { Command cmd; cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); From 38a1d38f37f8ef0d9d75db51a5b6707f58fb8227 Mon Sep 17 00:00:00 2001 From: chenganj Date: Wed, 10 Feb 2021 16:04:53 -0500 Subject: [PATCH 048/161] globusAPI initial commit --- .../edu/harvard/iq/dataverse/DatasetLock.java | 3 + .../harvard/iq/dataverse/api/Datasets.java | 323 ++++++- .../iq/dataverse/dataaccess/FileAccessIO.java | 8 +- .../dataverse/dataaccess/InputStreamIO.java | 6 + .../iq/dataverse/dataaccess/S3AccessIO.java | 47 +- .../iq/dataverse/dataaccess/StorageIO.java | 3 + .../dataverse/dataaccess/SwiftAccessIO.java | 6 + .../datasetutility/AddReplaceFileHelper.java | 13 +- .../iq/dataverse/globus/AccessList.java | 33 + .../iq/dataverse/globus/AccessToken.java | 71 ++ .../harvard/iq/dataverse/globus/FileG.java | 67 ++ .../iq/dataverse/globus/FilesList.java | 60 ++ .../dataverse/globus/GlobusServiceBean.java | 909 ++++++++++++++++++ .../iq/dataverse/globus/Identities.java | 16 + .../harvard/iq/dataverse/globus/Identity.java | 67 ++ .../harvard/iq/dataverse/globus/MkDir.java | 22 + .../iq/dataverse/globus/MkDirResponse.java | 50 + .../iq/dataverse/globus/Permissions.java | 58 ++ .../dataverse/globus/PermissionsResponse.java | 58 ++ .../dataverse/globus/SuccessfulTransfer.java | 35 + .../edu/harvard/iq/dataverse/globus/Task.java | 69 ++ .../harvard/iq/dataverse/globus/Tasklist.java | 17 + .../iq/dataverse/globus/Transferlist.java | 18 + .../harvard/iq/dataverse/globus/UserInfo.java | 68 ++ .../settings/SettingsServiceBean.java | 15 +- 25 files changed, 2031 insertions(+), 11 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/FileG.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Identities.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Identity.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Task.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java index 93f4aca13d1..09c52a739f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java @@ -72,6 +72,9 @@ public enum Reason { /** DCM (rsync) upload in progress */ DcmUpload, + /** Globus upload in progress */ + GlobusUpload, + /** Tasks handled by FinalizeDatasetPublicationCommand: Registering PIDs for DS and DFs and/or file validation */ finalizePublication, diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 655cdafe04c..1db28d5dccc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; @@ -31,6 +32,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; @@ -75,6 +77,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.S3PackageImporter; @@ -107,6 +110,9 @@ import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import edu.harvard.iq.dataverse.globus.AccessToken; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; + import java.io.IOException; import java.io.InputStream; import java.io.StringReader; @@ -125,13 +131,8 @@ import javax.ejb.EJB; import javax.ejb.EJBException; import javax.inject.Inject; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonArrayBuilder; -import javax.json.JsonException; -import javax.json.JsonObject; -import javax.json.JsonObjectBuilder; -import javax.json.JsonReader; +import javax.json.*; +import javax.persistence.Query; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; @@ -150,6 +151,8 @@ import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import javax.ws.rs.core.UriInfo; + +import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrServerException; import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; @@ -171,6 +174,9 @@ public class Datasets extends AbstractApiBean { @EJB DataverseServiceBean dataverseService; + @EJB + GlobusServiceBean globusServiceBean; + @EJB UserNotificationServiceBean userNotificationService; @@ -2287,5 +2293,308 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, datasetService.merge(dataset); return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); } + + + @POST + @Path("{id}/addglobusFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addGlobusFileToDataset(@PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData + ) + { + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + //------------------------------------ + // (2a) Add lock to the dataset page + // -------------------------------------- + + String lockInfoMessage = "Globus Upload API is running "; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, + ((AuthenticatedUser) authUser).getId() , lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + //------------------------------------ + // (2b) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + + // ------------------------------------- + // (3) Parse JsonData + // ------------------------------------- + + String taskIdentifier = null; + + msgt("******* (api) jsonData 1: " + jsonData.toString()); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + // ------------------------------------- + // (4) Get taskIdentifier + // ------------------------------------- + + taskIdentifier = jsonObject.getString("taskIdentifier"); + msgt("******* (api) newTaskIdentifier: " + taskIdentifier); + + // ------------------------------------- + // (5) Wait until task completion + // ------------------------------------- + + boolean success = false; + boolean globustype = true; + + do { + try { + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; + msgt("******* (api) basicGlobusToken: " + basicGlobusToken); + AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + + success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); + msgt("******* (api) success: " + success); + + } catch (Exception ex) { + ex.printStackTrace(); + logger.info(ex.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id"); + } + + } while (!success); + + + try + { + StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig + ); + + // ------------------------------------- + // (6) Parse files information from jsondata + // calculate checksum + // determine mimetype + // ------------------------------------- + + JsonArray filesJson = jsonObject.getJsonArray("files"); + + + // Start to add the files + if (filesJson != null) { + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + + String storageIdentifier = fileJson.getString("storageIdentifier"); //"s3://176ce6992af-208dea3661bb50" + String suppliedContentType = fileJson.getString("contentType"); + String fileName = fileJson.getString("fileName"); + + String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); + + String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); + + String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); + + // the storageidentifier should be unique + Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); + query.setParameter("storageIdentifier", dbstorageIdentifier); + + if (query.getResultList().size() > 0) { + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("message " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); + + jarr.add(fileoutput); + } else { + + // calculate mimeType + //logger.info(" JC Step 0 Supplied type: " + fileName ) ; + //logger.info(" JC Step 1 Supplied type: " + suppliedContentType ) ; + String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + //logger.info(" JC Step 2 finalType: " + finalType ) ; + String type = FileUtil.determineFileTypeByExtension(fileName); + //logger.info(" JC Step 3 type by fileextension: " + type ) ; + if (!StringUtils.isBlank(type)) { + //Use rules for deciding when to trust browser supplied type + //if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + //logger.info(" JC Step 4 type after useRecognized function : " + finalType ) ; + //} + logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); + } + + JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); + fileJson = path.apply(fileJson); + + // calculate md5 checksum + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + + path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); + fileJson = path.apply(fileJson); + + //--------------------------------------- + // Load up optional params via JSON + //--------------------------------------- + + OptionalFileParams optionalFileParams = null; + msgt("(api) jsonData 2: " + fileJson.toString()); + + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + } catch (DataFileTagException ex) { + return error( Response.Status.BAD_REQUEST, ex.getMessage()); + } + + msg("ADD!"); + + //------------------- + // Run "runAddFileByDatasetId" + //------------------- + addFileHelper.runAddFileByDataset(dataset, + fileName, + finalType, + storageIdentifier, + null, + optionalFileParams, + globustype); + + + if (addFileHelper.hasError()){ + + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("error Code: " ,addFileHelper.getHttpErrorCode().toString()) + .add("message " , addFileHelper.getErrorMessagesAsString("\n")); + + jarr.add(fileoutput); + + }else{ + String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); + + JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); + + try { + logger.fine("successMsg: " + successMsg); + String duplicateWarning = addFileHelper.getDuplicateFileWarning(); + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("warning message: " ,addFileHelper.getDuplicateFileWarning()) + .add("message " , successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + + } else { + JsonObjectBuilder fileoutput= Json.createObjectBuilder() + .add("storageIdentifier " , storageIdentifier) + .add("message " , successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + + } catch (Exception ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + } + } + } + } + }// End of adding files + + + DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (dcmLock == null) { + logger.log(Level.WARNING, "Dataset not locked for Globus upload"); + } else { + logger.log(Level.INFO, "Dataset remove locked for Globus upload"); + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + //dataset.removeLock(dcmLock); + } + + try { + Command cmd; + cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "====== UpdateDatasetVersionCommand Exception : " + ex.getMessage()); + } + + dataset = datasetService.find(dataset.getId()); + + List s= dataset.getFiles(); + for (DataFile dataFile : s) { + logger.info(" ******** TEST the datafile id is = " + dataFile.getId() + " = " + dataFile.getDisplayName()); + } + + msg("******* pre ingest start in globus API"); + + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + msg("******* post ingest start in globus API"); + + } catch (Exception e) { + String message = e.getMessage(); + msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); + e.printStackTrace(); + } + + return ok(Json.createObjectBuilder().add("Files", jarr)); + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index bd0549622f0..d11d55ede9f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -35,6 +35,7 @@ // Dataverse imports: +import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; @@ -416,7 +417,12 @@ public void deleteAllAuxObjects() throws IOException { } - + @Override + public List listAuxObjects(String s) throws IOException { + return null; + } + + @Override public String getStorageLocation() { // For a local file, the "storage location" is a complete, absolute diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index c9796d24b27..2befee82d0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse.dataaccess; +import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import java.io.IOException; import java.io.InputStream; @@ -149,6 +150,11 @@ public OutputStream getOutputStream() throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: there is no output stream associated with this object."); } + @Override + public List listAuxObjects(String s) throws IOException { + return null; + } + @Override public InputStream getAuxFileAsInputStream(String auxItemTag) { throw new UnsupportedOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index c0defccfdef..0b4e8b43cd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -4,6 +4,8 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.auth.profile.ProfileCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; @@ -112,6 +114,8 @@ public S3AccessIO(String storageLocation, String driverId) { key = storageLocation.substring(storageLocation.indexOf('/')+1); } + public static String S3_IDENTIFIER_PREFIX = "s3"; + //Used for tests only public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, String driverId) { super(dvObject, req, driverId); @@ -636,6 +640,46 @@ public List listAuxObjects() throws IOException { return ret; } + @Override + public List listAuxObjects(String s ) throws IOException { + if (!this.canWrite()) { + open(); + } + String prefix = getDestinationKey(""); + + List ret = new ArrayList<>(); + + System.out.println("======= bucketname ===== "+ bucketName); + System.out.println("======= prefix ===== "+ prefix); + + ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); + ObjectListing storedAuxFilesList = null; + try { + storedAuxFilesList = s3.listObjects(req); + } catch (SdkClientException sce) { + throw new IOException ("S3 listAuxObjects: failed to get a listing for "+prefix); + } + if (storedAuxFilesList == null) { + return ret; + } + List storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries(); + try { + while (storedAuxFilesList.isTruncated()) { + logger.fine("S3 listAuxObjects: going to next page of list"); + storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList); + if (storedAuxFilesList != null) { + storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries()); + } + } + } catch (AmazonClientException ase) { + //logger.warning("Caught an AmazonServiceException in S3AccessIO.listAuxObjects(): " + ase.getMessage()); + throw new IOException("S3AccessIO: Failed to get aux objects for listing."); + } + + + return storedAuxFilesSummary; + } + @Override public void deleteAuxObject(String auxItemTag) throws IOException { if (!this.canWrite()) { @@ -875,7 +919,8 @@ public String generateTemporaryS3Url() throws IOException { if (s != null) { return s.toString(); } - + + //throw new IOException("Failed to generate temporary S3 url for "+key); return null; } else if (dvObject instanceof Dataset) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 2f66eec5f4c..9bfd9154323 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -37,6 +37,7 @@ import java.util.Iterator; import java.util.List; +import com.amazonaws.services.s3.model.S3ObjectSummary; //import org.apache.commons.httpclient.Header; //import org.apache.commons.httpclient.methods.GetMethod; @@ -542,4 +543,6 @@ public boolean isBelowIngestSizeLimit() { return true; } } + + public abstract ListlistAuxObjects(String s) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 3bc29cb9836..bee67f85a55 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -1,4 +1,5 @@ package edu.harvard.iq.dataverse.dataaccess; +import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; @@ -875,6 +876,11 @@ public String getSwiftContainerName() { return null; } + @Override + public List listAuxObjects(String s) throws IOException { + return null; + } + //https://gist.github.com/ishikawa/88599 public static String toHexString(byte[] bytes) { Formatter formatter = new Formatter(); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index ea1cfc38cfa..c0d5afb95cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -312,7 +312,18 @@ public boolean runAddFileByDataset(Dataset chosenDataset, String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams){ - + return this.runAddFileByDataset(chosenDataset,newFileName,newFileContentType,newStorageIdentifier,newFileInputStream,optionalFileParams,false); + + } + + public boolean runAddFileByDataset(Dataset chosenDataset, + String newFileName, + String newFileContentType, + String newStorageIdentifier, + InputStream newFileInputStream, + OptionalFileParams optionalFileParams, + boolean globustype) { + msgt(">> runAddFileByDatasetId"); initErrorHandling(); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java new file mode 100644 index 00000000000..9a963000541 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessList.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class AccessList { + private int length; + private String endpoint; + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public void setLength(int length) { + this.length = length; + } + + public String getEndpoint() { + return endpoint; + } + + public ArrayList getDATA() { + return DATA; + } + + public int getLength() { + return length; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java new file mode 100644 index 00000000000..2d68c5c8839 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -0,0 +1,71 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + + +public class AccessToken implements java.io.Serializable { + + private String accessToken; + private String idToken; + private Long expiresIn; + private String resourceServer; + private String tokenType; + private String state; + private String scope; + private String refreshToken; + private ArrayList otherTokens; + + public String getAccessToken() { return accessToken; } + + String getIdToken() { return idToken; } + + Long getExpiresIn() { return expiresIn; } + + String getResourceServer() { return resourceServer; } + + String getTokenType() { return tokenType; } + + String getState() { return state; } + + String getScope() {return scope; } + + String getRefreshToken() { return refreshToken; } + + ArrayList getOtherTokens() { return otherTokens; } + + public void setAccessToken(String accessToken) { + this.accessToken = accessToken; + } + + public void setExpiresIn(Long expiresIn) { + this.expiresIn = expiresIn; + } + + public void setIdToken(String idToken) { + this.idToken = idToken; + } + + public void setOtherTokens(ArrayList otherTokens) { + this.otherTokens = otherTokens; + } + + public void setRefreshToken(String refreshToken) { + this.refreshToken = refreshToken; + } + + public void setResourceServer(String resourceServer) { + this.resourceServer = resourceServer; + } + + public void setScope(String scope) { + this.scope = scope; + } + + public void setState(String state) { + this.state = state; + } + + public void setTokenType(String tokenType) { + this.tokenType = tokenType; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java b/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java new file mode 100644 index 00000000000..bd6a4b3b881 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.globus; + +public class FileG { + private String DATA_TYPE; + private String group; + private String name; + private String permissions; + private String size; + private String type; + private String user; + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getGroup() { + return group; + } + + public String getName() { + return name; + } + + public String getPermissions() { + return permissions; + } + + public String getSize() { + return size; + } + + public String getType() { + return type; + } + + public String getUser() { + return user; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setGroup(String group) { + this.group = group; + } + + public void setName(String name) { + this.name = name; + } + + public void setPermissions(String permissions) { + this.permissions = permissions; + } + + public void setSize(String size) { + this.size = size; + } + + public void setType(String type) { + this.type = type; + } + + public void setUser(String user) { + this.user = user; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java b/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java new file mode 100644 index 00000000000..777e37f9b80 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java @@ -0,0 +1,60 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class FilesList { + private ArrayList DATA; + private String DATA_TYPE; + private String absolute_path; + private String endpoint; + private String length; + private String path; + + public String getEndpoint() { + return endpoint; + } + + public ArrayList getDATA() { + return DATA; + } + + public String getAbsolute_path() { + return absolute_path; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getLength() { + return length; + } + + public String getPath() { + return path; + } + + public void setLength(String length) { + this.length = length; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public void setAbsolute_path(String absolute_path) { + this.absolute_path = absolute_path; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPath(String path) { + this.path = path; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java new file mode 100644 index 00000000000..5e314c4f47e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -0,0 +1,909 @@ +package edu.harvard.iq.dataverse.globus; + +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.google.gson.FieldNamingPolicy; +import com.google.gson.GsonBuilder; +import edu.harvard.iq.dataverse.*; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.faces.application.FacesMessage; +import javax.faces.context.FacesContext; +import javax.faces.view.ViewScoped; +import javax.inject.Inject; +import javax.inject.Named; + +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.*; + +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; + +import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; +import com.google.gson.Gson; +import edu.harvard.iq.dataverse.api.AbstractApiBean; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; +import edu.harvard.iq.dataverse.util.SystemConfig; +import org.primefaces.PrimeFaces; + +import static edu.harvard.iq.dataverse.util.JsfHelper.JH; + + +@Stateless +@Named("GlobusServiceBean") +public class GlobusServiceBean implements java.io.Serializable{ + + @EJB + protected DatasetServiceBean datasetSvc; + + @EJB + protected SettingsServiceBean settingsSvc; + + @Inject + DataverseSession session; + + @EJB + protected AuthenticationServiceBean authSvc; + + @EJB + EjbDataverseEngine commandEngine; + + private static final Logger logger = Logger.getLogger(FeaturedDataverseServiceBean.class.getCanonicalName()); + + private String code; + private String userTransferToken; + private String state; + + public String getState() { + return state; + } + + public void setState(String state) { + this.state = state; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getUserTransferToken() { + return userTransferToken; + } + + public void setUserTransferToken(String userTransferToken) { + this.userTransferToken = userTransferToken; + } + + public void onLoad() { + logger.info("Start Globus " + code); + logger.info("State " + state); + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + String datasetId = state; + logger.info("DatasetId = " + datasetId); + + String directory = getDirectory(datasetId); + if (directory == null) { + logger.severe("Cannot find directory"); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + HttpServletRequest origRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); + + logger.info(origRequest.getScheme()); + logger.info(origRequest.getServerName()); + + if (code != null ) { + + try { + AccessToken accessTokenUser = getAccessToken(origRequest, basicGlobusToken); + if (accessTokenUser == null) { + logger.severe("Cannot get access user token for code " + code); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } else { + setUserTransferToken(accessTokenUser.getOtherTokens().get(0).getAccessToken()); + } + + UserInfo usr = getUserInfo(accessTokenUser); + if (usr == null) { + logger.severe("Cannot get user info for " + accessTokenUser.getAccessToken()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + logger.info(accessTokenUser.getAccessToken()); + logger.info(usr.getEmail()); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + if (clientTokenUser == null) { + logger.severe("Cannot get client token "); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + logger.info(clientTokenUser.getAccessToken()); + + int status = createDirectory(clientTokenUser, directory, globusEndpoint); + if (status == 202) { + int perStatus = givePermission("identity", usr.getSub(), "rw", clientTokenUser, directory, globusEndpoint); + if (perStatus != 201 && perStatus != 200) { + logger.severe("Cannot get permissions "); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + } else if (status == 502) { //directory already exists + int perStatus = givePermission("identity", usr.getSub(), "rw", clientTokenUser, directory, globusEndpoint); + if (perStatus == 409) { + logger.info("permissions already exist"); + } else if (perStatus != 201 && perStatus != 200) { + logger.severe("Cannot get permissions "); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + } else { + logger.severe("Cannot create directory, status code " + status); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + return; + } + // ProcessBuilder processBuilder = new ProcessBuilder(); + // AuthenticatedUser user = (AuthenticatedUser) session.getUser(); + // ApiToken token = authSvc.findApiTokenByUser(user); + // String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + origRequest.getServerName() + "/api/globus/" + datasetId; + // logger.info("====command ==== " + command); + // processBuilder.command("bash", "-c", command); + // logger.info("=== Start process"); + // Process process = processBuilder.start(); + // logger.info("=== Going globus"); + goGlobusUpload(directory, globusEndpoint); + logger.info("=== Finished globus"); + + + } catch (MalformedURLException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + } catch (UnsupportedEncodingException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + } catch (IOException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); + } + + } + + } + + private void goGlobusUpload(String directory, String globusEndpoint ) { + + String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?destination_id=" + globusEndpoint + "&destination_path=" + directory + "'" +")"; + PrimeFaces.current().executeScript(httpString); + } + + public void goGlobusDownload(String datasetId) { + + String directory = getDirectory(datasetId); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?origin_id=" + globusEndpoint + "&origin_path=" + directory + "'" +")"; + PrimeFaces.current().executeScript(httpString); + } + + ArrayList checkPermisions( AccessToken clientTokenUser, String directory, String globusEndpoint, String principalType, String principal) throws MalformedURLException { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + ArrayList ids = new ArrayList(); + if (result.status == 200) { + AccessList al = parseJson(result.jsonResponse, AccessList.class, false); + + for (int i = 0; i< al.getDATA().size(); i++) { + Permissions pr = al.getDATA().get(i); + if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory )) && pr.getPrincipalType().equals(principalType) && + ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) ) { + ids.add(pr.getId()); + } else { + continue; + } + } + } + + return ids; + } + + public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { + if (directory != null && !directory.equals("")) { + directory = "/" + directory + "/"; + } + logger.info("Start updating permissions." + " Directory is " + directory); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, null); + logger.info("Size of rules " + rules.size()); + int count = 0; + while (count < rules.size()) { + logger.info("Start removing rules " + rules.get(count) ); + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPermissions(perm); + permissions.setPath(directory); + + Gson gson = new GsonBuilder().create(); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + rules.get(count)); + logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + rules.get(count)); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"PUT", gson.toJson(permissions)); + if (result.status != 200) { + logger.warning("Cannot update access rule " + rules.get(count)); + } else { + logger.info("Access rule " + rules.get(count) + " was updated"); + } + count++; + } + } + + public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { + + ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, principal); + + + + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPrincipalType(principalType); + permissions.setPrincipal(principal); + permissions.setPath(directory + "/" ); + permissions.setPermissions(perm); + + Gson gson = new GsonBuilder().create(); + MakeRequestResponse result = null; + if (rules.size() == 0) { + logger.info("Start creating the rule"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/"+ globusEndpoint + "/access"); + result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", gson.toJson(permissions)); + + if (result.status == 400) { + logger.severe("Path " + permissions.getPath() + " is not valid"); + } else if (result.status == 409) { + logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + } + + return result.status; + } else { + logger.info("Start Updating the rule"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/"+ globusEndpoint + "/access/" + rules.get(0)); + result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); + + if (result.status == 400) { + logger.severe("Path " + permissions.getPath() + " is not valid"); + } else if (result.status == 409) { + logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + } + logger.info("Result status " + result.status); + } + + return result.status; + } + + private int createDirectory(AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + globusEndpoint + "/mkdir"); + + MkDir mkDir = new MkDir(); + mkDir.setDataType("mkdir"); + mkDir.setPath(directory); + Gson gson = new GsonBuilder().create(); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"POST", gson.toJson(mkDir)); + logger.info(result.toString()); + + if (result.status == 502) { + logger.warning("Cannot create directory " + mkDir.getPath() + ", it already exists"); + } else if (result.status == 403) { + logger.severe("Cannot create directory " + mkDir.getPath() + ", permission denied"); + } else if (result.status == 202) { + logger.info("Directory created " + mkDir.getPath()); + } + + return result.status; + + } + + public String getTaskList(String basicGlobusToken, String identifierForFileStorage, String timeWhenAsyncStarted) throws MalformedURLException { + try + { + logger.info("1.getTaskList ====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== identifierForFileStorage ====== " + identifierForFileStorage); + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task_list?filter_endpoint="+globusEndpoint+"&filter_status=SUCCEEDED&filter_completion_time="+timeWhenAsyncStarted); + + //AccessToken accessTokenUser + //accessTokenUser.getOtherTokens().get(0).getAccessToken() + MakeRequestResponse result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + //logger.info("==TEST ==" + result.toString()); + + + + //2019-12-01 18:34:37+00:00 + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + //SimpleDateFormat task_sdf = new SimpleDateFormat("yyyy-MM-ddTHH:mm:ss"); + + Calendar cal1 = Calendar.getInstance(); + cal1.setTime(sdf.parse(timeWhenAsyncStarted)); + + Calendar cal2 = Calendar.getInstance(); + + Tasklist tasklist = null; + //2019-12-01 18:34:37+00:00 + + if (result.status == 200) { + tasklist = parseJson(result.jsonResponse, Tasklist.class, false); + for (int i = 0; i< tasklist.getDATA().size(); i++) { + Task task = tasklist.getDATA().get(i); + Date tastTime = sdf.parse(task.getRequest_time().replace("T" , " ")); + cal2.setTime(tastTime); + + + if ( cal1.before(cal2)) { + + // get /task//successful_transfers + // verify datasetid in "destination_path": "/~/test_godata_copy/file1.txt", + // go to aws and get files and write to database tables + + logger.info("====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== task.getRequest_time().toString() ====== " + task.getRequest_time()); + + boolean success = getSuccessfulTransfers(clientTokenUser, task.getTask_id() , identifierForFileStorage) ; + + if(success) + { + logger.info("SUCCESS ====== " + timeWhenAsyncStarted + " timeWhenAsyncStarted is before tastTime = TASK time = " + task.getTask_id()); + return task.getTask_id(); + } + } + else + { + //logger.info("====== " + timeWhenAsyncStarted + " timeWhenAsyncStarted is after tastTime = TASK time = " + task.getTask_id()); + //return task.getTask_id(); + } + } + } + } catch (MalformedURLException ex) { + logger.severe(ex.getMessage()); + logger.severe(ex.getCause().toString()); + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId, String identifierForFileStorage) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); + + MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), + "GET", null); + + Transferlist transferlist = null; + + if (result.status == 200) { + transferlist = parseJson(result.jsonResponse, Transferlist.class, false); + for (int i = 0; i < transferlist.getDATA().size(); i++) { + SuccessfulTransfer successfulTransfer = transferlist.getDATA().get(i); + String pathToVerify = successfulTransfer.getDestination_path(); + logger.info("getSuccessfulTransfers : ======pathToVerify === " + pathToVerify + " ====identifierForFileStorage === " + identifierForFileStorage); + if(pathToVerify.contains(identifierForFileStorage)) + { + logger.info(" SUCCESS ====== " + pathToVerify + " ==== " + identifierForFileStorage); + return true; + } + } + } + return false; + } + + public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId ) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); + + MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), + "GET", null); + + Transferlist transferlist = null; + + if (result.status == 200) { + logger.info(" SUCCESS ====== " ); + return true; + } + return false; + } + + + + public AccessToken getClientToken(String basicGlobusToken) throws MalformedURLException { + URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); + + MakeRequestResponse result = makeRequest(url, "Basic", + basicGlobusToken,"POST", null); + AccessToken clientTokenUser = null; + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + return clientTokenUser; + } + + public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGlobusToken ) throws UnsupportedEncodingException, MalformedURLException { + String serverName = origRequest.getServerName(); + if (serverName.equals("localhost")) { + serverName = "utl-192-123.library.utoronto.ca"; + } + + String redirectURL = "https://" + serverName + "/globus.xhtml"; + + redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); + + URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL + + "&grant_type=authorization_code"); + logger.info(url.toString()); + + MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken,"POST", null); + AccessToken accessTokenUser = null; + + if (result.status == 200) { + logger.info("Access Token: \n" + result.toString()); + accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + logger.info(accessTokenUser.getAccessToken()); + } + + return accessTokenUser; + + } + + public UserInfo getUserInfo(AccessToken accessTokenUser) throws MalformedURLException { + + URL url = new URL("https://auth.globus.org/v2/oauth2/userinfo"); + MakeRequestResponse result = makeRequest(url, "Bearer" , accessTokenUser.getAccessToken() , "GET", null); + UserInfo usr = null; + if (result.status == 200) { + usr = parseJson(result.jsonResponse, UserInfo.class, true); + } + + return usr; + } + + public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { + String str = null; + HttpURLConnection connection = null; + int status = 0; + try { + connection = (HttpURLConnection) url.openConnection(); + //Basic NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 + logger.info(authType + " " + authCode); + connection.setRequestProperty("Authorization", authType + " " + authCode); + //connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setRequestMethod(method); + if (jsonString != null) { + connection.setRequestProperty("Content-Type", "application/json"); + connection.setRequestProperty("Accept", "application/json"); + logger.info(jsonString); + connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); + wr.write(jsonString); + wr.flush(); + } + + status = connection.getResponseCode(); + logger.info("Status now " + status); + InputStream result = connection.getInputStream(); + if (result != null) { + logger.info("Result is not null"); + str = readResultJson(result).toString(); + logger.info("str is "); + logger.info(result.toString()); + } else { + logger.info("Result is null"); + str = null; + } + + logger.info("status: " + status); + } catch (IOException ex) { + logger.info("IO"); + logger.severe(ex.getMessage()); + logger.info(ex.getCause().toString()); + logger.info(ex.getStackTrace().toString()); + } finally { + if (connection != null) { + connection.disconnect(); + } + } + MakeRequestResponse r = new MakeRequestResponse(str, status); + return r; + + } + + private StringBuilder readResultJson(InputStream in) { + StringBuilder sb = null; + try { + + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + sb = new StringBuilder(); + String line; + while ((line = br.readLine()) != null) { + sb.append(line + "\n"); + } + br.close(); + logger.info(sb.toString()); + } catch (IOException e) { + sb = null; + logger.severe(e.getMessage()); + } + return sb; + } + + private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + if (sb != null) { + Gson gson = null; + if (namingPolicy) { + gson = new GsonBuilder().setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES).create(); + + } else { + gson = new GsonBuilder().create(); + } + T jsonClass = gson.fromJson(sb, jsonParserClass); + return jsonClass; + } else { + logger.severe("Bad respond from token rquest"); + return null; + } + } + + String getDirectory(String datasetId) { + Dataset dataset = null; + String directory = null; + try { + dataset = datasetSvc.find(Long.parseLong(datasetId)); + if (dataset == null) { + logger.severe("Dataset not found " + datasetId); + return null; + } + String storeId = dataset.getStorageIdentifier(); + storeId.substring(storeId.indexOf("//") + 1); + directory = storeId.substring(storeId.indexOf("//") + 1); + logger.info(storeId); + logger.info(directory); + logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); + return directory; + + } catch (NumberFormatException nfe) { + logger.severe(nfe.getMessage()); + + return null; + } + + } + + class MakeRequestResponse { + public String jsonResponse; + public int status; + MakeRequestResponse(String jsonResponse, int status) { + this.jsonResponse = jsonResponse; + this.status = status; + } + + } + + private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) throws MalformedURLException { + URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint +"/ls?path=" + directory + "/"); + + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + logger.info("find directory status:" + result.status); + + return result; + } + + public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedEncodingException, MalformedURLException { + + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { + return false; + } + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + if (clientTokenUser == null) { + logger.severe("Cannot get client token "); + return false; + } + + String directory = getDirectory(datasetId); + logger.info(directory); + + MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); + + if (status.status == 200) { + + /* FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); + ArrayList files = fl.getDATA(); + if (files != null) { + for (FileG file: files) { + if (!file.getName().contains("cached") && !file.getName().contains(".thumb")) { + int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, + directory + "/" + file.getName(), globusEndpoint); + logger.info("givePermission status " + perStatus + " for " + file.getName()); + if (perStatus == 409) { + logger.info("Permissions already exist or limit was reached for " + file.getName()); + } else if (perStatus == 400) { + logger.info("No file in Globus " + file.getName()); + } else if (perStatus != 201) { + logger.info("Cannot get permission for " + file.getName()); + } + } + } + }*/ + + int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, globusEndpoint); + logger.info("givePermission status " + perStatus); + if (perStatus == 409) { + logger.info("Permissions already exist or limit was reached"); + } else if (perStatus == 400) { + logger.info("No directory in Globus"); + } else if (perStatus != 201 && perStatus != 200) { + logger.info("Cannot give read permission"); + return false; + } + + } else if (status.status == 404) { + logger.info("There is no globus directory"); + }else { + logger.severe("Cannot find directory in globus, status " + status ); + return false; + } + + return true; + } +/* + public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) throws MalformedURLException { + + logger.info("=====Tasklist == dataset id :" + dataset.getId()); + String directory = null; + + try { + + List fileMetadatas = new ArrayList<>(); + + StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + + + + DatasetVersion workingVersion = dataset.getEditVersion(); + + if (workingVersion.getCreateTime() != null) { + workingVersion.setCreateTime(new Timestamp(new Date().getTime())); + } + + + directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); + + System.out.println("======= directory ==== " + directory + " ==== datasetId :" + dataset.getId()); + Map checksumMapOld = new HashMap<>(); + + Iterator fmIt = workingVersion.getFileMetadatas().iterator(); + + while (fmIt.hasNext()) { + FileMetadata fm = fmIt.next(); + if (fm.getDataFile() != null && fm.getDataFile().getId() != null) { + String chksum = fm.getDataFile().getChecksumValue(); + if (chksum != null) { + checksumMapOld.put(chksum, 1); + } + } + } + + List dFileList = new ArrayList<>(); + boolean update = false; + for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + + String s3ObjectKey = s3ObjectSummary.getKey(); + + + String t = s3ObjectKey.replace(directory, ""); + + if (t.indexOf(".") > 0) { + long totalSize = s3ObjectSummary.getSize(); + String filePath = s3ObjectKey; + String fileName = filePath.split("/")[filePath.split("/").length - 1]; + String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; + + logger.info("Full path " + fullPath); + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); + + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + //String checksumVal = s3ObjectSummary.getETag(); + logger.info("The checksum is " + checksumVal); + if ((checksumMapOld.get(checksumVal) != null)) { + logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == file already exists "); + } else if (filePath.contains("cached") || filePath.contains(".thumb")) { + logger.info(filePath + " is ignored"); + } else { + update = true; + logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == new file "); + try { + + DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); //MIME_TYPE_GLOBUS + datafile.setModificationTime(new Timestamp(new Date().getTime())); + datafile.setCreateDate(new Timestamp(new Date().getTime())); + datafile.setPermissionModificationTime(new Timestamp(new Date().getTime())); + + FileMetadata fmd = new FileMetadata(); + + + fmd.setLabel(fileName); + fmd.setDirectoryLabel(filePath.replace(directory, "").replace(File.separator + fileName, "")); + + fmd.setDataFile(datafile); + + datafile.getFileMetadatas().add(fmd); + + FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); + logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == added to datafile, filemetadata "); + + try { + // We persist "SHA1" rather than "SHA-1". + //datafile.setChecksumType(DataFile.ChecksumType.SHA1); + datafile.setChecksumType(DataFile.ChecksumType.MD5); + datafile.setChecksumValue(checksumVal); + } catch (Exception cksumEx) { + logger.info("==== datasetId :" + dataset.getId() + "======Could not calculate checksumType signature for the new file "); + } + + datafile.setFilesize(totalSize); + + dFileList.add(datafile); + + } catch (Exception ioex) { + logger.info("datasetId :" + dataset.getId() + "======Failed to process and/or save the file " + ioex.getMessage()); + return false; + + } + } + } + } + if (update) { + + List filesAdded = new ArrayList<>(); + + if (dFileList != null && dFileList.size() > 0) { + + // Dataset dataset = version.getDataset(); + + for (DataFile dataFile : dFileList) { + + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(workingVersion); + dataset.getFiles().add(dataFile); + + } + + filesAdded.add(dataFile); + + } + + logger.info("==== datasetId :" + dataset.getId() + " ===== Done! Finished saving new files to the dataset."); + } + + fileMetadatas.clear(); + for (DataFile addedFile : filesAdded) { + fileMetadatas.add(addedFile.getFileMetadata()); + } + filesAdded = null; + + if (workingVersion.isDraft()) { + + logger.info("Async: ==== datasetId :" + dataset.getId() + " ==== inside draft version "); + + Timestamp updateTime = new Timestamp(new Date().getTime()); + + workingVersion.setLastUpdateTime(updateTime); + dataset.setModificationTime(updateTime); + + + for (FileMetadata fileMetadata : fileMetadatas) { + + if (fileMetadata.getDataFile().getCreateDate() == null) { + fileMetadata.getDataFile().setCreateDate(updateTime); + fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); + } + fileMetadata.getDataFile().setModificationTime(updateTime); + } + + + } else { + logger.info("datasetId :" + dataset.getId() + " ==== inside released version "); + + for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { + for (FileMetadata fileMetadata : fileMetadatas) { + if (fileMetadata.getDataFile().getStorageIdentifier() != null) { + + if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { + workingVersion.getFileMetadatas().set(i, fileMetadata); + } + } + } + } + + + } + + + try { + Command cmd; + logger.info("Async: ==== datasetId :" + dataset.getId() + " ======= UpdateDatasetVersionCommand START in globus function "); + cmd = new UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, (HttpServletRequest) null)); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + //new DataverseRequest(authenticatedUser, (HttpServletRequest) null) + //dvRequestService.getDataverseRequest() + commandEngine.submit(cmd); + } catch (CommandException ex) { + logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); + return false; + } + + logger.info("==== datasetId :" + dataset.getId() + " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); + + //return true; + } + + } catch (Exception e) { + String message = e.getMessage(); + + logger.info("==== datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); + e.printStackTrace(); + return false; + //return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); + } + + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + updatePermision(clientTokenUser, directory, "identity", "r"); + return true; + } + +*/ +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java b/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java new file mode 100644 index 00000000000..6411262b5c9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java @@ -0,0 +1,16 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + + +public class Identities { + ArrayList identities; + + public void setIdentities(ArrayList identities) { + this.identities = identities; + } + + public ArrayList getIdentities() { + return identities; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java b/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java new file mode 100644 index 00000000000..265bd55217a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.globus; + +public class Identity { + private String id; + private String username; + private String status; + private String name; + private String email; + private String identityProvider; + private String organization; + + public void setOrganization(String organization) { + this.organization = organization; + } + + public void setIdentityProvider(String identityProvider) { + this.identityProvider = identityProvider; + } + + public void setName(String name) { + this.name = name; + } + + public void setEmail(String email) { + this.email = email; + } + + public void setId(String id) { + this.id = id; + } + + public void setStatus(String status) { + this.status = status; + } + + public void setUsername(String username) { + this.username = username; + } + + public String getOrganization() { + return organization; + } + + public String getIdentityProvider() { + return identityProvider; + } + + public String getName() { + return name; + } + + public String getEmail() { + return email; + } + + public String getId() { + return id; + } + + public String getStatus() { + return status; + } + + public String getUsername() { + return username; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java b/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java new file mode 100644 index 00000000000..2c906f1f31d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java @@ -0,0 +1,22 @@ +package edu.harvard.iq.dataverse.globus; + +public class MkDir { + private String DATA_TYPE; + private String path; + + public void setDataType(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPath(String path) { + this.path = path; + } + + public String getDataType() { + return DATA_TYPE; + } + + public String getPath() { + return path; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java b/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java new file mode 100644 index 00000000000..d31b34b8e70 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.globus; + +public class MkDirResponse { + private String DATA_TYPE; + private String code; + private String message; + private String request_id; + private String resource; + + public void setCode(String code) { + this.code = code; + } + + public void setDataType(String dataType) { + this.DATA_TYPE = dataType; + } + + public void setMessage(String message) { + this.message = message; + } + + public void setRequestId(String requestId) { + this.request_id = requestId; + } + + public void setResource(String resource) { + this.resource = resource; + } + + public String getCode() { + return code; + } + + public String getDataType() { + return DATA_TYPE; + } + + public String getMessage() { + return message; + } + + public String getRequestId() { + return request_id; + } + + public String getResource() { + return resource; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java b/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java new file mode 100644 index 00000000000..b8bb5193fa4 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Permissions.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.globus; + +public class Permissions { + private String DATA_TYPE; + private String principal_type; + private String principal; + private String id; + private String path; + private String permissions; + + public void setPath(String path) { + this.path = path; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setPermissions(String permissions) { + this.permissions = permissions; + } + + public void setPrincipal(String principal) { + this.principal = principal; + } + + public void setPrincipalType(String principalType) { + this.principal_type = principalType; + } + + public String getPath() { + return path; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getPermissions() { + return permissions; + } + + public String getPrincipal() { + return principal; + } + + public String getPrincipalType() { + return principal_type; + } + + public void setId(String id) { + this.id = id; + } + + public String getId() { + return id; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java b/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java new file mode 100644 index 00000000000..a30b1ecdc04 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java @@ -0,0 +1,58 @@ +package edu.harvard.iq.dataverse.globus; + +public class PermissionsResponse { + private String code; + private String resource; + private String DATA_TYPE; + private String request_id; + private String access_id; + private String message; + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public String getResource() { + return resource; + } + + public String getRequestId() { + return request_id; + } + + public String getMessage() { + return message; + } + + public String getCode() { + return code; + } + + public String getAccessId() { + return access_id; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public void setResource(String resource) { + this.resource = resource; + } + + public void setRequestId(String requestId) { + this.request_id = requestId; + } + + public void setMessage(String message) { + this.message = message; + } + + public void setCode(String code) { + this.code = code; + } + + public void setAccessId(String accessId) { + this.access_id = accessId; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java b/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java new file mode 100644 index 00000000000..6e2e5810a0a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java @@ -0,0 +1,35 @@ +package edu.harvard.iq.dataverse.globus; + +public class SuccessfulTransfer { + + private String DATA_TYPE; + private String destination_path; + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public String getDestination_path() { + return destination_path; + } + + public void setDestination_path(String destination_path) { + this.destination_path = destination_path; + } + + public String getSource_path() { + return source_path; + } + + public void setSource_path(String source_path) { + this.source_path = source_path; + } + + private String source_path; + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java new file mode 100644 index 00000000000..8d9f13f8ddf --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java @@ -0,0 +1,69 @@ +package edu.harvard.iq.dataverse.globus; + +public class Task { + + private String DATA_TYPE; + private String type; + private String status; + private String owner_id; + private String request_time; + private String task_id; + private String destination_endpoint_display_name; + + public String getDestination_endpoint_display_name() { + return destination_endpoint_display_name; + } + + public void setDestination_endpoint_display_name(String destination_endpoint_display_name) { + this.destination_endpoint_display_name = destination_endpoint_display_name; + } + + public void setRequest_time(String request_time) { + this.request_time = request_time; + } + + public String getRequest_time() { + return request_time; + } + + public String getTask_id() { + return task_id; + } + + public void setTask_id(String task_id) { + this.task_id = task_id; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getOwner_id() { + return owner_id; + } + + public void setOwner_id(String owner_id) { + this.owner_id = owner_id; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java b/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java new file mode 100644 index 00000000000..34e8c6c528e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java @@ -0,0 +1,17 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class Tasklist { + + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public ArrayList getDATA() { + return DATA; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java b/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java new file mode 100644 index 00000000000..0a1bd607ee2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java @@ -0,0 +1,18 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.ArrayList; + +public class Transferlist { + + + private ArrayList DATA; + + public void setDATA(ArrayList DATA) { + this.DATA = DATA; + } + + public ArrayList getDATA() { + return DATA; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java b/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java new file mode 100644 index 00000000000..a195486dd0b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java @@ -0,0 +1,68 @@ +package edu.harvard.iq.dataverse.globus; + +public class UserInfo implements java.io.Serializable{ + + private String identityProviderDisplayName; + private String identityProvider; + private String organization; + private String sub; + private String preferredUsername; + private String name; + private String email; + + public void setEmail(String email) { + this.email = email; + } + + public void setName(String name) { + this.name = name; + } + + public void setPreferredUsername(String preferredUsername) { + this.preferredUsername = preferredUsername; + } + + public void setSub(String sub) { + this.sub = sub; + } + + public void setIdentityProvider(String identityProvider) { + this.identityProvider = identityProvider; + } + + public void setIdentityProviderDisplayName(String identityProviderDisplayName) { + this.identityProviderDisplayName = identityProviderDisplayName; + } + + public void setOrganization(String organization) { + this.organization = organization; + } + + public String getEmail() { + return email; + } + + public String getPreferredUsername() { + return preferredUsername; + } + + public String getSub() { + return sub; + } + + public String getName() { + return name; + } + + public String getIdentityProvider() { + return identityProvider; + } + + public String getIdentityProviderDisplayName() { + return identityProviderDisplayName; + } + + public String getOrganization() { + return organization; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index e292ee39722..cfa972bb8d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -436,7 +436,20 @@ Whether Harvesting (OAI) service is enabled /** * Sort Date Facets Chronologically instead or presenting them in order of # of hits as other facets are. Default is true */ - ChronologicalDateFacets + ChronologicalDateFacets, + + /** + * BasicGlobusToken for Globus Application + */ + BasicGlobusToken, + /** + * GlobusEndpoint is Glopus endpoint for Globus application + */ + GlobusEndpoint, + /**Client id for Globus application + * + */ + GlobusClientId ; @Override From 66a4ca056cf16450ed5bf788aa9b726928efb6ec Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 11 Feb 2021 15:23:55 -0500 Subject: [PATCH 049/161] debug 1 --- .../harvard/iq/dataverse/api/Datasets.java | 19 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 2 - .../datasetutility/AddReplaceFileHelper.java | 50 +++- .../dataverse/ingest/IngestServiceBean.java | 263 +++++++++++++++++- .../iq/dataverse/ingest/IngestUtil.java | 17 ++ 5 files changed, 324 insertions(+), 27 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1db28d5dccc..7ad53638942 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -77,7 +77,6 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; -import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.S3PackageImporter; @@ -2378,7 +2377,6 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, // ------------------------------------- taskIdentifier = jsonObject.getString("taskIdentifier"); - msgt("******* (api) newTaskIdentifier: " + taskIdentifier); // ------------------------------------- // (5) Wait until task completion @@ -2391,11 +2389,9 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, try { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; - msgt("******* (api) basicGlobusToken: " + basicGlobusToken); AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); - msgt("******* (api) success: " + success); } catch (Exception ex) { ex.printStackTrace(); @@ -2433,7 +2429,6 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, JsonArray filesJson = jsonObject.getJsonArray("files"); - // Start to add the files if (filesJson != null) { for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { @@ -2461,20 +2456,13 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } else { // calculate mimeType - //logger.info(" JC Step 0 Supplied type: " + fileName ) ; - //logger.info(" JC Step 1 Supplied type: " + suppliedContentType ) ; String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - //logger.info(" JC Step 2 finalType: " + finalType ) ; + String type = FileUtil.determineFileTypeByExtension(fileName); - //logger.info(" JC Step 3 type by fileextension: " + type ) ; + if (!StringUtils.isBlank(type)) { - //Use rules for deciding when to trust browser supplied type - //if (FileUtil.useRecognizedType(finalType, type)) { finalType = type; - //logger.info(" JC Step 4 type after useRecognized function : " + finalType ) ; - //} - logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); - } + } JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); fileJson = path.apply(fileJson); @@ -2492,7 +2480,6 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, //--------------------------------------- OptionalFileParams optionalFileParams = null; - msgt("(api) jsonData 2: " + fileJson.toString()); try { optionalFileParams = new OptionalFileParams(fileJson.toString()); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 0b4e8b43cd9..92026aef170 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -113,8 +113,6 @@ public S3AccessIO(String storageLocation, String driverId) { minPartSize = getMinPartSize(driverId); key = storageLocation.substring(storageLocation.indexOf('/')+1); } - - public static String S3_IDENTIFIER_PREFIX = "s3"; //Used for tests only public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, String driverId) { diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index c0d5afb95cd..a3d86894251 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -98,7 +98,7 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - + public static String GLOBUSFILE_ADD_OPERATION = "GLOBUSFILE_ADD_OPERATION"; private String currentOperation; @@ -316,6 +316,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset, } + // JC STEP 1 public boolean runAddFileByDataset(Dataset chosenDataset, String newFileName, String newFileContentType, @@ -328,8 +329,13 @@ public boolean runAddFileByDataset(Dataset chosenDataset, initErrorHandling(); - this.currentOperation = FILE_ADD_OPERATION; - + if(globustype) { + this.currentOperation = GLOBUSFILE_ADD_OPERATION; + } + else { + this.currentOperation = FILE_ADD_OPERATION; + } + if (!this.step_001_loadDataset(chosenDataset)){ return false; } @@ -455,7 +461,8 @@ private boolean runAddReplaceFile(Dataset owner, String newFileName, String newF InputStream newFileInputStream, OptionalFileParams optionalFileParams) { return runAddReplaceFile(owner,newFileName, newFileContentType, null, newFileInputStream, optionalFileParams); } - + + // JC STEP 4 private boolean runAddReplaceFile(Dataset owner, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, @@ -534,6 +541,7 @@ public boolean runReplaceFromUI_Phase1(Long oldFileId, * * @return */ + // JC STEP 5 private boolean runAddReplacePhase1(Dataset owner, String newFileName, String newFileContentType, @@ -703,11 +711,13 @@ private boolean runAddReplacePhase2(){ }else{ msgt("step_070_run_update_dataset_command"); + if (!this.isGlobusFileAddOperation()) { if (!this.step_070_run_update_dataset_command()){ return false; } } - + } + msgt("step_090_notifyUser"); if (!this.step_090_notifyUser()){ return false; @@ -766,10 +776,22 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } + /** + * Is this a file add operation via Globus? + * + * @return + */ + + public boolean isGlobusFileAddOperation(){ + + return this.currentOperation.equals(GLOBUSFILE_ADD_OPERATION); + } /** * Initialize error handling vars */ + + // JC STEP 2 private void initErrorHandling(){ this.errorFound = false; @@ -937,6 +959,8 @@ private String getBundleErr(String msgName){ /** * */ + + // JC STEP 3 private boolean step_001_loadDataset(Dataset selectedDataset){ if (this.hasError()){ @@ -1512,7 +1536,16 @@ private boolean step_060_addFilesViaIngestService(){ } int nFiles = finalFileList.size(); - finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace); + + if (!this.isGlobusFileAddOperation()) { + finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace); + } + else { + finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, isFileReplaceOperation()); + } + + + if (nFiles != finalFileList.size()) { if (nFiles == 1) { @@ -1908,9 +1941,10 @@ private boolean step_100_startIngestJobs(){ msg("pre ingest start"); // start the ingest! // - + if (!this.isGlobusFileAddOperation()) { ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); - + } + msg("post ingest start"); return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index eec5504661a..035922f0724 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -154,7 +154,268 @@ public class IngestServiceBean { // attached to the Dataset via some cascade path (for example, via // DataFileCategory objects, if any were already assigned to the files). // It must be called before we attempt to permanently save the files in - // the database by calling the Save command on the dataset and/or version. + // the database by calling the Save command on the dataset and/or version. + + public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, boolean isReplaceOperation) { + List ret = new ArrayList<>(); + + if (newFiles != null && newFiles.size() > 0) { + // ret = new ArrayList<>(); + // final check for duplicate file names; + // we tried to make the file names unique on upload, but then + // the user may have edited them on the "add files" page, and + // renamed FOOBAR-1.txt back to FOOBAR.txt... + //Don't change the name if we're replacing a file - (the original hasn't yet been deleted but will be in a later step) + if(!isReplaceOperation) { + IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles); + } + Dataset dataset = version.getDataset(); + + for (DataFile dataFile : newFiles) { + boolean unattached = false; + boolean savedSuccess = false; + if (dataFile.getOwner() == null) { + unattached = true; + dataFile.setOwner(dataset); + } + + String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier()); + String driverType = DataAccess.getDriverType(storageInfo[0]); + String storageLocation = storageInfo[1]; + String tempFileLocation = null; + Path tempLocationPath = null; + if (driverType.equals("tmp")) { //"tmp" is the default if no prefix or the "tmp://" driver + tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation; + + // Try to save the file in its permanent location: + tempLocationPath = Paths.get(tempFileLocation); + WritableByteChannel writeChannel = null; + FileChannel readChannel = null; + + StorageIO dataAccess = null; + + try { + logger.fine("Attempting to create a new storageIO object for " + storageLocation); + dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); + + logger.fine("Successfully created a new storageIO object."); + /* + * This commented-out code demonstrates how to copy bytes from a local + * InputStream (or a readChannel) into the writable byte channel of a Dataverse + * DataAccessIO object: + */ + + /* + * storageIO.open(DataAccessOption.WRITE_ACCESS); + * + * writeChannel = storageIO.getWriteChannel(); readChannel = new + * FileInputStream(tempLocationPath.toFile()).getChannel(); + * + * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( + * start < readChannel.size() ) { readChannel.transferTo(start, + * bytesPerIteration, writeChannel); start += bytesPerIteration; } + */ + + /* + * But it's easier to use this convenience method from the DataAccessIO: + * + * (if the underlying storage method for this file is local filesystem, the + * DataAccessIO will simply copy the file using Files.copy, like this: + * + * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), + * StandardCopyOption.REPLACE_EXISTING); + */ + dataAccess.savePath(tempLocationPath); + + // Set filesize in bytes + // + dataFile.setFilesize(dataAccess.getSize()); + savedSuccess = true; + logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + + } catch (IOException ioex) { + logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); + } finally { + if (readChannel != null) { + try { + readChannel.close(); + } catch (IOException e) { + } + } + if (writeChannel != null) { + try { + writeChannel.close(); + } catch (IOException e) { + } + } + } + + // Since we may have already spent some CPU cycles scaling down image thumbnails, + // we may as well save them, by moving these generated images to the permanent + // dataset directory. We should also remember to delete any such files in the + // temp directory: + List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), + storageLocation); + if (generatedTempFiles != null) { + for (Path generated : generatedTempFiles) { + if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to + // save the main file! + logger.fine("(Will also try to permanently save generated thumbnail file " + + generated.toString() + ")"); + try { + // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), + // generated.getFileName().toString())); + int i = generated.toString().lastIndexOf("thumb"); + if (i > 1) { + String extensionTag = generated.toString().substring(i); + dataAccess.savePathAsAux(generated, extensionTag); + logger.fine( + "Saved generated thumbnail as aux object. \"preview available\" status: " + + dataFile.isPreviewImageAvailable()); + } else { + logger.warning( + "Generated thumbnail file name does not match the expected pattern: " + + generated.toString()); + } + + } catch (IOException ioex) { + logger.warning("Failed to save generated file " + generated.toString()); + } + } + + // ... but we definitely want to delete it: + try { + Files.delete(generated); + } catch (IOException ioex) { + logger.warning("Failed to delete generated file " + generated.toString()); + } + } + } + + if (unattached) { + dataFile.setOwner(null); + } + // Any necessary post-processing: + // performPostProcessingTasks(dataFile); + } else { + try { + StorageIO dataAccess = DataAccess.getStorageIO(dataFile); + //Populate metadata + dataAccess.open(DataAccessOption.READ_ACCESS); + //set file size + dataFile.setFilesize(dataAccess.getSize()); + if(dataAccess instanceof S3AccessIO) { + ((S3AccessIO)dataAccess).removeTempTag(); + } + } catch (IOException ioex) { + logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " (" + + ioex.getMessage() + ")"); + } + savedSuccess = true; + dataFile.setOwner(null); + } + + logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); + boolean belowLimit = false; + + try { + belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); + } catch (IOException e) { + logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); + } + + if (savedSuccess && belowLimit) { + // These are all brand new files, so they should all have + // one filemetadata total. -- L.A. + FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); + String fileName = fileMetadata.getLabel(); + + boolean metadataExtracted = false; + if (FileUtil.canIngestAsTabular(dataFile)) { + /* + * Note that we don't try to ingest the file right away - instead we mark it as + * "scheduled for ingest", then at the end of the save process it will be queued + * for async. ingest in the background. In the meantime, the file will be + * ingested as a regular, non-tabular file, and appear as such to the user, + * until the ingest job is finished with the Ingest Service. + */ + dataFile.SetIngestScheduled(); + } else if (fileMetadataExtractable(dataFile)) { + + try { + // FITS is the only type supported for metadata + // extraction, as of now. -- L.A. 4.0 + dataFile.setContentType("application/fits"); + metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); + } catch (IOException mex) { + logger.severe("Caught exception trying to extract indexable metadata from file " + + fileName + ", " + mex.getMessage()); + } + if (metadataExtracted) { + logger.fine("Successfully extracted indexable metadata from file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from file " + fileName); + } + } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { + // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"! + // "text/tsv" should be used instead: + dataFile.setContentType(FileUtil.MIME_TYPE_TSV); + } + } + // ... and let's delete the main temp file if it exists: + if(tempLocationPath!=null) { + try { + logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); + Files.delete(tempLocationPath); + } catch (IOException ex) { + // (non-fatal - it's just a temp file.) + logger.warning("Failed to delete temp file " + tempLocationPath.toString()); + } + } + if (savedSuccess) { + // temp dbug line + // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + + // dataset.getGlobalId()); + // Make sure the file is attached to the dataset and to the version, if this + // hasn't been done yet: + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + version.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(version); + dataset.getFiles().add(dataFile); + + if (dataFile.getFileMetadata().getCategories() != null) { + ListIterator dfcIt = dataFile.getFileMetadata().getCategories() + .listIterator(); + + while (dfcIt.hasNext()) { + DataFileCategory dataFileCategory = dfcIt.next(); + + if (dataFileCategory.getDataset() == null) { + DataFileCategory newCategory = dataset + .getCategoryByName(dataFileCategory.getName()); + if (newCategory != null) { + newCategory.addFileMetadata(dataFile.getFileMetadata()); + // dataFileCategory = newCategory; + dfcIt.set(newCategory); + } else { + dfcIt.remove(); + } + } + } + } + } + } + + ret.add(dataFile); + } + } + + return ret; + } + + public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, DataFile fileToReplace) { List ret = new ArrayList<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java index 13d4ed96815..fa199bd096c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java @@ -51,6 +51,23 @@ public class IngestUtil { private static final Logger logger = Logger.getLogger(IngestUtil.class.getCanonicalName()); + + public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List newFiles) { + + // Step 1: create list of existing path names from all FileMetadata in the DatasetVersion + // unique path name: directoryLabel + file separator + fileLabel + Set pathNamesExisting = existingPathNamesAsSet(version); + + // Step 2: check each new DataFile against the list of path names, if a duplicate create a new unique file name + for (Iterator dfIt = newFiles.iterator(); dfIt.hasNext();) { + + FileMetadata fm = dfIt.next().getFileMetadata(); + + fm.setLabel(duplicateFilenameCheck(fm, pathNamesExisting)); + } + } + + /** * Checks a list of new data files for duplicate names, renaming any * duplicates to ensure that they are unique. From b9689b3f53053896dff8170cac8d5afdbdcce3d9 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 16 Feb 2021 08:56:08 -0500 Subject: [PATCH 050/161] Resolved Globus API for multiple files input (dv version 5.3 ) --- .../harvard/iq/dataverse/api/Datasets.java | 202 ++++++++------ .../iq/dataverse/dataaccess/FileAccessIO.java | 5 - .../dataverse/dataaccess/InputStreamIO.java | 5 - .../iq/dataverse/dataaccess/S3AccessIO.java | 40 --- .../iq/dataverse/dataaccess/StorageIO.java | 1 - .../dataverse/dataaccess/SwiftAccessIO.java | 5 - .../datasetutility/AddReplaceFileHelper.java | 11 +- .../dataverse/ingest/IngestServiceBean.java | 260 ------------------ .../iq/dataverse/ingest/IngestUtil.java | 17 +- .../harvard/iq/dataverse/util/BundleUtil.java | 2 +- 10 files changed, 116 insertions(+), 432 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7ad53638942..49dbd9bf257 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2314,8 +2314,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, try { authUser = findUserOrDie(); } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.addreplace.error.auth") + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") ); } @@ -2349,8 +2348,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, for (DatasetVersion dv : dataset.getVersions()) { if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") ); } } @@ -2406,9 +2404,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, { StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - - } + List cachedObjectsTags = datasetSIO.listAuxObjects(); DataverseRequest dvRequest = createDataverseRequest(authUser); AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( @@ -2429,120 +2425,146 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, JsonArray filesJson = jsonObject.getJsonArray("files"); - // Start to add the files - if (filesJson != null) { - for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + int totalNumberofFiles = 0 ; + int successNumberofFiles = 0; + try { + // Start to add the files + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { - String storageIdentifier = fileJson.getString("storageIdentifier"); //"s3://176ce6992af-208dea3661bb50" - String suppliedContentType = fileJson.getString("contentType"); - String fileName = fileJson.getString("fileName"); + String storageIdentifier = fileJson.getString("storageIdentifier"); //"s3://176ce6992af-208dea3661bb50" + String suppliedContentType = fileJson.getString("contentType"); + String fileName = fileJson.getString("fileName"); - String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); + String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); - String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); + String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); - String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); + String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); - // the storageidentifier should be unique - Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); - query.setParameter("storageIdentifier", dbstorageIdentifier); + // the storageidentifier should be unique + Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); + query.setParameter("storageIdentifier", dbstorageIdentifier); - if (query.getResultList().size() > 0) { - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("message " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); + if (query.getResultList().size() > 0) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", storageIdentifier) + .add("message", " The datatable is not updated since the Storage Identifier already exists in dvObject. "); - jarr.add(fileoutput); - } else { + jarr.add(fileoutput); + } else { - // calculate mimeType - String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + // calculate mimeType + String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - String type = FileUtil.determineFileTypeByExtension(fileName); + String type = FileUtil.determineFileTypeByExtension(fileName); - if (!StringUtils.isBlank(type)) { - finalType = type; - } + if (!StringUtils.isBlank(type)) { + finalType = type; + } - JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); - fileJson = path.apply(fileJson); + JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); + fileJson = path.apply(fileJson); + + int count = 0; + // calculate md5 checksum + do { + try { + + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + InputStream in = dataFileStorageIO.getInputStream(); + String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + + path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); + fileJson = path.apply(fileJson); + count = 3; + } catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + msgt(" ***** Try to calculate checksum again for " + fileName); + //error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to calculate checksum"); + } - // calculate md5 checksum - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - InputStream in = dataFileStorageIO.getInputStream(); - String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + } while (count < 3); - path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); - fileJson = path.apply(fileJson); + //--------------------------------------- + // Load up optional params via JSON + //--------------------------------------- - //--------------------------------------- - // Load up optional params via JSON - //--------------------------------------- + OptionalFileParams optionalFileParams = null; - OptionalFileParams optionalFileParams = null; + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + } catch (DataFileTagException ex) { + return error(Response.Status.BAD_REQUEST, ex.getMessage()); + } - try { - optionalFileParams = new OptionalFileParams(fileJson.toString()); - } catch (DataFileTagException ex) { - return error( Response.Status.BAD_REQUEST, ex.getMessage()); - } + msg("ADD!"); - msg("ADD!"); + //------------------- + // Run "runAddFileByDatasetId" + //------------------- + addFileHelper.runAddFileByDataset(dataset, + fileName, + finalType, + storageIdentifier, + null, + optionalFileParams, + globustype); - //------------------- - // Run "runAddFileByDatasetId" - //------------------- - addFileHelper.runAddFileByDataset(dataset, - fileName, - finalType, - storageIdentifier, - null, - optionalFileParams, - globustype); + if (addFileHelper.hasError()) { - if (addFileHelper.hasError()){ + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier ", storageIdentifier) + .add("error Code: ", addFileHelper.getHttpErrorCode().toString()) + .add("message ", addFileHelper.getErrorMessagesAsString("\n")); - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("error Code: " ,addFileHelper.getHttpErrorCode().toString()) - .add("message " , addFileHelper.getErrorMessagesAsString("\n")); + jarr.add(fileoutput); - jarr.add(fileoutput); + } else { + String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); - }else{ - String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); + JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); + try { + logger.fine("successMsg: " + successMsg); + String duplicateWarning = addFileHelper.getDuplicateFileWarning(); + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier ", storageIdentifier) + .add("warning message: ", addFileHelper.getDuplicateFileWarning()) + .add("message ", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); - try { - logger.fine("successMsg: " + successMsg); - String duplicateWarning = addFileHelper.getDuplicateFileWarning(); - if (duplicateWarning != null && !duplicateWarning.isEmpty()) { - // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("warning message: " ,addFileHelper.getDuplicateFileWarning()) - .add("message " , successresult.getJsonArray("files").getJsonObject(0)); - jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier ", storageIdentifier) + .add("message ", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } - } else { - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("message " , successresult.getJsonArray("files").getJsonObject(0)); - jarr.add(fileoutput); + } catch (Exception ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); } - - } catch (Exception ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); } } + successNumberofFiles = successNumberofFiles + 1; } - } - }// End of adding files - + }// End of adding files + }catch (Exception e ) + { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, e); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + } + logger.log(Level.INFO, "Total Number of Files " + totalNumberofFiles); + logger.log(Level.INFO, "Success Number of Files " + successNumberofFiles); DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); if (dcmLock == null) { logger.log(Level.WARNING, "Dataset not locked for Globus upload"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d11d55ede9f..fa26232f6cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -416,11 +416,6 @@ public void deleteAllAuxObjects() throws IOException { } } - - @Override - public List listAuxObjects(String s) throws IOException { - return null; - } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 2befee82d0c..90a32d49487 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -150,11 +150,6 @@ public OutputStream getOutputStream() throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: there is no output stream associated with this object."); } - @Override - public List listAuxObjects(String s) throws IOException { - return null; - } - @Override public InputStream getAuxFileAsInputStream(String auxItemTag) { throw new UnsupportedOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 92026aef170..1deda4f49d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -638,46 +638,6 @@ public List listAuxObjects() throws IOException { return ret; } - @Override - public List listAuxObjects(String s ) throws IOException { - if (!this.canWrite()) { - open(); - } - String prefix = getDestinationKey(""); - - List ret = new ArrayList<>(); - - System.out.println("======= bucketname ===== "+ bucketName); - System.out.println("======= prefix ===== "+ prefix); - - ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); - ObjectListing storedAuxFilesList = null; - try { - storedAuxFilesList = s3.listObjects(req); - } catch (SdkClientException sce) { - throw new IOException ("S3 listAuxObjects: failed to get a listing for "+prefix); - } - if (storedAuxFilesList == null) { - return ret; - } - List storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries(); - try { - while (storedAuxFilesList.isTruncated()) { - logger.fine("S3 listAuxObjects: going to next page of list"); - storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList); - if (storedAuxFilesList != null) { - storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries()); - } - } - } catch (AmazonClientException ase) { - //logger.warning("Caught an AmazonServiceException in S3AccessIO.listAuxObjects(): " + ase.getMessage()); - throw new IOException("S3AccessIO: Failed to get aux objects for listing."); - } - - - return storedAuxFilesSummary; - } - @Override public void deleteAuxObject(String auxItemTag) throws IOException { if (!this.canWrite()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 9bfd9154323..6780984eb92 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -544,5 +544,4 @@ public boolean isBelowIngestSizeLimit() { } } - public abstract ListlistAuxObjects(String s) throws IOException; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index bee67f85a55..eaebc86e35a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -875,11 +875,6 @@ public String getSwiftContainerName() { } return null; } - - @Override - public List listAuxObjects(String s) throws IOException { - return null; - } //https://gist.github.com/ishikawa/88599 public static String toHexString(byte[] bytes) { diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index a3d86894251..c94b1a81d3a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1534,17 +1534,10 @@ private boolean step_060_addFilesViaIngestService(){ this.addErrorSevere(getBundleErr("final_file_list_empty")); return false; } - - int nFiles = finalFileList.size(); - - if (!this.isGlobusFileAddOperation()) { - finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace); - } - else { - finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, isFileReplaceOperation()); - } + int nFiles = finalFileList.size(); + finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace); if (nFiles != finalFileList.size()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 035922f0724..b58a34a79ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -156,266 +156,6 @@ public class IngestServiceBean { // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. - public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, boolean isReplaceOperation) { - List ret = new ArrayList<>(); - - if (newFiles != null && newFiles.size() > 0) { - // ret = new ArrayList<>(); - // final check for duplicate file names; - // we tried to make the file names unique on upload, but then - // the user may have edited them on the "add files" page, and - // renamed FOOBAR-1.txt back to FOOBAR.txt... - //Don't change the name if we're replacing a file - (the original hasn't yet been deleted but will be in a later step) - if(!isReplaceOperation) { - IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles); - } - Dataset dataset = version.getDataset(); - - for (DataFile dataFile : newFiles) { - boolean unattached = false; - boolean savedSuccess = false; - if (dataFile.getOwner() == null) { - unattached = true; - dataFile.setOwner(dataset); - } - - String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier()); - String driverType = DataAccess.getDriverType(storageInfo[0]); - String storageLocation = storageInfo[1]; - String tempFileLocation = null; - Path tempLocationPath = null; - if (driverType.equals("tmp")) { //"tmp" is the default if no prefix or the "tmp://" driver - tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation; - - // Try to save the file in its permanent location: - tempLocationPath = Paths.get(tempFileLocation); - WritableByteChannel writeChannel = null; - FileChannel readChannel = null; - - StorageIO dataAccess = null; - - try { - logger.fine("Attempting to create a new storageIO object for " + storageLocation); - dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); - - logger.fine("Successfully created a new storageIO object."); - /* - * This commented-out code demonstrates how to copy bytes from a local - * InputStream (or a readChannel) into the writable byte channel of a Dataverse - * DataAccessIO object: - */ - - /* - * storageIO.open(DataAccessOption.WRITE_ACCESS); - * - * writeChannel = storageIO.getWriteChannel(); readChannel = new - * FileInputStream(tempLocationPath.toFile()).getChannel(); - * - * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( - * start < readChannel.size() ) { readChannel.transferTo(start, - * bytesPerIteration, writeChannel); start += bytesPerIteration; } - */ - - /* - * But it's easier to use this convenience method from the DataAccessIO: - * - * (if the underlying storage method for this file is local filesystem, the - * DataAccessIO will simply copy the file using Files.copy, like this: - * - * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), - * StandardCopyOption.REPLACE_EXISTING); - */ - dataAccess.savePath(tempLocationPath); - - // Set filesize in bytes - // - dataFile.setFilesize(dataAccess.getSize()); - savedSuccess = true; - logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); - - } catch (IOException ioex) { - logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); - } finally { - if (readChannel != null) { - try { - readChannel.close(); - } catch (IOException e) { - } - } - if (writeChannel != null) { - try { - writeChannel.close(); - } catch (IOException e) { - } - } - } - - // Since we may have already spent some CPU cycles scaling down image thumbnails, - // we may as well save them, by moving these generated images to the permanent - // dataset directory. We should also remember to delete any such files in the - // temp directory: - List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), - storageLocation); - if (generatedTempFiles != null) { - for (Path generated : generatedTempFiles) { - if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to - // save the main file! - logger.fine("(Will also try to permanently save generated thumbnail file " - + generated.toString() + ")"); - try { - // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), - // generated.getFileName().toString())); - int i = generated.toString().lastIndexOf("thumb"); - if (i > 1) { - String extensionTag = generated.toString().substring(i); - dataAccess.savePathAsAux(generated, extensionTag); - logger.fine( - "Saved generated thumbnail as aux object. \"preview available\" status: " - + dataFile.isPreviewImageAvailable()); - } else { - logger.warning( - "Generated thumbnail file name does not match the expected pattern: " - + generated.toString()); - } - - } catch (IOException ioex) { - logger.warning("Failed to save generated file " + generated.toString()); - } - } - - // ... but we definitely want to delete it: - try { - Files.delete(generated); - } catch (IOException ioex) { - logger.warning("Failed to delete generated file " + generated.toString()); - } - } - } - - if (unattached) { - dataFile.setOwner(null); - } - // Any necessary post-processing: - // performPostProcessingTasks(dataFile); - } else { - try { - StorageIO dataAccess = DataAccess.getStorageIO(dataFile); - //Populate metadata - dataAccess.open(DataAccessOption.READ_ACCESS); - //set file size - dataFile.setFilesize(dataAccess.getSize()); - if(dataAccess instanceof S3AccessIO) { - ((S3AccessIO)dataAccess).removeTempTag(); - } - } catch (IOException ioex) { - logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " (" - + ioex.getMessage() + ")"); - } - savedSuccess = true; - dataFile.setOwner(null); - } - - logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); - boolean belowLimit = false; - - try { - belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); - } catch (IOException e) { - logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); - } - - if (savedSuccess && belowLimit) { - // These are all brand new files, so they should all have - // one filemetadata total. -- L.A. - FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); - String fileName = fileMetadata.getLabel(); - - boolean metadataExtracted = false; - if (FileUtil.canIngestAsTabular(dataFile)) { - /* - * Note that we don't try to ingest the file right away - instead we mark it as - * "scheduled for ingest", then at the end of the save process it will be queued - * for async. ingest in the background. In the meantime, the file will be - * ingested as a regular, non-tabular file, and appear as such to the user, - * until the ingest job is finished with the Ingest Service. - */ - dataFile.SetIngestScheduled(); - } else if (fileMetadataExtractable(dataFile)) { - - try { - // FITS is the only type supported for metadata - // extraction, as of now. -- L.A. 4.0 - dataFile.setContentType("application/fits"); - metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); - } catch (IOException mex) { - logger.severe("Caught exception trying to extract indexable metadata from file " - + fileName + ", " + mex.getMessage()); - } - if (metadataExtracted) { - logger.fine("Successfully extracted indexable metadata from file " + fileName); - } else { - logger.fine("Failed to extract indexable metadata from file " + fileName); - } - } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { - // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"! - // "text/tsv" should be used instead: - dataFile.setContentType(FileUtil.MIME_TYPE_TSV); - } - } - // ... and let's delete the main temp file if it exists: - if(tempLocationPath!=null) { - try { - logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); - Files.delete(tempLocationPath); - } catch (IOException ex) { - // (non-fatal - it's just a temp file.) - logger.warning("Failed to delete temp file " + tempLocationPath.toString()); - } - } - if (savedSuccess) { - // temp dbug line - // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + - // dataset.getGlobalId()); - // Make sure the file is attached to the dataset and to the version, if this - // hasn't been done yet: - if (dataFile.getOwner() == null) { - dataFile.setOwner(dataset); - - version.getFileMetadatas().add(dataFile.getFileMetadata()); - dataFile.getFileMetadata().setDatasetVersion(version); - dataset.getFiles().add(dataFile); - - if (dataFile.getFileMetadata().getCategories() != null) { - ListIterator dfcIt = dataFile.getFileMetadata().getCategories() - .listIterator(); - - while (dfcIt.hasNext()) { - DataFileCategory dataFileCategory = dfcIt.next(); - - if (dataFileCategory.getDataset() == null) { - DataFileCategory newCategory = dataset - .getCategoryByName(dataFileCategory.getName()); - if (newCategory != null) { - newCategory.addFileMetadata(dataFile.getFileMetadata()); - // dataFileCategory = newCategory; - dfcIt.set(newCategory); - } else { - dfcIt.remove(); - } - } - } - } - } - } - - ret.add(dataFile); - } - } - - return ret; - } - - public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, DataFile fileToReplace) { List ret = new ArrayList<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java index fa199bd096c..7363d9d9430 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java @@ -52,21 +52,6 @@ public class IngestUtil { private static final Logger logger = Logger.getLogger(IngestUtil.class.getCanonicalName()); - public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List newFiles) { - - // Step 1: create list of existing path names from all FileMetadata in the DatasetVersion - // unique path name: directoryLabel + file separator + fileLabel - Set pathNamesExisting = existingPathNamesAsSet(version); - - // Step 2: check each new DataFile against the list of path names, if a duplicate create a new unique file name - for (Iterator dfIt = newFiles.iterator(); dfIt.hasNext();) { - - FileMetadata fm = dfIt.next().getFileMetadata(); - - fm.setLabel(duplicateFilenameCheck(fm, pathNamesExisting)); - } - } - /** * Checks a list of new data files for duplicate names, renaming any @@ -274,7 +259,7 @@ public static Set existingPathNamesAsSet(DatasetVersion version, FileMet // #6942 added proxy for existing files to a boolean set when dataset version copy is done for (Iterator fmIt = version.getFileMetadatas().iterator(); fmIt.hasNext();) { FileMetadata fm = fmIt.next(); - if((fm.isInPriorVersion() || fm.getId() != null) && (replacedFmd==null) || (!fm.getDataFile().equals(replacedFmd.getDataFile()))) { + if((fm.isInPriorVersion() || fm.getId() != null) && (replacedFmd==null || !fm.getDataFile().equals(replacedFmd.getDataFile()))) { String existingName = fm.getLabel(); String existingDir = fm.getDirectoryLabel(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java index ca12683de15..a9511c65730 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java @@ -56,7 +56,7 @@ private static String getStringFromBundleNoMissingCheck(String key, List String stringFromBundle = null; stringFromBundle = bundle.getString(key); - logger.fine("string found: " + stringFromBundle); + //logger.fine("string found: " + stringFromBundle); if (arguments != null) { Object[] argArray = new String[arguments.size()]; From f8b7c3e2a630595a2d553e542c32b89b171bb24b Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 16 Feb 2021 09:08:56 -0500 Subject: [PATCH 051/161] Removed unwanted statements --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 1 - .../java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java | 1 - .../edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java | 1 - .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 3 --- .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 2 -- .../edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java | 1 - .../iq/dataverse/datasetutility/AddReplaceFileHelper.java | 2 -- .../edu/harvard/iq/dataverse/ingest/IngestServiceBean.java | 1 - src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java | 2 -- src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java | 2 +- 10 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 49dbd9bf257..4382e6ee588 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2386,7 +2386,6 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, do { try { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index fa26232f6cf..a92c6a5a5f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -35,7 +35,6 @@ // Dataverse imports: -import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 90a32d49487..c9796d24b27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -5,7 +5,6 @@ */ package edu.harvard.iq.dataverse.dataaccess; -import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import java.io.IOException; import java.io.InputStream; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 1deda4f49d1..eaa4de8d705 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -4,8 +4,6 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.auth.profile.ProfileCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; @@ -878,7 +876,6 @@ public String generateTemporaryS3Url() throws IOException { return s.toString(); } - //throw new IOException("Failed to generate temporary S3 url for "+key); return null; } else if (dvObject instanceof Dataset) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 6780984eb92..2f66eec5f4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -37,7 +37,6 @@ import java.util.Iterator; import java.util.List; -import com.amazonaws.services.s3.model.S3ObjectSummary; //import org.apache.commons.httpclient.Header; //import org.apache.commons.httpclient.methods.GetMethod; @@ -543,5 +542,4 @@ public boolean isBelowIngestSizeLimit() { return true; } } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index eaebc86e35a..5bdee44f1e5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -1,5 +1,4 @@ package edu.harvard.iq.dataverse.dataaccess; -import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index c94b1a81d3a..afd513b244d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1536,10 +1536,8 @@ private boolean step_060_addFilesViaIngestService(){ } int nFiles = finalFileList.size(); - finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace); - if (nFiles != finalFileList.size()) { if (nFiles == 1) { addError("Failed to save the content of the uploaded file."); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b58a34a79ae..4d69464c91b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -155,7 +155,6 @@ public class IngestServiceBean { // DataFileCategory objects, if any were already assigned to the files). // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. - public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, DataFile fileToReplace) { List ret = new ArrayList<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java index 7363d9d9430..356ac4f30ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java @@ -51,8 +51,6 @@ public class IngestUtil { private static final Logger logger = Logger.getLogger(IngestUtil.class.getCanonicalName()); - - /** * Checks a list of new data files for duplicate names, renaming any * duplicates to ensure that they are unique. diff --git a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java index a9511c65730..ca12683de15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java @@ -56,7 +56,7 @@ private static String getStringFromBundleNoMissingCheck(String key, List String stringFromBundle = null; stringFromBundle = bundle.getString(key); - //logger.fine("string found: " + stringFromBundle); + logger.fine("string found: " + stringFromBundle); if (arguments != null) { Object[] argArray = new String[arguments.size()]; From d6480aa7cc4f09fa73619af2cc08719b9a84b687 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 16 Feb 2021 09:25:30 -0500 Subject: [PATCH 052/161] mimeType is calculated only from file extension --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4382e6ee588..9b8c1deb90b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2433,7 +2433,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { String storageIdentifier = fileJson.getString("storageIdentifier"); //"s3://176ce6992af-208dea3661bb50" - String suppliedContentType = fileJson.getString("contentType"); + //String suppliedContentType = fileJson.getString("contentType"); String fileName = fileJson.getString("fileName"); String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); @@ -2455,7 +2455,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } else { // calculate mimeType - String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; + String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; String type = FileUtil.determineFileTypeByExtension(fileName); From 22134188bf9f24c931a4b29c5fc4b2603301e956 Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 18 Feb 2021 09:07:22 -0500 Subject: [PATCH 053/161] corrected compilation errors --- .../edu/harvard/iq/dataverse/DatasetLock.java | 3 - .../harvard/iq/dataverse/api/GlobusApi.java | 7 ++- .../dataverse/dataaccess/InputStreamIO.java | 5 -- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- .../harvard/iq/dataverse/util/FileUtil.java | 15 +---- src/main/webapp/editFilesFragment.xhtml | 63 ++++++++++++++++++- .../file-download-button-fragment.xhtml | 11 ++++ 7 files changed, 80 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java index f3dc4922f6e..62eec80af17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetLock.java @@ -72,9 +72,6 @@ public enum Reason { /** DCM (rsync) upload in progress */ DcmUpload, - /** Globus upload in progress */ - GlobusUpload, - /** Globus upload in progress */ GlobusUpload, diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index 078da050f28..c26b1bec184 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -203,10 +203,12 @@ public Response globus(@PathParam("id") String datasetId, if (filesJson != null) { for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { - +/* for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { } + */ + String storageIdentifier = fileJson.getString("storageIdentifier"); String suppliedContentType = fileJson.getString("contentType"); @@ -238,7 +240,8 @@ public Response globus(@PathParam("id") String datasetId, String type = FileUtil.determineFileTypeByExtension(fileName); if (!StringUtils.isBlank(type)) { //Use rules for deciding when to trust browser supplied type - if (FileUtil.useRecognizedType(finalType, type)) { + //if (FileUtil.useRecognizedType(finalType, type)) + { finalType = type; } logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index e244b8a788a..52dff797e33 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -151,11 +151,6 @@ public OutputStream getOutputStream() throws IOException { throw new UnsupportedDataAccessOperationException("InputStreamIO: there is no output stream associated with this object."); } - @Override - public List listAuxObjects(String s) throws IOException { - return null; - } - @Override public InputStream getAuxFileAsInputStream(String auxItemTag) { throw new UnsupportedOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 9bfd9154323..b3877252bd4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -544,5 +544,5 @@ public boolean isBelowIngestSizeLimit() { } } - public abstract ListlistAuxObjects(String s) throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index e588dd5659f..6d0c88e886d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -20,8 +20,6 @@ package edu.harvard.iq.dataverse.util; -import static edu.harvard.iq.dataverse.dataaccess.S3AccessIO.S3_IDENTIFIER_PREFIX; - import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; @@ -1146,7 +1144,7 @@ public static List createDataFiles(DatasetVersion version, InputStream } // end createDataFiles - public static boolean useRecognizedType(String suppliedContentType, String recognizedType) { + private static boolean useRecognizedType(String suppliedContentType, String recognizedType) { // is it any better than the type that was supplied to us, // if any? // This is not as trivial a task as one might expect... @@ -1378,17 +1376,6 @@ public static void generateS3PackageStorageIdentifier(DataFile dataFile) { String storageId = driverId + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); dataFile.setStorageIdentifier(storageId); } - - public static void generateS3PackageStorageIdentifierForGlobus(DataFile dataFile) { - String bucketName = System.getProperty("dataverse.files.s3-bucket-name"); - String storageId = null; - if ( dataFile.getFileMetadata().getDirectoryLabel() != null && !dataFile.getFileMetadata().getDirectoryLabel().equals("")) { - storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getDirectoryLabel() + "/" + dataFile.getFileMetadata().getLabel(); - } else { - storageId = S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + dataFile.getFileMetadata().getLabel(); - } - dataFile.setStorageIdentifier(storageId); - } public static void generateStorageIdentifier(DataFile dataFile) { //Is it true that this is only used for temp files and we could safely prepend "tmp://" to indicate that? diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index e5e12201fc8..d8d3081afef 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -276,7 +276,54 @@
- + +
+
Globus
+ + +
+
+ + +

+ #{bundle['file.createGlobusUploadDisabled']} +

+
+
+ + +

+ + BEFORE YOU START: You will need to set up a free account with Globus and + have Globus Connect Personal running on your computer to transfer files to and from the service. +
+ + +
+
+ Once Globus transfer has finished, you will get an email notification. Please come back here and press the following button: +
+ + +
+
+ +

+ +
+ Click here to view the dataset page: #{EditDatafilesPage.dataset.displayName} . +
+
+
+
+
@@ -985,6 +1032,20 @@ return true; } } + + function openGlobus(datasetId, client_id) { + var res = location.protocol+'//'+location.hostname+(location.port ? ':'+location.port: ''); + + var scope = encodeURI("openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all", "UTF-8"); + + var new_url = "https://auth.globus.org/v2/oauth2/authorize?client_id=" + client_id + "&response_type=code&" + + "scope=" + scope + "&state=" + datasetId; + new_url = new_url + "&redirect_uri=" + res + "%2Fglobus.xhtml" ; + + + var myWindows = window.open(new_url); + } + //]]> diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 85fe60863b4..cafe1875590 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -58,6 +58,17 @@ #{bundle.download} + + + + + + #{bundle['file.downloadFromGlobus']} + From b6f8f0fad123a67ef6e9d6af5628064110eab9e9 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 1 Mar 2021 08:58:21 -0500 Subject: [PATCH 054/161] sql scripts --- .../V4.11.0.1__5565-sanitize-directory-labels.sql | 9 +++++++++ .../V4.11__5513-database-variablemetadata.sql | 5 +++++ .../V4.12.0.1__4.13-re-sanitize-filemetadata.sql | 12 ++++++++++++ .../db/migration/V4.13.0.1__3575-usernames.sql | 1 + .../db/migration/V4.14.0.1__5822-export-var-meta.sql | 2 ++ .../db/migration/V4.15.0.1__2043-split-gbr-table.sql | 10 ++++++++++ .../V4.16.0.1__5303-addColumn-to-settingTable.sql | 10 ++++++++++ .../db/migration/V4.16.0.2__5028-dataset-explore.sql | 3 +++ .../V4.16.0.3__6156-FooterImageforSub-Dataverse.sql | 4 ++++ .../migration/V4.17.0.1__5991-update-scribejava.sql | 1 + .../migration/V4.17.0.2__3578-file-page-preview.sql | 5 +++++ .../V4.18.1.1__6459-contenttype-nullable.sql | 2 ++ .../db/migration/V4.19.0.1__6485_multistore.sql | 3 +++ .../V4.19.0.2__6644-update-editor-role-alias.sql | 2 ++ ...0.1__2734-alter-data-table-add-orig-file-name.sql | 2 ++ .../V4.20.0.2__6748-configure-dropdown-toolname.sql | 2 ++ .../db/migration/V4.20.0.3__6558-file-validation.sql | 4 ++++ .../migration/V4.20.0.4__6936-maildomain-groups.sql | 1 + .../migration/V4.20.0.5__6505-zipdownload-jobs.sql | 2 ++ ....0.1__6872-assign-storage-drivers-to-datasets.sql | 1 + 20 files changed, 81 insertions(+) create mode 100644 src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql create mode 100644 src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql create mode 100644 src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql create mode 100644 src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql create mode 100644 src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql create mode 100644 src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql create mode 100644 src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql create mode 100644 src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql create mode 100644 src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql create mode 100644 src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql create mode 100644 src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql create mode 100644 src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql create mode 100644 src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql create mode 100644 src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql create mode 100644 src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql create mode 100644 src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql create mode 100644 src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql create mode 100644 src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql create mode 100644 src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql create mode 100644 src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql diff --git a/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql b/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql new file mode 100644 index 00000000000..3d3ed777c9f --- /dev/null +++ b/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql @@ -0,0 +1,9 @@ +-- replace any sequences of slashes and backslashes with a single slash: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/\\][/\\]+', '/', 'g'); +-- strip (and replace with a .) any characters that are no longer allowed in the directory labels: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); +-- now replace any sequences of .s with a single .: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); +-- get rid of any leading or trailing slashes, spaces, '-'s and '.'s: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '^[/ .\-]+', '', ''); +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/ \.\-]+$', '', ''); diff --git a/src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql b/src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql new file mode 100644 index 00000000000..3c29a974bae --- /dev/null +++ b/src/main/resources/db/migration/V4.11__5513-database-variablemetadata.sql @@ -0,0 +1,5 @@ +-- universe is dropped since it is empty in the dataverse +-- this column will be moved to variablemetadata table +-- issue 5513 +ALTER TABLE datavariable +DROP COLUMN if exists universe; diff --git a/src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql b/src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql new file mode 100644 index 00000000000..8623ed97b70 --- /dev/null +++ b/src/main/resources/db/migration/V4.12.0.1__4.13-re-sanitize-filemetadata.sql @@ -0,0 +1,12 @@ +-- let's try again and fix the existing directoryLabels: +-- (the script shipped with 4.12 was missing the most important line; bad copy-and-paste) +-- replace any sequences of slashes and backslashes with a single slash: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/\\][/\\]+', '/', 'g'); +-- strip (and replace with a .) any characters that are no longer allowed in the directory labels: +-- (this line was missing from the script released with 4.12!!) +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[^A-Za-z0-9_ ./-]+', '.', 'g'); +-- now replace any sequences of .s with a single .: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); +-- get rid of any leading or trailing slashes, spaces, '-'s and '.'s: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '^[/ .\-]+', '', ''); +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/ \.\-]+$', '', ''); diff --git a/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql new file mode 100644 index 00000000000..9e35623c455 --- /dev/null +++ b/src/main/resources/db/migration/V4.13.0.1__3575-usernames.sql @@ -0,0 +1 @@ +CREATE UNIQUE INDEX IF NOT EXISTS index_authenticateduser_lower_useridentifier ON authenticateduser (lower(useridentifier)); diff --git a/src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql b/src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql new file mode 100644 index 00000000000..e65f52c7c91 --- /dev/null +++ b/src/main/resources/db/migration/V4.14.0.1__5822-export-var-meta.sql @@ -0,0 +1,2 @@ +ALTER TABLE variablemetadata +ADD COLUMN IF NOT EXISTS postquestion text; diff --git a/src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql b/src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql new file mode 100644 index 00000000000..adde91ee1b0 --- /dev/null +++ b/src/main/resources/db/migration/V4.15.0.1__2043-split-gbr-table.sql @@ -0,0 +1,10 @@ +DO $$ +BEGIN +IF EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name='guestbookresponse' AND column_name='downloadtype') THEN + INSERT INTO filedownload(guestbookresponse_id, downloadtype, downloadtimestamp, sessionid) SELECT id, downloadtype, responsetime, sessionid FROM guestbookresponse; + ALTER TABLE guestbookresponse DROP COLUMN downloadtype, DROP COLUMN sessionid; +END IF; +END +$$ + + diff --git a/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql new file mode 100644 index 00000000000..66bcb78601c --- /dev/null +++ b/src/main/resources/db/migration/V4.16.0.1__5303-addColumn-to-settingTable.sql @@ -0,0 +1,10 @@ +ALTER TABLE ONLY setting DROP CONSTRAINT setting_pkey ; + +ALTER TABLE setting ADD COLUMN IF NOT EXISTS ID SERIAL PRIMARY KEY; + +ALTER TABLE setting ADD COLUMN IF NOT EXISTS lang text; + + +CREATE UNIQUE INDEX IF NOT EXISTS unique_settings + ON setting + (name, coalesce(lang, '')); diff --git a/src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql b/src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql new file mode 100644 index 00000000000..d880b1bddb4 --- /dev/null +++ b/src/main/resources/db/migration/V4.16.0.2__5028-dataset-explore.sql @@ -0,0 +1,3 @@ +ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS scope VARCHAR(255); +UPDATE externaltool SET scope = 'FILE'; +ALTER TABLE externaltool ALTER COLUMN scope SET NOT NULL; diff --git a/src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql b/src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql new file mode 100644 index 00000000000..3951897279e --- /dev/null +++ b/src/main/resources/db/migration/V4.16.0.3__6156-FooterImageforSub-Dataverse.sql @@ -0,0 +1,4 @@ +ALTER TABLE dataversetheme +ADD COLUMN IF NOT EXISTS logofooter VARCHAR, +ADD COLUMN IF NOT EXISTS logoFooterBackgroundColor VARCHAR, +ADD COLUMN IF NOT EXISTS logofooteralignment VARCHAR; diff --git a/src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql b/src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql new file mode 100644 index 00000000000..6762e1fc076 --- /dev/null +++ b/src/main/resources/db/migration/V4.17.0.1__5991-update-scribejava.sql @@ -0,0 +1 @@ +ALTER TABLE OAuth2TokenData DROP COLUMN IF EXISTS scope; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql b/src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql new file mode 100644 index 00000000000..152700ed96c --- /dev/null +++ b/src/main/resources/db/migration/V4.17.0.2__3578-file-page-preview.sql @@ -0,0 +1,5 @@ +ALTER TABLE externalTool +ADD COLUMN IF NOT EXISTS hasPreviewMode BOOLEAN; +UPDATE externaltool SET hasPreviewMode = false; +ALTER TABLE externaltool ALTER COLUMN hasPreviewMode SET NOT NULL; + diff --git a/src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql b/src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql new file mode 100644 index 00000000000..79eab8583f0 --- /dev/null +++ b/src/main/resources/db/migration/V4.18.1.1__6459-contenttype-nullable.sql @@ -0,0 +1,2 @@ +-- contenttype can be null because dataset tools do not require it +ALTER TABLE externaltool ALTER contenttype DROP NOT NULL; diff --git a/src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql b/src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql new file mode 100644 index 00000000000..84364169614 --- /dev/null +++ b/src/main/resources/db/migration/V4.19.0.1__6485_multistore.sql @@ -0,0 +1,3 @@ +ALTER TABLE dataverse +ADD COLUMN IF NOT EXISTS storagedriver TEXT; +UPDATE dvobject set storageidentifier=CONCAT('file://', storageidentifier) where storageidentifier not like '%://%' and dtype='DataFile'; diff --git a/src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql b/src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql new file mode 100644 index 00000000000..7eccdb5f3c4 --- /dev/null +++ b/src/main/resources/db/migration/V4.19.0.2__6644-update-editor-role-alias.sql @@ -0,0 +1,2 @@ + +update dataverserole set alias = 'contributor' where alias = 'editor'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql b/src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql new file mode 100644 index 00000000000..edde8821045 --- /dev/null +++ b/src/main/resources/db/migration/V4.20.0.1__2734-alter-data-table-add-orig-file-name.sql @@ -0,0 +1,2 @@ + +ALTER TABLE datatable ADD COLUMN IF NOT EXISTS originalfilename character varying(255); \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql b/src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql new file mode 100644 index 00000000000..e360b0adfb6 --- /dev/null +++ b/src/main/resources/db/migration/V4.20.0.2__6748-configure-dropdown-toolname.sql @@ -0,0 +1,2 @@ +ALTER TABLE externaltool +ADD COLUMN IF NOT EXISTS toolname VARCHAR(255); diff --git a/src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql b/src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql new file mode 100644 index 00000000000..3e5e742968c --- /dev/null +++ b/src/main/resources/db/migration/V4.20.0.3__6558-file-validation.sql @@ -0,0 +1,4 @@ +-- the lock type "pidRegister" has been removed in 4.20, replaced with "finalizePublication" type +-- (since this script is run as the application is being deployed, any background pid registration +-- job is definitely no longer running - so we do want to remove any such locks left behind) +DELETE FROM DatasetLock WHERE reason='pidRegister'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql b/src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql new file mode 100644 index 00000000000..8c89b66fdec --- /dev/null +++ b/src/main/resources/db/migration/V4.20.0.4__6936-maildomain-groups.sql @@ -0,0 +1 @@ +ALTER TABLE persistedglobalgroup ADD COLUMN IF NOT EXISTS emaildomains text; \ No newline at end of file diff --git a/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql b/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql new file mode 100644 index 00000000000..484d5dd0784 --- /dev/null +++ b/src/main/resources/db/migration/V4.20.0.5__6505-zipdownload-jobs.sql @@ -0,0 +1,2 @@ +-- maybe temporary? - work in progress +CREATE TABLE IF NOT EXISTS CUSTOMZIPSERVICEREQUEST (KEY VARCHAR(63), STORAGELOCATION VARCHAR(255), FILENAME VARCHAR(255), ISSUETIME TIMESTAMP); diff --git a/src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql b/src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql new file mode 100644 index 00000000000..453b2054c43 --- /dev/null +++ b/src/main/resources/db/migration/V5.0.0.1__6872-assign-storage-drivers-to-datasets.sql @@ -0,0 +1 @@ +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS storagedriver VARCHAR(255); \ No newline at end of file From 414721188bc591d8c0f0d137bae58847be0b3c69 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 1 Mar 2021 09:35:12 -0500 Subject: [PATCH 055/161] datasetlock for globusupload --- .../edu/harvard/iq/dataverse/PermissionServiceBean.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index aaf38af1b36..6f05245bafd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -733,6 +733,9 @@ else if (dataset.isLockedFor(DatasetLock.Reason.Workflow)) { else if (dataset.isLockedFor(DatasetLock.Reason.DcmUpload)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); } + else if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); + } else if (dataset.isLockedFor(DatasetLock.Reason.EditInProgress)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), command); } @@ -768,6 +771,9 @@ else if (dataset.isLockedFor(DatasetLock.Reason.Workflow)) { else if (dataset.isLockedFor(DatasetLock.Reason.DcmUpload)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); } + else if (dataset.isLockedFor(DatasetLock.Reason.GlobusUpload)) { + throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.downloadNotAllowed"), command); + } else if (dataset.isLockedFor(DatasetLock.Reason.EditInProgress)) { throw new IllegalCommandException(BundleUtil.getStringFromBundle("dataset.message.locked.publishNotAllowed"), command); } From 07516b29b196a30891e47458df1fdb5ed6bbda45 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 1 Mar 2021 10:58:07 -0500 Subject: [PATCH 056/161] datasetlock for globusupload --- src/main/webapp/editFilesFragment.xhtml | 61 ------------------------- 1 file changed, 61 deletions(-) diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index d8d3081afef..6deb2a7b33f 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -277,53 +277,6 @@ -
-
Globus
- - -
-
- - -

- #{bundle['file.createGlobusUploadDisabled']} -

-
-
- - -

- - BEFORE YOU START: You will need to set up a free account with Globus and - have Globus Connect Personal running on your computer to transfer files to and from the service. -
- - -
-
- Once Globus transfer has finished, you will get an email notification. Please come back here and press the following button: -
- - -
-
- -

- -
- Click here to view the dataset page: #{EditDatafilesPage.dataset.displayName} . -
-
-
-
-
@@ -1032,20 +985,6 @@ return true; } } - - function openGlobus(datasetId, client_id) { - var res = location.protocol+'//'+location.hostname+(location.port ? ':'+location.port: ''); - - var scope = encodeURI("openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all", "UTF-8"); - - var new_url = "https://auth.globus.org/v2/oauth2/authorize?client_id=" + client_id + "&response_type=code&" + - "scope=" + scope + "&state=" + datasetId; - new_url = new_url + "&redirect_uri=" + res + "%2Fglobus.xhtml" ; - - - var myWindows = window.open(new_url); - } - //]]> From 2fa243abe63c60b07a714070acd4a62d5c8d6e96 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 16 Mar 2021 10:16:31 -0400 Subject: [PATCH 057/161] Globus API upgrade --- .../iq/dataverse/DatasetServiceBean.java | 277 ++- .../harvard/iq/dataverse/api/Datasets.java | 1542 ++++++++++------- .../dataverse/globus/fileDetailsHolder.java | 31 + .../harvard/iq/dataverse/util/FileUtil.java | 3 +- .../iq/dataverse/util/json/JsonPrinter.java | 10 + 5 files changed, 1215 insertions(+), 648 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index c1efe119fd2..f7e37b3d929 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -16,25 +17,28 @@ import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.globus.AccessToken; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.fileDetailsHolder; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.workflows.WorkflowComment; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; + +import java.io.*; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.EJBException; @@ -42,6 +46,7 @@ import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.inject.Named; +import javax.json.*; import javax.persistence.EntityManager; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; @@ -49,8 +54,14 @@ import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang.StringUtils; import org.ocpsoft.common.util.Strings; +import javax.servlet.http.HttpServletRequest; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; + /** * * @author skraffmiller @@ -95,6 +106,10 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB SystemConfig systemConfig; + @EJB + GlobusServiceBean globusServiceBean; + + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @PersistenceContext(unitName = "VDCNet-ejbPU") @@ -1004,6 +1019,246 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo hdLogger.info("Successfully destroyed the dataset"); } catch (Exception ex) { hdLogger.warning("Failed to destroy the dataset"); - } + } + } + + @Asynchronous + public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, User authUser, String httpRequestUrl) throws ExecutionException, InterruptedException { + + logger.info(httpRequestUrl + " == globusAsyncCall == step 1 "+ dataset.getId()); + + Thread.sleep(5000); + String lockInfoMessage = "Globus Upload API is running "; + DatasetLock lock = addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, + ((AuthenticatedUser) authUser).getId(), lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + String taskIdentifier = jsonObject.getString("taskIdentifier"); + String datasetIdentifier = jsonObject.getString("datasetId").replace("doi:",""); + + // globus task status check + globusStatusCheck(taskIdentifier); + + // calculate checksum, mimetype + try { + List inputList = new ArrayList(); + JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + + if (filesJsonArray != null) { + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from victoria + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String[] bits = storageIdentifier.split(":"); + String fileId = bits[bits.length-1]; + String bucketName = bits[1].replace("/", ""); + + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = "s3://" + bucketName + "/" + datasetIdentifier +"/" +fileId ; + + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } + + JsonObject newfilesJsonObject= calculateMissingMetadataFields(inputList); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + + JsonArrayBuilder jsonSecondAPI = Json.createArrayBuilder() ; + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] bits = storageIdentifier.split(":"); + String fileId = bits[bits.length-1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size() ) + .mapToObj(index -> ((JsonObject)newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + + if(newfileJsonObject != null) { + JsonPatch path = Json.createPatchBuilder().add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + jsonSecondAPI.add(stringToJsonObjectBuilder(fileJsonObject.toString())); + } + } + + String newjsonData = jsonSecondAPI.build().toString(); + + ProcessBuilder processBuilder = new ProcessBuilder(); + + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl.split("/api")[0]+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + System.out.println("*******====command ==== " + command); + + new Thread(new Runnable() { + public void run() { + try { + processBuilder.command("bash", "-c", command); + Process process = processBuilder.start(); + } catch (Exception ex) { + logger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } + } + }).start(); + + } + + } catch (Exception e) { + logger.info("Exception "); + e.printStackTrace(); + } + } + + public static JsonObjectBuilder stringToJsonObjectBuilder(String str) { + JsonReader jsonReader = Json.createReader(new StringReader(str)); + JsonObject jo = jsonReader.readObject(); + jsonReader.close(); + + JsonObjectBuilder job = Json.createObjectBuilder(); + + for (Map.Entry entry : jo.entrySet()) { + job.add(entry.getKey(), entry.getValue()); + } + + return job; } + + Executor executor = Executors.newFixedThreadPool(10); + + + private Boolean globusStatusCheck(String taskId) + { + boolean success = false; + do { + try { + logger.info(" sleep before globus transfer check"); + Thread.sleep(50000); + + String basicGlobusToken = settingsService.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + + success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + + } catch (Exception ex) { + ex.printStackTrace(); + } + + } while (!success); + + logger.info(" globus transfer completed "); + + return success; + } + + + public JsonObject calculateMissingMetadataFields(List inputList) throws InterruptedException, ExecutionException, IOException { + + List> hashvalueCompletableFutures = + inputList.stream().map(iD -> calculateDetailsAsync(iD)).collect(Collectors.toList()); + + CompletableFuture allFutures = CompletableFuture + .allOf(hashvalueCompletableFutures.toArray(new CompletableFuture[hashvalueCompletableFutures.size()])); + + CompletableFuture> allCompletableFuture = allFutures.thenApply(future -> { + return hashvalueCompletableFutures.stream() + .map(completableFuture -> completableFuture.join()) + .collect(Collectors.toList()); + }); + + CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { + return files.stream().map(d -> json(d)).collect(toJsonArray()); + }); + + JsonArrayBuilder filesObject = (JsonArrayBuilder) completableFuture.get(); + + JsonObject output = Json.createObjectBuilder().add("files", filesObject).build(); + + return output; + + } + + private CompletableFuture calculateDetailsAsync(String id) { + logger.info(" calcualte additional details for these globus id ==== " + id); + return CompletableFuture.supplyAsync( () -> { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + return ( calculateDetails(id) ); + } catch (InterruptedException | IOException e) { + e.printStackTrace(); + } + return null; + }, executor).exceptionally(ex -> { + return null; + }); + } + + + private fileDetailsHolder calculateDetails(String id) throws InterruptedException, IOException { + int count = 0; + String checksumVal = ""; + InputStream in = null; + String fileId = id.split("IDsplit")[0]; + String fullPath = id.split("IDsplit")[1]; + String fileName = id.split("IDsplit")[2]; + do { + try { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + in = dataFileStorageIO.getInputStream(); + checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + count = 3; + } catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + } + + } while (count < 3); + + + return new fileDetailsHolder(fileId, checksumVal, calculatemime(fileName)); + //getBytes(in)+"" ); + // calculatemime(fileName)); + } + + public long getBytes(InputStream is) throws IOException { + + FileInputStream fileStream = (FileInputStream)is; + return fileStream.getChannel().size(); + } + + public String calculatemime(String fileName) throws InterruptedException { + + String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; + String type = FileUtil.determineFileTypeByExtension(fileName); + + if (!StringUtils.isBlank(type)) { + if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + } + } + + return finalType; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7f50504ebc4..c2854b33e29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.api; -import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; @@ -29,6 +28,7 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; @@ -77,10 +77,10 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.globus.fileDetailsHolder; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.S3PackageImporter; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -117,16 +117,11 @@ import java.io.StringReader; import java.sql.Timestamp; import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; +import java.util.concurrent.*; import java.util.logging.Level; import java.util.logging.Logger; +import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.EJBException; import javax.inject.Inject; @@ -158,43 +153,45 @@ import org.glassfish.jersey.media.multipart.FormDataParam; import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.FileMetadata; + import java.util.Map.Entry; +import java.util.stream.Collectors; +import java.util.stream.IntStream; @Path("datasets") public class Datasets extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Datasets.class.getCanonicalName()); - - @Inject DataverseSession session; + + @Inject DataverseSession session; @EJB DatasetServiceBean datasetService; @EJB DataverseServiceBean dataverseService; - + @EJB GlobusServiceBean globusServiceBean; @EJB UserNotificationServiceBean userNotificationService; - + @EJB PermissionServiceBean permissionService; - + @EJB AuthenticationServiceBean authenticationServiceBean; - + @EJB DDIExportServiceBean ddiExportService; - + @EJB DatasetFieldServiceBean datasetfieldService; @EJB MetadataBlockServiceBean metadataBlockService; - + @EJB DataFileServiceBean fileService; @@ -203,65 +200,72 @@ public class Datasets extends AbstractApiBean { @EJB EjbDataverseEngine commandEngine; - + @EJB IndexServiceBean indexService; @EJB S3PackageImporter s3PackageImporter; - + @EJB SettingsServiceBean settingsService; // TODO: Move to AbstractApiBean @EJB DatasetMetricsServiceBean datasetMetricsSvc; - + @EJB DatasetExternalCitationsServiceBean datasetExternalCitationsService; - + @Inject MakeDataCountLoggingServiceBean mdcLogService; - + @Inject DataverseRequestServiceBean dvRequestService; + @Context + protected HttpServletRequest httpRequest; + + /** * Used to consolidate the way we parse and handle dataset versions. - * @param + * @param */ public interface DsVersionHandler { T handleLatest(); + T handleDraft(); - T handleSpecific( long major, long minor ); + + T handleSpecific(long major, long minor); + T handleLatestPublished(); } - + @GET @Path("{id}") public Response getDataset(@PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { - return response( req -> { + return response(req -> { final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id))); final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); final JsonObjectBuilder jsonbuilder = json(retrieved); //Report MDC if this is a released version (could be draft if user has access, or user may not have access at all and is not getting metadata beyond the minimum) - if((latest != null) && latest.isReleased()) { + if ((latest != null) && latest.isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, retrieved); mdcLogService.logEntry(entry); } return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest) : null)); }); } - - // TODO: - // This API call should, ideally, call findUserOrDie() and the GetDatasetCommand + + // TODO: + // This API call should, ideally, call findUserOrDie() and the GetDatasetCommand // to obtain the dataset that we are trying to export - which would handle - // Auth in the process... For now, Auth isn't necessary - since export ONLY + // Auth in the process... For now, Auth isn't necessary - since export ONLY // WORKS on published datasets, which are open to the world. -- L.A. 4.5 - + @GET @Path("/export") - @Produces({"application/xml", "application/json", "application/html" }) + @Produces({"application/xml", "application/json", "application/html"}) public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { try { @@ -269,20 +273,20 @@ public Response exportDataset(@QueryParam("persistentId") String persistentId, @ if (dataset == null) { return error(Response.Status.NOT_FOUND, "A dataset with the persistentId " + persistentId + " could not be found."); } - + ExportService instance = ExportService.getInstance(settingsSvc); - + InputStream is = instance.getExport(dataset, exporter); - + String mediaType = instance.getMediaType(exporter); - //Export is only possible for released (non-draft) dataset versions so we can log without checking to see if this is a request for a draft + //Export is only possible for released (non-draft) dataset versions so we can log without checking to see if this is a request for a draft MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, dataset); mdcLogService.logEntry(entry); - + return Response.ok() .entity(is) .type(mediaType). - build(); + build(); } catch (Exception wr) { return error(Response.Status.FORBIDDEN, "Export Failed"); } @@ -290,23 +294,23 @@ public Response exportDataset(@QueryParam("persistentId") String persistentId, @ @DELETE @Path("{id}") - public Response deleteDataset( @PathParam("id") String id) { + public Response deleteDataset(@PathParam("id") String id) { // Internally, "DeleteDatasetCommand" simply redirects to "DeleteDatasetVersionCommand" // (and there's a comment that says "TODO: remove this command") - // do we need an exposed API call for it? - // And DeleteDatasetVersionCommand further redirects to DestroyDatasetCommand, - // if the dataset only has 1 version... In other words, the functionality + // do we need an exposed API call for it? + // And DeleteDatasetVersionCommand further redirects to DestroyDatasetCommand, + // if the dataset only has 1 version... In other words, the functionality // currently provided by this API is covered between the "deleteDraftVersion" and - // "destroyDataset" API calls. - // (The logic below follows the current implementation of the underlying + // "destroyDataset" API calls. + // (The logic below follows the current implementation of the underlying // commands!) - - return response( req -> { + + return response(req -> { Dataset doomed = findDatasetOrDie(id); DatasetVersion doomedVersion = doomed.getLatestVersion(); User u = findUserOrDie(); boolean destroy = false; - + if (doomed.getVersions().size() == 1) { if (doomed.isReleased() && (!(u instanceof AuthenticatedUser) || !u.isSuperuser())) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Only superusers can delete published datasets")); @@ -317,26 +321,26 @@ public Response deleteDataset( @PathParam("id") String id) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "This is a published dataset with multiple versions. This API can only delete the latest version if it is a DRAFT")); } } - - // Gather the locations of the physical files that will need to be + + // Gather the locations of the physical files that will need to be // deleted once the destroy command execution has been finalized: Map deleteStorageLocations = fileService.getPhysicalFilesToDelete(doomedVersion, destroy); - - execCommand( new DeleteDatasetCommand(req, findDatasetOrDie(id))); - - // If we have gotten this far, the destroy command has succeeded, + + execCommand(new DeleteDatasetCommand(req, findDatasetOrDie(id))); + + // If we have gotten this far, the destroy command has succeeded, // so we can finalize it by permanently deleting the physical files: - // (DataFileService will double-check that the datafiles no - // longer exist in the database, before attempting to delete + // (DataFileService will double-check that the datafiles no + // longer exist in the database, before attempting to delete // the physical files) if (!deleteStorageLocations.isEmpty()) { fileService.finalizeFileDeletes(deleteStorageLocations); } - + return ok("Dataset " + id + " deleted"); }); } - + @DELETE @Path("{id}/destroy") public Response destroyDataset(@PathParam("id") String id) { @@ -350,16 +354,16 @@ public Response destroyDataset(@PathParam("id") String id) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Destroy can only be called by superusers.")); } - // Gather the locations of the physical files that will need to be + // Gather the locations of the physical files that will need to be // deleted once the destroy command execution has been finalized: Map deleteStorageLocations = fileService.getPhysicalFilesToDelete(doomed); execCommand(new DestroyDatasetCommand(doomed, req)); - // If we have gotten this far, the destroy command has succeeded, + // If we have gotten this far, the destroy command has succeeded, // so we can finalize permanently deleting the physical files: - // (DataFileService will double-check that the datafiles no - // longer exist in the database, before attempting to delete + // (DataFileService will double-check that the datafiles no + // longer exist in the database, before attempting to delete // the physical files) if (!deleteStorageLocations.isEmpty()) { fileService.finalizeFileDeletes(deleteStorageLocations); @@ -368,59 +372,59 @@ public Response destroyDataset(@PathParam("id") String id) { return ok("Dataset " + id + " destroyed"); }); } - + @DELETE @Path("{id}/versions/{versionId}") - public Response deleteDraftVersion( @PathParam("id") String id, @PathParam("versionId") String versionId ){ - if ( ! ":draft".equals(versionId) ) { + public Response deleteDraftVersion(@PathParam("id") String id, @PathParam("versionId") String versionId) { + if (!":draft".equals(versionId)) { return badRequest("Only the :draft version can be deleted"); } - return response( req -> { + return response(req -> { Dataset dataset = findDatasetOrDie(id); DatasetVersion doomed = dataset.getLatestVersion(); - + if (!doomed.isDraft()) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "This is NOT a DRAFT version")); } - - // Gather the locations of the physical files that will need to be + + // Gather the locations of the physical files that will need to be // deleted once the destroy command execution has been finalized: - + Map deleteStorageLocations = fileService.getPhysicalFilesToDelete(doomed); - - execCommand( new DeleteDatasetVersionCommand(req, dataset)); - - // If we have gotten this far, the delete command has succeeded - - // by either deleting the Draft version of a published dataset, - // or destroying an unpublished one. + + execCommand(new DeleteDatasetVersionCommand(req, dataset)); + + // If we have gotten this far, the delete command has succeeded - + // by either deleting the Draft version of a published dataset, + // or destroying an unpublished one. // This means we can finalize permanently deleting the physical files: - // (DataFileService will double-check that the datafiles no - // longer exist in the database, before attempting to delete + // (DataFileService will double-check that the datafiles no + // longer exist in the database, before attempting to delete // the physical files) if (!deleteStorageLocations.isEmpty()) { fileService.finalizeFileDeletes(deleteStorageLocations); } - + return ok("Draft version of dataset " + id + " deleted"); }); } - + @DELETE @Path("{datasetId}/deleteLink/{linkedDataverseId}") - public Response deleteDatasetLinkingDataverse( @PathParam("datasetId") String datasetId, @PathParam("linkedDataverseId") String linkedDataverseId) { - boolean index = true; + public Response deleteDatasetLinkingDataverse(@PathParam("datasetId") String datasetId, @PathParam("linkedDataverseId") String linkedDataverseId) { + boolean index = true; return response(req -> { execCommand(new DeleteDatasetLinkingDataverseCommand(req, findDatasetOrDie(datasetId), findDatasetLinkingDataverseOrDie(datasetId, linkedDataverseId), index)); return ok("Link from Dataset " + datasetId + " to linked Dataverse " + linkedDataverseId + " deleted"); }); } - + @PUT @Path("{id}/citationdate") - public Response setCitationDate( @PathParam("id") String id, String dsfTypeName) { - return response( req -> { - if ( dsfTypeName.trim().isEmpty() ){ + public Response setCitationDate(@PathParam("id") String id, String dsfTypeName) { + return response(req -> { + if (dsfTypeName.trim().isEmpty()) { return badRequest("Please provide a dataset field type in the requst body."); } DatasetFieldType dsfType = null; @@ -434,124 +438,124 @@ public Response setCitationDate( @PathParam("id") String id, String dsfTypeName) execCommand(new SetDatasetCitationDateCommand(req, findDatasetOrDie(id), dsfType)); return ok("Citation Date for dataset " + id + " set to: " + (dsfType != null ? dsfType.getDisplayName() : "default")); }); - } - + } + @DELETE @Path("{id}/citationdate") - public Response useDefaultCitationDate( @PathParam("id") String id) { - return response( req -> { + public Response useDefaultCitationDate(@PathParam("id") String id) { + return response(req -> { execCommand(new SetDatasetCitationDateCommand(req, findDatasetOrDie(id), null)); return ok("Citation Date for dataset " + id + " set to default"); }); - } - + } + @GET @Path("{id}/versions") - public Response listVersions( @PathParam("id") String id ) { - return response( req -> - ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id)) ) - .stream() - .map( d -> json(d) ) - .collect(toJsonArray()))); - } - + public Response listVersions(@PathParam("id") String id) { + return response(req -> + ok(execCommand(new ListVersionsCommand(req, findDatasetOrDie(id))) + .stream() + .map(d -> json(d)) + .collect(toJsonArray()))); + } + @GET @Path("{id}/versions/{versionId}") - public Response getVersion( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + public Response getVersion(@PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") - : ok(json(dsv)); + : ok(json(dsv)); }); } - + @GET @Path("{id}/versions/{versionId}/files") - public Response getVersionFiles( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> ok( jsonFileMetadatas( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas()))); + public Response getVersionFiles(@PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> ok(jsonFileMetadatas( + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas()))); } - + @GET @Path("{id}/dirindex") @Produces("text/html") public Response getFileAccessFolderView(@PathParam("id") String datasetId, @QueryParam("version") String versionId, @QueryParam("folder") String folderName, @QueryParam("original") Boolean originals, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { folderName = folderName == null ? "" : folderName; - versionId = versionId == null ? ":latest-published" : versionId; - - DatasetVersion version; + versionId = versionId == null ? ":latest-published" : versionId; + + DatasetVersion version; try { DataverseRequest req = createDataverseRequest(findUserOrDie()); version = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); } catch (WrappedResponse wr) { return wr.getResponse(); } - + String output = FileUtil.formatFolderListingHtml(folderName, version, "", originals != null && originals); - + // return "NOT FOUND" if there is no such folder in the dataset version: - + if ("".equals(output)) { return notFound("Folder " + folderName + " does not exist"); } - - + + String indexFileName = folderName.equals("") ? ".index.html" : ".index-" + folderName.replace('/', '_') + ".html"; response.setHeader("Content-disposition", "attachment; filename=\"" + indexFileName + "\""); - + return Response.ok() .entity(output) //.type("application/html"). .build(); } - + @GET @Path("{id}/versions/{versionId}/metadata") - public Response getVersionMetadata( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> ok( - jsonByBlocks( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers ) + public Response getVersionMetadata(@PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> ok( + jsonByBlocks( + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers) .getDatasetFields()))); } - + @GET @Path("{id}/versions/{versionNumber}/metadata/{block}") - public Response getVersionMetadataBlock( @PathParam("id") String datasetId, - @PathParam("versionNumber") String versionNumber, - @PathParam("block") String blockName, - @Context UriInfo uriInfo, - @Context HttpHeaders headers ) { - - return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), uriInfo, headers ); - + public Response getVersionMetadataBlock(@PathParam("id") String datasetId, + @PathParam("versionNumber") String versionNumber, + @PathParam("block") String blockName, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + + return response(req -> { + DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), uriInfo, headers); + Map> fieldsByBlock = DatasetField.groupByBlock(dsv.getDatasetFields()); - for ( Map.Entry> p : fieldsByBlock.entrySet() ) { - if ( p.getKey().getName().equals(blockName) ) { + for (Map.Entry> p : fieldsByBlock.entrySet()) { + if (p.getKey().getName().equals(blockName)) { return ok(json(p.getKey(), p.getValue())); } } return notFound("metadata block named " + blockName + " not found"); }); } - + @GET @Path("{id}/modifyRegistration") - public Response updateDatasetTargetURL(@PathParam("id") String id ) { - return response( req -> { + public Response updateDatasetTargetURL(@PathParam("id") String id) { + return response(req -> { execCommand(new UpdateDatasetTargetURLCommand(findDatasetOrDie(id), req)); return ok("Dataset " + id + " target url updated"); }); } - + @POST @Path("/modifyRegistrationAll") public Response updateDatasetTargetURLAll() { - return response( req -> { - datasetService.findAll().forEach( ds -> { + return response(req -> { + datasetService.findAll().forEach(ds -> { try { execCommand(new UpdateDatasetTargetURLCommand(findDatasetOrDie(ds.getId().toString()), req)); } catch (WrappedResponse ex) { @@ -561,7 +565,7 @@ public Response updateDatasetTargetURLAll() { return ok("Update All Dataset target url completed"); }); } - + @POST @Path("{id}/modifyRegistrationMetadata") public Response updateDatasetPIDMetadata(@PathParam("id") String id) { @@ -581,36 +585,36 @@ public Response updateDatasetPIDMetadata(@PathParam("id") String id) { return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.single.dataset", args)); }); } - + @GET @Path("/modifyRegistrationPIDMetadataAll") public Response updateDatasetPIDMetadataAll() { - return response( req -> { - datasetService.findAll().forEach( ds -> { + return response(req -> { + datasetService.findAll().forEach(ds -> { try { execCommand(new UpdateDvObjectPIDMetadataCommand(findDatasetOrDie(ds.getId().toString()), req)); } catch (WrappedResponse ex) { Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); } - }); + }); return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.update.all")); }); } - + @PUT @Path("{id}/versions/{versionId}") - public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId ){ - - if ( ! ":draft".equals(versionId) ) { - return error( Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); + public Response updateDraftVersion(String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId) { + + if (!":draft".equals(versionId)) { + return error(Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); } - - try ( StringReader rdr = new StringReader(jsonBody) ) { + + try (StringReader rdr = new StringReader(jsonBody)) { DataverseRequest req = createDataverseRequest(findUserOrDie()); Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); DatasetVersion incomingVersion = jsonParser().parseDatasetVersion(json); - + // clear possibly stale fields from the incoming dataset version. // creation and modification dates are updated by the commands. incomingVersion.setId(null); @@ -620,18 +624,18 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, incomingVersion.setDataset(ds); incomingVersion.setCreateTime(null); incomingVersion.setLastUpdateTime(null); - - if (!incomingVersion.getFileMetadatas().isEmpty()){ - return error( Response.Status.BAD_REQUEST, "You may not add files via this api."); + + if (!incomingVersion.getFileMetadatas().isEmpty()) { + return error(Response.Status.BAD_REQUEST, "You may not add files via this api."); } - + boolean updateDraft = ds.getLatestVersion().isDraft(); - + DatasetVersion managedVersion; - if ( updateDraft ) { + if (updateDraft) { final DatasetVersion editVersion = ds.getEditVersion(); editVersion.setDatasetFields(incomingVersion.getDatasetFields()); - editVersion.setTermsOfUseAndAccess( incomingVersion.getTermsOfUseAndAccess() ); + editVersion.setTermsOfUseAndAccess(incomingVersion.getTermsOfUseAndAccess()); Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); managedVersion = managedDataset.getEditVersion(); } else { @@ -640,18 +644,18 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, // DatasetVersion managedVersion = execCommand( updateDraft // ? new UpdateDatasetVersionCommand(req, incomingVersion) // : new CreateDatasetVersionCommand(req, ds, incomingVersion)); - return ok( json(managedVersion) ); - + return ok(json(managedVersion)); + } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset version Json: " + ex.getMessage(), ex); - return error( Response.Status.BAD_REQUEST, "Error parsing dataset version: " + ex.getMessage() ); - + return error(Response.Status.BAD_REQUEST, "Error parsing dataset version: " + ex.getMessage()); + } catch (WrappedResponse ex) { return ex.getResponse(); - + } } - + @PUT @Path("{id}/deleteMetadata") public Response deleteVersionMetadata(String jsonBody, @PathParam("id") String id) throws WrappedResponse { @@ -689,7 +693,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav boolean found = false; for (DatasetField dsf : dsv.getDatasetFields()) { if (dsf.getDatasetFieldType().equals(updateField.getDatasetFieldType())) { - if (dsf.getDatasetFieldType().isAllowMultiples()) { + if (dsf.getDatasetFieldType().isAllowMultiples()) { if (updateField.getDatasetFieldType().isControlledVocabulary()) { if (dsf.getDatasetFieldType().isAllowMultiples()) { for (ControlledVocabularyValue cvv : updateField.getControlledVocabularyValues()) { @@ -754,7 +758,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav datasetFieldCompoundValueItemsToRemove.forEach((remove) -> { dsf.getDatasetFieldCompoundValues().remove(remove); }); - if (!found) { + if (!found) { logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + deleteVal + " not found."); return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + deleteVal + " not found."); } @@ -769,17 +773,16 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav break; } } - if (!found){ + if (!found) { String displayValue = !updateField.getDisplayValue().isEmpty() ? updateField.getDisplayValue() : updateField.getCompoundDisplayValue(); - logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found." ); - return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found." ); + logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found."); + return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found."); } - } + } - boolean updateDraft = ds.getLatestVersion().isDraft(); - DatasetVersion managedVersion = updateDraft + DatasetVersion managedVersion = updateDraft ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion() : execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); return ok(json(managedVersion)); @@ -793,24 +796,24 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav return ex.getResponse(); } - + } - - private String getCompoundDisplayValue (DatasetFieldCompoundValue dscv){ + + private String getCompoundDisplayValue(DatasetFieldCompoundValue dscv) { String returnString = ""; - for (DatasetField dsf : dscv.getChildDatasetFields()) { - for (String value : dsf.getValues()) { - if (!(value == null)) { - returnString += (returnString.isEmpty() ? "" : "; ") + value.trim(); - } + for (DatasetField dsf : dscv.getChildDatasetFields()) { + for (String value : dsf.getValues()) { + if (!(value == null)) { + returnString += (returnString.isEmpty() ? "" : "; ") + value.trim(); } } + } return returnString; } - + @PUT @Path("{id}/editMetadata") - public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, @QueryParam("replace") Boolean replace) throws WrappedResponse{ + public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, @QueryParam("replace") Boolean replace) throws WrappedResponse { Boolean replaceData = replace != null; @@ -818,26 +821,26 @@ public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, return processDatasetUpdate(jsonBody, id, req, replaceData); } - - - private Response processDatasetUpdate(String jsonBody, String id, DataverseRequest req, Boolean replaceData){ + + + private Response processDatasetUpdate(String jsonBody, String id, DataverseRequest req, Boolean replaceData) { try (StringReader rdr = new StringReader(jsonBody)) { - + Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); DatasetVersion dsv = ds.getEditVersion(); - + List fields = new LinkedList<>(); - DatasetField singleField = null; - + DatasetField singleField = null; + JsonArray fieldsJson = json.getJsonArray("fields"); - if( fieldsJson == null ){ - singleField = jsonParser().parseField(json, Boolean.FALSE); + if (fieldsJson == null) { + singleField = jsonParser().parseField(json, Boolean.FALSE); fields.add(singleField); - } else{ + } else { fields = jsonParser().parseMultipleFields(json); } - + String valdationErrors = validateDatasetFieldValues(fields); @@ -848,8 +851,8 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque dsv.setVersionState(DatasetVersion.VersionState.DRAFT); - //loop through the update fields - // and compare to the version fields + //loop through the update fields + // and compare to the version fields //if exist add/replace values //if not add entire dsf for (DatasetField updateField : fields) { @@ -947,7 +950,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque } } - + private String validateDatasetFieldValues(List fields) { StringBuilder error = new StringBuilder(); @@ -965,14 +968,14 @@ private String validateDatasetFieldValues(List fields) { } return ""; } - + /** * @deprecated This was shipped as a GET but should have been a POST, see https://github.com/IQSS/dataverse/issues/2431 */ @GET @Path("{id}/actions/:publish") @Deprecated - public Response publishDataseUsingGetDeprecated( @PathParam("id") String id, @QueryParam("type") String type ) { + public Response publishDataseUsingGetDeprecated(@PathParam("id") String id, @QueryParam("type") String type) { logger.info("publishDataseUsingGetDeprecated called on id " + id + ". Encourage use of POST rather than GET, which is deprecated."); return publishDataset(id, type); } @@ -984,10 +987,10 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S if (type == null) { return error(Response.Status.BAD_REQUEST, "Missing 'type' parameter (either 'major','minor', or 'updatecurrent')."); } - boolean updateCurrent=false; + boolean updateCurrent = false; AuthenticatedUser user = findAuthenticatedUserOrDie(); type = type.toLowerCase(); - boolean isMinor=false; + boolean isMinor = false; switch (type) { case "minor": isMinor = true; @@ -995,15 +998,15 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S case "major": isMinor = false; break; - case "updatecurrent": - if(user.isSuperuser()) { - updateCurrent=true; - } else { - return error(Response.Status.FORBIDDEN, "Only superusers can update the current version"); - } - break; + case "updatecurrent": + if (user.isSuperuser()) { + updateCurrent = true; + } else { + return error(Response.Status.FORBIDDEN, "Only superusers can update the current version"); + } + break; default: - return error(Response.Status.BAD_REQUEST, "Illegal 'type' parameter value '" + type + "'. It needs to be either 'major', 'minor', or 'updatecurrent'."); + return error(Response.Status.BAD_REQUEST, "Illegal 'type' parameter value '" + type + "'. It needs to be either 'major', 'minor', or 'updatecurrent'."); } Dataset ds = findDatasetOrDie(id); @@ -1064,21 +1067,21 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S .build(); } } else { - PublishDatasetResult res = execCommand(new PublishDatasetCommand(ds, + PublishDatasetResult res = execCommand(new PublishDatasetCommand(ds, createDataverseRequest(user), - isMinor)); - return res.isWorkflow() ? accepted(json(res.getDataset())) : ok(json(res.getDataset())); + isMinor)); + return res.isWorkflow() ? accepted(json(res.getDataset())) : ok(json(res.getDataset())); } } catch (WrappedResponse ex) { return ex.getResponse(); } } - + @POST @Path("{id}/move/{targetDataverseAlias}") public Response moveDataset(@PathParam("id") String id, @PathParam("targetDataverseAlias") String targetDataverseAlias, @QueryParam("forceMove") Boolean force) { try { - User u = findUserOrDie(); + User u = findUserOrDie(); Dataset ds = findDatasetOrDie(id); Dataverse target = dataverseService.findByAlias(targetDataverseAlias); if (target == null) { @@ -1097,32 +1100,32 @@ public Response moveDataset(@PathParam("id") String id, @PathParam("targetDatave } } } - + @PUT - @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") - public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { - try{ - User u = findUserOrDie(); + @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") + public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { + try { + User u = findUserOrDie(); Dataset linked = findDatasetOrDie(linkedDatasetId); Dataverse linking = findDataverseOrDie(linkingDataverseAlias); - if (linked == null){ + if (linked == null) { return error(Response.Status.BAD_REQUEST, "Linked Dataset not found."); - } - if (linking == null){ + } + if (linking == null) { return error(Response.Status.BAD_REQUEST, "Linking Dataverse not found."); - } + } execCommand(new LinkDatasetCommand( createDataverseRequest(u), linking, linked - )); + )); return ok("Dataset " + linked.getId() + " linked successfully to " + linking.getAlias()); } catch (WrappedResponse ex) { return ex.getResponse(); } } - + @GET @Path("{id}/links") - public Response getLinks(@PathParam("id") String idSupplied ) { + public Response getLinks(@PathParam("id") String idSupplied) { try { User u = findUserOrDie(); if (!u.isSuperuser()) { @@ -1146,8 +1149,8 @@ public Response getLinks(@PathParam("id") String idSupplied ) { /** * Add a given assignment to a given user or group - * @param ra role assignment DTO - * @param id dataset id + * @param ra role assignment DTO + * @param id dataset id * @param apiKey */ @POST @@ -1155,12 +1158,12 @@ public Response getLinks(@PathParam("id") String idSupplied ) { public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") String id, @QueryParam("key") String apiKey) { try { Dataset dataset = findDatasetOrDie(id); - + RoleAssignee assignee = findAssignee(ra.getAssignee()); if (assignee == null) { return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("datasets.api.grant.role.assignee.not.found.error")); - } - + } + DataverseRole theRole; Dataverse dv = dataset.getOwner(); theRole = null; @@ -1188,7 +1191,7 @@ public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") } } - + @DELETE @Path("{identifier}/assignments/{id}") public Response deleteAssignment(@PathParam("id") long assignmentId, @PathParam("identifier") String dsId) { @@ -1211,26 +1214,26 @@ public Response deleteAssignment(@PathParam("id") long assignmentId, @PathParam( @GET @Path("{identifier}/assignments") public Response getAssignments(@PathParam("identifier") String id) { - return response( req -> - ok( execCommand( - new ListRoleAssignments(req, findDatasetOrDie(id))) - .stream().map(ra->json(ra)).collect(toJsonArray())) ); + return response(req -> + ok(execCommand( + new ListRoleAssignments(req, findDatasetOrDie(id))) + .stream().map(ra -> json(ra)).collect(toJsonArray()))); } @GET @Path("{id}/privateUrl") public Response getPrivateUrlData(@PathParam("id") String idSupplied) { - return response( req -> { + return response(req -> { PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, findDatasetOrDie(idSupplied))); - return (privateUrl != null) ? ok(json(privateUrl)) - : error(Response.Status.NOT_FOUND, "Private URL not found."); + return (privateUrl != null) ? ok(json(privateUrl)) + : error(Response.Status.NOT_FOUND, "Private URL not found."); }); } @POST @Path("{id}/privateUrl") public Response createPrivateUrl(@PathParam("id") String idSupplied) { - return response( req -> + return response(req -> ok(json(execCommand( new CreatePrivateUrlCommand(req, findDatasetOrDie(idSupplied)))))); } @@ -1238,7 +1241,7 @@ public Response createPrivateUrl(@PathParam("id") String idSupplied) { @DELETE @Path("{id}/privateUrl") public Response deletePrivateUrl(@PathParam("id") String idSupplied) { - return response( req -> { + return response(req -> { Dataset dataset = findDatasetOrDie(idSupplied); PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, dataset)); if (privateUrl != null) { @@ -1292,7 +1295,7 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { try { Dataset dataset = findDatasetOrDie(idSupplied); InputStream is = DatasetUtil.getThumbnailAsInputStream(dataset, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - if(is == null) { + if (is == null) { return notFound("Thumbnail not available"); } return Response.ok(is).build(); @@ -1349,11 +1352,11 @@ public Response getRsync(@PathParam("identifier") String id) { dataset = findDatasetOrDie(id); AuthenticatedUser user = findAuthenticatedUserOrDie(); ScriptRequestResponse scriptRequestResponse = execCommand(new RequestRsyncScriptCommand(createDataverseRequest(user), dataset)); - + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.DcmUpload, user.getId(), "script downloaded"); if (lock == null) { logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - return error(Response.Status.FORBIDDEN, "Failed to lock the dataset (dataset id="+dataset.getId()+")"); + return error(Response.Status.FORBIDDEN, "Failed to lock the dataset (dataset id=" + dataset.getId() + ")"); } return ok(scriptRequestResponse.getScript(), MediaType.valueOf(MediaType.TEXT_PLAIN)); } catch (WrappedResponse wr) { @@ -1362,15 +1365,15 @@ public Response getRsync(@PathParam("identifier") String id) { return error(Response.Status.INTERNAL_SERVER_ERROR, "Something went wrong attempting to download rsync script: " + EjbUtil.ejbExceptionToString(ex)); } } - + /** - * This api endpoint triggers the creation of a "package" file in a dataset - * after that package has been moved onto the same filesystem via the Data Capture Module. + * This api endpoint triggers the creation of a "package" file in a dataset + * after that package has been moved onto the same filesystem via the Data Capture Module. * The package is really just a way that Dataverse interprets a folder created by DCM, seeing it as just one file. * The "package" can be downloaded over RSAL. - * + *

* This endpoint currently supports both posix file storage and AWS s3 storage in Dataverse, and depending on which one is active acts accordingly. - * + *

* The initial design of the DCM/Dataverse interaction was not to use packages, but to allow import of all individual files natively into Dataverse. * But due to the possibly immense number of files (millions) the package approach was taken. * This is relevant because the posix ("file") code contains many remnants of that development work. @@ -1394,13 +1397,13 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String try { Dataset dataset = findDatasetOrDie(id); if ("validation passed".equals(statusMessageFromDcm)) { - logger.log(Level.INFO, "Checksum Validation passed for DCM."); + logger.log(Level.INFO, "Checksum Validation passed for DCM."); String storageDriver = dataset.getDataverseContext().getEffectiveStorageDriverId(); String uploadFolder = jsonFromDcm.getString("uploadFolder"); int totalSize = jsonFromDcm.getInt("totalSize"); String storageDriverType = System.getProperty("dataverse.file." + storageDriver + ".type"); - + if (storageDriverType.equals("file")) { logger.log(Level.INFO, "File storage driver used for (dataset id={0})", dataset.getId()); @@ -1417,15 +1420,15 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String String message = wr.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to put the files into Dataverse. Message was '" + message + "'."); } - } else if(storageDriverType.equals("s3")) { - + } else if (storageDriverType.equals("s3")) { + logger.log(Level.INFO, "S3 storage driver used for DCM (dataset id={0})", dataset.getId()); try { - + //Where the lifting is actually done, moving the s3 files over and having dataverse know of the existance of the package s3PackageImporter.copyFromS3(dataset, uploadFolder); DataFile packageFile = s3PackageImporter.createPackageDataFile(dataset, uploadFolder, new Long(totalSize)); - + if (packageFile == null) { logger.log(Level.SEVERE, "S3 File package import failed."); return error(Response.Status.INTERNAL_SERVER_ERROR, "S3 File package import failed."); @@ -1437,7 +1440,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.DcmUpload); dataset.removeLock(dcmLock); } - + // update version using the command engine to enforce user permissions and constraints if (dataset.getVersions().size() == 1 && dataset.getLatestVersion().getVersionState() == DatasetVersion.VersionState.DRAFT) { try { @@ -1455,11 +1458,11 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String JsonObjectBuilder job = Json.createObjectBuilder(); return ok(job); - - } catch (IOException e) { + + } catch (IOException e) { String message = e.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); - } + } } else { return error(Response.Status.INTERNAL_SERVER_ERROR, "Invalid storage driver in Dataverse, not compatible with dcm"); } @@ -1482,7 +1485,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String return ex.getResponse(); } } - + @POST @Path("{id}/submitForReview") @@ -1490,9 +1493,9 @@ public Response submitForReview(@PathParam("id") String idSupplied) { try { Dataset updatedDataset = execCommand(new SubmitDatasetForReviewCommand(createDataverseRequest(findUserOrDie()), findDatasetOrDie(idSupplied))); JsonObjectBuilder result = Json.createObjectBuilder(); - + boolean inReview = updatedDataset.isLockedFor(DatasetLock.Reason.InReview); - + result.add("inReview", inReview); result.add("message", "Dataset id " + updatedDataset.getId() + " has been submitted for review."); return ok(result); @@ -1504,7 +1507,7 @@ public Response submitForReview(@PathParam("id") String idSupplied) { @POST @Path("{id}/returnToAuthor") public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBody) { - + if (jsonBody == null || jsonBody.isEmpty()) { return error(Response.Status.BAD_REQUEST, "You must supply JSON to this API endpoint and it must contain a reason for returning the dataset (field: reasonForReturn)."); } @@ -1512,14 +1515,14 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo JsonObject json = Json.createReader(rdr).readObject(); try { Dataset dataset = findDatasetOrDie(idSupplied); - String reasonForReturn = null; + String reasonForReturn = null; reasonForReturn = json.getString("reasonForReturn"); // TODO: Once we add a box for the curator to type into, pass the reason for return to the ReturnDatasetToAuthorCommand and delete this check and call to setReturnReason on the API side. if (reasonForReturn == null || reasonForReturn.isEmpty()) { return error(Response.Status.BAD_REQUEST, "You must enter a reason for returning a dataset to the author(s)."); } AuthenticatedUser authenticatedUser = findAuthenticatedUserOrDie(); - Dataset updatedDataset = execCommand(new ReturnDatasetToAuthorCommand(createDataverseRequest(authenticatedUser), dataset, reasonForReturn )); + Dataset updatedDataset = execCommand(new ReturnDatasetToAuthorCommand(createDataverseRequest(authenticatedUser), dataset, reasonForReturn)); JsonObjectBuilder result = Json.createObjectBuilder(); result.add("inReview", false); @@ -1530,237 +1533,237 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo } } -@GET -@Path("{id}/uploadsid") -@Deprecated -public Response getUploadUrl(@PathParam("id") String idSupplied) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if(s3io == null) { - return error(Response.Status.NOT_FOUND,"Direct upload not supported for files in this dataset: " + dataset.getId()); - } - String url = null; - String storageIdentifier = null; - try { - url = s3io.generateTemporaryS3UploadUrl(); - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - JsonObjectBuilder response = Json.createObjectBuilder() - .add("url", url) - .add("storageIdentifier", storageIdentifier ); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + @GET + @Path("{id}/uploadsid") + @Deprecated + public Response getUploadUrl(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); -@GET -@Path("{id}/uploadurls") -public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if (s3io == null) { - return error(Response.Status.NOT_FOUND, - "Direct upload not supported for files in this dataset: " + dataset.getId()); - } - JsonObjectBuilder response = null; - String storageIdentifier = null; - try { - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - response = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); - - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, - error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - response.add("storageIdentifier", storageIdentifier); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if (s3io == null) { + return error(Response.Status.NOT_FOUND, "Direct upload not supported for files in this dataset: " + dataset.getId()); + } + String url = null; + String storageIdentifier = null; + try { + url = s3io.generateTemporaryS3UploadUrl(); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } -@DELETE -@Path("mpupload") -public Response abortMPUpload(@QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { - try { - Dataset dataset = datasetSvc.findByGlobalId(idSupplied); - //Allow the API to be used within a session (e.g. for direct upload in the UI) - User user =session.getUser(); - if (!user.isAuthenticated()) { - try { - user = findAuthenticatedUserOrDie(); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - } - boolean allowed = false; - if (dataset != null) { - allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } else { - /* - * The only legitimate case where a global id won't correspond to a dataset is - * for uploads during creation. Given that this call will still fail unless all - * three parameters correspond to an active multipart upload, it should be safe - * to allow the attempt for an authenticated user. If there are concerns about - * permissions, one could check with the current design that the user is allowed - * to create datasets in some dataverse that is configured to use the storage - * provider specified in the storageidentifier, but testing for the ability to - * create a dataset in a specific dataverse would requiring changing the design - * somehow (e.g. adding the ownerId to this call). - */ - allowed = true; - } - if (!allowed) { - return error(Response.Status.FORBIDDEN, - "You are not permitted to abort file uploads with the supplied parameters."); - } - try { - S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); - } catch (IOException io) { - logger.warning("Multipart upload abort failed for uploadId: " + uploadId + " storageidentifier=" - + storageidentifier + " dataset Id: " + dataset.getId()); - logger.warning(io.getMessage()); - throw new WrappedResponse(io, - error(Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); - } - return Response.noContent().build(); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + JsonObjectBuilder response = Json.createObjectBuilder() + .add("url", url) + .add("storageIdentifier", storageIdentifier); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } -@PUT -@Path("mpupload") -public Response completeMPUpload(String partETagBody, @QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { - try { - Dataset dataset = datasetSvc.findByGlobalId(idSupplied); - //Allow the API to be used within a session (e.g. for direct upload in the UI) - User user =session.getUser(); - if (!user.isAuthenticated()) { - try { - user=findAuthenticatedUserOrDie(); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions to complete mpupload for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - } - boolean allowed = false; - if (dataset != null) { - allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } else { - /* - * The only legitimate case where a global id won't correspond to a dataset is - * for uploads during creation. Given that this call will still fail unless all - * three parameters correspond to an active multipart upload, it should be safe - * to allow the attempt for an authenticated user. If there are concerns about - * permissions, one could check with the current design that the user is allowed - * to create datasets in some dataverse that is configured to use the storage - * provider specified in the storageidentifier, but testing for the ability to - * create a dataset in a specific dataverse would requiring changing the design - * somehow (e.g. adding the ownerId to this call). - */ - allowed = true; - } - if (!allowed) { - return error(Response.Status.FORBIDDEN, - "You are not permitted to complete file uploads with the supplied parameters."); - } - List eTagList = new ArrayList(); - logger.info("Etags: " + partETagBody); - try { - JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); - JsonObject object = jsonReader.readObject(); - jsonReader.close(); - for(String partNo : object.keySet()) { - eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); - } - for(PartETag et: eTagList) { - logger.info("Part: " + et.getPartNumber() + " : " + et.getETag()); - } - } catch (JsonException je) { - logger.info("Unable to parse eTags from: " + partETagBody); - throw new WrappedResponse(je, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); - } - try { - S3AccessIO.completeMultipartUpload(idSupplied, storageidentifier, uploadId, eTagList); - } catch (IOException io) { - logger.warning("Multipart upload completion failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " globalId: " + idSupplied); - logger.warning(io.getMessage()); - try { - S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); - } catch (IOException e) { - logger.severe("Also unable to abort the upload (and release the space on S3 for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " globalId: " + idSupplied); - logger.severe(io.getMessage()); - } - - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); - } - return ok("Multipart Upload completed"); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + @GET + @Path("{id}/uploadurls") + public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if (s3io == null) { + return error(Response.Status.NOT_FOUND, + "Direct upload not supported for files in this dataset: " + dataset.getId()); + } + JsonObjectBuilder response = null; + String storageIdentifier = null; + try { + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + response = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); + + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, + error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } + + response.add("storageIdentifier", storageIdentifier); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @DELETE + @Path("mpupload") + public Response abortMPUpload(@QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = datasetSvc.findByGlobalId(idSupplied); + //Allow the API to be used within a session (e.g. for direct upload in the UI) + User user = session.getUser(); + if (!user.isAuthenticated()) { + try { + user = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + } + boolean allowed = false; + if (dataset != null) { + allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } else { + /* + * The only legitimate case where a global id won't correspond to a dataset is + * for uploads during creation. Given that this call will still fail unless all + * three parameters correspond to an active multipart upload, it should be safe + * to allow the attempt for an authenticated user. If there are concerns about + * permissions, one could check with the current design that the user is allowed + * to create datasets in some dataverse that is configured to use the storage + * provider specified in the storageidentifier, but testing for the ability to + * create a dataset in a specific dataverse would requiring changing the design + * somehow (e.g. adding the ownerId to this call). + */ + allowed = true; + } + if (!allowed) { + return error(Response.Status.FORBIDDEN, + "You are not permitted to abort file uploads with the supplied parameters."); + } + try { + S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); + } catch (IOException io) { + logger.warning("Multipart upload abort failed for uploadId: " + uploadId + " storageidentifier=" + + storageidentifier + " dataset Id: " + dataset.getId()); + logger.warning(io.getMessage()); + throw new WrappedResponse(io, + error(Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + } + return Response.noContent().build(); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @PUT + @Path("mpupload") + public Response completeMPUpload(String partETagBody, @QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = datasetSvc.findByGlobalId(idSupplied); + //Allow the API to be used within a session (e.g. for direct upload in the UI) + User user = session.getUser(); + if (!user.isAuthenticated()) { + try { + user = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions to complete mpupload for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + } + boolean allowed = false; + if (dataset != null) { + allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } else { + /* + * The only legitimate case where a global id won't correspond to a dataset is + * for uploads during creation. Given that this call will still fail unless all + * three parameters correspond to an active multipart upload, it should be safe + * to allow the attempt for an authenticated user. If there are concerns about + * permissions, one could check with the current design that the user is allowed + * to create datasets in some dataverse that is configured to use the storage + * provider specified in the storageidentifier, but testing for the ability to + * create a dataset in a specific dataverse would requiring changing the design + * somehow (e.g. adding the ownerId to this call). + */ + allowed = true; + } + if (!allowed) { + return error(Response.Status.FORBIDDEN, + "You are not permitted to complete file uploads with the supplied parameters."); + } + List eTagList = new ArrayList(); + logger.info("Etags: " + partETagBody); + try { + JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); + JsonObject object = jsonReader.readObject(); + jsonReader.close(); + for (String partNo : object.keySet()) { + eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); + } + for (PartETag et : eTagList) { + logger.info("Part: " + et.getPartNumber() + " : " + et.getETag()); + } + } catch (JsonException je) { + logger.info("Unable to parse eTags from: " + partETagBody); + throw new WrappedResponse(je, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); + } + try { + S3AccessIO.completeMultipartUpload(idSupplied, storageidentifier, uploadId, eTagList); + } catch (IOException io) { + logger.warning("Multipart upload completion failed for uploadId: " + uploadId + " storageidentifier=" + storageidentifier + " globalId: " + idSupplied); + logger.warning(io.getMessage()); + try { + S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); + } catch (IOException e) { + logger.severe("Also unable to abort the upload (and release the space on S3 for uploadId: " + uploadId + " storageidentifier=" + storageidentifier + " globalId: " + idSupplied); + logger.severe(io.getMessage()); + } + + throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); + } + return ok("Multipart Upload completed"); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } /** * Add a File to an existing Dataset - * + * * @param idSupplied * @param jsonData * @param fileInputStream * @param contentDispositionHeader * @param formDataBodyPart - * @return + * @return */ @POST @Path("{id}/add") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addFileToDataset(@PathParam("id") String idSupplied, - @FormDataParam("jsonData") String jsonData, - @FormDataParam("file") InputStream fileInputStream, - @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, - @FormDataParam("file") final FormDataBodyPart formDataBodyPart - ){ + @FormDataParam("jsonData") String jsonData, + @FormDataParam("file") InputStream fileInputStream, + @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, + @FormDataParam("file") final FormDataBodyPart formDataBodyPart + ) { if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); @@ -1775,27 +1778,27 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } catch (WrappedResponse ex) { return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); + ); } - - + + // ------------------------------------- // (2) Get the Dataset Id - // + // // ------------------------------------- Dataset dataset; - + try { dataset = findDatasetOrDie(idSupplied); } catch (WrappedResponse wr) { - return wr.getResponse(); + return wr.getResponse(); } - + //------------------------------------ // (2a) Make sure dataset does not have package file // // -------------------------------------- - + for (DatasetVersion dv : dataset.getVersions()) { if (dv.isHasPackageFile()) { return error(Response.Status.FORBIDDEN, @@ -1807,40 +1810,40 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, // (2a) Load up optional params via JSON //--------------------------------------- OptionalFileParams optionalFileParams = null; - msgt("(api) jsonData: " + jsonData); + msgt("(api) jsonData: " + jsonData); try { optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error( Response.Status.BAD_REQUEST, ex.getMessage()); + return error(Response.Status.BAD_REQUEST, ex.getMessage()); } - + // ------------------------------------- // (3) Get the file name and content type // ------------------------------------- String newFilename = null; String newFileContentType = null; String newStorageIdentifier = null; - if (null == contentDispositionHeader) { - if (optionalFileParams.hasStorageIdentifier()) { - newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid - if (optionalFileParams.hasFileName()) { - newFilename = optionalFileParams.getFileName(); - if (optionalFileParams.hasMimetype()) { - newFileContentType = optionalFileParams.getMimeType(); - } - } - } else { - return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); - } - } else { - newFilename = contentDispositionHeader.getFileName(); - newFileContentType = formDataBodyPart.getMediaType().toString(); - } - - + if (null == contentDispositionHeader) { + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + // ToDo - check that storageIdentifier is valid + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + } else { + return error(BAD_REQUEST, + "You must upload a file or provide a storageidentifier, filename, and mimetype."); + } + } else { + newFilename = contentDispositionHeader.getFileName(); + newFileContentType = formDataBodyPart.getMediaType().toString(); + } + + //------------------- // (3) Create the AddReplaceFileHelper object //------------------- @@ -1848,28 +1851,28 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, DataverseRequest dvRequest2 = createDataverseRequest(authUser); AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, - systemConfig); + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig); //------------------- // (4) Run "runAddFileByDatasetId" //------------------- addFileHelper.runAddFileByDataset(dataset, - newFilename, - newFileContentType, - newStorageIdentifier, - fileInputStream, - optionalFileParams); + newFilename, + newFileContentType, + newStorageIdentifier, + fileInputStream, + optionalFileParams); - if (addFileHelper.hasError()){ + if (addFileHelper.hasError()) { return error(addFileHelper.getHttpErrorCode(), addFileHelper.getErrorMessagesAsString("\n")); - }else{ + } else { String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); try { //msgt("as String: " + addFileHelper.getSuccessResult()); @@ -1887,7 +1890,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } else { return ok(addFileHelper.getSuccessResultAsJsonObjectBuilder()); } - + //"Look at that! You added a file! (hey hey, it may have worked)"); } catch (NoFilesException ex) { Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); @@ -1895,71 +1898,77 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } } - + } // end: addFileToDataset - - private void msg(String m){ + private void msg(String m) { //System.out.println(m); logger.fine(m); } - private void dashes(){ + + private void dashes() { msg("----------------"); } - private void msgt(String m){ - dashes(); msg(m); dashes(); + + private void msgt(String m) { + dashes(); + msg(m); + dashes(); } - - - public static T handleVersion( String versionId, DsVersionHandler hdl ) - throws WrappedResponse { + + + public static T handleVersion(String versionId, DsVersionHandler hdl) + throws WrappedResponse { switch (versionId) { - case ":latest": return hdl.handleLatest(); - case ":draft": return hdl.handleDraft(); - case ":latest-published": return hdl.handleLatestPublished(); + case ":latest": + return hdl.handleLatest(); + case ":draft": + return hdl.handleDraft(); + case ":latest-published": + return hdl.handleLatestPublished(); default: try { String[] versions = versionId.split("\\."); switch (versions.length) { case 1: - return hdl.handleSpecific(Long.parseLong(versions[0]), (long)0.0); + return hdl.handleSpecific(Long.parseLong(versions[0]), (long) 0.0); case 2: - return hdl.handleSpecific( Long.parseLong(versions[0]), Long.parseLong(versions[1]) ); + return hdl.handleSpecific(Long.parseLong(versions[0]), Long.parseLong(versions[1])); default: - throw new WrappedResponse(error( Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); + throw new WrappedResponse(error(Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); } - } catch ( NumberFormatException nfe ) { - throw new WrappedResponse( error( Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'") ); + } catch (NumberFormatException nfe) { + throw new WrappedResponse(error(Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); } } } - - private DatasetVersion getDatasetVersionOrDie( final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { - DatasetVersion dsv = execCommand( handleVersion(versionNumber, new DsVersionHandler>(){ - @Override - public Command handleLatest() { - return new GetLatestAccessibleDatasetVersionCommand(req, ds); - } + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { + DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler>() { - @Override - public Command handleDraft() { - return new GetDraftDatasetVersionCommand(req, ds); - } - - @Override - public Command handleSpecific(long major, long minor) { - return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); - } + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds); + } - @Override - public Command handleLatestPublished() { - return new GetLatestPublishedDatasetVersionCommand(req, ds); - } - })); - if ( dsv == null || dsv.getId() == null ) { - throw new WrappedResponse( notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found") ); + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds); + } + })); + if (dsv == null || dsv.getId() == null) { + throw new WrappedResponse(notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found")); } if (dsv.isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, ds); @@ -1967,7 +1976,7 @@ public Command handleLatestPublished() { } return dsv; } - + @GET @Path("{identifier}/locks") public Response getLocks(@PathParam("identifier") String id, @QueryParam("type") DatasetLock.Reason lockType) { @@ -1975,26 +1984,26 @@ public Response getLocks(@PathParam("identifier") String id, @QueryParam("type") Dataset dataset = null; try { dataset = findDatasetOrDie(id); - Set locks; + Set locks; if (lockType == null) { locks = dataset.getLocks(); } else { // request for a specific type lock: DatasetLock lock = dataset.getLockFor(lockType); - locks = new HashSet<>(); + locks = new HashSet<>(); if (lock != null) { locks.add(lock); } } - + return ok(locks.stream().map(lock -> json(lock)).collect(toJsonArray())); } catch (WrappedResponse wr) { return wr.getResponse(); - } - } - + } + } + @DELETE @Path("{identifier}/locks") public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("type") DatasetLock.Reason lockType) { @@ -2006,7 +2015,7 @@ public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("typ return error(Response.Status.FORBIDDEN, "This API end point can be used by superusers only."); } Dataset dataset = findDatasetOrDie(id); - + if (lockType == null) { Set locks = new HashSet<>(); for (DatasetLock lock : dataset.getLocks()) { @@ -2018,7 +2027,7 @@ public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("typ // refresh the dataset: dataset = findDatasetOrDie(id); } - // kick of dataset reindexing, in case the locks removed + // kick of dataset reindexing, in case the locks removed // affected the search card: try { indexService.indexDataset(dataset, true); @@ -2038,7 +2047,7 @@ public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("typ execCommand(new RemoveLockCommand(req, dataset, lock.getReason())); // refresh the dataset: dataset = findDatasetOrDie(id); - // ... and kick of dataset reindexing, in case the lock removed + // ... and kick of dataset reindexing, in case the lock removed // affected the search card: try { indexService.indexDataset(dataset, true); @@ -2058,7 +2067,7 @@ public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("typ }); } - + @POST @Path("{identifier}/lock/{type}") public Response lockDataset(@PathParam("identifier") String id, @PathParam("type") DatasetLock.Reason lockType) { @@ -2067,7 +2076,7 @@ public Response lockDataset(@PathParam("identifier") String id, @PathParam("type AuthenticatedUser user = findAuthenticatedUserOrDie(); if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "This API end point can be used by superusers only."); - } + } Dataset dataset = findDatasetOrDie(id); DatasetLock lock = dataset.getLockFor(lockType); if (lock != null) { @@ -2094,16 +2103,16 @@ public Response lockDataset(@PathParam("identifier") String id, @PathParam("type }); } - + @GET @Path("{id}/makeDataCount/citations") public Response getMakeDataCountCitations(@PathParam("id") String idSupplied) { - + try { Dataset dataset = findDatasetOrDie(idSupplied); JsonArrayBuilder datasetsCitations = Json.createArrayBuilder(); List externalCitations = datasetExternalCitationsService.getDatasetExternalCitationsByDataset(dataset); - for (DatasetExternalCitations citation : externalCitations ){ + for (DatasetExternalCitations citation : externalCitations) { JsonObjectBuilder candidateObj = Json.createObjectBuilder(); /** * In the future we can imagine storing and presenting more @@ -2114,9 +2123,9 @@ public Response getMakeDataCountCitations(@PathParam("id") String idSupplied) { */ candidateObj.add("citationUrl", citation.getCitedByUrl()); datasetsCitations.add(candidateObj); - } - return ok(datasetsCitations); - + } + return ok(datasetsCitations); + } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -2129,23 +2138,23 @@ public Response getMakeDataCountMetricCurrentMonth(@PathParam("id") String idSup String nullCurrentMonth = null; return getMakeDataCountMetric(idSupplied, metricSupplied, nullCurrentMonth, country); } - + @GET @Path("{identifier}/storagesize") - public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.storage"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached,GetDatasetStorageSizeCommand.Mode.STORAGE, null))))); + execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached, GetDatasetStorageSizeCommand.Mode.STORAGE, null))))); } - + @GET @Path("{identifier}/versions/{versionId}/downloadsize") - public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version , findDatasetOrDie(dvIdtf), uriInfo, headers)))))); + execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers)))))); } @GET @@ -2247,29 +2256,29 @@ public Response getMakeDataCountMetric(@PathParam("id") String idSupplied, @Path return wr.getResponse(); } } - + @GET @Path("{identifier}/storageDriver") public Response getFileStore(@PathParam("identifier") String dvIdtf, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - - Dataset dataset; - + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - + return response(req -> ok(dataset.getEffectiveStorageDriverId())); } - + @PUT @Path("{identifier}/storageDriver") public Response setFileStore(@PathParam("identifier") String dvIdtf, - String storageDriverLabel, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + String storageDriverLabel, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + // Superuser-only: AuthenticatedUser user; try { @@ -2279,17 +2288,17 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } - - Dataset dataset; - + } + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - - // We don't want to allow setting this to a store id that does not exist: + + // We don't want to allow setting this to a store id that does not exist: for (Entry store : DataAccess.getStorageDriverLabels().entrySet()) { if (store.getKey().equals(storageDriverLabel)) { dataset.setStorageDriverId(store.getValue()); @@ -2297,15 +2306,15 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, return ok("Storage driver set to: " + store.getKey() + "/" + store.getValue()); } } - return error(Response.Status.BAD_REQUEST, - "No Storage Driver found for : " + storageDriverLabel); + return error(Response.Status.BAD_REQUEST, + "No Storage Driver found for : " + storageDriverLabel); } - + @DELETE @Path("{identifier}/storageDriver") public Response resetFileStore(@PathParam("identifier") String dvIdtf, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + // Superuser-only: AuthenticatedUser user; try { @@ -2315,29 +2324,28 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } - - Dataset dataset; - + } + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - + dataset.setStorageDriverId(null); datasetService.merge(dataset); - return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); } @POST - @Path("{id}/addglobusFiles") + @Path("{id}/addglobusFilesBkup") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addGlobusFileToDataset(@PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData - ) - { + ) { JsonArrayBuilder jarr = Json.createArrayBuilder(); if (!systemConfig.isHTTPUpload()) { @@ -2372,7 +2380,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, String lockInfoMessage = "Globus Upload API is running "; DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, - ((AuthenticatedUser) authUser).getId() , lockInfoMessage); + ((AuthenticatedUser) authUser).getId(), lockInfoMessage); if (lock != null) { dataset.addLock(lock); } else { @@ -2436,8 +2444,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, } while (!success); - try - { + try { StorageIO datasetSIO = DataAccess.getStorageIO(dataset); List cachedObjectsTags = datasetSIO.listAuxObjects(); @@ -2461,7 +2468,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, JsonArray filesJson = jsonObject.getJsonArray("files"); - int totalNumberofFiles = 0 ; + int totalNumberofFiles = 0; int successNumberofFiles = 0; try { // Start to add the files @@ -2549,7 +2556,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, storageIdentifier, null, optionalFileParams, - globustype); + true); if (addFileHelper.hasError()) { @@ -2593,8 +2600,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, successNumberofFiles = successNumberofFiles + 1; } }// End of adding files - }catch (Exception e ) - { + } catch (Exception e) { Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, e); return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); } @@ -2621,7 +2627,7 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, dataset = datasetService.find(dataset.getId()); - List s= dataset.getFiles(); + List s = dataset.getFiles(); for (DataFile dataFile : s) { logger.info(" ******** TEST the datafile id is = " + dataFile.getId() + " = " + dataFile.getDisplayName()); } @@ -2641,5 +2647,269 @@ public Response addGlobusFileToDataset(@PathParam("id") String datasetId, return ok(Json.createObjectBuilder().add("Files", jarr)); } + + + @POST + @Path("{id}/addglobusFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addGlobusFileToDatasetTrial1(@PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData + ) throws IOException, ExecutionException, InterruptedException { + + logger.info ( " ==== 1 (api) jsonData 1 ====== " + jsonData ); + + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + String requestUrl = httpRequest.getRequestURL().toString(); + + // Async Call + datasetService.globusAsyncCall( jsonData , token , dataset , authUser, requestUrl); + + return ok("Globus Task successfully completed "); + } + + + /** + * Add a File to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/addFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addFilesToDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + + //------------------------------------ + // (2b) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + + + msgt("******* (api) jsonData 1: " + jsonData.toString()); + + JsonArray filesJson = null; + try (StringReader rdr = new StringReader(jsonData)) { + //jsonObject = Json.createReader(rdr).readObject(); + filesJson = Json.createReader(rdr).readArray(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + + try { + DataverseRequest dvRequest = createDataverseRequest(authUser); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig + ); + + // ------------------------------------- + // (6) Parse files information from jsondata + // calculate checksum + // determine mimetype + // ------------------------------------- + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + try { + // Start to add the files + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + + OptionalFileParams optionalFileParams = null; + + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + } catch (DataFileTagException ex) { + return error(Response.Status.BAD_REQUEST, ex.getMessage()); + } + + // ------------------------------------- + // (3) Get the file name and content type + // ------------------------------------- + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + // ToDo - check that storageIdentifier is valid + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + } else { + return error(BAD_REQUEST, + "You must upload a file or provide a storageidentifier, filename, and mimetype."); + } + + + msg("ADD!"); + + //------------------- + // Run "runAddFileByDatasetId" + //------------------- + + addFileHelper.runAddFileByDataset(dataset, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams,true); + + if (addFileHelper.hasError()) { + + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier ", newStorageIdentifier) + .add("error Code: ", addFileHelper.getHttpErrorCode().toString()) + .add("message ", addFileHelper.getErrorMessagesAsString("\n")); + + jarr.add(fileoutput); + + } else { + String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); + + JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); + + try { + logger.fine("successMsg: " + successMsg); + String duplicateWarning = addFileHelper.getDuplicateFileWarning(); + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier ", newStorageIdentifier) + .add("warning message: ", addFileHelper.getDuplicateFileWarning()) + .add("message ", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier ", newStorageIdentifier) + .add("message ", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + + } catch (Exception ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + } + } + + successNumberofFiles = successNumberofFiles + 1; + } + }// End of adding files + } catch (Exception e) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, e); + return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + } + + logger.log(Level.INFO, "Total Number of Files " + totalNumberofFiles); + logger.log(Level.INFO, "Success Number of Files " + successNumberofFiles); + DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (dcmLock == null) { + logger.log(Level.WARNING, "Dataset not locked for Globus upload"); + } else { + logger.log(Level.INFO, "Dataset remove locked for Globus upload"); + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + //dataset.removeLock(dcmLock); + } + + try { + Command cmd; + cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "====== UpdateDatasetVersionCommand Exception : " + ex.getMessage()); + } + + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } catch (Exception e) { + String message = e.getMessage(); + msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); + e.printStackTrace(); + } + + return ok(Json.createObjectBuilder().add("Files", jarr)); + + } // end: addFileToDataset + } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java b/src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java new file mode 100644 index 00000000000..fac1192d054 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java @@ -0,0 +1,31 @@ +package edu.harvard.iq.dataverse.globus; + + + +public class fileDetailsHolder { + + private String hash; + private String mime; + private String storageID; + + public fileDetailsHolder(String id, String hash, String mime) { + + this.storageID = id; + this.hash = hash ; + this.mime = mime ; + + } + + public String getStorageID() { + return this.storageID; + } + + public String getHash() { + return hash; + } + + public String getMime() { + return mime; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 3c7cd22644b..5c898be968c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -683,6 +683,7 @@ public static String calculateChecksum(InputStream in, ChecksumType checksumType return checksumDigestToString(md.digest()); } + public static String calculateChecksum(byte[] dataBytes, ChecksumType checksumType) { MessageDigest md = null; @@ -1156,7 +1157,7 @@ public static List createDataFiles(DatasetVersion version, InputStream } // end createDataFiles - private static boolean useRecognizedType(String suppliedContentType, String recognizedType) { + public static boolean useRecognizedType(String suppliedContentType, String recognizedType) { // is it any better than the type that was supplied to us, // if any? // This is not as trivial a task as one might expect... diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index c37efc3178f..70515ca9b0f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.DataverseContact; import edu.harvard.iq.dataverse.DataverseFacet; import edu.harvard.iq.dataverse.DataverseTheme; +import edu.harvard.iq.dataverse.api.Datasets; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.groups.impl.maildomain.MailDomainGroup; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; @@ -36,6 +37,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.globus.fileDetailsHolder; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -324,6 +326,14 @@ public static JsonObjectBuilder json(Dataset ds) { .add("storageIdentifier", ds.getStorageIdentifier()); } + public static JsonObjectBuilder json(fileDetailsHolder ds) { + return Json.createObjectBuilder().add(ds.getStorageID() , + Json.createObjectBuilder() + .add("id", ds.getStorageID() ) + .add("hash", ds.getHash()) + .add("mime",ds.getMime())); + } + public static JsonObjectBuilder json(DatasetVersion dsv) { JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()) From 282063ebb7b6615b71d2d4fa5f7ec34b510fe521 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 16 Mar 2021 16:44:08 -0400 Subject: [PATCH 058/161] corrected few variables --- .../harvard/iq/dataverse/DatasetServiceBean.java | 14 +++++++++----- .../edu/harvard/iq/dataverse/api/Datasets.java | 5 +++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index f7e37b3d929..e2f3907e4aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1025,7 +1025,11 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo @Asynchronous public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, User authUser, String httpRequestUrl) throws ExecutionException, InterruptedException { - logger.info(httpRequestUrl + " == globusAsyncCall == step 1 "+ dataset.getId()); + String datasetIdentifier = dataset.getStorageIdentifier(); + + String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") +3); + datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") +3); + Thread.sleep(5000); String lockInfoMessage = "Globus Upload API is running "; @@ -1047,12 +1051,11 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, Us } String taskIdentifier = jsonObject.getString("taskIdentifier"); - String datasetIdentifier = jsonObject.getString("datasetId").replace("doi:",""); // globus task status check globusStatusCheck(taskIdentifier); - // calculate checksum, mimetype + try { List inputList = new ArrayList(); JsonArray filesJsonArray = jsonObject.getJsonArray("files"); @@ -1069,12 +1072,13 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, Us String bucketName = bits[1].replace("/", ""); // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = "s3://" + bucketName + "/" + datasetIdentifier +"/" +fileId ; + String fullPath = storageType + bucketName + "/" + datasetIdentifier +"/" +fileId ; inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); } - JsonObject newfilesJsonObject= calculateMissingMetadataFields(inputList); + // calculate checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); JsonArrayBuilder jsonSecondAPI = Json.createArrayBuilder() ; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 8836eb62e44..8797f3d26f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2998,6 +2998,11 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "====== UpdateDatasetVersionCommand Exception : " + ex.getMessage()); } + dataset = datasetService.find(dataset.getId()); + + List s = dataset.getFiles(); + for (DataFile dataFile : s) {} + //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); From a5413c85073967798ba099f45fff5f865fc5f19d Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 18 Mar 2021 13:28:58 -0400 Subject: [PATCH 059/161] hardcoded httpRequestUrl --- .../edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index e2f3907e4aa..e41a440dd93 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1106,7 +1106,7 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, Us ProcessBuilder processBuilder = new ProcessBuilder(); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl.split("/api")[0]+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); new Thread(new Runnable() { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 8797f3d26f8..0ad96872c94 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2787,8 +2787,18 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, } catch (WrappedResponse wr) { return wr.getResponse(); } + /* + String requestUrl = httpRequest.getProtocol().toLowerCase().split("/")[0]+"://"+httpRequest.getServerName(); - String requestUrl = httpRequest.getRequestURL().toString(); + if( httpRequest.getServerPort() > 0 ) + { + requestUrl = requestUrl + ":"+ httpRequest.getServerPort(); + } + */ + + + String requestUrl = "https://dvdev.scholarsportal.info" ; + //String requestUrl = "http://localhost:8080" ; // Async Call datasetService.globusAsyncCall( jsonData , token , dataset , authUser, requestUrl); From f1433266987581e9ac3fc684b646a3923bd9288b Mon Sep 17 00:00:00 2001 From: chenganj Date: Fri, 19 Mar 2021 15:12:57 -0400 Subject: [PATCH 060/161] - tweak datasetlock, - skip checksum validation using dataset category --- .../iq/dataverse/DatasetServiceBean.java | 11 ++--------- .../harvard/iq/dataverse/api/Datasets.java | 19 ++++++++++++++++--- .../FinalizeDatasetPublicationCommand.java | 8 +++++++- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index e41a440dd93..a0ec12a5d64 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1023,7 +1023,7 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo } @Asynchronous - public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, User authUser, String httpRequestUrl) throws ExecutionException, InterruptedException { + public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl) throws ExecutionException, InterruptedException { String datasetIdentifier = dataset.getStorageIdentifier(); @@ -1032,14 +1032,7 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, Us Thread.sleep(5000); - String lockInfoMessage = "Globus Upload API is running "; - DatasetLock lock = addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, - ((AuthenticatedUser) authUser).getId(), lockInfoMessage); - if (lock != null) { - dataset.addLock(lock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } + JsonObject jsonObject = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 0ad96872c94..7675d008ec0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2775,8 +2775,6 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, ); } - ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); - // ------------------------------------- // (2) Get the Dataset Id // ------------------------------------- @@ -2787,6 +2785,21 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, } catch (WrappedResponse wr) { return wr.getResponse(); } + + + String lockInfoMessage = "Globus Upload API is started "; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, + ((AuthenticatedUser) authUser).getId(), lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + + ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); + + /* String requestUrl = httpRequest.getProtocol().toLowerCase().split("/")[0]+"://"+httpRequest.getServerName(); @@ -2801,7 +2814,7 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, //String requestUrl = "http://localhost:8080" ; // Async Call - datasetService.globusAsyncCall( jsonData , token , dataset , authUser, requestUrl); + datasetService.globusAsyncCall( jsonData , token , dataset , requestUrl); return ok("Globus Task successfully completed "); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index c2f186f1e8c..04e9e09c6d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -80,7 +80,13 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // some imported datasets may already be released. // validate the physical files (verify checksums): - validateDataFiles(theDataset, ctxt); + if(theDataset.getCategoryByName("GLOBUS") != null) { + logger.info("skip validating checksum "+theDataset.getGlobalId().asString()); + } + else { + logger.info("run validating checksum "); + validateDataFiles(theDataset, ctxt); + } // (this will throw a CommandException if it fails) } From 6cd23a1b327f84fd649a0b802322532df92d345a Mon Sep 17 00:00:00 2001 From: chenganj Date: Wed, 24 Mar 2021 08:55:04 -0400 Subject: [PATCH 061/161] - tweak datasetlock, - skip checksum validation using dataset category --- .../iq/dataverse/DatasetServiceBean.java | 71 +++++++++++++++---- .../harvard/iq/dataverse/api/Datasets.java | 9 ++- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index a0ec12a5d64..48b14f19971 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1025,6 +1025,31 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo @Asynchronous public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl) throws ExecutionException, InterruptedException { + String logTimestamp = logFormatter.format(new Date()); + Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); + + //Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); + //Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); + String logFileName = "../logs" + File.separator + "globus_" + logTimestamp + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + globusLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + globusLogger.addHandler(fileHandler); + } else { + globusLogger = logger; + } + + globusLogger.info("Starting an globusAsyncCall"); + String datasetIdentifier = dataset.getStorageIdentifier(); String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") +3); @@ -1033,8 +1058,6 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St Thread.sleep(5000); - - JsonObject jsonObject = null; try (StringReader rdr = new StringReader(jsonData)) { jsonObject = Json.createReader(rdr).readObject(); @@ -1046,7 +1069,7 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St String taskIdentifier = jsonObject.getString("taskIdentifier"); // globus task status check - globusStatusCheck(taskIdentifier); + globusStatusCheck(taskIdentifier,globusLogger); try { @@ -1071,7 +1094,7 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St } // calculate checksum, mimetype - JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList); + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList,globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); JsonArrayBuilder jsonSecondAPI = Json.createArrayBuilder() ; @@ -1097,6 +1120,8 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St String newjsonData = jsonSecondAPI.build().toString(); + globusLogger.info("Generated new JsonData with calculated values"); + ProcessBuilder processBuilder = new ProcessBuilder(); String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; @@ -1115,6 +1140,13 @@ public void run() { } + + globusLogger.info("Finished export-all job."); + + if (fileHandlerSuceeded) { + fileHandler.close(); + } + } catch (Exception e) { logger.info("Exception "); e.printStackTrace(); @@ -1138,12 +1170,13 @@ public static JsonObjectBuilder stringToJsonObjectBuilder(String str) { Executor executor = Executors.newFixedThreadPool(10); - private Boolean globusStatusCheck(String taskId) + private Boolean globusStatusCheck(String taskId, Logger globusLogger) { boolean success = false; do { try { - logger.info(" sleep before globus transfer check"); + + globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(50000); String basicGlobusToken = settingsService.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); @@ -1157,16 +1190,17 @@ private Boolean globusStatusCheck(String taskId) } while (!success); - logger.info(" globus transfer completed "); + + globusLogger.info("globus transfer task completed successfully"); return success; } - public JsonObject calculateMissingMetadataFields(List inputList) throws InterruptedException, ExecutionException, IOException { + public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { List> hashvalueCompletableFutures = - inputList.stream().map(iD -> calculateDetailsAsync(iD)).collect(Collectors.toList()); + inputList.stream().map(iD -> calculateDetailsAsync(iD,globusLogger)).collect(Collectors.toList()); CompletableFuture allFutures = CompletableFuture .allOf(hashvalueCompletableFutures.toArray(new CompletableFuture[hashvalueCompletableFutures.size()])); @@ -1189,8 +1223,9 @@ public JsonObject calculateMissingMetadataFields(List inputList) throws } - private CompletableFuture calculateDetailsAsync(String id) { - logger.info(" calcualte additional details for these globus id ==== " + id); + private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { + //logger.info(" calcualte additional details for these globus id ==== " + id); + return CompletableFuture.supplyAsync( () -> { try { Thread.sleep(2000); @@ -1198,7 +1233,7 @@ private CompletableFuture calculateDetailsAsync(String id) { e.printStackTrace(); } try { - return ( calculateDetails(id) ); + return ( calculateDetails(id,globusLogger) ); } catch (InterruptedException | IOException e) { e.printStackTrace(); } @@ -1209,13 +1244,17 @@ private CompletableFuture calculateDetailsAsync(String id) { } - private fileDetailsHolder calculateDetails(String id) throws InterruptedException, IOException { + private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throws InterruptedException, IOException { int count = 0; String checksumVal = ""; InputStream in = null; String fileId = id.split("IDsplit")[0]; String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; + + // what if the file doesnot exists in s3 + // what if checksum calculation failed + do { try { StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); @@ -1232,8 +1271,10 @@ private fileDetailsHolder calculateDetails(String id) throws InterruptedExceptio } while (count < 3); - return new fileDetailsHolder(fileId, checksumVal, calculatemime(fileName)); - //getBytes(in)+"" ); + String mimeType = calculatemime(fileName); + globusLogger.info("File Details " + fileId + " checksum = "+ checksumVal + " mimeType = " + mimeType); + return new fileDetailsHolder(fileId, checksumVal,mimeType); + //getBytes(in)+"" ); // calculatemime(fileName)); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7675d008ec0..afeb10e304c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2801,6 +2801,8 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, /* + + x-forwarded-proto String requestUrl = httpRequest.getProtocol().toLowerCase().split("/")[0]+"://"+httpRequest.getServerName(); if( httpRequest.getServerPort() > 0 ) @@ -2810,12 +2812,15 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, */ - String requestUrl = "https://dvdev.scholarsportal.info" ; - //String requestUrl = "http://localhost:8080" ; + //String requestUrl = "https://dvdev.scholarsportal.info" ; + String requestUrl = "http://localhost:8080" ; // Async Call datasetService.globusAsyncCall( jsonData , token , dataset , requestUrl); + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.CHECKSUMFAIL, dataset.getId()); + + return ok("Globus Task successfully completed "); } From 491fe42c07944db5fc4686a4699ffb1399ca9051 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 29 Mar 2021 10:50:33 -0400 Subject: [PATCH 062/161] - delete globus permission --- .../iq/dataverse/DatasetServiceBean.java | 25 +++++++--- .../harvard/iq/dataverse/api/Datasets.java | 5 +- .../harvard/iq/dataverse/api/GlobusApi.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 46 ++++++++++++++++--- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 48b14f19971..007b1060aae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -28,6 +28,7 @@ import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.*; +import java.net.MalformedURLException; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.CompletableFuture; @@ -1022,8 +1023,11 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo } } + + + @Asynchronous - public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl) throws ExecutionException, InterruptedException { + public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl) throws ExecutionException, InterruptedException, MalformedURLException { String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); @@ -1048,7 +1052,7 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St globusLogger = logger; } - globusLogger.info("Starting an globusAsyncCall"); + globusLogger.info("Starting an globusAsyncCall "); String datasetIdentifier = dataset.getStorageIdentifier(); @@ -1071,6 +1075,8 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St // globus task status check globusStatusCheck(taskIdentifier,globusLogger); + globusLogger.info("Start removing Globus permission for the client"); + try { List inputList = new ArrayList(); @@ -1170,8 +1176,7 @@ public static JsonObjectBuilder stringToJsonObjectBuilder(String str) { Executor executor = Executors.newFixedThreadPool(10); - private Boolean globusStatusCheck(String taskId, Logger globusLogger) - { + private Boolean globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { boolean success = false; do { try { @@ -1179,18 +1184,24 @@ private Boolean globusStatusCheck(String taskId, Logger globusLogger) globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(50000); - String basicGlobusToken = settingsService.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + AccessToken clientTokenUser = globusServiceBean.getClientToken(); success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + } catch (Exception ex) { ex.printStackTrace(); } } while (!success); +/* + AccessToken clientTokenUser = globusServiceBean.getClientToken(); + String directory = globusServiceBean.getDirectory( dataset.getId()+"" ); + globusServiceBean.updatePermision(clientTokenUser, directory, "identity", "r"); + globusLogger.info("Successfully removed Globus permission for the client"); +*/ globusLogger.info("globus transfer task completed successfully"); return success; @@ -1272,7 +1283,7 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw String mimeType = calculatemime(fileName); - globusLogger.info("File Details " + fileId + " checksum = "+ checksumVal + " mimeType = " + mimeType); + globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = "+ checksumVal + " mimeType = " + mimeType); return new fileDetailsHolder(fileId, checksumVal,mimeType); //getBytes(in)+"" ); // calculatemime(fileName)); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index afeb10e304c..be46a5fab31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2531,7 +2531,7 @@ public Response addGlobusFileToDatasetBkup(@PathParam("id") String datasetId, do { try { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + AccessToken clientTokenUser = globusServiceBean.getClientToken(); success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); @@ -2800,6 +2800,9 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); + //String xfp = httpRequest.getHeader("X-Forwarded-Proto"); + //String requestUrl = xfp +"://"+httpRequest.getServerName(); + /* x-forwarded-proto diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java index c26b1bec184..39c1a13842a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java @@ -166,7 +166,7 @@ public Response globus(@PathParam("id") String datasetId, String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; msgt("******* (api) basicGlobusToken: " + basicGlobusToken); - AccessToken clientTokenUser = globusServiceBean.getClientToken(basicGlobusToken); + AccessToken clientTokenUser = globusServiceBean.getClientToken(); success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); msgt("******* (api) success: " + success); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5e314c4f47e..2bb3f6c694d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -145,7 +145,8 @@ public void onLoad() { } logger.info(accessTokenUser.getAccessToken()); logger.info(usr.getEmail()); - AccessToken clientTokenUser = getClientToken(basicGlobusToken); + //AccessToken clientTokenUser = getClientToken(basicGlobusToken); + AccessToken clientTokenUser = getClientToken(); if (clientTokenUser == null) { logger.severe("Cannot get client token "); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); @@ -219,6 +220,16 @@ public void goGlobusDownload(String datasetId) { String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?origin_id=" + globusEndpoint + "&origin_path=" + directory + "'" +")"; PrimeFaces.current().executeScript(httpString); } +/* + public void removeGlobusPermission() throws MalformedURLException { + //taskId and ruleId + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + AccessToken clientTokenUser = getClientToken(basicGlobusToken); + String directory = getDirectory( dataset.getId()+"" ); + updatePermision(clientTokenUser, directory, "identity", "r"); + } + + */ ArrayList checkPermisions( AccessToken clientTokenUser, String directory, String globusEndpoint, String principalType, String principal) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); @@ -234,6 +245,7 @@ ArrayList checkPermisions( AccessToken clientTokenUser, String director ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) ) { ids.add(pr.getId()); } else { + logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); continue; } } @@ -244,7 +256,7 @@ ArrayList checkPermisions( AccessToken clientTokenUser, String director public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { if (directory != null && !directory.equals("")) { - directory = "/" + directory + "/"; + directory = directory + "/"; } logger.info("Start updating permissions." + " Directory is " + directory); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); @@ -272,6 +284,24 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin } } + public void deletePermision(String ruleId) throws MalformedURLException { + + AccessToken clientTokenUser = getClientToken(); + logger.info("Start updating permissions." ); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); + logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(),"DELETE", null); + if (result.status != 200) { + logger.warning("Cannot update access rule " + ruleId); + } else { + logger.info("Access rule " + ruleId + " was updated"); + } + + } + public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, principal); @@ -347,7 +377,8 @@ public String getTaskList(String basicGlobusToken, String identifierForFileStora logger.info("1.getTaskList ====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== identifierForFileStorage ====== " + identifierForFileStorage); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - AccessToken clientTokenUser = getClientToken(basicGlobusToken); + //AccessToken clientTokenUser = getClientToken(basicGlobusToken); + AccessToken clientTokenUser = getClientToken( ); URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task_list?filter_endpoint="+globusEndpoint+"&filter_status=SUCCEEDED&filter_completion_time="+timeWhenAsyncStarted); @@ -453,7 +484,8 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId - public AccessToken getClientToken(String basicGlobusToken) throws MalformedURLException { + public AccessToken getClientToken() throws MalformedURLException { + String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); MakeRequestResponse result = makeRequest(url, "Basic", @@ -590,7 +622,7 @@ private T parseJson(String sb, Class jsonParserClass, boolean namingPolic } } - String getDirectory(String datasetId) { + public String getDirectory(String datasetId) { Dataset dataset = null; String directory = null; try { @@ -642,7 +674,8 @@ public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedE if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { return false; } - AccessToken clientTokenUser = getClientToken(basicGlobusToken); + //AccessToken clientTokenUser = getClientToken(basicGlobusToken); + AccessToken clientTokenUser = getClientToken( ); if (clientTokenUser == null) { logger.severe("Cannot get client token "); return false; @@ -714,7 +747,6 @@ public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) th workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } - directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); System.out.println("======= directory ==== " + directory + " ==== datasetId :" + dataset.getId()); From bc5edf0ad09ecf627cb936a5de50e19be4df34ba Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 30 Mar 2021 17:38:42 -0400 Subject: [PATCH 063/161] - added GLOBUSUPLOADSUCCESS notification type and user notification messages - added deleteRule api - --- .../iq/dataverse/DatasetServiceBean.java | 104 +- .../harvard/iq/dataverse/MailServiceBean.java | 11 + .../iq/dataverse/UserNotification.java | 2 +- .../harvard/iq/dataverse/api/Datasets.java | 1289 +++++++++-------- .../providers/builtin/DataverseUserPage.java | 4 + .../dataverse/globus/GlobusServiceBean.java | 10 +- .../harvard/iq/dataverse/util/MailUtil.java | 8 + src/main/java/propertyFiles/Bundle.properties | 3 + src/main/webapp/dataverseuser.xhtml | 7 + 9 files changed, 785 insertions(+), 653 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 007b1060aae..8f53aafc110 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -29,6 +29,7 @@ import java.io.*; import java.net.MalformedURLException; +import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.CompletableFuture; @@ -110,6 +111,8 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB GlobusServiceBean globusServiceBean; + @EJB + UserNotificationServiceBean userNotificationService; private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -1027,7 +1030,7 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo @Asynchronous - public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl) throws ExecutionException, InterruptedException, MalformedURLException { + public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, User authUser) throws ExecutionException, InterruptedException, MalformedURLException { String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); @@ -1071,12 +1074,12 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St } String taskIdentifier = jsonObject.getString("taskIdentifier"); + String ruleId = jsonObject.getString("ruleId"); // globus task status check globusStatusCheck(taskIdentifier,globusLogger); - globusLogger.info("Start removing Globus permission for the client"); - + globusServiceBean.deletePermision(ruleId,globusLogger); try { List inputList = new ArrayList(); @@ -1128,27 +1131,23 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St globusLogger.info("Generated new JsonData with calculated values"); - ProcessBuilder processBuilder = new ProcessBuilder(); String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); - new Thread(new Runnable() { - public void run() { - try { - processBuilder.command("bash", "-c", command); - Process process = processBuilder.start(); - } catch (Exception ex) { - logger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); - } - } - }).start(); + String output = addFilesAsync(command , globusLogger ) ; + if(output.equalsIgnoreCase("ok")) + { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADSUCCESS, dataset.getId()); + globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); + } + else + { + globusLogger.log(Level.SEVERE, "******* Error while executing api/datasets/:persistentId/add call ", command); + } } - - globusLogger.info("Finished export-all job."); - if (fileHandlerSuceeded) { fileHandler.close(); } @@ -1180,28 +1179,16 @@ private Boolean globusStatusCheck(String taskId, Logger globusLogger) throws Mal boolean success = false; do { try { - globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(50000); - AccessToken clientTokenUser = globusServiceBean.getClientToken(); - success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); - - } catch (Exception ex) { ex.printStackTrace(); } } while (!success); -/* - AccessToken clientTokenUser = globusServiceBean.getClientToken(); - String directory = globusServiceBean.getDirectory( dataset.getId()+"" ); - globusServiceBean.updatePermision(clientTokenUser, directory, "identity", "r"); - - globusLogger.info("Successfully removed Globus permission for the client"); -*/ globusLogger.info("globus transfer task completed successfully"); return success; @@ -1309,5 +1296,64 @@ public String calculatemime(String fileName) throws InterruptedException { return finalType; } + public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { + CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + return (addFiles(curlCommand, globusLogger)); + }, executor).exceptionally(ex -> { + globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); + ex.printStackTrace(); + return null; + }); + + String result = addFilesFuture.get(); + + return result ; + } + + + + + private String addFiles(String curlCommand, Logger globusLogger) + { + boolean success = false; + ProcessBuilder processBuilder = new ProcessBuilder(); + Process process = null; + String line; + String status = ""; + + try { + globusLogger.info("Call to : " + curlCommand); + processBuilder.command("bash", "-c", curlCommand); + process = processBuilder.start(); + process.waitFor(); + + BufferedReader br=new BufferedReader(new InputStreamReader(process.getInputStream())); + + StringBuilder sb = new StringBuilder(); + while((line=br.readLine())!=null) sb.append(line); + globusLogger.info(" API Output : " + sb.toString()); + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(sb.toString())) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + globusLogger.log(Level.SEVERE, "Error parsing dataset json."); + } + + status = jsonObject.getString("status"); + } catch (Exception ex) { + globusLogger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } + + + return status; + } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 13a92c9cd27..415e3ea1d89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -529,6 +529,15 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio logger.fine("fileImportMsg: " + fileImportMsg); return messageText += fileImportMsg; + case GLOBUSUPLOADSUCCESS: + dataset = (Dataset) targetObject; + String fileMsg = BundleUtil.getStringFromBundle("notification.mail.import.globus", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName() + )); + return messageText += fileMsg; + case CHECKSUMIMPORT: version = (DatasetVersion) targetObject; String checksumImportMsg = BundleUtil.getStringFromBundle("notification.import.checksum", Arrays.asList( @@ -601,6 +610,8 @@ private Object getObjectOfNotification (UserNotification userNotification){ return datasetService.find(userNotification.getObjectId()); case FILESYSTEMIMPORT: return versionService.find(userNotification.getObjectId()); + case GLOBUSUPLOADSUCCESS: + return datasetService.find(userNotification.getObjectId()); case CHECKSUMIMPORT: return versionService.find(userNotification.getObjectId()); case APIGENERATED: diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index e44c5f6333e..82bf6393f86 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -30,7 +30,7 @@ public enum Type { ASSIGNROLE, REVOKEROLE, CREATEDV, CREATEDS, CREATEACC, SUBMITTEDDS, RETURNEDDS, PUBLISHEDDS, REQUESTFILEACCESS, GRANTFILEACCESS, REJECTFILEACCESS, FILESYSTEMIMPORT, CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, - PUBLISHFAILED_PIDREG + PUBLISHFAILED_PIDREG,GLOBUSUPLOADSUCCESS; }; private static final long serialVersionUID = 1L; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index be46a5fab31..b328877e145 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -149,14 +149,11 @@ import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; +import javax.ws.rs.core.*; import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; -import javax.ws.rs.core.UriInfo; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrServerException; import org.glassfish.jersey.media.multipart.FormDataBodyPart; @@ -173,36 +170,37 @@ public class Datasets extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Datasets.class.getCanonicalName()); - - @Inject DataverseSession session; + + @Inject + DataverseSession session; @EJB DatasetServiceBean datasetService; @EJB DataverseServiceBean dataverseService; - + @EJB GlobusServiceBean globusServiceBean; @EJB UserNotificationServiceBean userNotificationService; - + @EJB PermissionServiceBean permissionService; - + @EJB AuthenticationServiceBean authenticationServiceBean; - + @EJB DDIExportServiceBean ddiExportService; - + @EJB DatasetFieldServiceBean datasetfieldService; @EJB MetadataBlockServiceBean metadataBlockService; - + @EJB DataFileServiceBean fileService; @@ -211,26 +209,26 @@ public class Datasets extends AbstractApiBean { @EJB EjbDataverseEngine commandEngine; - + @EJB IndexServiceBean indexService; @EJB S3PackageImporter s3PackageImporter; - + @EJB SettingsServiceBean settingsService; // TODO: Move to AbstractApiBean @EJB DatasetMetricsServiceBean datasetMetricsSvc; - + @EJB DatasetExternalCitationsServiceBean datasetExternalCitationsService; - + @Inject MakeDataCountLoggingServiceBean mdcLogService; - + @Inject DataverseRequestServiceBean dvRequestService; @@ -240,40 +238,43 @@ public class Datasets extends AbstractApiBean { /** * Used to consolidate the way we parse and handle dataset versions. - * @param + * @param */ public interface DsVersionHandler { T handleLatest(); + T handleDraft(); - T handleSpecific( long major, long minor ); + + T handleSpecific(long major, long minor); + T handleLatestPublished(); } - + @GET @Path("{id}") public Response getDataset(@PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { - return response( req -> { + return response(req -> { final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id))); final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); final JsonObjectBuilder jsonbuilder = json(retrieved); //Report MDC if this is a released version (could be draft if user has access, or user may not have access at all and is not getting metadata beyond the minimum) - if((latest != null) && latest.isReleased()) { + if ((latest != null) && latest.isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, retrieved); mdcLogService.logEntry(entry); } return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest) : null)); }); } - + // TODO: // This API call should, ideally, call findUserOrDie() and the GetDatasetCommand // to obtain the dataset that we are trying to export - which would handle // Auth in the process... For now, Auth isn't necessary - since export ONLY // WORKS on published datasets, which are open to the world. -- L.A. 4.5 - + @GET @Path("/export") - @Produces({"application/xml", "application/json", "application/html" }) + @Produces({"application/xml", "application/json", "application/html"}) public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { try { @@ -281,20 +282,20 @@ public Response exportDataset(@QueryParam("persistentId") String persistentId, @ if (dataset == null) { return error(Response.Status.NOT_FOUND, "A dataset with the persistentId " + persistentId + " could not be found."); } - + ExportService instance = ExportService.getInstance(settingsSvc); - + InputStream is = instance.getExport(dataset, exporter); - + String mediaType = instance.getMediaType(exporter); //Export is only possible for released (non-draft) dataset versions so we can log without checking to see if this is a request for a draft MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, dataset); mdcLogService.logEntry(entry); - + return Response.ok() .entity(is) .type(mediaType). - build(); + build(); } catch (Exception wr) { return error(Response.Status.FORBIDDEN, "Export Failed"); } @@ -302,7 +303,7 @@ public Response exportDataset(@QueryParam("persistentId") String persistentId, @ @DELETE @Path("{id}") - public Response deleteDataset( @PathParam("id") String id) { + public Response deleteDataset(@PathParam("id") String id) { // Internally, "DeleteDatasetCommand" simply redirects to "DeleteDatasetVersionCommand" // (and there's a comment that says "TODO: remove this command") // do we need an exposed API call for it? @@ -312,13 +313,13 @@ public Response deleteDataset( @PathParam("id") String id) { // "destroyDataset" API calls. // (The logic below follows the current implementation of the underlying // commands!) - - return response( req -> { + + return response(req -> { Dataset doomed = findDatasetOrDie(id); DatasetVersion doomedVersion = doomed.getLatestVersion(); User u = findUserOrDie(); boolean destroy = false; - + if (doomed.getVersions().size() == 1) { if (doomed.isReleased() && (!(u instanceof AuthenticatedUser) || !u.isSuperuser())) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "Only superusers can delete published datasets")); @@ -329,13 +330,13 @@ public Response deleteDataset( @PathParam("id") String id) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "This is a published dataset with multiple versions. This API can only delete the latest version if it is a DRAFT")); } } - + // Gather the locations of the physical files that will need to be // deleted once the destroy command execution has been finalized: Map deleteStorageLocations = fileService.getPhysicalFilesToDelete(doomedVersion, destroy); - - execCommand( new DeleteDatasetCommand(req, findDatasetOrDie(id))); - + + execCommand(new DeleteDatasetCommand(req, findDatasetOrDie(id))); + // If we have gotten this far, the destroy command has succeeded, // so we can finalize it by permanently deleting the physical files: // (DataFileService will double-check that the datafiles no @@ -344,11 +345,11 @@ public Response deleteDataset( @PathParam("id") String id) { if (!deleteStorageLocations.isEmpty()) { fileService.finalizeFileDeletes(deleteStorageLocations); } - + return ok("Dataset " + id + " deleted"); }); } - + @DELETE @Path("{id}/destroy") public Response destroyDataset(@PathParam("id") String id) { @@ -380,29 +381,29 @@ public Response destroyDataset(@PathParam("id") String id) { return ok("Dataset " + id + " destroyed"); }); } - + @DELETE @Path("{id}/versions/{versionId}") - public Response deleteDraftVersion( @PathParam("id") String id, @PathParam("versionId") String versionId ){ - if ( ! ":draft".equals(versionId) ) { + public Response deleteDraftVersion(@PathParam("id") String id, @PathParam("versionId") String versionId) { + if (!":draft".equals(versionId)) { return badRequest("Only the :draft version can be deleted"); } - return response( req -> { + return response(req -> { Dataset dataset = findDatasetOrDie(id); DatasetVersion doomed = dataset.getLatestVersion(); - + if (!doomed.isDraft()) { throw new WrappedResponse(error(Response.Status.UNAUTHORIZED, "This is NOT a DRAFT version")); } - + // Gather the locations of the physical files that will need to be // deleted once the destroy command execution has been finalized: - + Map deleteStorageLocations = fileService.getPhysicalFilesToDelete(doomed); - - execCommand( new DeleteDatasetVersionCommand(req, dataset)); - + + execCommand(new DeleteDatasetVersionCommand(req, dataset)); + // If we have gotten this far, the delete command has succeeded - // by either deleting the Draft version of a published dataset, // or destroying an unpublished one. @@ -413,26 +414,26 @@ public Response deleteDraftVersion( @PathParam("id") String id, @PathParam("ver if (!deleteStorageLocations.isEmpty()) { fileService.finalizeFileDeletes(deleteStorageLocations); } - + return ok("Draft version of dataset " + id + " deleted"); }); } - + @DELETE @Path("{datasetId}/deleteLink/{linkedDataverseId}") - public Response deleteDatasetLinkingDataverse( @PathParam("datasetId") String datasetId, @PathParam("linkedDataverseId") String linkedDataverseId) { - boolean index = true; + public Response deleteDatasetLinkingDataverse(@PathParam("datasetId") String datasetId, @PathParam("linkedDataverseId") String linkedDataverseId) { + boolean index = true; return response(req -> { execCommand(new DeleteDatasetLinkingDataverseCommand(req, findDatasetOrDie(datasetId), findDatasetLinkingDataverseOrDie(datasetId, linkedDataverseId), index)); return ok("Link from Dataset " + datasetId + " to linked Dataverse " + linkedDataverseId + " deleted"); }); } - + @PUT @Path("{id}/citationdate") - public Response setCitationDate( @PathParam("id") String id, String dsfTypeName) { - return response( req -> { - if ( dsfTypeName.trim().isEmpty() ){ + public Response setCitationDate(@PathParam("id") String id, String dsfTypeName) { + return response(req -> { + if (dsfTypeName.trim().isEmpty()) { return badRequest("Please provide a dataset field type in the requst body."); } DatasetFieldType dsfType = null; @@ -446,124 +447,124 @@ public Response setCitationDate( @PathParam("id") String id, String dsfTypeName) execCommand(new SetDatasetCitationDateCommand(req, findDatasetOrDie(id), dsfType)); return ok("Citation Date for dataset " + id + " set to: " + (dsfType != null ? dsfType.getDisplayName() : "default")); }); - } - + } + @DELETE @Path("{id}/citationdate") - public Response useDefaultCitationDate( @PathParam("id") String id) { - return response( req -> { + public Response useDefaultCitationDate(@PathParam("id") String id) { + return response(req -> { execCommand(new SetDatasetCitationDateCommand(req, findDatasetOrDie(id), null)); return ok("Citation Date for dataset " + id + " set to default"); }); - } - + } + @GET @Path("{id}/versions") - public Response listVersions( @PathParam("id") String id ) { - return response( req -> - ok( execCommand( new ListVersionsCommand(req, findDatasetOrDie(id)) ) - .stream() - .map( d -> json(d) ) - .collect(toJsonArray()))); - } - + public Response listVersions(@PathParam("id") String id) { + return response(req -> + ok(execCommand(new ListVersionsCommand(req, findDatasetOrDie(id))) + .stream() + .map(d -> json(d)) + .collect(toJsonArray()))); + } + @GET @Path("{id}/versions/{versionId}") - public Response getVersion( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + public Response getVersion(@PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") - : ok(json(dsv)); + : ok(json(dsv)); }); } - + @GET @Path("{id}/versions/{versionId}/files") - public Response getVersionFiles( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> ok( jsonFileMetadatas( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas()))); + public Response getVersionFiles(@PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> ok(jsonFileMetadatas( + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas()))); } - + @GET @Path("{id}/dirindex") @Produces("text/html") public Response getFileAccessFolderView(@PathParam("id") String datasetId, @QueryParam("version") String versionId, @QueryParam("folder") String folderName, @QueryParam("original") Boolean originals, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { folderName = folderName == null ? "" : folderName; - versionId = versionId == null ? ":latest-published" : versionId; - - DatasetVersion version; + versionId = versionId == null ? ":latest-published" : versionId; + + DatasetVersion version; try { DataverseRequest req = createDataverseRequest(findUserOrDie()); version = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); } catch (WrappedResponse wr) { return wr.getResponse(); } - + String output = FileUtil.formatFolderListingHtml(folderName, version, "", originals != null && originals); - + // return "NOT FOUND" if there is no such folder in the dataset version: - + if ("".equals(output)) { return notFound("Folder " + folderName + " does not exist"); } - - + + String indexFileName = folderName.equals("") ? ".index.html" : ".index-" + folderName.replace('/', '_') + ".html"; response.setHeader("Content-disposition", "attachment; filename=\"" + indexFileName + "\""); - + return Response.ok() .entity(output) //.type("application/html"). .build(); } - + @GET @Path("{id}/versions/{versionId}/metadata") - public Response getVersionMetadata( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return response( req -> ok( - jsonByBlocks( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers ) + public Response getVersionMetadata(@PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> ok( + jsonByBlocks( + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers) .getDatasetFields()))); } - + @GET @Path("{id}/versions/{versionNumber}/metadata/{block}") - public Response getVersionMetadataBlock( @PathParam("id") String datasetId, - @PathParam("versionNumber") String versionNumber, - @PathParam("block") String blockName, - @Context UriInfo uriInfo, - @Context HttpHeaders headers ) { - - return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), uriInfo, headers ); - + public Response getVersionMetadataBlock(@PathParam("id") String datasetId, + @PathParam("versionNumber") String versionNumber, + @PathParam("block") String blockName, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + + return response(req -> { + DatasetVersion dsv = getDatasetVersionOrDie(req, versionNumber, findDatasetOrDie(datasetId), uriInfo, headers); + Map> fieldsByBlock = DatasetField.groupByBlock(dsv.getDatasetFields()); - for ( Map.Entry> p : fieldsByBlock.entrySet() ) { - if ( p.getKey().getName().equals(blockName) ) { + for (Map.Entry> p : fieldsByBlock.entrySet()) { + if (p.getKey().getName().equals(blockName)) { return ok(json(p.getKey(), p.getValue())); } } return notFound("metadata block named " + blockName + " not found"); }); } - + @GET @Path("{id}/modifyRegistration") - public Response updateDatasetTargetURL(@PathParam("id") String id ) { - return response( req -> { + public Response updateDatasetTargetURL(@PathParam("id") String id) { + return response(req -> { execCommand(new UpdateDatasetTargetURLCommand(findDatasetOrDie(id), req)); return ok("Dataset " + id + " target url updated"); }); } - + @POST @Path("/modifyRegistrationAll") public Response updateDatasetTargetURLAll() { - return response( req -> { - datasetService.findAll().forEach( ds -> { + return response(req -> { + datasetService.findAll().forEach(ds -> { try { execCommand(new UpdateDatasetTargetURLCommand(findDatasetOrDie(ds.getId().toString()), req)); } catch (WrappedResponse ex) { @@ -573,7 +574,7 @@ public Response updateDatasetTargetURLAll() { return ok("Update All Dataset target url completed"); }); } - + @POST @Path("{id}/modifyRegistrationMetadata") public Response updateDatasetPIDMetadata(@PathParam("id") String id) { @@ -593,36 +594,36 @@ public Response updateDatasetPIDMetadata(@PathParam("id") String id) { return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.single.dataset", args)); }); } - + @GET @Path("/modifyRegistrationPIDMetadataAll") public Response updateDatasetPIDMetadataAll() { - return response( req -> { - datasetService.findAll().forEach( ds -> { + return response(req -> { + datasetService.findAll().forEach(ds -> { try { execCommand(new UpdateDvObjectPIDMetadataCommand(findDatasetOrDie(ds.getId().toString()), req)); } catch (WrappedResponse ex) { Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); } - }); + }); return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.update.all")); }); } - + @PUT @Path("{id}/versions/{versionId}") - public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId ){ - - if ( ! ":draft".equals(versionId) ) { - return error( Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); + public Response updateDraftVersion(String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId) { + + if (!":draft".equals(versionId)) { + return error(Response.Status.BAD_REQUEST, "Only the :draft version can be updated"); } - - try ( StringReader rdr = new StringReader(jsonBody) ) { + + try (StringReader rdr = new StringReader(jsonBody)) { DataverseRequest req = createDataverseRequest(findUserOrDie()); Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); DatasetVersion incomingVersion = jsonParser().parseDatasetVersion(json); - + // clear possibly stale fields from the incoming dataset version. // creation and modification dates are updated by the commands. incomingVersion.setId(null); @@ -632,18 +633,18 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, incomingVersion.setDataset(ds); incomingVersion.setCreateTime(null); incomingVersion.setLastUpdateTime(null); - - if (!incomingVersion.getFileMetadatas().isEmpty()){ - return error( Response.Status.BAD_REQUEST, "You may not add files via this api."); + + if (!incomingVersion.getFileMetadatas().isEmpty()) { + return error(Response.Status.BAD_REQUEST, "You may not add files via this api."); } - + boolean updateDraft = ds.getLatestVersion().isDraft(); - + DatasetVersion managedVersion; - if ( updateDraft ) { + if (updateDraft) { final DatasetVersion editVersion = ds.getEditVersion(); editVersion.setDatasetFields(incomingVersion.getDatasetFields()); - editVersion.setTermsOfUseAndAccess( incomingVersion.getTermsOfUseAndAccess() ); + editVersion.setTermsOfUseAndAccess(incomingVersion.getTermsOfUseAndAccess()); Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); managedVersion = managedDataset.getEditVersion(); } else { @@ -652,18 +653,18 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, // DatasetVersion managedVersion = execCommand( updateDraft // ? new UpdateDatasetVersionCommand(req, incomingVersion) // : new CreateDatasetVersionCommand(req, ds, incomingVersion)); - return ok( json(managedVersion) ); - + return ok(json(managedVersion)); + } catch (JsonParseException ex) { logger.log(Level.SEVERE, "Semantic error parsing dataset version Json: " + ex.getMessage(), ex); - return error( Response.Status.BAD_REQUEST, "Error parsing dataset version: " + ex.getMessage() ); - + return error(Response.Status.BAD_REQUEST, "Error parsing dataset version: " + ex.getMessage()); + } catch (WrappedResponse ex) { return ex.getResponse(); - + } } - + @PUT @Path("{id}/deleteMetadata") public Response deleteVersionMetadata(String jsonBody, @PathParam("id") String id) throws WrappedResponse { @@ -701,7 +702,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav boolean found = false; for (DatasetField dsf : dsv.getDatasetFields()) { if (dsf.getDatasetFieldType().equals(updateField.getDatasetFieldType())) { - if (dsf.getDatasetFieldType().isAllowMultiples()) { + if (dsf.getDatasetFieldType().isAllowMultiples()) { if (updateField.getDatasetFieldType().isControlledVocabulary()) { if (dsf.getDatasetFieldType().isAllowMultiples()) { for (ControlledVocabularyValue cvv : updateField.getControlledVocabularyValues()) { @@ -766,7 +767,7 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav datasetFieldCompoundValueItemsToRemove.forEach((remove) -> { dsf.getDatasetFieldCompoundValues().remove(remove); }); - if (!found) { + if (!found) { logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + deleteVal + " not found."); return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + deleteVal + " not found."); } @@ -781,17 +782,16 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav break; } } - if (!found){ + if (!found) { String displayValue = !updateField.getDisplayValue().isEmpty() ? updateField.getDisplayValue() : updateField.getCompoundDisplayValue(); - logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found." ); - return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found." ); + logger.log(Level.SEVERE, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found."); + return error(Response.Status.BAD_REQUEST, "Delete metadata failed: " + updateField.getDatasetFieldType().getDisplayName() + ": " + displayValue + " not found."); } - } + } - boolean updateDraft = ds.getLatestVersion().isDraft(); - DatasetVersion managedVersion = updateDraft + DatasetVersion managedVersion = updateDraft ? execCommand(new UpdateDatasetVersionCommand(ds, req)).getEditVersion() : execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); return ok(json(managedVersion)); @@ -805,24 +805,24 @@ private Response processDatasetFieldDataDelete(String jsonBody, String id, Datav return ex.getResponse(); } - + } - - private String getCompoundDisplayValue (DatasetFieldCompoundValue dscv){ + + private String getCompoundDisplayValue(DatasetFieldCompoundValue dscv) { String returnString = ""; - for (DatasetField dsf : dscv.getChildDatasetFields()) { - for (String value : dsf.getValues()) { - if (!(value == null)) { - returnString += (returnString.isEmpty() ? "" : "; ") + value.trim(); - } + for (DatasetField dsf : dscv.getChildDatasetFields()) { + for (String value : dsf.getValues()) { + if (!(value == null)) { + returnString += (returnString.isEmpty() ? "" : "; ") + value.trim(); } } + } return returnString; } - + @PUT @Path("{id}/editMetadata") - public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, @QueryParam("replace") Boolean replace) throws WrappedResponse{ + public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, @QueryParam("replace") Boolean replace) throws WrappedResponse { Boolean replaceData = replace != null; @@ -830,26 +830,26 @@ public Response editVersionMetadata(String jsonBody, @PathParam("id") String id, return processDatasetUpdate(jsonBody, id, req, replaceData); } - - - private Response processDatasetUpdate(String jsonBody, String id, DataverseRequest req, Boolean replaceData){ + + + private Response processDatasetUpdate(String jsonBody, String id, DataverseRequest req, Boolean replaceData) { try (StringReader rdr = new StringReader(jsonBody)) { - + Dataset ds = findDatasetOrDie(id); JsonObject json = Json.createReader(rdr).readObject(); DatasetVersion dsv = ds.getEditVersion(); - + List fields = new LinkedList<>(); - DatasetField singleField = null; - + DatasetField singleField = null; + JsonArray fieldsJson = json.getJsonArray("fields"); - if( fieldsJson == null ){ - singleField = jsonParser().parseField(json, Boolean.FALSE); + if (fieldsJson == null) { + singleField = jsonParser().parseField(json, Boolean.FALSE); fields.add(singleField); - } else{ + } else { fields = jsonParser().parseMultipleFields(json); } - + String valdationErrors = validateDatasetFieldValues(fields); @@ -959,7 +959,7 @@ private Response processDatasetUpdate(String jsonBody, String id, DataverseReque } } - + private String validateDatasetFieldValues(List fields) { StringBuilder error = new StringBuilder(); @@ -977,14 +977,14 @@ private String validateDatasetFieldValues(List fields) { } return ""; } - + /** * @deprecated This was shipped as a GET but should have been a POST, see https://github.com/IQSS/dataverse/issues/2431 */ @GET @Path("{id}/actions/:publish") @Deprecated - public Response publishDataseUsingGetDeprecated( @PathParam("id") String id, @QueryParam("type") String type ) { + public Response publishDataseUsingGetDeprecated(@PathParam("id") String id, @QueryParam("type") String type) { logger.info("publishDataseUsingGetDeprecated called on id " + id + ". Encourage use of POST rather than GET, which is deprecated."); return publishDataset(id, type, false); } @@ -996,10 +996,10 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S if (type == null) { return error(Response.Status.BAD_REQUEST, "Missing 'type' parameter (either 'major','minor', or 'updatecurrent')."); } - boolean updateCurrent=false; + boolean updateCurrent = false; AuthenticatedUser user = findAuthenticatedUserOrDie(); type = type.toLowerCase(); - boolean isMinor=false; + boolean isMinor = false; switch (type) { case "minor": isMinor = true; @@ -1007,15 +1007,15 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S case "major": isMinor = false; break; - case "updatecurrent": - if(user.isSuperuser()) { - updateCurrent=true; - } else { - return error(Response.Status.FORBIDDEN, "Only superusers can update the current version"); - } - break; + case "updatecurrent": + if (user.isSuperuser()) { + updateCurrent = true; + } else { + return error(Response.Status.FORBIDDEN, "Only superusers can update the current version"); + } + break; default: - return error(Response.Status.BAD_REQUEST, "Illegal 'type' parameter value '" + type + "'. It needs to be either 'major', 'minor', or 'updatecurrent'."); + return error(Response.Status.BAD_REQUEST, "Illegal 'type' parameter value '" + type + "'. It needs to be either 'major', 'minor', or 'updatecurrent'."); } Dataset ds = findDatasetOrDie(id); @@ -1037,8 +1037,8 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S * error is returned. * */ - if ((ds.getModificationTime()!=null && (ds.getIndexTime() == null || (ds.getIndexTime().compareTo(ds.getModificationTime()) <= 0))) || - (ds.getPermissionModificationTime()!=null && (ds.getPermissionIndexTime() == null || (ds.getPermissionIndexTime().compareTo(ds.getPermissionModificationTime()) <= 0)))) { + if ((ds.getModificationTime() != null && (ds.getIndexTime() == null || (ds.getIndexTime().compareTo(ds.getModificationTime()) <= 0))) || + (ds.getPermissionModificationTime() != null && (ds.getPermissionIndexTime() == null || (ds.getPermissionIndexTime().compareTo(ds.getPermissionModificationTime()) <= 0)))) { return error(Response.Status.CONFLICT, "Dataset is awaiting indexing"); } } @@ -1099,21 +1099,21 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S .build(); } } else { - PublishDatasetResult res = execCommand(new PublishDatasetCommand(ds, + PublishDatasetResult res = execCommand(new PublishDatasetCommand(ds, createDataverseRequest(user), - isMinor)); - return res.isWorkflow() ? accepted(json(res.getDataset())) : ok(json(res.getDataset())); + isMinor)); + return res.isWorkflow() ? accepted(json(res.getDataset())) : ok(json(res.getDataset())); } } catch (WrappedResponse ex) { return ex.getResponse(); } } - + @POST @Path("{id}/move/{targetDataverseAlias}") public Response moveDataset(@PathParam("id") String id, @PathParam("targetDataverseAlias") String targetDataverseAlias, @QueryParam("forceMove") Boolean force) { try { - User u = findUserOrDie(); + User u = findUserOrDie(); Dataset ds = findDatasetOrDie(id); Dataverse target = dataverseService.findByAlias(targetDataverseAlias); if (target == null) { @@ -1132,32 +1132,32 @@ public Response moveDataset(@PathParam("id") String id, @PathParam("targetDatave } } } - + @PUT - @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") - public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { - try{ - User u = findUserOrDie(); + @Path("{linkedDatasetId}/link/{linkingDataverseAlias}") + public Response linkDataset(@PathParam("linkedDatasetId") String linkedDatasetId, @PathParam("linkingDataverseAlias") String linkingDataverseAlias) { + try { + User u = findUserOrDie(); Dataset linked = findDatasetOrDie(linkedDatasetId); Dataverse linking = findDataverseOrDie(linkingDataverseAlias); - if (linked == null){ + if (linked == null) { return error(Response.Status.BAD_REQUEST, "Linked Dataset not found."); - } - if (linking == null){ + } + if (linking == null) { return error(Response.Status.BAD_REQUEST, "Linking Dataverse not found."); - } + } execCommand(new LinkDatasetCommand( createDataverseRequest(u), linking, linked - )); + )); return ok("Dataset " + linked.getId() + " linked successfully to " + linking.getAlias()); } catch (WrappedResponse ex) { return ex.getResponse(); } } - + @GET @Path("{id}/links") - public Response getLinks(@PathParam("id") String idSupplied ) { + public Response getLinks(@PathParam("id") String idSupplied) { try { User u = findUserOrDie(); if (!u.isSuperuser()) { @@ -1181,8 +1181,8 @@ public Response getLinks(@PathParam("id") String idSupplied ) { /** * Add a given assignment to a given user or group - * @param ra role assignment DTO - * @param id dataset id + * @param ra role assignment DTO + * @param id dataset id * @param apiKey */ @POST @@ -1190,12 +1190,12 @@ public Response getLinks(@PathParam("id") String idSupplied ) { public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") String id, @QueryParam("key") String apiKey) { try { Dataset dataset = findDatasetOrDie(id); - + RoleAssignee assignee = findAssignee(ra.getAssignee()); if (assignee == null) { return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("datasets.api.grant.role.assignee.not.found.error")); - } - + } + DataverseRole theRole; Dataverse dv = dataset.getOwner(); theRole = null; @@ -1223,7 +1223,7 @@ public Response createAssignment(RoleAssignmentDTO ra, @PathParam("identifier") } } - + @DELETE @Path("{identifier}/assignments/{id}") public Response deleteAssignment(@PathParam("id") long assignmentId, @PathParam("identifier") String dsId) { @@ -1246,26 +1246,26 @@ public Response deleteAssignment(@PathParam("id") long assignmentId, @PathParam( @GET @Path("{identifier}/assignments") public Response getAssignments(@PathParam("identifier") String id) { - return response( req -> - ok( execCommand( - new ListRoleAssignments(req, findDatasetOrDie(id))) - .stream().map(ra->json(ra)).collect(toJsonArray())) ); + return response(req -> + ok(execCommand( + new ListRoleAssignments(req, findDatasetOrDie(id))) + .stream().map(ra -> json(ra)).collect(toJsonArray()))); } @GET @Path("{id}/privateUrl") public Response getPrivateUrlData(@PathParam("id") String idSupplied) { - return response( req -> { + return response(req -> { PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, findDatasetOrDie(idSupplied))); - return (privateUrl != null) ? ok(json(privateUrl)) - : error(Response.Status.NOT_FOUND, "Private URL not found."); + return (privateUrl != null) ? ok(json(privateUrl)) + : error(Response.Status.NOT_FOUND, "Private URL not found."); }); } @POST @Path("{id}/privateUrl") public Response createPrivateUrl(@PathParam("id") String idSupplied) { - return response( req -> + return response(req -> ok(json(execCommand( new CreatePrivateUrlCommand(req, findDatasetOrDie(idSupplied)))))); } @@ -1273,7 +1273,7 @@ public Response createPrivateUrl(@PathParam("id") String idSupplied) { @DELETE @Path("{id}/privateUrl") public Response deletePrivateUrl(@PathParam("id") String idSupplied) { - return response( req -> { + return response(req -> { Dataset dataset = findDatasetOrDie(idSupplied); PrivateUrl privateUrl = execCommand(new GetPrivateUrlCommand(req, dataset)); if (privateUrl != null) { @@ -1327,7 +1327,7 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { try { Dataset dataset = findDatasetOrDie(idSupplied); InputStream is = DatasetUtil.getThumbnailAsInputStream(dataset, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - if(is == null) { + if (is == null) { return notFound("Thumbnail not available"); } return Response.ok(is).build(); @@ -1384,11 +1384,11 @@ public Response getRsync(@PathParam("identifier") String id) { dataset = findDatasetOrDie(id); AuthenticatedUser user = findAuthenticatedUserOrDie(); ScriptRequestResponse scriptRequestResponse = execCommand(new RequestRsyncScriptCommand(createDataverseRequest(user), dataset)); - + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.DcmUpload, user.getId(), "script downloaded"); if (lock == null) { logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - return error(Response.Status.FORBIDDEN, "Failed to lock the dataset (dataset id="+dataset.getId()+")"); + return error(Response.Status.FORBIDDEN, "Failed to lock the dataset (dataset id=" + dataset.getId() + ")"); } return ok(scriptRequestResponse.getScript(), MediaType.valueOf(MediaType.TEXT_PLAIN)); } catch (WrappedResponse wr) { @@ -1397,15 +1397,15 @@ public Response getRsync(@PathParam("identifier") String id) { return error(Response.Status.INTERNAL_SERVER_ERROR, "Something went wrong attempting to download rsync script: " + EjbUtil.ejbExceptionToString(ex)); } } - + /** - * This api endpoint triggers the creation of a "package" file in a dataset - * after that package has been moved onto the same filesystem via the Data Capture Module. + * This api endpoint triggers the creation of a "package" file in a dataset + * after that package has been moved onto the same filesystem via the Data Capture Module. * The package is really just a way that Dataverse interprets a folder created by DCM, seeing it as just one file. * The "package" can be downloaded over RSAL. - * + * * This endpoint currently supports both posix file storage and AWS s3 storage in Dataverse, and depending on which one is active acts accordingly. - * + * * The initial design of the DCM/Dataverse interaction was not to use packages, but to allow import of all individual files natively into Dataverse. * But due to the possibly immense number of files (millions) the package approach was taken. * This is relevant because the posix ("file") code contains many remnants of that development work. @@ -1429,13 +1429,13 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String try { Dataset dataset = findDatasetOrDie(id); if ("validation passed".equals(statusMessageFromDcm)) { - logger.log(Level.INFO, "Checksum Validation passed for DCM."); + logger.log(Level.INFO, "Checksum Validation passed for DCM."); String storageDriver = dataset.getDataverseContext().getEffectiveStorageDriverId(); String uploadFolder = jsonFromDcm.getString("uploadFolder"); int totalSize = jsonFromDcm.getInt("totalSize"); String storageDriverType = System.getProperty("dataverse.file." + storageDriver + ".type"); - + if (storageDriverType.equals("file")) { logger.log(Level.INFO, "File storage driver used for (dataset id={0})", dataset.getId()); @@ -1452,15 +1452,15 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String String message = wr.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to put the files into Dataverse. Message was '" + message + "'."); } - } else if(storageDriverType.equals("s3")) { - + } else if (storageDriverType.equals("s3")) { + logger.log(Level.INFO, "S3 storage driver used for DCM (dataset id={0})", dataset.getId()); try { - + //Where the lifting is actually done, moving the s3 files over and having dataverse know of the existance of the package s3PackageImporter.copyFromS3(dataset, uploadFolder); DataFile packageFile = s3PackageImporter.createPackageDataFile(dataset, uploadFolder, new Long(totalSize)); - + if (packageFile == null) { logger.log(Level.SEVERE, "S3 File package import failed."); return error(Response.Status.INTERNAL_SERVER_ERROR, "S3 File package import failed."); @@ -1472,7 +1472,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.DcmUpload); dataset.removeLock(dcmLock); } - + // update version using the command engine to enforce user permissions and constraints if (dataset.getVersions().size() == 1 && dataset.getLatestVersion().getVersionState() == DatasetVersion.VersionState.DRAFT) { try { @@ -1490,11 +1490,11 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String JsonObjectBuilder job = Json.createObjectBuilder(); return ok(job); - - } catch (IOException e) { + + } catch (IOException e) { String message = e.getMessage(); return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); - } + } } else { return error(Response.Status.INTERNAL_SERVER_ERROR, "Invalid storage driver in Dataverse, not compatible with dcm"); } @@ -1517,7 +1517,7 @@ public Response receiveChecksumValidationResults(@PathParam("identifier") String return ex.getResponse(); } } - + @POST @Path("{id}/submitForReview") @@ -1525,9 +1525,9 @@ public Response submitForReview(@PathParam("id") String idSupplied) { try { Dataset updatedDataset = execCommand(new SubmitDatasetForReviewCommand(createDataverseRequest(findUserOrDie()), findDatasetOrDie(idSupplied))); JsonObjectBuilder result = Json.createObjectBuilder(); - + boolean inReview = updatedDataset.isLockedFor(DatasetLock.Reason.InReview); - + result.add("inReview", inReview); result.add("message", "Dataset id " + updatedDataset.getId() + " has been submitted for review."); return ok(result); @@ -1539,7 +1539,7 @@ public Response submitForReview(@PathParam("id") String idSupplied) { @POST @Path("{id}/returnToAuthor") public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBody) { - + if (jsonBody == null || jsonBody.isEmpty()) { return error(Response.Status.BAD_REQUEST, "You must supply JSON to this API endpoint and it must contain a reason for returning the dataset (field: reasonForReturn)."); } @@ -1547,14 +1547,14 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo JsonObject json = Json.createReader(rdr).readObject(); try { Dataset dataset = findDatasetOrDie(idSupplied); - String reasonForReturn = null; + String reasonForReturn = null; reasonForReturn = json.getString("reasonForReturn"); // TODO: Once we add a box for the curator to type into, pass the reason for return to the ReturnDatasetToAuthorCommand and delete this check and call to setReturnReason on the API side. if (reasonForReturn == null || reasonForReturn.isEmpty()) { return error(Response.Status.BAD_REQUEST, "You must enter a reason for returning a dataset to the author(s)."); } AuthenticatedUser authenticatedUser = findAuthenticatedUserOrDie(); - Dataset updatedDataset = execCommand(new ReturnDatasetToAuthorCommand(createDataverseRequest(authenticatedUser), dataset, reasonForReturn )); + Dataset updatedDataset = execCommand(new ReturnDatasetToAuthorCommand(createDataverseRequest(authenticatedUser), dataset, reasonForReturn)); JsonObjectBuilder result = Json.createObjectBuilder(); result.add("inReview", false); @@ -1565,237 +1565,237 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo } } -@GET -@Path("{id}/uploadsid") -@Deprecated -public Response getUploadUrl(@PathParam("id") String idSupplied) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if(s3io == null) { - return error(Response.Status.NOT_FOUND,"Direct upload not supported for files in this dataset: " + dataset.getId()); - } - String url = null; - String storageIdentifier = null; - try { - url = s3io.generateTemporaryS3UploadUrl(); - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - JsonObjectBuilder response = Json.createObjectBuilder() - .add("url", url) - .add("storageIdentifier", storageIdentifier ); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + @GET + @Path("{id}/uploadsid") + @Deprecated + public Response getUploadUrl(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); -@GET -@Path("{id}/uploadurls") -public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - try { - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if (s3io == null) { - return error(Response.Status.NOT_FOUND, - "Direct upload not supported for files in this dataset: " + dataset.getId()); - } - JsonObjectBuilder response = null; - String storageIdentifier = null; - try { - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - response = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); - - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, - error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - response.add("storageIdentifier", storageIdentifier); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if (s3io == null) { + return error(Response.Status.NOT_FOUND, "Direct upload not supported for files in this dataset: " + dataset.getId()); + } + String url = null; + String storageIdentifier = null; + try { + url = s3io.generateTemporaryS3UploadUrl(); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } -@DELETE -@Path("mpupload") -public Response abortMPUpload(@QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { - try { - Dataset dataset = datasetSvc.findByGlobalId(idSupplied); - //Allow the API to be used within a session (e.g. for direct upload in the UI) - User user =session.getUser(); - if (!user.isAuthenticated()) { - try { - user = findAuthenticatedUserOrDie(); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - } - boolean allowed = false; - if (dataset != null) { - allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } else { - /* - * The only legitimate case where a global id won't correspond to a dataset is - * for uploads during creation. Given that this call will still fail unless all - * three parameters correspond to an active multipart upload, it should be safe - * to allow the attempt for an authenticated user. If there are concerns about - * permissions, one could check with the current design that the user is allowed - * to create datasets in some dataverse that is configured to use the storage - * provider specified in the storageidentifier, but testing for the ability to - * create a dataset in a specific dataverse would requiring changing the design - * somehow (e.g. adding the ownerId to this call). - */ - allowed = true; - } - if (!allowed) { - return error(Response.Status.FORBIDDEN, - "You are not permitted to abort file uploads with the supplied parameters."); - } - try { - S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); - } catch (IOException io) { - logger.warning("Multipart upload abort failed for uploadId: " + uploadId + " storageidentifier=" - + storageidentifier + " dataset Id: " + dataset.getId()); - logger.warning(io.getMessage()); - throw new WrappedResponse(io, - error(Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); - } - return Response.noContent().build(); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + JsonObjectBuilder response = Json.createObjectBuilder() + .add("url", url) + .add("storageIdentifier", storageIdentifier); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } -@PUT -@Path("mpupload") -public Response completeMPUpload(String partETagBody, @QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { - try { - Dataset dataset = datasetSvc.findByGlobalId(idSupplied); - //Allow the API to be used within a session (e.g. for direct upload in the UI) - User user =session.getUser(); - if (!user.isAuthenticated()) { - try { - user=findAuthenticatedUserOrDie(); - } catch (WrappedResponse ex) { - logger.info( - "Exception thrown while trying to figure out permissions to complete mpupload for dataset id " - + dataset.getId() + ": " + ex.getLocalizedMessage()); - throw ex; - } - } - boolean allowed = false; - if (dataset != null) { - allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) - .canIssue(UpdateDatasetVersionCommand.class); - } else { - /* - * The only legitimate case where a global id won't correspond to a dataset is - * for uploads during creation. Given that this call will still fail unless all - * three parameters correspond to an active multipart upload, it should be safe - * to allow the attempt for an authenticated user. If there are concerns about - * permissions, one could check with the current design that the user is allowed - * to create datasets in some dataverse that is configured to use the storage - * provider specified in the storageidentifier, but testing for the ability to - * create a dataset in a specific dataverse would requiring changing the design - * somehow (e.g. adding the ownerId to this call). - */ - allowed = true; - } - if (!allowed) { - return error(Response.Status.FORBIDDEN, - "You are not permitted to complete file uploads with the supplied parameters."); - } - List eTagList = new ArrayList(); - logger.info("Etags: " + partETagBody); - try { - JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); - JsonObject object = jsonReader.readObject(); - jsonReader.close(); - for(String partNo : object.keySet()) { - eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); - } - for(PartETag et: eTagList) { - logger.info("Part: " + et.getPartNumber() + " : " + et.getETag()); - } - } catch (JsonException je) { - logger.info("Unable to parse eTags from: " + partETagBody); - throw new WrappedResponse(je, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); - } - try { - S3AccessIO.completeMultipartUpload(idSupplied, storageidentifier, uploadId, eTagList); - } catch (IOException io) { - logger.warning("Multipart upload completion failed for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " globalId: " + idSupplied); - logger.warning(io.getMessage()); - try { - S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); - } catch (IOException e) { - logger.severe("Also unable to abort the upload (and release the space on S3 for uploadId: " + uploadId +" storageidentifier=" + storageidentifier + " globalId: " + idSupplied); - logger.severe(io.getMessage()); - } - - throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); - } - return ok("Multipart Upload completed"); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } -} + @GET + @Path("{id}/uploadurls") + public Response getMPUploadUrls(@PathParam("id") String idSupplied, @QueryParam("size") long fileSize) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions while getting upload URLs for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if (s3io == null) { + return error(Response.Status.NOT_FOUND, + "Direct upload not supported for files in this dataset: " + dataset.getId()); + } + JsonObjectBuilder response = null; + String storageIdentifier = null; + try { + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + response = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); + + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, + error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } + + response.add("storageIdentifier", storageIdentifier); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @DELETE + @Path("mpupload") + public Response abortMPUpload(@QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = datasetSvc.findByGlobalId(idSupplied); + //Allow the API to be used within a session (e.g. for direct upload in the UI) + User user = session.getUser(); + if (!user.isAuthenticated()) { + try { + user = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions while getting aborting upload for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + } + boolean allowed = false; + if (dataset != null) { + allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } else { + /* + * The only legitimate case where a global id won't correspond to a dataset is + * for uploads during creation. Given that this call will still fail unless all + * three parameters correspond to an active multipart upload, it should be safe + * to allow the attempt for an authenticated user. If there are concerns about + * permissions, one could check with the current design that the user is allowed + * to create datasets in some dataverse that is configured to use the storage + * provider specified in the storageidentifier, but testing for the ability to + * create a dataset in a specific dataverse would requiring changing the design + * somehow (e.g. adding the ownerId to this call). + */ + allowed = true; + } + if (!allowed) { + return error(Response.Status.FORBIDDEN, + "You are not permitted to abort file uploads with the supplied parameters."); + } + try { + S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); + } catch (IOException io) { + logger.warning("Multipart upload abort failed for uploadId: " + uploadId + " storageidentifier=" + + storageidentifier + " dataset Id: " + dataset.getId()); + logger.warning(io.getMessage()); + throw new WrappedResponse(io, + error(Response.Status.INTERNAL_SERVER_ERROR, "Could not abort multipart upload")); + } + return Response.noContent().build(); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + + @PUT + @Path("mpupload") + public Response completeMPUpload(String partETagBody, @QueryParam("globalid") String idSupplied, @QueryParam("storageidentifier") String storageidentifier, @QueryParam("uploadid") String uploadId) { + try { + Dataset dataset = datasetSvc.findByGlobalId(idSupplied); + //Allow the API to be used within a session (e.g. for direct upload in the UI) + User user = session.getUser(); + if (!user.isAuthenticated()) { + try { + user = findAuthenticatedUserOrDie(); + } catch (WrappedResponse ex) { + logger.info( + "Exception thrown while trying to figure out permissions to complete mpupload for dataset id " + + dataset.getId() + ": " + ex.getLocalizedMessage()); + throw ex; + } + } + boolean allowed = false; + if (dataset != null) { + allowed = permissionSvc.requestOn(createDataverseRequest(user), dataset) + .canIssue(UpdateDatasetVersionCommand.class); + } else { + /* + * The only legitimate case where a global id won't correspond to a dataset is + * for uploads during creation. Given that this call will still fail unless all + * three parameters correspond to an active multipart upload, it should be safe + * to allow the attempt for an authenticated user. If there are concerns about + * permissions, one could check with the current design that the user is allowed + * to create datasets in some dataverse that is configured to use the storage + * provider specified in the storageidentifier, but testing for the ability to + * create a dataset in a specific dataverse would requiring changing the design + * somehow (e.g. adding the ownerId to this call). + */ + allowed = true; + } + if (!allowed) { + return error(Response.Status.FORBIDDEN, + "You are not permitted to complete file uploads with the supplied parameters."); + } + List eTagList = new ArrayList(); + logger.info("Etags: " + partETagBody); + try { + JsonReader jsonReader = Json.createReader(new StringReader(partETagBody)); + JsonObject object = jsonReader.readObject(); + jsonReader.close(); + for (String partNo : object.keySet()) { + eTagList.add(new PartETag(Integer.parseInt(partNo), object.getString(partNo))); + } + for (PartETag et : eTagList) { + logger.info("Part: " + et.getPartNumber() + " : " + et.getETag()); + } + } catch (JsonException je) { + logger.info("Unable to parse eTags from: " + partETagBody); + throw new WrappedResponse(je, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); + } + try { + S3AccessIO.completeMultipartUpload(idSupplied, storageidentifier, uploadId, eTagList); + } catch (IOException io) { + logger.warning("Multipart upload completion failed for uploadId: " + uploadId + " storageidentifier=" + storageidentifier + " globalId: " + idSupplied); + logger.warning(io.getMessage()); + try { + S3AccessIO.abortMultipartUpload(idSupplied, storageidentifier, uploadId); + } catch (IOException e) { + logger.severe("Also unable to abort the upload (and release the space on S3 for uploadId: " + uploadId + " storageidentifier=" + storageidentifier + " globalId: " + idSupplied); + logger.severe(io.getMessage()); + } + + throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not complete multipart upload")); + } + return ok("Multipart Upload completed"); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } /** * Add a File to an existing Dataset - * + * * @param idSupplied * @param jsonData * @param fileInputStream * @param contentDispositionHeader * @param formDataBodyPart - * @return + * @return */ @POST @Path("{id}/add") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addFileToDataset(@PathParam("id") String idSupplied, - @FormDataParam("jsonData") String jsonData, - @FormDataParam("file") InputStream fileInputStream, - @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, - @FormDataParam("file") final FormDataBodyPart formDataBodyPart - ){ + @FormDataParam("jsonData") String jsonData, + @FormDataParam("file") InputStream fileInputStream, + @FormDataParam("file") FormDataContentDisposition contentDispositionHeader, + @FormDataParam("file") final FormDataBodyPart formDataBodyPart + ) { if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); @@ -1810,27 +1810,27 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } catch (WrappedResponse ex) { return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); + ); } - - + + // ------------------------------------- // (2) Get the Dataset Id // // ------------------------------------- Dataset dataset; - + try { dataset = findDatasetOrDie(idSupplied); } catch (WrappedResponse wr) { - return wr.getResponse(); + return wr.getResponse(); } - + //------------------------------------ // (2a) Make sure dataset does not have package file // // -------------------------------------- - + for (DatasetVersion dv : dataset.getVersions()) { if (dv.isHasPackageFile()) { return error(Response.Status.FORBIDDEN, @@ -1842,40 +1842,40 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, // (2a) Load up optional params via JSON //--------------------------------------- OptionalFileParams optionalFileParams = null; - msgt("(api) jsonData: " + jsonData); + msgt("(api) jsonData: " + jsonData); try { optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error( Response.Status.BAD_REQUEST, ex.getMessage()); + return error(Response.Status.BAD_REQUEST, ex.getMessage()); } - + // ------------------------------------- // (3) Get the file name and content type // ------------------------------------- String newFilename = null; String newFileContentType = null; String newStorageIdentifier = null; - if (null == contentDispositionHeader) { - if (optionalFileParams.hasStorageIdentifier()) { - newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid - if (optionalFileParams.hasFileName()) { - newFilename = optionalFileParams.getFileName(); - if (optionalFileParams.hasMimetype()) { - newFileContentType = optionalFileParams.getMimeType(); - } - } - } else { - return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); - } - } else { - newFilename = contentDispositionHeader.getFileName(); - newFileContentType = formDataBodyPart.getMediaType().toString(); - } - - + if (null == contentDispositionHeader) { + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + // ToDo - check that storageIdentifier is valid + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + } else { + return error(BAD_REQUEST, + "You must upload a file or provide a storageidentifier, filename, and mimetype."); + } + } else { + newFilename = contentDispositionHeader.getFileName(); + newFileContentType = formDataBodyPart.getMediaType().toString(); + } + + //------------------- // (3) Create the AddReplaceFileHelper object //------------------- @@ -1883,28 +1883,28 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, DataverseRequest dvRequest2 = createDataverseRequest(authUser); AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, - systemConfig); + ingestService, + datasetService, + fileService, + permissionSvc, + commandEngine, + systemConfig); //------------------- // (4) Run "runAddFileByDatasetId" //------------------- addFileHelper.runAddFileByDataset(dataset, - newFilename, - newFileContentType, - newStorageIdentifier, - fileInputStream, - optionalFileParams); + newFilename, + newFileContentType, + newStorageIdentifier, + fileInputStream, + optionalFileParams); - if (addFileHelper.hasError()){ + if (addFileHelper.hasError()) { return error(addFileHelper.getHttpErrorCode(), addFileHelper.getErrorMessagesAsString("\n")); - }else{ + } else { String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); try { //msgt("as String: " + addFileHelper.getSuccessResult()); @@ -1922,7 +1922,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } else { return ok(addFileHelper.getSuccessResultAsJsonObjectBuilder()); } - + //"Look at that! You added a file! (hey hey, it may have worked)"); } catch (NoFilesException ex) { Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); @@ -1930,71 +1930,77 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } } - + } // end: addFileToDataset - - private void msg(String m){ + private void msg(String m) { //System.out.println(m); logger.fine(m); } - private void dashes(){ + + private void dashes() { msg("----------------"); } - private void msgt(String m){ - dashes(); msg(m); dashes(); + + private void msgt(String m) { + dashes(); + msg(m); + dashes(); } - - - public static T handleVersion( String versionId, DsVersionHandler hdl ) - throws WrappedResponse { + + + public static T handleVersion(String versionId, DsVersionHandler hdl) + throws WrappedResponse { switch (versionId) { - case ":latest": return hdl.handleLatest(); - case ":draft": return hdl.handleDraft(); - case ":latest-published": return hdl.handleLatestPublished(); + case ":latest": + return hdl.handleLatest(); + case ":draft": + return hdl.handleDraft(); + case ":latest-published": + return hdl.handleLatestPublished(); default: try { String[] versions = versionId.split("\\."); switch (versions.length) { case 1: - return hdl.handleSpecific(Long.parseLong(versions[0]), (long)0.0); + return hdl.handleSpecific(Long.parseLong(versions[0]), (long) 0.0); case 2: - return hdl.handleSpecific( Long.parseLong(versions[0]), Long.parseLong(versions[1]) ); + return hdl.handleSpecific(Long.parseLong(versions[0]), Long.parseLong(versions[1])); default: - throw new WrappedResponse(error( Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); + throw new WrappedResponse(error(Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); } - } catch ( NumberFormatException nfe ) { - throw new WrappedResponse( error( Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'") ); + } catch (NumberFormatException nfe) { + throw new WrappedResponse(error(Response.Status.BAD_REQUEST, "Illegal version identifier '" + versionId + "'")); } } } - - private DatasetVersion getDatasetVersionOrDie( final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { - DatasetVersion dsv = execCommand( handleVersion(versionNumber, new DsVersionHandler>(){ - @Override - public Command handleLatest() { - return new GetLatestAccessibleDatasetVersionCommand(req, ds); - } + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { + DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler>() { - @Override - public Command handleDraft() { - return new GetDraftDatasetVersionCommand(req, ds); - } - - @Override - public Command handleSpecific(long major, long minor) { - return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); - } + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds); + } - @Override - public Command handleLatestPublished() { - return new GetLatestPublishedDatasetVersionCommand(req, ds); - } - })); - if ( dsv == null || dsv.getId() == null ) { - throw new WrappedResponse( notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found") ); + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds); + } + })); + if (dsv == null || dsv.getId() == null) { + throw new WrappedResponse(notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found")); } if (dsv.isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, ds); @@ -2002,7 +2008,7 @@ public Command handleLatestPublished() { } return dsv; } - + @GET @Path("{identifier}/locks") public Response getLocks(@PathParam("identifier") String id, @QueryParam("type") DatasetLock.Reason lockType) { @@ -2010,26 +2016,26 @@ public Response getLocks(@PathParam("identifier") String id, @QueryParam("type") Dataset dataset = null; try { dataset = findDatasetOrDie(id); - Set locks; + Set locks; if (lockType == null) { locks = dataset.getLocks(); } else { // request for a specific type lock: DatasetLock lock = dataset.getLockFor(lockType); - locks = new HashSet<>(); + locks = new HashSet<>(); if (lock != null) { locks.add(lock); } } - + return ok(locks.stream().map(lock -> json(lock)).collect(toJsonArray())); } catch (WrappedResponse wr) { return wr.getResponse(); - } - } - + } + } + @DELETE @Path("{identifier}/locks") public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("type") DatasetLock.Reason lockType) { @@ -2041,7 +2047,7 @@ public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("typ return error(Response.Status.FORBIDDEN, "This API end point can be used by superusers only."); } Dataset dataset = findDatasetOrDie(id); - + if (lockType == null) { Set locks = new HashSet<>(); for (DatasetLock lock : dataset.getLocks()) { @@ -2093,7 +2099,7 @@ public Response deleteLocks(@PathParam("identifier") String id, @QueryParam("typ }); } - + @POST @Path("{identifier}/lock/{type}") public Response lockDataset(@PathParam("identifier") String id, @PathParam("type") DatasetLock.Reason lockType) { @@ -2102,7 +2108,7 @@ public Response lockDataset(@PathParam("identifier") String id, @PathParam("type AuthenticatedUser user = findAuthenticatedUserOrDie(); if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "This API end point can be used by superusers only."); - } + } Dataset dataset = findDatasetOrDie(id); DatasetLock lock = dataset.getLockFor(lockType); if (lock != null) { @@ -2129,16 +2135,16 @@ public Response lockDataset(@PathParam("identifier") String id, @PathParam("type }); } - + @GET @Path("{id}/makeDataCount/citations") public Response getMakeDataCountCitations(@PathParam("id") String idSupplied) { - + try { Dataset dataset = findDatasetOrDie(idSupplied); JsonArrayBuilder datasetsCitations = Json.createArrayBuilder(); List externalCitations = datasetExternalCitationsService.getDatasetExternalCitationsByDataset(dataset); - for (DatasetExternalCitations citation : externalCitations ){ + for (DatasetExternalCitations citation : externalCitations) { JsonObjectBuilder candidateObj = Json.createObjectBuilder(); /** * In the future we can imagine storing and presenting more @@ -2149,9 +2155,9 @@ public Response getMakeDataCountCitations(@PathParam("id") String idSupplied) { */ candidateObj.add("citationUrl", citation.getCitedByUrl()); datasetsCitations.add(candidateObj); - } - return ok(datasetsCitations); - + } + return ok(datasetsCitations); + } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -2164,23 +2170,23 @@ public Response getMakeDataCountMetricCurrentMonth(@PathParam("id") String idSup String nullCurrentMonth = null; return getMakeDataCountMetric(idSupplied, metricSupplied, nullCurrentMonth, country); } - + @GET @Path("{identifier}/storagesize") - public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.storage"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached,GetDatasetStorageSizeCommand.Mode.STORAGE, null))))); + execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached, GetDatasetStorageSizeCommand.Mode.STORAGE, null))))); } - + @GET @Path("{identifier}/versions/{versionId}/downloadsize") - public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), - execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version , findDatasetOrDie(dvIdtf), uriInfo, headers)))))); + execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, GetDatasetStorageSizeCommand.Mode.DOWNLOAD, getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers)))))); } @GET @@ -2282,29 +2288,29 @@ public Response getMakeDataCountMetric(@PathParam("id") String idSupplied, @Path return wr.getResponse(); } } - + @GET @Path("{identifier}/storageDriver") public Response getFileStore(@PathParam("identifier") String dvIdtf, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - - Dataset dataset; - + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - + return response(req -> ok(dataset.getEffectiveStorageDriverId())); } - + @PUT @Path("{identifier}/storageDriver") public Response setFileStore(@PathParam("identifier") String dvIdtf, - String storageDriverLabel, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + String storageDriverLabel, + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + // Superuser-only: AuthenticatedUser user; try { @@ -2314,16 +2320,16 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } - - Dataset dataset; - + } + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - + // We don't want to allow setting this to a store id that does not exist: for (Entry store : DataAccess.getStorageDriverLabels().entrySet()) { if (store.getKey().equals(storageDriverLabel)) { @@ -2332,15 +2338,15 @@ public Response setFileStore(@PathParam("identifier") String dvIdtf, return ok("Storage driver set to: " + store.getKey() + "/" + store.getValue()); } } - return error(Response.Status.BAD_REQUEST, - "No Storage Driver found for : " + storageDriverLabel); + return error(Response.Status.BAD_REQUEST, + "No Storage Driver found for : " + storageDriverLabel); } - + @DELETE @Path("{identifier}/storageDriver") public Response resetFileStore(@PathParam("identifier") String dvIdtf, - @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { - + @Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse { + // Superuser-only: AuthenticatedUser user; try { @@ -2350,19 +2356,19 @@ public Response resetFileStore(@PathParam("identifier") String dvIdtf, } if (!user.isSuperuser()) { return error(Response.Status.FORBIDDEN, "Superusers only."); - } - - Dataset dataset; - + } + + Dataset dataset; + try { dataset = findDatasetOrDie(dvIdtf); } catch (WrappedResponse ex) { return error(Response.Status.NOT_FOUND, "No such dataset"); } - + dataset.setStorageDriverId(null); datasetService.merge(dataset); - return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + return ok("Storage reset to default: " + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); } @GET @@ -2406,11 +2412,11 @@ public Response getTimestamps(@PathParam("identifier") String id) { timestamps.add("hasStaleIndex", (dataset.getModificationTime() != null && (dataset.getIndexTime() == null || (dataset.getIndexTime().compareTo(dataset.getModificationTime()) <= 0))) ? true - : false); + : false); timestamps.add("hasStalePermissionIndex", (dataset.getPermissionModificationTime() != null && (dataset.getIndexTime() == null || (dataset.getIndexTime().compareTo(dataset.getModificationTime()) <= 0))) ? true - : false); + : false); } // More detail if you can see a draft if (canSeeDraft) { @@ -2439,12 +2445,11 @@ public Response getTimestamps(@PathParam("identifier") String id) { } - @POST @Path("{id}/addglobusFilesBkup") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addGlobusFileToDatasetBkup(@PathParam("id") String datasetId, - @FormDataParam("jsonData") String jsonData + @FormDataParam("jsonData") String jsonData ) { JsonArrayBuilder jarr = Json.createArrayBuilder(); @@ -2753,12 +2758,32 @@ public Response addGlobusFileToDatasetBkup(@PathParam("id") String datasetId, @Path("{id}/addglobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, - @FormDataParam("jsonData") String jsonData + @FormDataParam("jsonData") String jsonData, + @Context UriInfo uriInfo, + @Context HttpHeaders headers ) throws IOException, ExecutionException, InterruptedException { - logger.info ( " ==== 1 (api) jsonData 1 ====== " + jsonData ); + logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); + + if(uriInfo != null) { + logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); + } + + //logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + headers.getRequestHeaders() + + MultivaluedMap multivaluedMap = headers.getRequestHeaders(); + + Map result = new HashMap<>(); + multivaluedMap.forEach((name, values) -> { + if (!CollectionUtils.isEmpty(values)) { + result.put(name, (values.size() != 1) ? values : values.get(0)); + logger.info(" headers ==== " + name + " ==== "+ values ); + } + }); + + logger.info(" ==== headers.getRequestHeader(origin) ====== " + headers.getRequestHeader("origin") ); + logger.info(" ==== headers.getRequestHeader(referer) ====== " + headers.getRequestHeader("referer") ); - JsonArrayBuilder jarr = Json.createArrayBuilder(); if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); @@ -2786,8 +2811,19 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, return wr.getResponse(); } + //------------------------------------ + // (2b) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + - String lockInfoMessage = "Globus Upload API is started "; + String lockInfoMessage = "Globus Upload API started "; DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, ((AuthenticatedUser) authUser).getId(), lockInfoMessage); if (lock != null) { @@ -2800,11 +2836,12 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); - //String xfp = httpRequest.getHeader("X-Forwarded-Proto"); - //String requestUrl = xfp +"://"+httpRequest.getServerName(); /* + String xfp = httpRequest.getHeader("X-Forwarded-Proto"); + //String requestUrl = xfp +"://"+httpRequest.getServerName(); + x-forwarded-proto String requestUrl = httpRequest.getProtocol().toLowerCase().split("/")[0]+"://"+httpRequest.getServerName(); @@ -2812,16 +2849,14 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, { requestUrl = requestUrl + ":"+ httpRequest.getServerPort(); } - */ + */ - //String requestUrl = "https://dvdev.scholarsportal.info" ; - String requestUrl = "http://localhost:8080" ; + //String requestUrl = "http://localhost:8080"; + String requestUrl = "https://dvdev.scholarsportal.info" ; // Async Call - datasetService.globusAsyncCall( jsonData , token , dataset , requestUrl); - - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.CHECKSUMFAIL, dataset.getId()); + datasetService.globusAsyncCall(jsonData, token, dataset, requestUrl, authUser); return ok("Globus Task successfully completed "); @@ -2881,9 +2916,7 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, } } - - - msgt("******* (api) jsonData 1: " + jsonData.toString()); + msgt("******* (addFilesToDataset api) jsonData 1: " + jsonData.toString()); JsonArray filesJson = null; try (StringReader rdr = new StringReader(jsonData)) { @@ -2909,8 +2942,6 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, // ------------------------------------- // (6) Parse files information from jsondata - // calculate checksum - // determine mimetype // ------------------------------------- int totalNumberofFiles = 0; @@ -2949,8 +2980,7 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, "You must upload a file or provide a storageidentifier, filename, and mimetype."); } - - msg("ADD!"); + msg("ADD! = " + newFilename); //------------------- // Run "runAddFileByDatasetId" @@ -2961,7 +2991,7 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, newFileContentType, newStorageIdentifier, null, - optionalFileParams,true); + optionalFileParams, true); if (addFileHelper.hasError()) { @@ -3032,8 +3062,8 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, dataset = datasetService.find(dataset.getId()); List s = dataset.getFiles(); - for (DataFile dataFile : s) {} - + for (DataFile dataFile : s) { + } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); @@ -3046,4 +3076,27 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, return ok(Json.createObjectBuilder().add("Files", jarr)); } // end: addFileToDataset + + + @POST + @Path("/deleteglobusRule") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response deleteglobusRule(@FormDataParam("jsonData") String jsonData + ) throws IOException, ExecutionException, InterruptedException { + + msgt("******* (api deleteglobusRule) jsonData : " + jsonData.toString()); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + String ruleId = jsonObject.getString("ruleId"); + + globusServiceBean.deletePermision(ruleId,logger); + return ok("Globus Rule deleted successfully "); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index 204d93b5b8f..1be16f97045 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -500,6 +500,10 @@ public void displayNotification() { userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId())); break; + case GLOBUSUPLOADSUCCESS: + userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); + break; + case CHECKSUMIMPORT: userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 2bb3f6c694d..b2f6f424722 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -284,20 +284,20 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin } } - public void deletePermision(String ruleId) throws MalformedURLException { + public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { AccessToken clientTokenUser = getClientToken(); - logger.info("Start updating permissions." ); + globusLogger.info("Start deleting permissions." ); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); + //logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); MakeRequestResponse result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(),"DELETE", null); if (result.status != 200) { - logger.warning("Cannot update access rule " + ruleId); + globusLogger.warning("Cannot delete access rule " + ruleId); } else { - logger.info("Access rule " + ruleId + " was updated"); + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index 37667d16b55..c4645409f87 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -66,6 +66,14 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti } catch (Exception e) { return BundleUtil.getStringFromBundle("notification.email.import.filesystem.subject", rootDvNameAsList); } + case GLOBUSUPLOADSUCCESS: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.import.globus.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.import.globus.subject", rootDvNameAsList); + } case CHECKSUMIMPORT: return BundleUtil.getStringFromBundle("notification.email.import.checksum.subject", rootDvNameAsList); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 0927117ff86..f7c4def1943 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -217,7 +217,9 @@ notification.checksumfail=One or more files in your upload failed checksum valid notification.ingest.completed=Dataset {2} ingest process has successfully finished.

Ingested files:{3}
notification.ingest.completedwitherrors=Dataset {2} ingest process has finished with errors.

Ingested files:{3}
notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. +notification.mail.import.globus=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded via Globus and verified. notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. +notification.import.globus=Dataset {1} has been successfully uploaded via Globus and verified. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. @@ -696,6 +698,7 @@ contact.delegation={0} on behalf of {1} notification.email.info.unavailable=Unavailable notification.email.apiTokenGenerated=Hello {0} {1},\n\nAPI Token has been generated. Please keep it secure as you would do with a password. notification.email.apiTokenGenerated.subject=API Token was generated +notification.email.import.globus.subject=Dataset {0} has been successfully uploaded via Globus and verified # dataverse.xhtml dataverse.name=Dataverse Name diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 5de0154f49c..8d8baceb6d2 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -286,6 +286,13 @@ + + + + + + + From 14352024df292342d644af182543e6bcb3d0690d Mon Sep 17 00:00:00 2001 From: chenganj Date: Wed, 31 Mar 2021 11:11:05 -0400 Subject: [PATCH 064/161] corrected error --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 9 --------- src/main/webapp/file-download-button-fragment.xhtml | 11 ----------- 2 files changed, 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index a63a86a2586..78d7627a657 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -77,11 +77,9 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; -import edu.harvard.iq.dataverse.globus.fileDetailsHolder; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.S3PackageImporter; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -132,7 +130,6 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; -import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.EJBException; import javax.inject.Inject; @@ -161,10 +158,7 @@ import org.glassfish.jersey.media.multipart.FormDataParam; import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.FileMetadata; import java.util.Map.Entry; -import java.util.stream.Collectors; -import java.util.stream.IntStream; @Path("datasets") public class Datasets extends AbstractApiBean { @@ -232,9 +226,6 @@ public class Datasets extends AbstractApiBean { @Inject DataverseRequestServiceBean dvRequestService; - @Context - protected HttpServletRequest httpRequest; - /** * Used to consolidate the way we parse and handle dataset versions. diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index cafe1875590..85fe60863b4 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -58,17 +58,6 @@ #{bundle.download} - - - - - - #{bundle['file.downloadFromGlobus']} - From c3ff22927bf27e7716b9cd5f43fb0640752303ba Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 1 Apr 2021 11:40:15 -0400 Subject: [PATCH 065/161] api to delete globus rule and added notification --- .../iq/dataverse/DatasetServiceBean.java | 53 ++++++++++- .../harvard/iq/dataverse/MailServiceBean.java | 11 +++ .../iq/dataverse/UserNotification.java | 2 +- .../harvard/iq/dataverse/api/Datasets.java | 89 ++++++++----------- .../providers/builtin/DataverseUserPage.java | 4 + src/main/java/propertyFiles/Bundle.properties | 3 + src/main/webapp/dataverseuser.xhtml | 7 ++ 7 files changed, 112 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 51bef2f6f49..6a51e68ddbb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1044,14 +1044,14 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo @Asynchronous - public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, User authUser) throws ExecutionException, InterruptedException, MalformedURLException { + public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, User authUser) throws ExecutionException, InterruptedException, MalformedURLException { String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); //Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); //Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); - String logFileName = "../logs" + File.separator + "globus_" + logTimestamp + ".log"; + String logFileName = "../logs" + File.separator + "globusUpload" + dataset.getId()+"_"+authUser.getIdentifier()+"_"+ logTimestamp + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; try { @@ -1069,7 +1069,7 @@ public void globusAsyncCall(String jsonData, ApiToken token, Dataset dataset, St globusLogger = logger; } - globusLogger.info("Starting an globusAsyncCall "); + globusLogger.info("Starting an globusUpload "); String datasetIdentifier = dataset.getStorageIdentifier(); @@ -1368,6 +1368,53 @@ private String addFiles(String curlCommand, Logger globusLogger) return status; } + @Asynchronous + public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { + + String logTimestamp = logFormatter.format(new Date()); + Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); + + String logFileName = "../logs" + File.separator + "globusDownload_" + dataset.getId()+"_"+authUser.getIdentifier()+"_"+logTimestamp + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + globusLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + globusLogger.addHandler(fileHandler); + } else { + globusLogger = logger; + } + + globusLogger.info("Starting an globusDownload "); + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + globusLogger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + String taskIdentifier = jsonObject.getString("taskIdentifier"); + String ruleId = jsonObject.getString("ruleId"); + + // globus task status check + globusStatusCheck(taskIdentifier,globusLogger); + + // what if some files failed during download? + + if(ruleId.length() > 0) { + globusServiceBean.deletePermision(ruleId, globusLogger); + } + + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADSUCCESS, dataset.getId()); + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index bfe88ac50fd..e476a4e55b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -555,6 +555,15 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio )); return messageText += fileMsg; + case GLOBUSDOWNLOADSUCCESS: + dataset = (Dataset) targetObject; + String fileDownloadMsg = BundleUtil.getStringFromBundle("notification.mail.download.globus", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName() + )); + return messageText += fileDownloadMsg; + case CHECKSUMIMPORT: version = (DatasetVersion) targetObject; String checksumImportMsg = BundleUtil.getStringFromBundle("notification.import.checksum", Arrays.asList( @@ -631,6 +640,8 @@ private Object getObjectOfNotification (UserNotification userNotification){ return versionService.find(userNotification.getObjectId()); case GLOBUSUPLOADSUCCESS: return datasetService.find(userNotification.getObjectId()); + case GLOBUSDOWNLOADSUCCESS: + return datasetService.find(userNotification.getObjectId()); case CHECKSUMIMPORT: return versionService.find(userNotification.getObjectId()); case APIGENERATED: diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index e23c2a72b6c..78ef2bb6783 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -30,7 +30,7 @@ public enum Type { ASSIGNROLE, REVOKEROLE, CREATEDV, CREATEDS, CREATEACC, SUBMITTEDDS, RETURNEDDS, PUBLISHEDDS, REQUESTFILEACCESS, GRANTFILEACCESS, REJECTFILEACCESS, FILESYSTEMIMPORT, CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, - PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, GLOBUSUPLOADSUCCESS; + PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, GLOBUSUPLOADSUCCESS,GLOBUSDOWNLOADSUCCESS; }; private static final long serialVersionUID = 1L; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 78d7627a657..e0477c49aee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2764,26 +2764,6 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); - if(uriInfo != null) { - logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); - } - - //logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + headers.getRequestHeaders() - - MultivaluedMap multivaluedMap = headers.getRequestHeaders(); - - Map result = new HashMap<>(); - multivaluedMap.forEach((name, values) -> { - if (!CollectionUtils.isEmpty(values)) { - result.put(name, (values.size() != 1) ? values : values.get(0)); - logger.info(" headers ==== " + name + " ==== "+ values ); - } - }); - - logger.info(" ==== headers.getRequestHeader(origin) ====== " + headers.getRequestHeader("origin") ); - logger.info(" ==== headers.getRequestHeader(referer) ====== " + headers.getRequestHeader("referer") ); - - if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); } @@ -2834,31 +2814,13 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); - - - /* - - String xfp = httpRequest.getHeader("X-Forwarded-Proto"); - //String requestUrl = xfp +"://"+httpRequest.getServerName(); - - x-forwarded-proto - String requestUrl = httpRequest.getProtocol().toLowerCase().split("/")[0]+"://"+httpRequest.getServerName(); - - if( httpRequest.getServerPort() > 0 ) - { - requestUrl = requestUrl + ":"+ httpRequest.getServerPort(); - } - - */ - - //String requestUrl = "http://localhost:8080"; - String requestUrl = "https://dvdev.scholarsportal.info" ; + String requestUrl = headers.getRequestHeader("origin").get(0); // Async Call - datasetService.globusAsyncCall(jsonData, token, dataset, requestUrl, authUser); + datasetService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + return ok("Async call to Globus Upload started "); - return ok("Globus Task successfully completed "); } @@ -3078,24 +3040,45 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, @POST - @Path("/deleteglobusRule") + @Path("{id}/deleteglobusRule") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response deleteglobusRule(@FormDataParam("jsonData") String jsonData + public Response deleteglobusRule(@PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData ) throws IOException, ExecutionException, InterruptedException { - msgt("******* (api deleteglobusRule) jsonData : " + jsonData.toString()); - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); + + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); } - String ruleId = jsonObject.getString("ruleId"); + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // Async Call + datasetService.globusDownload(jsonData, dataset, authUser); + + return ok("Async call to Globus Download started"); - globusServiceBean.deletePermision(ruleId,logger); - return ok("Globus Rule deleted successfully "); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index bf1713ec1d4..4596ac8b3cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -511,6 +511,10 @@ public void displayNotification() { userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); break; + case GLOBUSDOWNLOADSUCCESS: + userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); + break; + case CHECKSUMIMPORT: userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 3af54b84ce3..0908ae7ecd0 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -220,8 +220,10 @@ notification.ingest.completed=Dataset {2} ingest process has finished with errors.

Ingested files:{3}
notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. notification.mail.import.globus=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded via Globus and verified. +notification.mail.download.globus=Files from the dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully downloaded via Globus and verified. notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.import.globus=Dataset {1} has been successfully uploaded via Globus and verified. +notification.download.globus=Files from the dataset {1} has been successfully downloaded via Globus and verified. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. @@ -711,6 +713,7 @@ notification.email.info.unavailable=Unavailable notification.email.apiTokenGenerated=Hello {0} {1},\n\nAPI Token has been generated. Please keep it secure as you would do with a password. notification.email.apiTokenGenerated.subject=API Token was generated notification.email.import.globus.subject=Dataset {0} has been successfully uploaded via Globus and verified +notification.email.download.globus.subject=Files from the dataset {0} has been successfully downloaded via Globus and verified # dataverse.xhtml dataverse.name=Dataverse Name diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index abaeba46ee3..05ebf5f3b7a 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -293,6 +293,13 @@
+ + + + + + + From 12e2e6eb1de0e2223c895b1a7fbfb6b29b3d5f14 Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 8 Apr 2021 11:29:14 -0400 Subject: [PATCH 066/161] correction to verify ruleID existence, added ChecksumDatasetSizeLimit and ChecksumFileSizeLimit settings --- .../iq/dataverse/DatasetServiceBean.java | 18 +++++- .../harvard/iq/dataverse/api/Datasets.java | 24 ++++++++ .../iq/dataverse/dataset/DatasetUtil.java | 12 +++- .../FinalizeDatasetPublicationCommand.java | 55 ++++++++++++------- .../dataverse/globus/GlobusServiceBean.java | 23 ++++---- .../settings/SettingsServiceBean.java | 4 ++ .../iq/dataverse/util/SystemConfig.java | 32 ++++++++++- 7 files changed, 130 insertions(+), 38 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 6a51e68ddbb..ec59972efe1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -59,7 +59,6 @@ import org.apache.commons.lang.StringUtils; import org.ocpsoft.common.util.Strings; -import javax.servlet.http.HttpServletRequest; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -1049,6 +1048,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); + //Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); //Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); String logFileName = "../logs" + File.separator + "globusUpload" + dataset.getId()+"_"+authUser.getIdentifier()+"_"+ logTimestamp + ".log"; @@ -1088,7 +1088,13 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } String taskIdentifier = jsonObject.getString("taskIdentifier"); - String ruleId = jsonObject.getString("ruleId"); + + String ruleId = "" ; + try { + jsonObject.getString("ruleId"); + }catch (NullPointerException npe){ + + } // globus task status check globusStatusCheck(taskIdentifier,globusLogger); @@ -1403,7 +1409,13 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } String taskIdentifier = jsonObject.getString("taskIdentifier"); - String ruleId = jsonObject.getString("ruleId"); + String ruleId = ""; + + try { + jsonObject.getString("ruleId"); + }catch (NullPointerException npe){ + + } // globus task status check globusStatusCheck(taskIdentifier,globusLogger); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index e0477c49aee..ca6425fc732 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2814,8 +2814,32 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); + if(uriInfo != null) { + logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); + } + + //logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + headers.getRequestHeaders() + + MultivaluedMap multivaluedMap = headers.getRequestHeaders(); + + Map result = new HashMap<>(); + multivaluedMap.forEach((name, values) -> { + if (!CollectionUtils.isEmpty(values)) { + result.put(name, (values.size() != 1) ? values : values.get(0)); + logger.info(" headers ==== " + name + " ==== "+ values ); + } + }); + + logger.info(" ==== headers.getRequestHeader(origin) ====== " + headers.getRequestHeader("origin") ); + logger.info(" ==== headers.getRequestHeader(referer) ====== " + headers.getRequestHeader("referer") ); + + String requestUrl = headers.getRequestHeader("origin").get(0); + if(requestUrl.contains("localhost")){ + requestUrl = "http://localhost:8080"; + } + // Async Call datasetService.globusUpload(jsonData, token, dataset, requestUrl, authUser); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 12a2cf58feb..d7f0d412d9f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -466,15 +466,23 @@ public static boolean isAppropriateStorageDriver(Dataset dataset){ * size for tabular files. */ public static String getDownloadSize(DatasetVersion dsv, boolean original) { + long bytes = 0l; + bytes = getDatasetDownloadSize( dsv, original); + return FileSizeChecker.bytesToHumanReadable(bytes); + } + + public static long getDatasetDownloadSize(DatasetVersion dsv, boolean original) { long bytes = 0l; for (FileMetadata fileMetadata : dsv.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); - if (original && dataFile.isTabularData()) { + if (original && dataFile.isTabularData()) { bytes += dataFile.getOriginalFileSize() == null ? 0 : dataFile.getOriginalFileSize(); } else { bytes += dataFile.getFilesize(); } } - return FileSizeChecker.bytesToHumanReadable(bytes); + return (bytes); } + + } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index bab4a719aa0..066813978d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -1,18 +1,12 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetLock; +import edu.harvard.iq.dataverse.*; + import static edu.harvard.iq.dataverse.DatasetVersion.VersionState.*; -import edu.harvard.iq.dataverse.DatasetVersionUser; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.UserNotification; + import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; @@ -28,7 +22,7 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; + import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.util.FileUtil; @@ -36,6 +30,9 @@ import java.util.concurrent.Future; import org.apache.solr.client.solrj.SolrServerException; +import javax.ejb.EJB; +import javax.inject.Inject; + /** * @@ -47,7 +44,9 @@ public class FinalizeDatasetPublicationCommand extends AbstractPublishDatasetCommand { private static final Logger logger = Logger.getLogger(FinalizeDatasetPublicationCommand.class.getName()); - + + + /** * mirror field from {@link PublishDatasetCommand} of same name */ @@ -70,7 +69,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { Dataset theDataset = getDataset(); logger.info("Finalizing publication of the dataset "+theDataset.getGlobalId().asString()); - + // validate the physical files before we do anything else: // (unless specifically disabled; or a minor version) if (theDataset.getLatestVersion().getVersionState() != RELEASED @@ -309,14 +308,28 @@ private void updateParentDataversesSubjectsField(Dataset savedDataset, CommandCo private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws CommandException { try { - for (DataFile dataFile : dataset.getFiles()) { - // TODO: Should we validate all the files in the dataset, or only - // the files that haven't been published previously? - // (the decision was made to validate all the files on every - // major release; we can revisit the decision if there's any - // indication that this makes publishing take significantly longer. - logger.log(Level.FINE, "validating DataFile {0}", dataFile.getId()); - FileUtil.validateDataFileChecksum(dataFile); + long maxDatasetSize = 0l; + long maxFileSize = 0l; + maxDatasetSize = ctxt.systemConfig().getChecksumDatasetSizeLimit(); + maxFileSize = ctxt.systemConfig().getChecksumFileSizeLimit(); + + long datasetSize = DatasetUtil.getDatasetDownloadSize(dataset.getLatestVersion(), false); + if (maxDatasetSize == -1 || datasetSize < maxDatasetSize) { + for (DataFile dataFile : dataset.getFiles()) { + // TODO: Should we validate all the files in the dataset, or only + // the files that haven't been published previously? + // (the decision was made to validate all the files on every + // major release; we can revisit the decision if there's any + // indication that this makes publishing take significantly longer. + logger.log(Level.FINE, "validating DataFile {0}", dataFile.getId()); + if (maxFileSize == -1 || dataFile.getOriginalFileSize() < maxFileSize) { + FileUtil.validateDataFileChecksum(dataFile); + } + } + } + else { + String message = "Skipping to validate File Checksum of the dataset " + dataset.getDisplayName() + ", because of the size of the dataset limit (set to " + maxDatasetSize + " ); "; + logger.info(message); } } catch (Throwable e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index b2f6f424722..2230d5bfcaf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -286,18 +286,19 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { - AccessToken clientTokenUser = getClientToken(); - globusLogger.info("Start deleting permissions." ); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); + if(ruleId.length() > 0 ) { + AccessToken clientTokenUser = getClientToken(); + globusLogger.info("Start deleting permissions."); + String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - //logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(),"DELETE", null); - if (result.status != 200) { - globusLogger.warning("Cannot delete access rule " + ruleId); - } else { - globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); + if (result.status != 200) { + globusLogger.warning("Cannot delete access rule " + ruleId); + } else { + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 7b1d7355649..dcd5b09149a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -228,6 +228,10 @@ public enum Key { SPSS/sav format, "RData" for R, etc. for example: :TabularIngestSizeLimit:RData */ TabularIngestSizeLimit, + /* dataset size limit for checksum validation */ + ChecksumDatasetSizeLimit, + /* file size limit for checksum validation */ + ChecksumFileSizeLimit, /** The message added to a popup upon dataset publish * diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index e9364669c7f..af7cf091c51 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -555,7 +555,37 @@ public Integer getSearchHighlightFragmentSize() { } return null; } - + + public long getChecksumDatasetSizeLimit() { + String limitEntry = settingsService.getValueForKey(SettingsServiceBean.Key.ChecksumDatasetSizeLimit); + + if (limitEntry != null) { + try { + Long sizeOption = new Long(limitEntry); + return sizeOption; + } catch (NumberFormatException nfe) { + logger.warning("Invalid value for TabularIngestSizeLimit option? - " + limitEntry); + } + } + // -1 means no limit is set; + return -1; + } + + public long getChecksumFileSizeLimit() { + String limitEntry = settingsService.getValueForKey(SettingsServiceBean.Key.ChecksumFileSizeLimit); + + if (limitEntry != null) { + try { + Long sizeOption = new Long(limitEntry); + return sizeOption; + } catch (NumberFormatException nfe) { + logger.warning("Invalid value for TabularIngestSizeLimit option? - " + limitEntry); + } + } + // -1 means no limit is set; + return -1; + } + public long getTabularIngestSizeLimit() { // This method will return the blanket ingestable size limit, if // set on the system. I.e., the universal limit that applies to all From ad48ad711049f99d17b3494fab3d923016bfc799 Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 19 Apr 2021 16:58:39 -0400 Subject: [PATCH 067/161] cleanup : removed redundant code from Phase 1 --- .../edu/harvard/iq/dataverse/DatasetPage.java | 4 - .../iq/dataverse/DatasetServiceBean.java | 55 ++- .../harvard/iq/dataverse/api/Datasets.java | 326 +----------- .../harvard/iq/dataverse/api/GlobusApi.java | 464 ------------------ .../datasetutility/AddReplaceFileHelper.java | 18 +- .../harvard/iq/dataverse/globus/FileG.java | 67 --- .../iq/dataverse/globus/FilesList.java | 60 --- .../dataverse/globus/GlobusServiceBean.java | 264 ---------- .../iq/dataverse/globus/Identities.java | 16 - .../harvard/iq/dataverse/globus/Identity.java | 67 --- .../harvard/iq/dataverse/globus/MkDir.java | 22 - .../iq/dataverse/globus/MkDirResponse.java | 50 -- .../dataverse/globus/PermissionsResponse.java | 58 --- .../dataverse/globus/SuccessfulTransfer.java | 35 -- .../edu/harvard/iq/dataverse/globus/Task.java | 69 --- .../harvard/iq/dataverse/globus/Tasklist.java | 17 - .../iq/dataverse/globus/Transferlist.java | 18 - .../harvard/iq/dataverse/globus/UserInfo.java | 68 --- 18 files changed, 46 insertions(+), 1632 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/FileG.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Identities.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Identity.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Task.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 522fe65cea8..5030f4ffeca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean; import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; @@ -234,9 +233,6 @@ public enum DisplayMode { @Inject MakeDataCountLoggingServiceBean mdcLogService; @Inject DataverseHeaderFragment dataverseHeaderFragment; - @Inject - protected GlobusServiceBean globusService; - private Dataset dataset = new Dataset(); private Long id = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 319e6ac1c10..8b715788172 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1067,13 +1067,12 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo @Asynchronous public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, User authUser) throws ExecutionException, InterruptedException, MalformedURLException { + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - - - //Logger.getLogger(DatasetServiceBean.class.getCanonicalName()); - //Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusUpload" + dataset.getId()+"_"+authUser.getIdentifier()+"_"+ logTimestamp + ".log"; + String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; try { @@ -1131,28 +1130,31 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from victoria + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from externalTool String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String fileName = fileJsonObject.getString("fileName"); String[] bits = storageIdentifier.split(":"); - String fileId = bits[bits.length-1]; String bucketName = bits[1].replace("/", ""); + String fileId = bits[bits.length-1]; // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 String fullPath = storageType + bucketName + "/" + datasetIdentifier +"/" +fileId ; + String fileName = fileJsonObject.getString("fileName"); inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); } - // calculate checksum, mimetype + // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList,globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - JsonArrayBuilder jsonSecondAPI = Json.createArrayBuilder() ; + JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder() ; for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + countAll++; String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String directoryLabel = fileJsonObject.getString("directoryLabel"); String[] bits = storageIdentifier.split(":"); String fileId = bits[bits.length-1]; @@ -1165,13 +1167,18 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin fileJsonObject = path.apply(fileJsonObject); path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); fileJsonObject = path.apply(fileJsonObject); - jsonSecondAPI.add(stringToJsonObjectBuilder(fileJsonObject.toString())); + jsonDataSecondAPI.add(stringToJsonObjectBuilder(fileJsonObject.toString())); + countSuccess++; + } + else { + globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + countError++; } } - String newjsonData = jsonSecondAPI.build().toString(); + String newjsonData = jsonDataSecondAPI.build().toString(); - globusLogger.info("Generated new JsonData with calculated values"); + globusLogger.info("Successfully generated new JsonData for Second API call"); String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; @@ -1180,7 +1187,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String output = addFilesAsync(command , globusLogger ) ; if(output.equalsIgnoreCase("ok")) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADSUCCESS, dataset.getId()); + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADSUCCESS, dataset.getId(),""); + globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); } else @@ -1190,6 +1198,11 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } + globusLogger.info("Files processed: " + countAll.toString()); + globusLogger.info("Files added successfully: " + countSuccess.toString()); + globusLogger.info("Files failures: " + countError.toString()); + globusLogger.info("Finished upload via Globus job."); + if (fileHandlerSuceeded) { fileHandler.close(); } @@ -1310,10 +1323,14 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw } while (count < 3); - - String mimeType = calculatemime(fileName); - globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = "+ checksumVal + " mimeType = " + mimeType); - return new fileDetailsHolder(fileId, checksumVal,mimeType); + if(checksumVal.length() > 0 ) { + String mimeType = calculatemime(fileName); + globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + " mimeType = " + mimeType); + return new fileDetailsHolder(fileId, checksumVal, mimeType); + } + else { + return null; + } //getBytes(in)+"" ); // calculatemime(fileName)); } @@ -1402,7 +1419,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusDownload_" + dataset.getId()+"_"+authUser.getIdentifier()+"_"+logTimestamp + ".log"; + String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ca6425fc732..f56674cb351 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2444,315 +2444,6 @@ public Response getTimestamps(@PathParam("identifier") String id) { } - @POST - @Path("{id}/addglobusFilesBkup") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addGlobusFileToDatasetBkup(@PathParam("id") String datasetId, - @FormDataParam("jsonData") String jsonData - ) { - JsonArrayBuilder jarr = Json.createArrayBuilder(); - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - // ------------------------------------- - // (1) Get the user from the API key - // ------------------------------------- - User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } - - // ------------------------------------- - // (2) Get the Dataset Id - // ------------------------------------- - Dataset dataset; - - try { - dataset = findDatasetOrDie(datasetId); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } - - //------------------------------------ - // (2a) Add lock to the dataset page - // -------------------------------------- - - String lockInfoMessage = "Globus Upload API is running "; - DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, - ((AuthenticatedUser) authUser).getId(), lockInfoMessage); - if (lock != null) { - dataset.addLock(lock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } - - //------------------------------------ - // (2b) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - - - // ------------------------------------- - // (3) Parse JsonData - // ------------------------------------- - - String taskIdentifier = null; - - msgt("******* (api) jsonData 1: " + jsonData.toString()); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - - // ------------------------------------- - // (4) Get taskIdentifier - // ------------------------------------- - - taskIdentifier = jsonObject.getString("taskIdentifier"); - - // ------------------------------------- - // (5) Wait until task completion - // ------------------------------------- - - boolean success = false; - boolean globustype = true; - - do { - try { - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - AccessToken clientTokenUser = globusServiceBean.getClientToken(); - - success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); - - } catch (Exception ex) { - ex.printStackTrace(); - logger.info(ex.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id"); - } - - } while (!success); - - - try { - StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - - List cachedObjectsTags = datasetSIO.listAuxObjects(); - - DataverseRequest dvRequest = createDataverseRequest(authUser); - AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( - dvRequest, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, - systemConfig - ); - - // ------------------------------------- - // (6) Parse files information from jsondata - // calculate checksum - // determine mimetype - // ------------------------------------- - - JsonArray filesJson = jsonObject.getJsonArray("files"); - - int totalNumberofFiles = 0; - int successNumberofFiles = 0; - try { - // Start to add the files - if (filesJson != null) { - totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); - for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { - - String storageIdentifier = fileJson.getString("storageIdentifier"); //"s3://176ce6992af-208dea3661bb50" - //String suppliedContentType = fileJson.getString("contentType"); - String fileName = fileJson.getString("fileName"); - - String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); - - String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); - - String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); - - // the storageidentifier should be unique - Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); - query.setParameter("storageIdentifier", dbstorageIdentifier); - - if (query.getResultList().size() > 0) { - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier", storageIdentifier) - .add("message", " The datatable is not updated since the Storage Identifier already exists in dvObject. "); - - jarr.add(fileoutput); - } else { - - // calculate mimeType - String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; - - String type = FileUtil.determineFileTypeByExtension(fileName); - - if (!StringUtils.isBlank(type)) { - finalType = type; - } - - JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); - fileJson = path.apply(fileJson); - - int count = 0; - // calculate md5 checksum - do { - try { - - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - InputStream in = dataFileStorageIO.getInputStream(); - String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - - path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); - fileJson = path.apply(fileJson); - count = 3; - } catch (Exception ex) { - count = count + 1; - ex.printStackTrace(); - logger.info(ex.getMessage()); - Thread.sleep(5000); - msgt(" ***** Try to calculate checksum again for " + fileName); - //error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to calculate checksum"); - } - - } while (count < 3); - - //--------------------------------------- - // Load up optional params via JSON - //--------------------------------------- - - OptionalFileParams optionalFileParams = null; - - try { - optionalFileParams = new OptionalFileParams(fileJson.toString()); - } catch (DataFileTagException ex) { - return error(Response.Status.BAD_REQUEST, ex.getMessage()); - } - - msg("ADD!"); - - //------------------- - // Run "runAddFileByDatasetId" - //------------------- - addFileHelper.runAddFileByDataset(dataset, - fileName, - finalType, - storageIdentifier, - null, - optionalFileParams, - true); - - - if (addFileHelper.hasError()) { - - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier ", storageIdentifier) - .add("error Code: ", addFileHelper.getHttpErrorCode().toString()) - .add("message ", addFileHelper.getErrorMessagesAsString("\n")); - - jarr.add(fileoutput); - - } else { - String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); - - JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - - try { - logger.fine("successMsg: " + successMsg); - String duplicateWarning = addFileHelper.getDuplicateFileWarning(); - if (duplicateWarning != null && !duplicateWarning.isEmpty()) { - // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier ", storageIdentifier) - .add("warning message: ", addFileHelper.getDuplicateFileWarning()) - .add("message ", successresult.getJsonArray("files").getJsonObject(0)); - jarr.add(fileoutput); - - } else { - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier ", storageIdentifier) - .add("message ", successresult.getJsonArray("files").getJsonObject(0)); - jarr.add(fileoutput); - } - - } catch (Exception ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); - } - } - } - successNumberofFiles = successNumberofFiles + 1; - } - }// End of adding files - } catch (Exception e) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, e); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); - } - - logger.log(Level.INFO, "Total Number of Files " + totalNumberofFiles); - logger.log(Level.INFO, "Success Number of Files " + successNumberofFiles); - DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); - if (dcmLock == null) { - logger.log(Level.WARNING, "Dataset not locked for Globus upload"); - } else { - logger.log(Level.INFO, "Dataset remove locked for Globus upload"); - datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); - //dataset.removeLock(dcmLock); - } - - try { - Command cmd; - cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); - ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); - commandEngine.submit(cmd); - } catch (CommandException ex) { - logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "====== UpdateDatasetVersionCommand Exception : " + ex.getMessage()); - } - - dataset = datasetService.find(dataset.getId()); - - List s = dataset.getFiles(); - for (DataFile dataFile : s) { - logger.info(" ******** TEST the datafile id is = " + dataFile.getId() + " = " + dataFile.getDisplayName()); - } - - msg("******* pre ingest start in globus API"); - - ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); - - msg("******* post ingest start in globus API"); - - } catch (Exception e) { - String message = e.getMessage(); - msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); - e.printStackTrace(); - } - - return ok(Json.createObjectBuilder().add("Files", jarr)); - - } - - @POST @Path("{id}/addglobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) @@ -2818,21 +2509,6 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); } - //logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + headers.getRequestHeaders() - - MultivaluedMap multivaluedMap = headers.getRequestHeaders(); - - Map result = new HashMap<>(); - multivaluedMap.forEach((name, values) -> { - if (!CollectionUtils.isEmpty(values)) { - result.put(name, (values.size() != 1) ? values : values.get(0)); - logger.info(" headers ==== " + name + " ==== "+ values ); - } - }); - - logger.info(" ==== headers.getRequestHeader(origin) ====== " + headers.getRequestHeader("origin") ); - logger.info(" ==== headers.getRequestHeader(referer) ====== " + headers.getRequestHeader("referer") ); - String requestUrl = headers.getRequestHeader("origin").get(0); @@ -3054,7 +2730,7 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, } catch (Exception e) { String message = e.getMessage(); - msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); + msgt("******* datasetId :" + dataset.getId() + " ======= addFilesToDataset CALL Exception ============== " + message); e.printStackTrace(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java b/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java deleted file mode 100644 index 39c1a13842a..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/api/GlobusApi.java +++ /dev/null @@ -1,464 +0,0 @@ -package edu.harvard.iq.dataverse.api; - -import com.amazonaws.services.s3.model.S3ObjectSummary; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DataverseRequestServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.authorization.users.User; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.*; - -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; -import edu.harvard.iq.dataverse.datasetutility.DataFileTagException; -import edu.harvard.iq.dataverse.datasetutility.NoFilesException; -import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; -import edu.harvard.iq.dataverse.engine.command.Command; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.globus.AccessToken; -import edu.harvard.iq.dataverse.globus.GlobusServiceBean; -import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.json.JsonParseException; -import edu.harvard.iq.dataverse.util.json.JsonPrinter; -import org.apache.commons.lang.StringUtils; -import org.apache.http.HttpEntity; -import org.apache.http.HttpResponse; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.entity.StringEntity; -import org.apache.http.entity.mime.MultipartEntityBuilder; -import org.apache.http.entity.mime.content.ContentBody; -import org.apache.http.util.EntityUtils; -import org.glassfish.jersey.media.multipart.FormDataBodyPart; -import org.glassfish.jersey.media.multipart.FormDataContentDisposition; -import org.glassfish.jersey.media.multipart.FormDataParam; -import org.json.JSONObject; - - -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.ejb.Stateless; -import javax.inject.Inject; -import javax.json.*; -import javax.json.stream.JsonParsingException; -import javax.persistence.NoResultException; -import javax.persistence.Query; -import javax.servlet.http.HttpServletRequest; -import javax.ws.rs.*; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import java.sql.Timestamp; -import java.text.SimpleDateFormat; -import java.util.*; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; - - -import edu.harvard.iq.dataverse.api.Datasets; - -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; - -@Stateless -@Path("globus") -public class GlobusApi extends AbstractApiBean { - private static final Logger logger = Logger.getLogger(Access.class.getCanonicalName()); - - @EJB - DatasetServiceBean datasetService; - - @EJB - GlobusServiceBean globusServiceBean; - - @EJB - EjbDataverseEngine commandEngine; - - @EJB - PermissionServiceBean permissionService; - - @EJB - IngestServiceBean ingestService; - - - @Inject - DataverseRequestServiceBean dvRequestService; - - - @POST - @Path("{id}/add") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response globus(@PathParam("id") String datasetId, - @FormDataParam("jsonData") String jsonData - ) - { - JsonArrayBuilder jarr = Json.createArrayBuilder(); - - // ------------------------------------- - // (1) Get the user from the API key - // ------------------------------------- - User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } - - // ------------------------------------- - // (2) Get the Dataset Id - // ------------------------------------- - Dataset dataset; - - try { - dataset = findDatasetOrDie(datasetId); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } - - - // ------------------------------------- - // (3) Parse JsonData - // ------------------------------------- - - String taskIdentifier = null; - - msgt("******* (api) jsonData 1: " + jsonData); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - - // ------------------------------------- - // (4) Get taskIdentifier - // ------------------------------------- - - - taskIdentifier = jsonObject.getString("taskIdentifier"); - msgt("******* (api) newTaskIdentifier: " + taskIdentifier); - - // ------------------------------------- - // (5) Wait until task completion - // ------------------------------------- - - boolean success = false; - - do { - try { - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - basicGlobusToken = "ODA0ODBhNzEtODA5ZC00ZTJhLWExNmQtY2JkMzA1NTk0ZDdhOmQvM3NFd1BVUGY0V20ra2hkSkF3NTZMWFJPaFZSTVhnRmR3TU5qM2Q3TjA9"; - msgt("******* (api) basicGlobusToken: " + basicGlobusToken); - AccessToken clientTokenUser = globusServiceBean.getClientToken(); - - success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskIdentifier); - msgt("******* (api) success: " + success); - - } catch (Exception ex) { - ex.printStackTrace(); - logger.info(ex.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to get task id"); - } - - } while (!success); - - - try - { - StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - - DataverseRequest dvRequest2 = createDataverseRequest(authUser); - AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(dvRequest2, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, - systemConfig); - - // ------------------------------------- - // (6) Parse files information from jsondata - // calculate checksum - // determine mimetype - // ------------------------------------- - - JsonArray filesJson = jsonObject.getJsonArray("files"); - - if (filesJson != null) { - for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { -/* - for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - - } - */ - - - String storageIdentifier = fileJson.getString("storageIdentifier"); - String suppliedContentType = fileJson.getString("contentType"); - String fileName = fileJson.getString("fileName"); - - String fullPath = datasetSIO.getStorageLocation() + "/" + storageIdentifier.replace("s3://", ""); - - String bucketName = System.getProperty("dataverse.files." + storageIdentifier.split(":")[0] + ".bucket-name"); - - String dbstorageIdentifier = storageIdentifier.split(":")[0] + "://" + bucketName + ":" + storageIdentifier.replace("s3://", ""); - - Query query = em.createQuery("select object(o) from DvObject as o where o.storageIdentifier = :storageIdentifier"); - query.setParameter("storageIdentifier", dbstorageIdentifier); - - msgt("******* dbstorageIdentifier :" + dbstorageIdentifier + " ======= query.getResultList().size()============== " + query.getResultList().size()); - - - if (query.getResultList().size() > 0) { - - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("message " , " The datatable is not updated since the Storage Identifier already exists in dvObject. "); - - jarr.add(fileoutput); - } else { - - // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied - String finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; - String type = FileUtil.determineFileTypeByExtension(fileName); - if (!StringUtils.isBlank(type)) { - //Use rules for deciding when to trust browser supplied type - //if (FileUtil.useRecognizedType(finalType, type)) - { - finalType = type; - } - logger.info("Supplied type: " + suppliedContentType + ", finalType: " + finalType); - } - - JsonPatch path = Json.createPatchBuilder().add("/mimeType", finalType).build(); - fileJson = path.apply(fileJson); - - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - InputStream in = dataFileStorageIO.getInputStream(); - String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - - path = Json.createPatchBuilder().add("/md5Hash", checksumVal).build(); - fileJson = path.apply(fileJson); - - //addGlobusFileToDataset(dataset, fileJson.toString(), addFileHelper, fileName, finalType, storageIdentifier); - - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - - //------------------------------------ - // (1) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - - //--------------------------------------- - // (2) Load up optional params via JSON - //--------------------------------------- - - OptionalFileParams optionalFileParams = null; - msgt("(api) jsonData 2: " + fileJson.toString()); - - try { - optionalFileParams = new OptionalFileParams(fileJson.toString()); - } catch (DataFileTagException ex) { - return error( Response.Status.BAD_REQUEST, ex.getMessage()); - } - - - //------------------- - // (3) Create the AddReplaceFileHelper object - //------------------- - msg("ADD!"); - - //------------------- - // (4) Run "runAddFileByDatasetId" - //------------------- - addFileHelper.runAddFileByDataset(dataset, - fileName, - finalType, - storageIdentifier, - null, - optionalFileParams); - - - if (addFileHelper.hasError()){ - - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("error Code: " ,addFileHelper.getHttpErrorCode().toString()) - .add("message " , addFileHelper.getErrorMessagesAsString("\n")); - - jarr.add(fileoutput); - - }else{ - String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); - - JsonObject a1 = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - - JsonArray f1 = a1.getJsonArray("files"); - JsonObject file1 = f1.getJsonObject(0); - - try { - //msgt("as String: " + addFileHelper.getSuccessResult()); - - logger.fine("successMsg: " + successMsg); - String duplicateWarning = addFileHelper.getDuplicateFileWarning(); - if (duplicateWarning != null && !duplicateWarning.isEmpty()) { - // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("warning message: " ,addFileHelper.getDuplicateFileWarning()) - .add("message " , file1); - jarr.add(fileoutput); - - } else { - JsonObjectBuilder fileoutput= Json.createObjectBuilder() - .add("storageIdentifier " , storageIdentifier) - .add("message " , file1); - jarr.add(fileoutput); - } - - //"Look at that! You added a file! (hey hey, it may have worked)"); - } catch (Exception ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); - } - } - } - } - } - } catch (Exception e) { - String message = e.getMessage(); - msgt("******* Exception from globus API call " + message); - msgt("******* datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); - e.printStackTrace(); - } - return ok(Json.createObjectBuilder().add("Files", jarr)); - - } - - - - private void msg(String m) { - //System.out.println(m); - logger.info(m); - } - - private void dashes() { - msg("----------------"); - } - - private void msgt(String m) { - //dashes(); - msg(m); - //dashes(); - } - - public Response addGlobusFileToDataset( Dataset dataset, - String jsonData, AddReplaceFileHelper addFileHelper,String fileName, - String finalType, - String storageIdentifier - ){ - - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - - //------------------------------------ - // (1) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, - BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - - //--------------------------------------- - // (2) Load up optional params via JSON - //--------------------------------------- - - OptionalFileParams optionalFileParams = null; - msgt("(api) jsonData 2: " + jsonData); - - try { - optionalFileParams = new OptionalFileParams(jsonData); - } catch (DataFileTagException ex) { - return error( Response.Status.BAD_REQUEST, ex.getMessage()); - } - - - //------------------- - // (3) Create the AddReplaceFileHelper object - //------------------- - msg("ADD!"); - - //------------------- - // (4) Run "runAddFileByDatasetId" - //------------------- - addFileHelper.runAddFileByDataset(dataset, - fileName, - finalType, - storageIdentifier, - null, - optionalFileParams); - - - if (addFileHelper.hasError()){ - return error(addFileHelper.getHttpErrorCode(), addFileHelper.getErrorMessagesAsString("\n")); - }else{ - String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); - try { - //msgt("as String: " + addFileHelper.getSuccessResult()); - - logger.fine("successMsg: " + successMsg); - String duplicateWarning = addFileHelper.getDuplicateFileWarning(); - if (duplicateWarning != null && !duplicateWarning.isEmpty()) { - return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); - } else { - return ok(addFileHelper.getSuccessResultAsJsonObjectBuilder()); - } - - //"Look at that! You added a file! (hey hey, it may have worked)"); - } catch (NoFilesException ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); - - } - } - - - - } // end: addFileToDataset - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index f668d8a9a81..6747427d18e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -100,7 +100,7 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - public static String GLOBUSFILE_ADD_OPERATION = "GLOBUSFILE_ADD_OPERATION"; + public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; private String currentOperation; @@ -326,14 +326,14 @@ public boolean runAddFileByDataset(Dataset chosenDataset, String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams, - boolean globustype) { + boolean multipleFiles) { msgt(">> runAddFileByDatasetId"); initErrorHandling(); - if(globustype) { - this.currentOperation = GLOBUSFILE_ADD_OPERATION; + if(multipleFiles) { + this.currentOperation = MULTIPLEFILES_ADD_OPERATION; } else { this.currentOperation = FILE_ADD_OPERATION; @@ -747,7 +747,7 @@ private boolean runAddReplacePhase2(){ }else{ msgt("step_070_run_update_dataset_command"); - if (!this.isGlobusFileAddOperation()) { + if (!this.isMultipleFilesAddOperation()) { if (!this.step_070_run_update_dataset_command()){ return false; } @@ -813,14 +813,14 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } /** - * Is this a file add operation via Globus? + * Is this a multiple files add operation ? * * @return */ - public boolean isGlobusFileAddOperation(){ + public boolean isMultipleFilesAddOperation(){ - return this.currentOperation.equals(GLOBUSFILE_ADD_OPERATION); + return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); } /** @@ -1902,7 +1902,7 @@ private boolean step_100_startIngestJobs(){ msg("pre ingest start"); // start the ingest! // - if (!this.isGlobusFileAddOperation()) { + if (!this.isMultipleFilesAddOperation()) { ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java b/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java deleted file mode 100644 index bd6a4b3b881..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/FileG.java +++ /dev/null @@ -1,67 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class FileG { - private String DATA_TYPE; - private String group; - private String name; - private String permissions; - private String size; - private String type; - private String user; - - public String getDATA_TYPE() { - return DATA_TYPE; - } - - public String getGroup() { - return group; - } - - public String getName() { - return name; - } - - public String getPermissions() { - return permissions; - } - - public String getSize() { - return size; - } - - public String getType() { - return type; - } - - public String getUser() { - return user; - } - - public void setDATA_TYPE(String DATA_TYPE) { - this.DATA_TYPE = DATA_TYPE; - } - - public void setGroup(String group) { - this.group = group; - } - - public void setName(String name) { - this.name = name; - } - - public void setPermissions(String permissions) { - this.permissions = permissions; - } - - public void setSize(String size) { - this.size = size; - } - - public void setType(String type) { - this.type = type; - } - - public void setUser(String user) { - this.user = user; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java b/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java deleted file mode 100644 index 777e37f9b80..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/FilesList.java +++ /dev/null @@ -1,60 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -import java.util.ArrayList; - -public class FilesList { - private ArrayList DATA; - private String DATA_TYPE; - private String absolute_path; - private String endpoint; - private String length; - private String path; - - public String getEndpoint() { - return endpoint; - } - - public ArrayList getDATA() { - return DATA; - } - - public String getAbsolute_path() { - return absolute_path; - } - - public String getDATA_TYPE() { - return DATA_TYPE; - } - - public String getLength() { - return length; - } - - public String getPath() { - return path; - } - - public void setLength(String length) { - this.length = length; - } - - public void setEndpoint(String endpoint) { - this.endpoint = endpoint; - } - - public void setDATA(ArrayList DATA) { - this.DATA = DATA; - } - - public void setAbsolute_path(String absolute_path) { - this.absolute_path = absolute_path; - } - - public void setDATA_TYPE(String DATA_TYPE) { - this.DATA_TYPE = DATA_TYPE; - } - - public void setPath(String path) { - this.path = path; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 2230d5bfcaf..a59a2ca77c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -101,136 +101,6 @@ public void setUserTransferToken(String userTransferToken) { this.userTransferToken = userTransferToken; } - public void onLoad() { - logger.info("Start Globus " + code); - logger.info("State " + state); - - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - String datasetId = state; - logger.info("DatasetId = " + datasetId); - - String directory = getDirectory(datasetId); - if (directory == null) { - logger.severe("Cannot find directory"); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - HttpServletRequest origRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); - - logger.info(origRequest.getScheme()); - logger.info(origRequest.getServerName()); - - if (code != null ) { - - try { - AccessToken accessTokenUser = getAccessToken(origRequest, basicGlobusToken); - if (accessTokenUser == null) { - logger.severe("Cannot get access user token for code " + code); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } else { - setUserTransferToken(accessTokenUser.getOtherTokens().get(0).getAccessToken()); - } - - UserInfo usr = getUserInfo(accessTokenUser); - if (usr == null) { - logger.severe("Cannot get user info for " + accessTokenUser.getAccessToken()); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - logger.info(accessTokenUser.getAccessToken()); - logger.info(usr.getEmail()); - //AccessToken clientTokenUser = getClientToken(basicGlobusToken); - AccessToken clientTokenUser = getClientToken(); - if (clientTokenUser == null) { - logger.severe("Cannot get client token "); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - logger.info(clientTokenUser.getAccessToken()); - - int status = createDirectory(clientTokenUser, directory, globusEndpoint); - if (status == 202) { - int perStatus = givePermission("identity", usr.getSub(), "rw", clientTokenUser, directory, globusEndpoint); - if (perStatus != 201 && perStatus != 200) { - logger.severe("Cannot get permissions "); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - } else if (status == 502) { //directory already exists - int perStatus = givePermission("identity", usr.getSub(), "rw", clientTokenUser, directory, globusEndpoint); - if (perStatus == 409) { - logger.info("permissions already exist"); - } else if (perStatus != 201 && perStatus != 200) { - logger.severe("Cannot get permissions "); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - } else { - logger.severe("Cannot create directory, status code " + status); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - return; - } - // ProcessBuilder processBuilder = new ProcessBuilder(); - // AuthenticatedUser user = (AuthenticatedUser) session.getUser(); - // ApiToken token = authSvc.findApiTokenByUser(user); - // String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST https://" + origRequest.getServerName() + "/api/globus/" + datasetId; - // logger.info("====command ==== " + command); - // processBuilder.command("bash", "-c", command); - // logger.info("=== Start process"); - // Process process = processBuilder.start(); - // logger.info("=== Going globus"); - goGlobusUpload(directory, globusEndpoint); - logger.info("=== Finished globus"); - - - } catch (MalformedURLException ex) { - logger.severe(ex.getMessage()); - logger.severe(ex.getCause().toString()); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - } catch (UnsupportedEncodingException ex) { - logger.severe(ex.getMessage()); - logger.severe(ex.getCause().toString()); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - } catch (IOException ex) { - logger.severe(ex.getMessage()); - logger.severe(ex.getCause().toString()); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.GlobusError")); - } - - } - - } - - private void goGlobusUpload(String directory, String globusEndpoint ) { - - String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?destination_id=" + globusEndpoint + "&destination_path=" + directory + "'" +")"; - PrimeFaces.current().executeScript(httpString); - } - - public void goGlobusDownload(String datasetId) { - - String directory = getDirectory(datasetId); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String httpString = "window.location.replace('" + "https://app.globus.org/file-manager?origin_id=" + globusEndpoint + "&origin_path=" + directory + "'" +")"; - PrimeFaces.current().executeScript(httpString); - } -/* - public void removeGlobusPermission() throws MalformedURLException { - //taskId and ruleId - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - AccessToken clientTokenUser = getClientToken(basicGlobusToken); - String directory = getDirectory( dataset.getId()+"" ); - updatePermision(clientTokenUser, directory, "identity", "r"); - } - - */ - ArrayList checkPermisions( AccessToken clientTokenUser, String directory, String globusEndpoint, String principalType, String principal) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", @@ -348,125 +218,6 @@ public int givePermission(String principalType, String principal, String perm, A return result.status; } - private int createDirectory(AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + globusEndpoint + "/mkdir"); - - MkDir mkDir = new MkDir(); - mkDir.setDataType("mkdir"); - mkDir.setPath(directory); - Gson gson = new GsonBuilder().create(); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(),"POST", gson.toJson(mkDir)); - logger.info(result.toString()); - - if (result.status == 502) { - logger.warning("Cannot create directory " + mkDir.getPath() + ", it already exists"); - } else if (result.status == 403) { - logger.severe("Cannot create directory " + mkDir.getPath() + ", permission denied"); - } else if (result.status == 202) { - logger.info("Directory created " + mkDir.getPath()); - } - - return result.status; - - } - - public String getTaskList(String basicGlobusToken, String identifierForFileStorage, String timeWhenAsyncStarted) throws MalformedURLException { - try - { - logger.info("1.getTaskList ====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== identifierForFileStorage ====== " + identifierForFileStorage); - - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - //AccessToken clientTokenUser = getClientToken(basicGlobusToken); - AccessToken clientTokenUser = getClientToken( ); - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task_list?filter_endpoint="+globusEndpoint+"&filter_status=SUCCEEDED&filter_completion_time="+timeWhenAsyncStarted); - - //AccessToken accessTokenUser - //accessTokenUser.getOtherTokens().get(0).getAccessToken() - MakeRequestResponse result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); - //logger.info("==TEST ==" + result.toString()); - - - - //2019-12-01 18:34:37+00:00 - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - //SimpleDateFormat task_sdf = new SimpleDateFormat("yyyy-MM-ddTHH:mm:ss"); - - Calendar cal1 = Calendar.getInstance(); - cal1.setTime(sdf.parse(timeWhenAsyncStarted)); - - Calendar cal2 = Calendar.getInstance(); - - Tasklist tasklist = null; - //2019-12-01 18:34:37+00:00 - - if (result.status == 200) { - tasklist = parseJson(result.jsonResponse, Tasklist.class, false); - for (int i = 0; i< tasklist.getDATA().size(); i++) { - Task task = tasklist.getDATA().get(i); - Date tastTime = sdf.parse(task.getRequest_time().replace("T" , " ")); - cal2.setTime(tastTime); - - - if ( cal1.before(cal2)) { - - // get /task//successful_transfers - // verify datasetid in "destination_path": "/~/test_godata_copy/file1.txt", - // go to aws and get files and write to database tables - - logger.info("====== timeWhenAsyncStarted = " + timeWhenAsyncStarted + " ====== task.getRequest_time().toString() ====== " + task.getRequest_time()); - - boolean success = getSuccessfulTransfers(clientTokenUser, task.getTask_id() , identifierForFileStorage) ; - - if(success) - { - logger.info("SUCCESS ====== " + timeWhenAsyncStarted + " timeWhenAsyncStarted is before tastTime = TASK time = " + task.getTask_id()); - return task.getTask_id(); - } - } - else - { - //logger.info("====== " + timeWhenAsyncStarted + " timeWhenAsyncStarted is after tastTime = TASK time = " + task.getTask_id()); - //return task.getTask_id(); - } - } - } - } catch (MalformedURLException ex) { - logger.severe(ex.getMessage()); - logger.severe(ex.getCause().toString()); - } catch (Exception e) { - e.printStackTrace(); - } - return null; - } - - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId, String identifierForFileStorage) throws MalformedURLException { - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); - - MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), - "GET", null); - - Transferlist transferlist = null; - - if (result.status == 200) { - transferlist = parseJson(result.jsonResponse, Transferlist.class, false); - for (int i = 0; i < transferlist.getDATA().size(); i++) { - SuccessfulTransfer successfulTransfer = transferlist.getDATA().get(i); - String pathToVerify = successfulTransfer.getDestination_path(); - logger.info("getSuccessfulTransfers : ======pathToVerify === " + pathToVerify + " ====identifierForFileStorage === " + identifierForFileStorage); - if(pathToVerify.contains(identifierForFileStorage)) - { - logger.info(" SUCCESS ====== " + pathToVerify + " ==== " + identifierForFileStorage); - return true; - } - } - } - return false; - } - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId ) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); @@ -474,8 +225,6 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - Transferlist transferlist = null; - if (result.status == 200) { logger.info(" SUCCESS ====== " ); return true; @@ -483,8 +232,6 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId return false; } - - public AccessToken getClientToken() throws MalformedURLException { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); @@ -525,17 +272,6 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGl } - public UserInfo getUserInfo(AccessToken accessTokenUser) throws MalformedURLException { - - URL url = new URL("https://auth.globus.org/v2/oauth2/userinfo"); - MakeRequestResponse result = makeRequest(url, "Bearer" , accessTokenUser.getAccessToken() , "GET", null); - UserInfo usr = null; - if (result.status == 200) { - usr = parseJson(result.jsonResponse, UserInfo.class, true); - } - - return usr; - } public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java b/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java deleted file mode 100644 index 6411262b5c9..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Identities.java +++ /dev/null @@ -1,16 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -import java.util.ArrayList; - - -public class Identities { - ArrayList identities; - - public void setIdentities(ArrayList identities) { - this.identities = identities; - } - - public ArrayList getIdentities() { - return identities; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java b/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java deleted file mode 100644 index 265bd55217a..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Identity.java +++ /dev/null @@ -1,67 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class Identity { - private String id; - private String username; - private String status; - private String name; - private String email; - private String identityProvider; - private String organization; - - public void setOrganization(String organization) { - this.organization = organization; - } - - public void setIdentityProvider(String identityProvider) { - this.identityProvider = identityProvider; - } - - public void setName(String name) { - this.name = name; - } - - public void setEmail(String email) { - this.email = email; - } - - public void setId(String id) { - this.id = id; - } - - public void setStatus(String status) { - this.status = status; - } - - public void setUsername(String username) { - this.username = username; - } - - public String getOrganization() { - return organization; - } - - public String getIdentityProvider() { - return identityProvider; - } - - public String getName() { - return name; - } - - public String getEmail() { - return email; - } - - public String getId() { - return id; - } - - public String getStatus() { - return status; - } - - public String getUsername() { - return username; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java b/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java deleted file mode 100644 index 2c906f1f31d..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/MkDir.java +++ /dev/null @@ -1,22 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class MkDir { - private String DATA_TYPE; - private String path; - - public void setDataType(String DATA_TYPE) { - this.DATA_TYPE = DATA_TYPE; - } - - public void setPath(String path) { - this.path = path; - } - - public String getDataType() { - return DATA_TYPE; - } - - public String getPath() { - return path; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java b/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java deleted file mode 100644 index d31b34b8e70..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/MkDirResponse.java +++ /dev/null @@ -1,50 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class MkDirResponse { - private String DATA_TYPE; - private String code; - private String message; - private String request_id; - private String resource; - - public void setCode(String code) { - this.code = code; - } - - public void setDataType(String dataType) { - this.DATA_TYPE = dataType; - } - - public void setMessage(String message) { - this.message = message; - } - - public void setRequestId(String requestId) { - this.request_id = requestId; - } - - public void setResource(String resource) { - this.resource = resource; - } - - public String getCode() { - return code; - } - - public String getDataType() { - return DATA_TYPE; - } - - public String getMessage() { - return message; - } - - public String getRequestId() { - return request_id; - } - - public String getResource() { - return resource; - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java b/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java deleted file mode 100644 index a30b1ecdc04..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/PermissionsResponse.java +++ /dev/null @@ -1,58 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class PermissionsResponse { - private String code; - private String resource; - private String DATA_TYPE; - private String request_id; - private String access_id; - private String message; - - public String getDATA_TYPE() { - return DATA_TYPE; - } - - public String getResource() { - return resource; - } - - public String getRequestId() { - return request_id; - } - - public String getMessage() { - return message; - } - - public String getCode() { - return code; - } - - public String getAccessId() { - return access_id; - } - - public void setDATA_TYPE(String DATA_TYPE) { - this.DATA_TYPE = DATA_TYPE; - } - - public void setResource(String resource) { - this.resource = resource; - } - - public void setRequestId(String requestId) { - this.request_id = requestId; - } - - public void setMessage(String message) { - this.message = message; - } - - public void setCode(String code) { - this.code = code; - } - - public void setAccessId(String accessId) { - this.access_id = accessId; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java b/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java deleted file mode 100644 index 6e2e5810a0a..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/SuccessfulTransfer.java +++ /dev/null @@ -1,35 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class SuccessfulTransfer { - - private String DATA_TYPE; - private String destination_path; - - public String getDATA_TYPE() { - return DATA_TYPE; - } - - public void setDATA_TYPE(String DATA_TYPE) { - this.DATA_TYPE = DATA_TYPE; - } - - public String getDestination_path() { - return destination_path; - } - - public void setDestination_path(String destination_path) { - this.destination_path = destination_path; - } - - public String getSource_path() { - return source_path; - } - - public void setSource_path(String source_path) { - this.source_path = source_path; - } - - private String source_path; - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java deleted file mode 100644 index 8d9f13f8ddf..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java +++ /dev/null @@ -1,69 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class Task { - - private String DATA_TYPE; - private String type; - private String status; - private String owner_id; - private String request_time; - private String task_id; - private String destination_endpoint_display_name; - - public String getDestination_endpoint_display_name() { - return destination_endpoint_display_name; - } - - public void setDestination_endpoint_display_name(String destination_endpoint_display_name) { - this.destination_endpoint_display_name = destination_endpoint_display_name; - } - - public void setRequest_time(String request_time) { - this.request_time = request_time; - } - - public String getRequest_time() { - return request_time; - } - - public String getTask_id() { - return task_id; - } - - public void setTask_id(String task_id) { - this.task_id = task_id; - } - - public String getDATA_TYPE() { - return DATA_TYPE; - } - - public void setDATA_TYPE(String DATA_TYPE) { - this.DATA_TYPE = DATA_TYPE; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getOwner_id() { - return owner_id; - } - - public void setOwner_id(String owner_id) { - this.owner_id = owner_id; - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java b/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java deleted file mode 100644 index 34e8c6c528e..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Tasklist.java +++ /dev/null @@ -1,17 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -import java.util.ArrayList; - -public class Tasklist { - - private ArrayList DATA; - - public void setDATA(ArrayList DATA) { - this.DATA = DATA; - } - - public ArrayList getDATA() { - return DATA; - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java b/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java deleted file mode 100644 index 0a1bd607ee2..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Transferlist.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -import java.util.ArrayList; - -public class Transferlist { - - - private ArrayList DATA; - - public void setDATA(ArrayList DATA) { - this.DATA = DATA; - } - - public ArrayList getDATA() { - return DATA; - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java b/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java deleted file mode 100644 index a195486dd0b..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/globus/UserInfo.java +++ /dev/null @@ -1,68 +0,0 @@ -package edu.harvard.iq.dataverse.globus; - -public class UserInfo implements java.io.Serializable{ - - private String identityProviderDisplayName; - private String identityProvider; - private String organization; - private String sub; - private String preferredUsername; - private String name; - private String email; - - public void setEmail(String email) { - this.email = email; - } - - public void setName(String name) { - this.name = name; - } - - public void setPreferredUsername(String preferredUsername) { - this.preferredUsername = preferredUsername; - } - - public void setSub(String sub) { - this.sub = sub; - } - - public void setIdentityProvider(String identityProvider) { - this.identityProvider = identityProvider; - } - - public void setIdentityProviderDisplayName(String identityProviderDisplayName) { - this.identityProviderDisplayName = identityProviderDisplayName; - } - - public void setOrganization(String organization) { - this.organization = organization; - } - - public String getEmail() { - return email; - } - - public String getPreferredUsername() { - return preferredUsername; - } - - public String getSub() { - return sub; - } - - public String getName() { - return name; - } - - public String getIdentityProvider() { - return identityProvider; - } - - public String getIdentityProviderDisplayName() { - return identityProviderDisplayName; - } - - public String getOrganization() { - return organization; - } -} From a4531f54ab2565c8015493a3bcaa1043bed6137f Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 20 Apr 2021 16:55:47 -0400 Subject: [PATCH 068/161] update --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index f56674cb351..42f17d53183 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2620,7 +2620,9 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, } catch (DataFileTagException ex) { return error(Response.Status.BAD_REQUEST, ex.getMessage()); } - + catch (ClassCastException | com.google.gson.JsonParseException ex) { + return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); + } // ------------------------------------- // (3) Get the file name and content type // ------------------------------------- @@ -2704,10 +2706,10 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, logger.log(Level.INFO, "Success Number of Files " + successNumberofFiles); DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); if (dcmLock == null) { - logger.log(Level.WARNING, "Dataset not locked for Globus upload"); + logger.log(Level.WARNING, "No lock found for dataset"); } else { - logger.log(Level.INFO, "Dataset remove locked for Globus upload"); datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.log(Level.INFO, "Removed EditInProgress lock "); //dataset.removeLock(dcmLock); } From dc9b9711d2883f6ea8308dea54b6e23713479ace Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 26 Apr 2021 12:52:07 -0400 Subject: [PATCH 069/161] added errormessages in the email notification after the globus transfer --- .../iq/dataverse/DatasetServiceBean.java | 252 +++++++++++------- .../harvard/iq/dataverse/MailServiceBean.java | 52 +++- .../iq/dataverse/UserNotification.java | 3 +- .../providers/builtin/DataverseUserPage.java | 9 +- .../dataverse/globus/GlobusServiceBean.java | 39 +++ .../edu/harvard/iq/dataverse/globus/Task.java | 92 +++++++ .../harvard/iq/dataverse/util/MailUtil.java | 30 ++- src/main/java/propertyFiles/Bundle.properties | 19 +- src/main/webapp/dataverseuser.xhtml | 22 +- 9 files changed, 396 insertions(+), 122 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/Task.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 8b715788172..823d52814b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -19,6 +19,7 @@ import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.Task; import edu.harvard.iq.dataverse.globus.fileDetailsHolder; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; @@ -1094,8 +1095,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String datasetIdentifier = dataset.getStorageIdentifier(); - String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") +3); - datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") +3); + String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); + datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); Thread.sleep(5000); @@ -1110,106 +1111,123 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String taskIdentifier = jsonObject.getString("taskIdentifier"); - String ruleId = "" ; + String ruleId = ""; try { jsonObject.getString("ruleId"); - }catch (NullPointerException npe){ + } catch (NullPointerException npe) { } // globus task status check - globusStatusCheck(taskIdentifier,globusLogger); - - globusServiceBean.deletePermision(ruleId,globusLogger); - - try { - List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + String taskStatus = globusStatusCheck(taskIdentifier, globusLogger); + Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); - if (filesJsonArray != null) { + if(ruleId.length() > 0) { + globusServiceBean.deletePermision(ruleId, globusLogger); + } - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description " + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); + globusLogger.info("Globus task failed "); + } + else { + try { + List inputList = new ArrayList(); + JsonArray filesJsonArray = jsonObject.getJsonArray("files"); - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from externalTool - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] bits = storageIdentifier.split(":"); - String bucketName = bits[1].replace("/", ""); - String fileId = bits[bits.length-1]; + if (filesJsonArray != null) { - // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storageType + bucketName + "/" + datasetIdentifier +"/" +fileId ; - String fileName = fileJsonObject.getString("fileName"); + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); - } + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from externalTool + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] bits = storageIdentifier.split(":"); + String bucketName = bits[1].replace("/", ""); + String fileId = bits[bits.length - 1]; - // calculateMissingMetadataFields: checksum, mimetype - JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList,globusLogger); - JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + String fileName = fileJsonObject.getString("fileName"); - JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder() ; + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + // calculateMissingMetadataFields: checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - countAll++; - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String fileName = fileJsonObject.getString("fileName"); - String directoryLabel = fileJsonObject.getString("directoryLabel"); - String[] bits = storageIdentifier.split(":"); - String fileId = bits[bits.length-1]; + JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); - List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size() ) - .mapToObj(index -> ((JsonObject)newfilesJsonArray.get(index)).getJsonObject(fileId)) - .filter(Objects::nonNull).collect(Collectors.toList()); + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - if(newfileJsonObject != null) { - JsonPatch path = Json.createPatchBuilder().add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(stringToJsonObjectBuilder(fileJsonObject.toString())); - countSuccess++; - } - else { - globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); - countError++; + countAll++; + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String directoryLabel = fileJsonObject.getString("directoryLabel"); + String[] bits = storageIdentifier.split(":"); + String fileId = bits[bits.length - 1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) + .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + + if (newfileJsonObject != null) { + if ( !newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder().add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + jsonDataSecondAPI.add(stringToJsonObjectBuilder(fileJsonObject.toString())); + countSuccess++; + } else { + globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + countError++; + } + } else { + globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + countError++; + } } - } - String newjsonData = jsonDataSecondAPI.build().toString(); + String newjsonData = jsonDataSecondAPI.build().toString(); - globusLogger.info("Successfully generated new JsonData for Second API call"); + globusLogger.info("Successfully generated new JsonData for Second API call"); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST "+httpRequestUrl+"/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command); + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + System.out.println("*******====command ==== " + command); - String output = addFilesAsync(command , globusLogger ) ; - if(output.equalsIgnoreCase("ok")) - { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADSUCCESS, dataset.getId(),""); + String output = addFilesAsync(command, globusLogger); + if (output.equalsIgnoreCase("ok")) { + //if(!taskSkippedFiles) + if (countError == 0 ){ + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, dataset.getId(), countSuccess + " files added out of "+ countAll , true); + } + else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), countSuccess + " files added out of "+ countAll , true); + } + globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); + } else { + globusLogger.log(Level.SEVERE, "******* Error while executing api/datasets/:persistentId/add call ", command); + } - globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); - } - else - { - globusLogger.log(Level.SEVERE, "******* Error while executing api/datasets/:persistentId/add call ", command); } - } + globusLogger.info("Files processed: " + countAll.toString()); + globusLogger.info("Files added successfully: " + countSuccess.toString()); + globusLogger.info("Files failures: " + countError.toString()); + globusLogger.info("Finished upload via Globus job."); - globusLogger.info("Files processed: " + countAll.toString()); - globusLogger.info("Files added successfully: " + countSuccess.toString()); - globusLogger.info("Files failures: " + countError.toString()); - globusLogger.info("Finished upload via Globus job."); + if (fileHandlerSuceeded) { + fileHandler.close(); + } - if (fileHandlerSuceeded) { - fileHandler.close(); + } catch (Exception e) { + logger.info("Exception from globusUpload call "); + e.printStackTrace(); + globusLogger.info("Exception from globusUpload call " + e.getMessage()); } - - } catch (Exception e) { - logger.info("Exception "); - e.printStackTrace(); } } @@ -1230,23 +1248,62 @@ public static JsonObjectBuilder stringToJsonObjectBuilder(String str) { Executor executor = Executors.newFixedThreadPool(10); - private Boolean globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { - boolean success = false; + private String globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + boolean taskCompletion = false; + String status = ""; do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(50000); AccessToken clientTokenUser = globusServiceBean.getClientToken(); - success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + //success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + Task task = globusServiceBean.getTask(clientTokenUser,taskId, globusLogger); + status = task.getStatus(); + if(status != null) { + //The task is in progress. + if (status.equalsIgnoreCase("ACTIVE")) { + if(task.getNice_status().equalsIgnoreCase("ok") || task.getNice_status().equalsIgnoreCase("queued")) { + taskCompletion = false; + } + else { + taskCompletion = true; + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + //The task is either succeeded, failed or inactive. + taskCompletion = true; + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } + else { + status = "FAILED"; + taskCompletion = true; + } } catch (Exception ex) { ex.printStackTrace(); } - } while (!success); + } while (!taskCompletion); globusLogger.info("globus transfer task completed successfully"); - return success; + return status; + } + + private Boolean taskSkippedFiles(String taskId, Logger globusLogger) throws MalformedURLException { + + try { + globusLogger.info("checking globus transfer task " + taskId); + Thread.sleep(50000); + AccessToken clientTokenUser = globusServiceBean.getClientToken(); + return globusServiceBean.getTaskSkippedErrors(clientTokenUser,taskId, globusLogger); + + } catch (Exception ex) { + ex.printStackTrace(); + } + + return false; + } @@ -1314,7 +1371,11 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw in = dataFileStorageIO.getInputStream(); checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); count = 3; - } catch (Exception ex) { + }catch (IOException ioex) { + count = 3; + logger.info(ioex.getMessage()); + globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath + ") does not appear to be an S3 object associated with driver: " ); + }catch (Exception ex) { count = count + 1; ex.printStackTrace(); logger.info(ex.getMessage()); @@ -1323,14 +1384,13 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw } while (count < 3); - if(checksumVal.length() > 0 ) { - String mimeType = calculatemime(fileName); - globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + " mimeType = " + mimeType); - return new fileDetailsHolder(fileId, checksumVal, mimeType); - } - else { - return null; + if(checksumVal.length() == 0 ) { + checksumVal = "NULL"; } + + String mimeType = calculatemime(fileName); + globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + " mimeType = " + mimeType); + return new fileDetailsHolder(fileId, checksumVal, mimeType); //getBytes(in)+"" ); // calculatemime(fileName)); } @@ -1457,15 +1517,27 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } // globus task status check - globusStatusCheck(taskIdentifier,globusLogger); - - // what if some files failed during download? + String taskStatus = globusStatusCheck(taskIdentifier,globusLogger); + Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); if(ruleId.length() > 0) { globusServiceBean.deletePermision(ruleId, globusLogger); } - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADSUCCESS, dataset.getId()); + + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description : " + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); + globusLogger.info("Globus task failed during download process"); + } + else { + if(!taskSkippedFiles) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, dataset.getId()); + } + else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), ""); + } + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index e476a4e55b0..329058aa7a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -546,23 +546,48 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio logger.fine("fileImportMsg: " + fileImportMsg); return messageText += fileImportMsg; - case GLOBUSUPLOADSUCCESS: + case GLOBUSUPLOADCOMPLETED: dataset = (Dataset) targetObject; - String fileMsg = BundleUtil.getStringFromBundle("notification.mail.import.globus", Arrays.asList( + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.completed", Arrays.asList( systemConfig.getDataverseSiteUrl(), dataset.getGlobalIdString(), - dataset.getDisplayName() - )); - return messageText += fileMsg; + dataset.getDisplayName(), + comment + )) ; + return uploadCompletedMessage; - case GLOBUSDOWNLOADSUCCESS: + case GLOBUSDOWNLOADCOMPLETED: dataset = (Dataset) targetObject; - String fileDownloadMsg = BundleUtil.getStringFromBundle("notification.mail.download.globus", Arrays.asList( + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String downloadCompletedMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completed", Arrays.asList( systemConfig.getDataverseSiteUrl(), dataset.getGlobalIdString(), - dataset.getDisplayName() - )); - return messageText += fileDownloadMsg; + dataset.getDisplayName(), + comment + )) ; + return downloadCompletedMessage; + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.completedWithErrors", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName(), + comment + )) ; + return uploadCompletedWithErrorsMessage; + + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String downloadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completedWithErrors", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalIdString(), + dataset.getDisplayName(), + comment + )) ; + return downloadCompletedWithErrorsMessage; case CHECKSUMIMPORT: version = (DatasetVersion) targetObject; @@ -638,9 +663,10 @@ private Object getObjectOfNotification (UserNotification userNotification){ return datasetService.find(userNotification.getObjectId()); case FILESYSTEMIMPORT: return versionService.find(userNotification.getObjectId()); - case GLOBUSUPLOADSUCCESS: - return datasetService.find(userNotification.getObjectId()); - case GLOBUSDOWNLOADSUCCESS: + case GLOBUSUPLOADCOMPLETED: + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSDOWNLOADCOMPLETED: + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: return datasetService.find(userNotification.getObjectId()); case CHECKSUMIMPORT: return versionService.find(userNotification.getObjectId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index 78ef2bb6783..8a8f3d7d620 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -30,7 +30,8 @@ public enum Type { ASSIGNROLE, REVOKEROLE, CREATEDV, CREATEDS, CREATEACC, SUBMITTEDDS, RETURNEDDS, PUBLISHEDDS, REQUESTFILEACCESS, GRANTFILEACCESS, REJECTFILEACCESS, FILESYSTEMIMPORT, CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, - PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, GLOBUSUPLOADSUCCESS,GLOBUSDOWNLOADSUCCESS; + PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS, + GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS; }; private static final long serialVersionUID = 1L; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index 4596ac8b3cc..4c7c35bfc73 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -507,11 +507,10 @@ public void displayNotification() { userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId())); break; - case GLOBUSUPLOADSUCCESS: - userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); - break; - - case GLOBUSDOWNLOADSUCCESS: + case GLOBUSUPLOADCOMPLETED: + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSDOWNLOADCOMPLETED: + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index a59a2ca77c1..9cfbf432790 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -232,6 +232,45 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId return false; } + public Task getTask(AccessToken clientTokenUser, String taskId , Logger globusLogger) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId ); + + MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), + "GET", null); + + Task task = null; + String status = null; + //2019-12-01 18:34:37+00:00 + + if (result.status == 200) { + task = parseJson(result.jsonResponse, Task.class, false); + status = task.getStatus(); + } + if (result.status != 200) { + globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + result.jsonResponse.toString()); + } + + return task; + } + + public Boolean getTaskSkippedErrors(AccessToken clientTokenUser, String taskId , Logger globusLogger) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId ); + + MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), + "GET", null); + + Task task = null; + + if (result.status == 200) { + task = parseJson(result.jsonResponse, Task.class, false); + return task.getSkip_source_errors(); + } + + return false; + } + public AccessToken getClientToken() throws MalformedURLException { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java new file mode 100644 index 00000000000..911c84c0d34 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java @@ -0,0 +1,92 @@ +package edu.harvard.iq.dataverse.globus; + +import org.apache.xpath.operations.Bool; + +public class Task { + + private String DATA_TYPE; + private String type; + private String status; + private String owner_id; + private String request_time; + private String task_id; + private String destination_endpoint_display_name; + private boolean skip_source_errors; + private String nice_status; + private String nice_status_short_description; + + public String getDestination_endpoint_display_name() { + return destination_endpoint_display_name; + } + + public void setDestination_endpoint_display_name(String destination_endpoint_display_name) { + this.destination_endpoint_display_name = destination_endpoint_display_name; + } + + public void setRequest_time(String request_time) { + this.request_time = request_time; + } + + public String getRequest_time() { + return request_time; + } + + public String getTask_id() { + return task_id; + } + + public void setTask_id(String task_id) { + this.task_id = task_id; + } + + public String getDATA_TYPE() { + return DATA_TYPE; + } + + public void setDATA_TYPE(String DATA_TYPE) { + this.DATA_TYPE = DATA_TYPE; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getOwner_id() { + return owner_id; + } + + public void setOwner_id(String owner_id) { + this.owner_id = owner_id; + } + + public Boolean getSkip_source_errors() { + return skip_source_errors; + } + + public void setSkip_source_errors(Boolean skip_source_errors) { + this.skip_source_errors = skip_source_errors; + } + + public String getNice_status() { + return nice_status; + } + + public void setNice_status(String nice_status) { + this.nice_status = nice_status; + } + + public String getNice_status_short_description() { return nice_status_short_description; } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index ec665561860..94a2da72b8a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -70,13 +70,37 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti } catch (Exception e) { return BundleUtil.getStringFromBundle("notification.email.import.filesystem.subject", rootDvNameAsList); } - case GLOBUSUPLOADSUCCESS: + case GLOBUSUPLOADCOMPLETED: try { DatasetVersion version = (DatasetVersion)objectOfNotification; List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); - return BundleUtil.getStringFromBundle("notification.email.import.globus.subject", dsNameAsList); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompleted.subject", dsNameAsList); } catch (Exception e) { - return BundleUtil.getStringFromBundle("notification.email.import.globus.subject", rootDvNameAsList); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompleted.subject", rootDvNameAsList); + } + case GLOBUSDOWNLOADCOMPLETED: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompleted.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompleted.subject", rootDvNameAsList); + } + case GLOBUSUPLOADCOMPLETEDWITHERRORS: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", rootDvNameAsList); + } + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompletedWithErrors.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.downloadCompletedWithErrors.subject", rootDvNameAsList); } case CHECKSUMIMPORT: diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index c4adba1a94e..35487d74cf7 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -219,11 +219,15 @@ notification.checksumfail=One or more files in your upload failed checksum valid notification.ingest.completed=Dataset {2} ingest process has successfully finished.

Ingested files:{3}
notification.ingest.completedwitherrors=Dataset {2} ingest process has finished with errors.

Ingested files:{3}
notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. -notification.mail.import.globus=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded via Globus and verified. -notification.mail.download.globus=Files from the dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully downloaded via Globus and verified. +notification.mail.globus.upload.completed=Dataset {2} has been successfully uploaded via Globus and verified.

{3}
+notification.mail.globus.download.completed=Files from the dataset {2} has been successfully downloaded via Globus.

{3}
+notification.mail.globus.upload.completedWithErrors=Dataset {2} : uploading files via Globus has been completed with errors.

{3}
+notification.mail.globus.download.completedWithErrors=Files from the dataset {2} : downloading files via Globus has been completed with errors.

{3}
notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. -notification.import.globus=Dataset {1} has been successfully uploaded via Globus and verified. -notification.download.globus=Files from the dataset {1} has been successfully downloaded via Globus and verified. +notification.globus.upload.completed=Dataset {1} has been successfully uploaded via Globus and verified. +notification.globus.download.completed=Files from the dataset {1} has been successfully downloaded via Globus. +notification.globus.upload.completedWithErrors=Dataset {1} : uploading files via Globus has been completed with errors. +notification.globus.download.completedWithErrors=Files from the dataset {1} : downloading files via Globus has been completed with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. @@ -712,8 +716,11 @@ contact.delegation={0} on behalf of {1} notification.email.info.unavailable=Unavailable notification.email.apiTokenGenerated=Hello {0} {1},\n\nAPI Token has been generated. Please keep it secure as you would do with a password. notification.email.apiTokenGenerated.subject=API Token was generated -notification.email.import.globus.subject=Dataset {0} has been successfully uploaded via Globus and verified -notification.email.download.globus.subject=Files from the dataset {0} has been successfully downloaded via Globus and verified +notification.email.globus.uploadCompleted.subject={0}: Files uploaded successfully via Globus and verified +notification.email.globus.downloadCompleted.subject={0}: Files downloaded successfully via Globus +notification.email.globus.uploadCompletedWithErrors.subject={0}: Uploaded files via Globus with errors +notification.email.globus.downloadCompletedWithErrors.subject={0}: Downloaded files via Globus with errors + # dataverse.xhtml dataverse.name=Dataverse Name diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 05ebf5f3b7a..2bb65578517 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -286,16 +286,30 @@
- + - + - + - + + + + + + + + + + + + + + + From 9dfdb2f2d4e2d0c45f1bf8f56e346847ba0a9f5b Mon Sep 17 00:00:00 2001 From: chenganj Date: Mon, 26 Apr 2021 14:36:42 -0400 Subject: [PATCH 070/161] remove lock, if globus transfer failed due to GC not connected --- .../edu/harvard/iq/dataverse/DatasetServiceBean.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 823d52814b1..1ed64ee69cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -1127,9 +1127,18 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { - String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description " + taskStatus.split("#")[2]; + String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description : " + taskStatus.split("#")[2]; userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); globusLogger.info("Globus task failed "); + + DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (dcmLock == null) { + logger.log(Level.WARNING, "No lock found for dataset"); + } else { + removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.log(Level.INFO, "Removed EditInProgress lock "); + //dataset.removeLock(dcmLock); + } } else { try { From 04ac3994216471875878e8345d7df3391baf5bd7 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 24 Aug 2021 16:09:48 -0400 Subject: [PATCH 071/161] - --- .../edu/harvard/iq/dataverse/globus/Task.java | 1 - .../iq/dataverse/util/SystemConfig.java | 30 ------------------- 2 files changed, 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java index 911c84c0d34..4b2a56a110d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.globus; -import org.apache.xpath.operations.Bool; public class Task { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index dfb289c75b3..1d1a4cc4e6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -571,36 +571,6 @@ public Integer getSearchHighlightFragmentSize() { return null; } - public long getChecksumDatasetSizeLimit() { - String limitEntry = settingsService.getValueForKey(SettingsServiceBean.Key.ChecksumDatasetSizeLimit); - - if (limitEntry != null) { - try { - Long sizeOption = new Long(limitEntry); - return sizeOption; - } catch (NumberFormatException nfe) { - logger.warning("Invalid value for TabularIngestSizeLimit option? - " + limitEntry); - } - } - // -1 means no limit is set; - return -1; - } - - public long getChecksumFileSizeLimit() { - String limitEntry = settingsService.getValueForKey(SettingsServiceBean.Key.ChecksumFileSizeLimit); - - if (limitEntry != null) { - try { - Long sizeOption = new Long(limitEntry); - return sizeOption; - } catch (NumberFormatException nfe) { - logger.warning("Invalid value for TabularIngestSizeLimit option? - " + limitEntry); - } - } - // -1 means no limit is set; - return -1; - } - public long getTabularIngestSizeLimit() { // This method will return the blanket ingestable size limit, if // set on the system. I.e., the universal limit that applies to all From f9c34d2c021cf5eae666a885623246f50a70428a Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 24 Aug 2021 16:46:02 -0400 Subject: [PATCH 072/161] - --- .../harvard/iq/dataverse/util/FileUtil.java | 163 ++++++++---------- 1 file changed, 71 insertions(+), 92 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index bfaa7fcfc2f..ea45922c67d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -20,7 +20,7 @@ package edu.harvard.iq.dataverse.util; -import com.amazonaws.services.s3.model.S3ObjectSummary; + import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.DataFileServiceBean; @@ -685,7 +685,6 @@ public static String calculateChecksum(InputStream in, ChecksumType checksumType return checksumDigestToString(md.digest()); } - public static String calculateChecksum(byte[] dataBytes, ChecksumType checksumType) { MessageDigest md = null; @@ -1764,113 +1763,93 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { } public static void validateDataFileChecksum(DataFile dataFile) throws IOException { - String recalculatedChecksum = null; - /* if (dataFile.getContentType().equals(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE)) { - for (S3ObjectSummary s3ObjectSummary : dataFile.getStorageIO().listAuxObjects("")) { - recalculatedChecksum = s3ObjectSummary.getETag(); - if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } - } - } else {*/ - DataFile.ChecksumType checksumType = dataFile.getChecksumType(); - - logger.info(checksumType.toString()); - if (checksumType == null) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } + DataFile.ChecksumType checksumType = dataFile.getChecksumType(); + if (checksumType == null) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.noChecksumType", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } - StorageIO storage = dataFile.getStorageIO(); - InputStream in = null; + StorageIO storage = dataFile.getStorageIO(); + InputStream in = null; - try { - storage.open(DataAccessOption.READ_ACCESS); + try { + storage.open(DataAccessOption.READ_ACCESS); - if (!dataFile.isTabularData()) { - logger.info("It is not tabular"); - in = storage.getInputStream(); - } else { - // if this is a tabular file, read the preserved original "auxiliary file" - // instead: - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } - } catch (IOException ioex) { - in = null; + if (!dataFile.isTabularData()) { + in = storage.getInputStream(); + } else { + // if this is a tabular file, read the preserved original "auxiliary file" + // instead: + in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); } + } catch (IOException ioex) { + in = null; + } - if (in == null) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } + if (in == null) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } - try { - logger.info("Before calculating checksum"); - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); - logger.info("Checksum:" + recalculatedChecksum); - } catch (RuntimeException rte) { - recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); - } + String recalculatedChecksum = null; + try { + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + } catch (RuntimeException rte) { + recalculatedChecksum = null; + } finally { + IOUtils.closeQuietly(in); + } - if (recalculatedChecksum == null) { - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } + if (recalculatedChecksum == null) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failCalculateChecksum", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); + } - // TODO? What should we do if the datafile does not have a non-null checksum? - // Should we fail, or should we assume that the recalculated checksum - // is correct, and populate the checksumValue field with it? - if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { - // There's one possible condition that is 100% recoverable and can - // be automatically fixed (issue #6660): - logger.info(dataFile.getChecksumValue()); - logger.info(recalculatedChecksum); - logger.info("Checksums are not equal"); - boolean fixed = false; - if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { - // try again, see if the .orig file happens to be there: + // TODO? What should we do if the datafile does not have a non-null checksum? + // Should we fail, or should we assume that the recalculated checksum + // is correct, and populate the checksumValue field with it? + if (!recalculatedChecksum.equals(dataFile.getChecksumValue())) { + // There's one possible condition that is 100% recoverable and can + // be automatically fixed (issue #6660): + boolean fixed = false; + if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { + // try again, see if the .orig file happens to be there: + try { + in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } catch (IOException ioex) { + in = null; + } + if (in != null) { try { - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } catch (IOException ioex) { - in = null; + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + } catch (RuntimeException rte) { + recalculatedChecksum = null; + } finally { + IOUtils.closeQuietly(in); } - if (in != null) { + // try again: + if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { + fixed = true; try { - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); - } catch (RuntimeException rte) { - recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); - } - // try again: - if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { - fixed = true; - try { - storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } catch (IOException ioex) { - fixed = false; - } + storage.revertBackupAsAux(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } catch (IOException ioex) { + fixed = false; } } } + } - if (!fixed) { - logger.info("checksum cannot be fixed"); - String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); - logger.log(Level.INFO, info); - throw new IOException(info); - } + if (!fixed) { + String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); + logger.log(Level.INFO, info); + throw new IOException(info); } - //} - logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum}); + } + logger.log(Level.INFO, "successfully validated DataFile {0}; checksum {1}", new Object[]{dataFile.getId(), recalculatedChecksum}); } public static String getStorageIdentifierFromLocation(String location) { From af8cced996bf73be569af1f89c9b1e474a817a22 Mon Sep 17 00:00:00 2001 From: chenganj Date: Tue, 24 Aug 2021 21:27:32 -0400 Subject: [PATCH 073/161] - --- .../java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index 52dff797e33..c9796d24b27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -16,8 +16,6 @@ import java.util.List; import java.util.logging.Logger; -import com.amazonaws.services.s3.model.S3ObjectSummary; - /** * * @author Leonid Andreev From a7ec3bf2a34196ca18bd265fee3f6960b0ac45d3 Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 3 Feb 2022 13:23:21 -0500 Subject: [PATCH 074/161] Merge branch 'develop' into develop-globus-phase2.1 # Conflicts: # src/main/java/edu/harvard/iq/dataverse/DatasetPage.java # src/main/java/edu/harvard/iq/dataverse/UserNotification.java # src/main/java/edu/harvard/iq/dataverse/api/Datasets.java # src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 5a932fce71a..f1b0d22a131 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3279,7 +3279,8 @@ public Response addFilesToDatasetold(@PathParam("id") String idSupplied, fileService, permissionSvc, commandEngine, - systemConfig + systemConfig, + licenseSvc ); // ------------------------------------- From 5feb2c178c55aaaf4af6bc0bdc5e5bb519e822d7 Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 3 Feb 2022 16:11:12 -0500 Subject: [PATCH 075/161] - removed old method --- .../harvard/iq/dataverse/api/Datasets.java | 219 ------------------ 1 file changed, 219 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index f1b0d22a131..e5bd60fe20e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3204,225 +3204,6 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, } - - /** - * Add a File to an existing Dataset - * - * @param idSupplied - * @param jsonData - * @return - */ - @POST - @Path("{id}/addFiles") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addFilesToDatasetold(@PathParam("id") String idSupplied, - @FormDataParam("jsonData") String jsonData) { - - JsonArrayBuilder jarr = Json.createArrayBuilder(); - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - // ------------------------------------- - // (1) Get the user from the API key - // ------------------------------------- - User authUser; - try { - authUser = findUserOrDie(); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } - - // ------------------------------------- - // (2) Get the Dataset Id - // ------------------------------------- - Dataset dataset; - - try { - dataset = findDatasetOrDie(idSupplied); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } - - - //------------------------------------ - // (2b) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - - msgt("******* (addFilesToDataset api) jsonData 1: " + jsonData.toString()); - - JsonArray filesJson = null; - try (StringReader rdr = new StringReader(jsonData)) { - //jsonObject = Json.createReader(rdr).readObject(); - filesJson = Json.createReader(rdr).readArray(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - - - try { - DataverseRequest dvRequest = createDataverseRequest(authUser); - AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( - dvRequest, - ingestService, - datasetService, - fileService, - permissionSvc, - commandEngine, - systemConfig, - licenseSvc - ); - - // ------------------------------------- - // (6) Parse files information from jsondata - // ------------------------------------- - - int totalNumberofFiles = 0; - int successNumberofFiles = 0; - try { - // Start to add the files - if (filesJson != null) { - totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); - for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { - - OptionalFileParams optionalFileParams = null; - - try { - optionalFileParams = new OptionalFileParams(fileJson.toString()); - } catch (DataFileTagException ex) { - return error(Response.Status.BAD_REQUEST, ex.getMessage()); - } - catch (ClassCastException | com.google.gson.JsonParseException ex) { - return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); - } - // ------------------------------------- - // (3) Get the file name and content type - // ------------------------------------- - String newFilename = null; - String newFileContentType = null; - String newStorageIdentifier = null; - if (optionalFileParams.hasStorageIdentifier()) { - newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid - if (optionalFileParams.hasFileName()) { - newFilename = optionalFileParams.getFileName(); - if (optionalFileParams.hasMimetype()) { - newFileContentType = optionalFileParams.getMimeType(); - } - } - } else { - return error(BAD_REQUEST, - "You must upload a file or provide a storageidentifier, filename, and mimetype."); - } - - msg("ADD! = " + newFilename); - - //------------------- - // Run "runAddFileByDatasetId" - //------------------- - - addFileHelper.runAddFileByDataset(dataset, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - - if (addFileHelper.hasError()) { - - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier ", newStorageIdentifier) - .add("error Code: ", addFileHelper.getHttpErrorCode().toString()) - .add("message ", addFileHelper.getErrorMessagesAsString("\n")); - - jarr.add(fileoutput); - - } else { - String successMsg = BundleUtil.getStringFromBundle("file.addreplace.success.add"); - - JsonObject successresult = addFileHelper.getSuccessResultAsJsonObjectBuilder().build(); - - try { - logger.fine("successMsg: " + successMsg); - String duplicateWarning = addFileHelper.getDuplicateFileWarning(); - if (duplicateWarning != null && !duplicateWarning.isEmpty()) { - // return ok(addFileHelper.getDuplicateFileWarning(), addFileHelper.getSuccessResultAsJsonObjectBuilder()); - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier ", newStorageIdentifier) - .add("warning message: ", addFileHelper.getDuplicateFileWarning()) - .add("message ", successresult.getJsonArray("files").getJsonObject(0)); - jarr.add(fileoutput); - - } else { - JsonObjectBuilder fileoutput = Json.createObjectBuilder() - .add("storageIdentifier ", newStorageIdentifier) - .add("message ", successresult.getJsonArray("files").getJsonObject(0)); - jarr.add(fileoutput); - } - - } catch (Exception ex) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); - } - } - - successNumberofFiles = successNumberofFiles + 1; - } - }// End of adding files - } catch (Exception e) { - Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, e); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); - } - - logger.log(Level.INFO, "Total Number of Files " + totalNumberofFiles); - logger.log(Level.INFO, "Success Number of Files " + successNumberofFiles); - DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); - if (dcmLock == null) { - logger.log(Level.WARNING, "No lock found for dataset"); - } else { - datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "); - //dataset.removeLock(dcmLock); - } - - try { - Command cmd; - cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); - ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); - commandEngine.submit(cmd); - } catch (CommandException ex) { - logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "====== UpdateDatasetVersionCommand Exception : " + ex.getMessage()); - } - - dataset = datasetService.find(dataset.getId()); - - List s = dataset.getFiles(); - for (DataFile dataFile : s) { - } - //ingest job - ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); - - } catch (Exception e) { - String message = e.getMessage(); - msgt("******* datasetId :" + dataset.getId() + " ======= addFilesToDataset CALL Exception ============== " + message); - e.printStackTrace(); - } - - return ok(Json.createObjectBuilder().add("Files", jarr)); - - } // end: addFileToDataset - - @POST @Path("{id}/deleteglobusRule") @Consumes(MediaType.MULTIPART_FORM_DATA) From 231c68d3331c878dcb47ab1a602752376bb89f0f Mon Sep 17 00:00:00 2001 From: chenganj Date: Wed, 9 Feb 2022 10:07:35 -0500 Subject: [PATCH 076/161] - --- .../edu/harvard/iq/dataverse/settings/SettingsServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 3201b2953b4..dd7dd23bfd7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -448,7 +448,7 @@ Whether Harvesting (OAI) service is enabled /**Client id for Globus application * */ - GlobusClientId, + //GlobusClientId, /** * Optional external executables to run on the metadata for dataverses * and datasets being published; as an extra validation step, to From 6fd22b1723d6d475b9f290148cee48fb1a43d727 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 10 May 2022 13:09:48 -0400 Subject: [PATCH 077/161] typos --- src/main/webapp/filesFragment.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 7e05df63aae..13b36e1e23e 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -485,8 +485,8 @@ #{bundle['file.accessRequested']} 

- #{fileMetadata.dataFile.storageIO.remoteStoreName} + title="#{bundle['file.remotelyStored']}"> + #{fileMetadata.dataFile.storageIO.remoteStoreName} #{fileMetadata.dataFile.storageIO.remoteStoreName}
From 99fc91b18b26e3e5a0ad9f806f9e5f349fdb2ff6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 May 2022 13:52:22 -0400 Subject: [PATCH 078/161] logging, todos --- .../edu/harvard/iq/dataverse/DatasetServiceBean.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 4f412140891..8801a1a2c89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -27,6 +27,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.*; @@ -1191,6 +1192,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin jpe.printStackTrace(); logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); } + logger.fine("json: " + JsonUtil.prettyPrint(jsonObject)); String taskIdentifier = jsonObject.getString("taskIdentifier"); @@ -1205,6 +1207,9 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String taskStatus = globusStatusCheck(taskIdentifier, globusLogger); Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); + + + //ToDo - always "" from 1199 if(ruleId.length() > 0) { globusServiceBean.deletePermision(ruleId, globusLogger); } @@ -1454,8 +1459,8 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; - // what if the file doesnot exists in s3 - // what if checksum calculation failed + //ToDo: what if the file doesnot exists in s3 + //ToDo: what if checksum calculation failed do { try { From 1b5f5ae840c40089d43aca6e542c6385b38ccc6d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jun 2022 14:05:54 -0400 Subject: [PATCH 079/161] refactor to allow getting driverId directly --- .../iq/dataverse/dataaccess/DataAccess.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 14ead925445..7b844cd125f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -62,18 +62,27 @@ public static StorageIO getStorageIO(T dvObject) throws return getStorageIO(dvObject, null); } + + + public static String getStorgageDriverFromIdentifier(String storageIdentifier) { + + int separatorIndex = storageIdentifier.indexOf(SEPARATOR); + String driverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default + if (separatorIndex > 0) { + driverId = storageIdentifier.substring(0, separatorIndex); + } + return driverId; + } + //passing DVObject instead of a datafile to accomodate for use of datafiles as well as datasets public static StorageIO getStorageIO(T dvObject, DataAccessRequest req) throws IOException { if (dvObject == null || dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) { throw new IOException("getDataAccessObject: null or invalid datafile."); } - String storageIdentifier = dvObject.getStorageIdentifier(); - int separatorIndex = storageIdentifier.indexOf(SEPARATOR); - String storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default - if (separatorIndex > 0) { - storageDriverId = storageIdentifier.substring(0, separatorIndex); - } + + String storageDriverId = getStorgageDriverFromIdentifier(dvObject.getStorageIdentifier()); + return getStorageIO(dvObject, req, storageDriverId); } From 9759eb87761c95c8aa223f9a5a04591cfe9b853c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jun 2022 14:06:38 -0400 Subject: [PATCH 080/161] provide access to new dataverse.files..public store flag --- .../iq/dataverse/dataaccess/StorageIO.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 2918c19e32b..0e16c78e4b3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -35,10 +35,10 @@ import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; +import java.util.HashMap; import java.util.Iterator; import java.util.List; - -import com.amazonaws.services.s3.model.S3ObjectSummary; +import java.util.Map; //import org.apache.commons.httpclient.Header; //import org.apache.commons.httpclient.methods.GetMethod; @@ -80,6 +80,18 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { protected boolean isReadAccess = false; protected boolean isWriteAccess = false; + + //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes. + //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. + static protected Map driverPublicAccessMap = new HashMap(); + + public static boolean isPublicStore(String driverId) { + //Read once and cache + if(!driverPublicAccessMap.containsKey(driverId)) { + driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); + } + return driverPublicAccessMap.get(driverId); + } public boolean canRead() { return isReadAccess; From f29c6b910f77bad014c23d0c26d95101065ea313 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jun 2022 14:07:18 -0400 Subject: [PATCH 081/161] use store flag as default if PublicInstall isn't set --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 10 ++++++++-- src/main/java/edu/harvard/iq/dataverse/FilePage.java | 12 +++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 59936b80c41..fbdcadb954c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1185,7 +1185,7 @@ public String getComputeUrl(FileMetadata metadata) { } catch (IOException e) { logger.info("DatasetPage: Failed to get storageIO"); } - if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)) { + if (isHasPublicStore()) { return settingsWrapper.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getPersistentId() + "=" + swiftObject.getSwiftFileName(); } @@ -2056,7 +2056,7 @@ private String init(boolean initFull) { updateDatasetFieldInputLevels(); } - if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)){ + if (isHasPublicStore()){ JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.message.label.fileAccess"), BundleUtil.getStringFromBundle("dataset.message.publicInstall")); } @@ -5951,4 +5951,10 @@ public boolean downloadingRestrictedFiles() { } return false; } + + + //Determines whether this Dataset uses a public store and therefore doesn't support embargoed or restricted files + public boolean isHasPublicStore() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getStorageDriverId())); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 3fa6d4fdfff..6a84037f1ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -13,6 +13,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; @@ -843,7 +844,7 @@ public String getComputeUrl() throws IOException { if (swiftObject != null) { swiftObject.open(); //generate a temp url for a file - if (settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, false)) { + if (isHasPublicStore()) { return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalIdString() + "=" + swiftObject.getSwiftFileName(); } return settingsService.getValueForKey(SettingsServiceBean.Key.ComputeBaseUrl) + "?" + this.getFile().getOwner().getGlobalIdString() + "=" + swiftObject.getSwiftFileName() + "&temp_url_sig=" + swiftObject.getTempUrlSignature() + "&temp_url_expires=" + swiftObject.getTempUrlExpiry(); @@ -935,8 +936,8 @@ public String getPublicDownloadUrl() { try { SwiftAccessIO swiftIO = (SwiftAccessIO) storageIO; swiftIO.open(); - //if its a public install, lets just give users the permanent URL! - if (systemConfig.isPublicInstall()){ + //if its a public store, lets just give users the permanent URL! + if (isHasPublicStore()){ fileDownloadUrl = swiftIO.getRemoteUrl(); } else { //TODO: if a user has access to this file, they should be given the swift url @@ -1165,5 +1166,10 @@ public String getEmbargoPhrase() { public String getIngestMessage() { return BundleUtil.getStringFromBundle("file.ingestFailed.message", Arrays.asList(settingsWrapper.getGuidesBaseUrl(), settingsWrapper.getGuidesVersion())); } + + //Determines whether this File uses a public store and therefore doesn't support embargoed or restricted files + public boolean isHasPublicStore() { + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorgageDriverFromIdentifier(file.getStorageIdentifier()))); + } } From d7b0e43bab26aca15dc556d6b9166d3a57ae246e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jun 2022 14:07:31 -0400 Subject: [PATCH 082/161] cleanup --- .../edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 817136f8735..b71c192e117 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -933,9 +933,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary // them for some servers, we check whether the protocol is in the url and then // normalizing to use the part without the protocol String endpointServer = endpoint; - int protocolEnd = endpoint.indexOf("://"); + int protocolEnd = endpoint.indexOf(DataAccess.SEPARATOR); if (protocolEnd >=0 ) { - endpointServer = endpoint.substring(protocolEnd + 3); + endpointServer = endpoint.substring(protocolEnd + DataAccess.SEPARATOR.length()); } logger.fine("Endpoint: " + endpointServer); // We're then replacing @@ -994,9 +994,9 @@ private String generateTemporaryS3UploadUrl(String key, Date expiration) throws // them for some servers, we check whether the protocol is in the url and then // normalizing to use the part without the protocol String endpointServer = endpoint; - int protocolEnd = endpoint.indexOf("://"); + int protocolEnd = endpoint.indexOf(DataAccess.SEPARATOR); if (protocolEnd >=0 ) { - endpointServer = endpoint.substring(protocolEnd + 3); + endpointServer = endpoint.substring(protocolEnd + DataAccess.SEPARATOR.length()); } logger.fine("Endpoint: " + endpointServer); // We're then replacing From 407bfdad74108e59add6de07764a8847d1817e45 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 7 Jun 2022 14:40:23 -0400 Subject: [PATCH 083/161] use new public flag for stores in UI --- src/main/webapp/dataset-license-terms.xhtml | 8 ++++---- src/main/webapp/dataset.xhtml | 7 ++++--- src/main/webapp/manage-templates.xhtml | 3 ++- src/main/webapp/template.xhtml | 6 ++++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 9d381f54dd5..46166b632bd 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -242,13 +242,13 @@ or !empty termsOfUseAndAccess.contactForAccess or !empty termsOfUseAndAccess.sizeOfCollection or !empty termsOfUseAndAccess.studyCompletion)}">
- +  
-
+
-
+
diff --git a/src/main/webapp/editdatafiles.xhtml b/src/main/webapp/editdatafiles.xhtml index 540d79d29a1..6c4f07f51da 100644 --- a/src/main/webapp/editdatafiles.xhtml +++ b/src/main/webapp/editdatafiles.xhtml @@ -62,7 +62,8 @@ - + +
From cdaa28b4104c285d905441a9c97a9f6a53ff8446 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 10:06:04 -0400 Subject: [PATCH 097/161] move globus calls to globus bean, doc Lock issue, cleaunup Added note w.r.t. the GlobusUpload lock only being removed from the db prior to the completion of the globusUpload method because we're calling the removeDatasetLock method in another bean. --- .../iq/dataverse/DatasetServiceBean.java | 491 +---------------- .../harvard/iq/dataverse/api/Datasets.java | 17 +- .../dataverse/globus/GlobusServiceBean.java | 517 +++++++++++++++++- .../iq/dataverse/util/json/JsonUtil.java | 7 + 4 files changed, 533 insertions(+), 499 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 697deb95fbd..01b5d7fc187 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -540,6 +540,7 @@ public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { em.remove(lock); }); } + logger.info("RL: haslock: " + checkDatasetLock(dataset.getId())); } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) @@ -1156,494 +1157,4 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo } } - - - - @Asynchronous - public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, User authUser) throws ExecutionException, InterruptedException, MalformedURLException { - - Integer countAll = 0; - Integer countSuccess = 0; - Integer countError = 0; - String logTimestamp = logFormatter.format(new Date()); - Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; - FileHandler fileHandler; - boolean fileHandlerSuceeded; - try { - fileHandler = new FileHandler(logFileName); - globusLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; - } catch (IOException | SecurityException ex) { - Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; - } - - if (fileHandlerSuceeded) { - globusLogger.addHandler(fileHandler); - } else { - globusLogger = logger; - } - - globusLogger.info("Starting an globusUpload "); - - String datasetIdentifier = dataset.getStorageIdentifier(); - - String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); - - - Thread.sleep(5000); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - logger.fine("json: " + JsonUtil.prettyPrint(jsonObject)); - - String taskIdentifier = jsonObject.getString("taskIdentifier"); - - String ruleId = ""; - try { - jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - - } - - // globus task status check - String taskStatus = globusStatusCheck(taskIdentifier, globusLogger); - Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); - - - - //ToDo - always "" from 1199 - if(ruleId.length() > 0) { - globusServiceBean.deletePermision(ruleId, globusLogger); - } - - if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { - String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description : " + taskStatus.split("#")[2]; - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); - globusLogger.info("Globus task failed "); - - DatasetLock dcmLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); - if (dcmLock == null) { - logger.log(Level.WARNING, "No lock found for dataset"); - } else { - removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "); - //dataset.removeLock(dcmLock); - } - } - else { - try { - List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonObject.getJsonArray("files"); - - if (filesJsonArray != null) { - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from externalTool - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] bits = storageIdentifier.split(":"); - String bucketName = bits[1].replace("/", ""); - String fileId = bits[bits.length - 1]; - - // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; - String fileName = fileJsonObject.getString("fileName"); - - inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); - } - - // calculateMissingMetadataFields: checksum, mimetype - JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); - JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - - JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - countAll++; - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String fileName = fileJsonObject.getString("fileName"); - String directoryLabel = fileJsonObject.getString("directoryLabel"); - String[] bits = storageIdentifier.split(":"); - String fileId = bits[bits.length - 1]; - - List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) - .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) - .filter(Objects::nonNull).collect(Collectors.toList()); - - if (newfileJsonObject != null) { - if ( !newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder().add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(stringToJsonObjectBuilder(fileJsonObject.toString())); - countSuccess++; - } else { - globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } - } else { - globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } - } - - String newjsonData = jsonDataSecondAPI.build().toString(); - - globusLogger.info("Successfully generated new JsonData for Second API call"); - - - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command); - - String output = addFilesAsync(command, globusLogger); - if (output.equalsIgnoreCase("ok")) { - //if(!taskSkippedFiles) - if (countError == 0 ){ - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, dataset.getId(), countSuccess + " files added out of "+ countAll , true); - } - else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), countSuccess + " files added out of "+ countAll , true); - } - globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); - } else { - globusLogger.log(Level.SEVERE, "******* Error while executing api/datasets/:persistentId/add call ", command); - } - - } - - globusLogger.info("Files processed: " + countAll.toString()); - globusLogger.info("Files added successfully: " + countSuccess.toString()); - globusLogger.info("Files failures: " + countError.toString()); - globusLogger.info("Finished upload via Globus job."); - - if (fileHandlerSuceeded) { - fileHandler.close(); - } - - } catch (Exception e) { - logger.info("Exception from globusUpload call "); - e.printStackTrace(); - globusLogger.info("Exception from globusUpload call " + e.getMessage()); - } - } - } - - public static JsonObjectBuilder stringToJsonObjectBuilder(String str) { - JsonReader jsonReader = Json.createReader(new StringReader(str)); - JsonObject jo = jsonReader.readObject(); - jsonReader.close(); - - JsonObjectBuilder job = Json.createObjectBuilder(); - - for (Map.Entry entry : jo.entrySet()) { - job.add(entry.getKey(), entry.getValue()); - } - - return job; - } - - Executor executor = Executors.newFixedThreadPool(10); - - - private String globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { - boolean taskCompletion = false; - String status = ""; - do { - try { - globusLogger.info("checking globus transfer task " + taskId); - Thread.sleep(50000); - AccessToken clientTokenUser = globusServiceBean.getClientToken(); - //success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); - Task task = globusServiceBean.getTask(clientTokenUser,taskId, globusLogger); - status = task.getStatus(); - if(status != null) { - //The task is in progress. - if (status.equalsIgnoreCase("ACTIVE")) { - if(task.getNice_status().equalsIgnoreCase("ok") || task.getNice_status().equalsIgnoreCase("queued")) { - taskCompletion = false; - } - else { - taskCompletion = true; - status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } - } else { - //The task is either succeeded, failed or inactive. - taskCompletion = true; - status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } - } - else { - status = "FAILED"; - taskCompletion = true; - } - } catch (Exception ex) { - ex.printStackTrace(); - } - - } while (!taskCompletion); - - globusLogger.info("globus transfer task completed successfully"); - - return status; - } - - private Boolean taskSkippedFiles(String taskId, Logger globusLogger) throws MalformedURLException { - - try { - globusLogger.info("checking globus transfer task " + taskId); - Thread.sleep(50000); - AccessToken clientTokenUser = globusServiceBean.getClientToken(); - return globusServiceBean.getTaskSkippedErrors(clientTokenUser,taskId, globusLogger); - - } catch (Exception ex) { - ex.printStackTrace(); - } - - return false; - - } - - - public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { - - List> hashvalueCompletableFutures = - inputList.stream().map(iD -> calculateDetailsAsync(iD,globusLogger)).collect(Collectors.toList()); - - CompletableFuture allFutures = CompletableFuture - .allOf(hashvalueCompletableFutures.toArray(new CompletableFuture[hashvalueCompletableFutures.size()])); - - CompletableFuture> allCompletableFuture = allFutures.thenApply(future -> { - return hashvalueCompletableFutures.stream() - .map(completableFuture -> completableFuture.join()) - .collect(Collectors.toList()); - }); - - CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { - return files.stream().map(d -> json(d)).collect(toJsonArray()); - }); - - JsonArrayBuilder filesObject = (JsonArrayBuilder) completableFuture.get(); - - JsonObject output = Json.createObjectBuilder().add("files", filesObject).build(); - - return output; - - } - - private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { - //logger.info(" calcualte additional details for these globus id ==== " + id); - - return CompletableFuture.supplyAsync( () -> { - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - try { - return ( calculateDetails(id,globusLogger) ); - } catch (InterruptedException | IOException e) { - e.printStackTrace(); - } - return null; - }, executor).exceptionally(ex -> { - return null; - }); - } - - - private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throws InterruptedException, IOException { - int count = 0; - String checksumVal = ""; - InputStream in = null; - String fileId = id.split("IDsplit")[0]; - String fullPath = id.split("IDsplit")[1]; - String fileName = id.split("IDsplit")[2]; - - //ToDo: what if the file doesnot exists in s3 - //ToDo: what if checksum calculation failed - - do { - try { - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - in = dataFileStorageIO.getInputStream(); - checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - count = 3; - }catch (IOException ioex) { - count = 3; - logger.info(ioex.getMessage()); - globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath + ") does not appear to be an S3 object associated with driver: " ); - }catch (Exception ex) { - count = count + 1; - ex.printStackTrace(); - logger.info(ex.getMessage()); - Thread.sleep(5000); - } - - } while (count < 3); - - if(checksumVal.length() == 0 ) { - checksumVal = "NULL"; - } - - String mimeType = calculatemime(fileName); - globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + " mimeType = " + mimeType); - return new fileDetailsHolder(fileId, checksumVal, mimeType); - //getBytes(in)+"" ); - // calculatemime(fileName)); - } - - public long getBytes(InputStream is) throws IOException { - - FileInputStream fileStream = (FileInputStream)is; - return fileStream.getChannel().size(); - } - - public String calculatemime(String fileName) throws InterruptedException { - - String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; - String type = FileUtil.determineFileTypeByExtension(fileName); - - if (!StringUtils.isBlank(type)) { - if (FileUtil.useRecognizedType(finalType, type)) { - finalType = type; - } - } - - return finalType; - } - - public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { - CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - return (addFiles(curlCommand, globusLogger)); - }, executor).exceptionally(ex -> { - globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); - ex.printStackTrace(); - return null; - }); - - String result = addFilesFuture.get(); - - return result ; - } - - - - - private String addFiles(String curlCommand, Logger globusLogger) - { - boolean success = false; - ProcessBuilder processBuilder = new ProcessBuilder(); - Process process = null; - String line; - String status = ""; - - try { - globusLogger.info("Call to : " + curlCommand); - processBuilder.command("bash", "-c", curlCommand); - process = processBuilder.start(); - process.waitFor(); - - BufferedReader br=new BufferedReader(new InputStreamReader(process.getInputStream())); - - StringBuilder sb = new StringBuilder(); - while((line=br.readLine())!=null) sb.append(line); - globusLogger.info(" API Output : " + sb.toString()); - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(sb.toString())) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - globusLogger.log(Level.SEVERE, "Error parsing dataset json."); - } - - status = jsonObject.getString("status"); - } catch (Exception ex) { - globusLogger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); - } - - - return status; - } - - @Asynchronous - public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { - - String logTimestamp = logFormatter.format(new Date()); - Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); - - String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; - FileHandler fileHandler; - boolean fileHandlerSuceeded; - try { - fileHandler = new FileHandler(logFileName); - globusLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; - } catch (IOException | SecurityException ex) { - Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; - } - - if (fileHandlerSuceeded) { - globusLogger.addHandler(fileHandler); - } else { - globusLogger = logger; - } - - globusLogger.info("Starting an globusDownload "); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - globusLogger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - - String taskIdentifier = jsonObject.getString("taskIdentifier"); - String ruleId = ""; - - try { - jsonObject.getString("ruleId"); - }catch (NullPointerException npe){ - - } - - // globus task status check - String taskStatus = globusStatusCheck(taskIdentifier,globusLogger); - Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); - - if(ruleId.length() > 0) { - globusServiceBean.deletePermision(ruleId, globusLogger); - } - - - if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { - String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description : " + taskStatus.split("#")[2]; - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); - globusLogger.info("Globus task failed during download process"); - } - else { - if(!taskSkippedFiles) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, dataset.getId()); - } - else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), ""); - } - } - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 626fd041430..e259b4ebf9f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -164,7 +164,7 @@ public class Datasets extends AbstractApiBean { DataverseServiceBean dataverseService; @EJB - GlobusServiceBean globusServiceBean; + GlobusServiceBean globusService; @EJB UserNotificationServiceBean userNotificationService; @@ -3182,9 +3182,9 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, // ------------------------------------- // (1) Get the user from the API key // ------------------------------------- - User authUser; + AuthenticatedUser authUser; try { - authUser = findUserOrDie(); + authUser = findAuthenticatedUserOrDie(); } catch (WrappedResponse ex) { return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") ); @@ -3214,8 +3214,8 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, String lockInfoMessage = "Globus Upload API started "; - DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, - ((AuthenticatedUser) authUser).getId(), lockInfoMessage); + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, + (authUser).getId(), lockInfoMessage); if (lock != null) { dataset.addLock(lock); } else { @@ -3223,7 +3223,7 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, } - ApiToken token = authSvc.findApiTokenByUser((AuthenticatedUser) authUser); + ApiToken token = authSvc.findApiTokenByUser(authUser); if(uriInfo != null) { logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); @@ -3237,7 +3237,7 @@ public Response addGlobusFilesToDataset(@PathParam("id") String datasetId, } // Async Call - datasetService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); return ok("Async call to Globus Upload started "); @@ -3280,7 +3280,7 @@ public Response deleteglobusRule(@PathParam("id") String datasetId,@FormDataPara } // Async Call - datasetService.globusDownload(jsonData, dataset, authUser); + globusService.globusDownload(jsonData, dataset, authUser); return ok("Async call to Globus Download started"); @@ -3326,6 +3326,7 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, return wr.getResponse(); } + dataset.getLocks().forEach(dl -> {logger.info(dl.toString());}); //------------------------------------ // (2a) Make sure dataset does not have package file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 143a342c4e0..534fdc4a144 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -4,31 +4,57 @@ import com.google.gson.GsonBuilder; import edu.harvard.iq.dataverse.*; +import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.inject.Inject; import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; +import javax.json.JsonPatch; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.lang.StringUtils; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; + import java.io.*; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; - +import java.sql.Timestamp; +import java.text.SimpleDateFormat; import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.logging.FileHandler; +import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; @Stateless @@ -49,8 +75,12 @@ public class GlobusServiceBean implements java.io.Serializable{ @EJB EjbDataverseEngine commandEngine; + + @EJB + UserNotificationServiceBean userNotificationService; private static final Logger logger = Logger.getLogger(FeaturedDataverseServiceBean.class.getCanonicalName()); + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); private String code; private String userTransferToken; @@ -503,7 +533,492 @@ public String getGlobusAppUrlForDataset(Dataset d) { } + + @Asynchronous + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { + + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + String logTimestamp = logFormatter.format(new Date()); + Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); + String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + globusLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + globusLogger.addHandler(fileHandler); + } else { + globusLogger = logger; + } + + globusLogger.info("Starting an globusUpload "); + + String datasetIdentifier = dataset.getStorageIdentifier(); + + + //ToDo - use DataAccess methods? + String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); + datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); + + + Thread.sleep(5000); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + logger.fine("json: " + JsonUtil.prettyPrint(jsonObject)); + + String taskIdentifier = jsonObject.getString("taskIdentifier"); + + String ruleId = ""; + try { + jsonObject.getString("ruleId"); + } catch (NullPointerException npe) { + + } + + // globus task status check + String taskStatus = globusStatusCheck(taskIdentifier, globusLogger); + Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); + + + + //ToDo - always "" from 1199 + if(ruleId.length() > 0) { + deletePermision(ruleId, globusLogger); + } + + DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (gLock == null) { + logger.log(Level.WARNING, "No lock found for dataset"); + } else { + logger.log(Level.FINE, "Removing GlobusUpload lock " + gLock.getId()); + /* + * Note: This call to remove a lock only works immediately because it is in + * another service bean. Despite the removeDatasetLocks method having the + * REQUIRES_NEW transaction annotation, when the globusUpload method and that + * method were in the same bean (globusUpload was in the DatasetServiceBean to + * start), the globus lock was still seen in the API call initiated in the + * addFilesAsync method called within the globusUpload method. I.e. it appeared + * that the lock removal was not committed/visible outside this method until + * globusUpload itself ended. + */ + datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + } + + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description : " + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); + globusLogger.info("Globus task failed "); + + + } + else { + try { + datasetSvc.addDatasetLock(dataset, new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + List inputList = new ArrayList(); + JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + + if (filesJsonArray != null) { + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from externalTool + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] bits = storageIdentifier.split(":"); + String bucketName = bits[1].replace("/", ""); + String fileId = bits[bits.length - 1]; + + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + String fileName = fileJsonObject.getString("fileName"); + + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } + + // calculateMissingMetadataFields: checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + + JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + countAll++; + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String directoryLabel = fileJsonObject.getString("directoryLabel"); + String[] bits = storageIdentifier.split(":"); + String fileId = bits[bits.length - 1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) + .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + + if (newfileJsonObject != null) { + if ( !newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder().add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + jsonDataSecondAPI.add(JsonUtil.getJsonObject(fileJsonObject.toString())); + countSuccess++; + } else { + globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + countError++; + } + } else { + globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + countError++; + } + } + + String newjsonData = jsonDataSecondAPI.build().toString(); + + globusLogger.info("Successfully generated new JsonData for Second API call"); + + + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + System.out.println("*******====command ==== " + command); + + String output = addFilesAsync(command, globusLogger); + if (output.equalsIgnoreCase("ok")) { + //if(!taskSkippedFiles) + if (countError == 0 ){ + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, dataset.getId(), countSuccess + " files added out of "+ countAll , true); + } + else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), countSuccess + " files added out of "+ countAll , true); + } + globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); + } else { + globusLogger.log(Level.SEVERE, "******* Error while executing api/datasets/:persistentId/add call ", command); + } + + } + + globusLogger.info("Files processed: " + countAll.toString()); + globusLogger.info("Files added successfully: " + countSuccess.toString()); + globusLogger.info("Files failures: " + countError.toString()); + globusLogger.info("Finished upload via Globus job."); + + if (fileHandlerSuceeded) { + fileHandler.close(); + } + + } catch (Exception e) { + logger.info("Exception from globusUpload call "); + e.printStackTrace(); + globusLogger.info("Exception from globusUpload call " + e.getMessage()); + datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + } + } + } + + + public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { + CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + return (addFiles(curlCommand, globusLogger)); + }, executor).exceptionally(ex -> { + globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); + ex.printStackTrace(); + return null; + }); + + String result = addFilesFuture.get(); + + return result ; + } + + + + + private String addFiles(String curlCommand, Logger globusLogger) + { + boolean success = false; + ProcessBuilder processBuilder = new ProcessBuilder(); + Process process = null; + String line; + String status = ""; + + try { + globusLogger.info("Call to : " + curlCommand); + processBuilder.command("bash", "-c", curlCommand); + process = processBuilder.start(); + process.waitFor(); + + BufferedReader br=new BufferedReader(new InputStreamReader(process.getInputStream())); + + StringBuilder sb = new StringBuilder(); + while((line=br.readLine())!=null) sb.append(line); + globusLogger.info(" API Output : " + sb.toString()); + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(sb.toString())) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + globusLogger.log(Level.SEVERE, "Error parsing dataset json."); + } + + status = jsonObject.getString("status"); + } catch (Exception ex) { + globusLogger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } + + + return status; + } + + @Asynchronous + public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { + + String logTimestamp = logFormatter.format(new Date()); + Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); + + String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; + FileHandler fileHandler; + boolean fileHandlerSuceeded; + try { + fileHandler = new FileHandler(logFileName); + globusLogger.setUseParentHandlers(false); + fileHandlerSuceeded = true; + } catch (IOException | SecurityException ex) { + Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); + return; + } + + if (fileHandlerSuceeded) { + globusLogger.addHandler(fileHandler); + } else { + globusLogger = logger; + } + + globusLogger.info("Starting an globusDownload "); + + JsonObject jsonObject = null; + try (StringReader rdr = new StringReader(jsonData)) { + jsonObject = Json.createReader(rdr).readObject(); + } catch (Exception jpe) { + jpe.printStackTrace(); + globusLogger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); + } + + String taskIdentifier = jsonObject.getString("taskIdentifier"); + String ruleId = ""; + + try { + jsonObject.getString("ruleId"); + }catch (NullPointerException npe){ + + } + + // globus task status check + String taskStatus = globusStatusCheck(taskIdentifier,globusLogger); + Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); + + if(ruleId.length() > 0) { + deletePermision(ruleId, globusLogger); + } + + + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + String comment = "Reason : " + taskStatus.split("#") [1] + "
Short Description : " + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); + globusLogger.info("Globus task failed during download process"); + } + else { + if(!taskSkippedFiles) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, dataset.getId()); + } + else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), ""); + } + } + } + + Executor executor = Executors.newFixedThreadPool(10); + + + private String globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + boolean taskCompletion = false; + String status = ""; + do { + try { + globusLogger.info("checking globus transfer task " + taskId); + Thread.sleep(50000); + AccessToken clientTokenUser = getClientToken(); + //success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + Task task = getTask(clientTokenUser,taskId, globusLogger); + status = task.getStatus(); + if(status != null) { + //The task is in progress. + if (status.equalsIgnoreCase("ACTIVE")) { + if(task.getNice_status().equalsIgnoreCase("ok") || task.getNice_status().equalsIgnoreCase("queued")) { + taskCompletion = false; + } + else { + taskCompletion = true; + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + //The task is either succeeded, failed or inactive. + taskCompletion = true; + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } + else { + status = "FAILED"; + taskCompletion = true; + } + } catch (Exception ex) { + ex.printStackTrace(); + } + + } while (!taskCompletion); + + globusLogger.info("globus transfer task completed successfully"); + + return status; + } + + private Boolean taskSkippedFiles(String taskId, Logger globusLogger) throws MalformedURLException { + + try { + globusLogger.info("checking globus transfer task " + taskId); + Thread.sleep(50000); + AccessToken clientTokenUser = getClientToken(); + return getTaskSkippedErrors(clientTokenUser,taskId, globusLogger); + + } catch (Exception ex) { + ex.printStackTrace(); + } + + return false; + + } + + + public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { + + List> hashvalueCompletableFutures = + inputList.stream().map(iD -> calculateDetailsAsync(iD,globusLogger)).collect(Collectors.toList()); + + CompletableFuture allFutures = CompletableFuture + .allOf(hashvalueCompletableFutures.toArray(new CompletableFuture[hashvalueCompletableFutures.size()])); + + CompletableFuture> allCompletableFuture = allFutures.thenApply(future -> { + return hashvalueCompletableFutures.stream() + .map(completableFuture -> completableFuture.join()) + .collect(Collectors.toList()); + }); + + CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { + return files.stream().map(d -> json(d)).collect(toJsonArray()); + }); + + JsonArrayBuilder filesObject = (JsonArrayBuilder) completableFuture.get(); + + JsonObject output = Json.createObjectBuilder().add("files", filesObject).build(); + + return output; + + } + + private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { + //logger.info(" calcualte additional details for these globus id ==== " + id); + + return CompletableFuture.supplyAsync( () -> { + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + return ( calculateDetails(id,globusLogger) ); + } catch (InterruptedException | IOException e) { + e.printStackTrace(); + } + return null; + }, executor).exceptionally(ex -> { + return null; + }); + } + + + private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throws InterruptedException, IOException { + int count = 0; + String checksumVal = ""; + InputStream in = null; + String fileId = id.split("IDsplit")[0]; + String fullPath = id.split("IDsplit")[1]; + String fileName = id.split("IDsplit")[2]; + + //ToDo: what if the file doesnot exists in s3 + //ToDo: what if checksum calculation failed + + do { + try { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + in = dataFileStorageIO.getInputStream(); + checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + count = 3; + }catch (IOException ioex) { + count = 3; + logger.info(ioex.getMessage()); + globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath + ") does not appear to be an S3 object associated with driver: " ); + }catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + } + + } while (count < 3); + + if(checksumVal.length() == 0 ) { + checksumVal = "NULL"; + } + + String mimeType = calculatemime(fileName); + globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + " mimeType = " + mimeType); + return new fileDetailsHolder(fileId, checksumVal, mimeType); + //getBytes(in)+"" ); + // calculatemime(fileName)); + } + + public String calculatemime(String fileName) throws InterruptedException { + + String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; + String type = FileUtil.determineFileTypeByExtension(fileName); + + if (!StringUtils.isBlank(type)) { + if (FileUtil.useRecognizedType(finalType, type)) { + finalType = type; + } + } + + return finalType; + } /* public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) throws MalformedURLException { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index ae6935945e8..f4a3c635f8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -3,6 +3,8 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; + +import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; import java.util.Map; @@ -56,4 +58,9 @@ public static String prettyPrint(javax.json.JsonObject jsonObject) { return stringWriter.toString(); } + public static javax.json.JsonObject getJsonObject(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readObject(); + } + } } From 4a1d15b6826dda51e90fa43b08a55d79f254a83e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 11:10:24 -0400 Subject: [PATCH 098/161] move add lock before removing old one --- .../iq/dataverse/globus/GlobusServiceBean.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 534fdc4a144..ce841525159 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -601,7 +601,14 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin if(ruleId.length() > 0) { deletePermision(ruleId, globusLogger); } - + + //If success, switch to an EditInProgress lock - do this before removing the GlobusUpload lock + //Keeping a lock through the add datafiles API call avoids a conflicting edit and keeps any open dataset page refreshing until the datafile appears + if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { + datasetSvc.addDatasetLock(dataset, + new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + } + DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); if (gLock == null) { logger.log(Level.WARNING, "No lock found for dataset"); @@ -629,7 +636,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } else { try { - datasetSvc.addDatasetLock(dataset, new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + // + List inputList = new ArrayList(); JsonArray filesJsonArray = jsonObject.getJsonArray("files"); From 5a3e22f1197b2b71ac1ae0637301b2cdc88f8457 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 12:19:07 -0400 Subject: [PATCH 099/161] refactor to avoid second get task call to Globus --- .../dataverse/globus/GlobusServiceBean.java | 128 ++++++++---------- 1 file changed, 58 insertions(+), 70 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index ce841525159..108cd3f614b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -16,8 +16,6 @@ import javax.json.JsonArrayBuilder; import javax.json.JsonObject; import javax.json.JsonPatch; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; import javax.servlet.http.HttpServletRequest; import org.apache.commons.lang.StringUtils; @@ -184,7 +182,7 @@ public void deletePermision(String ruleId, Logger globusLogger) throws Malformed public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { - ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, principal); + ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, principal); @@ -249,12 +247,9 @@ public Task getTask(AccessToken clientTokenUser, String taskId , Logger globusLo "GET", null); Task task = null; - String status = null; - //2019-12-01 18:34:37+00:00 if (result.status == 200) { task = parseJson(result.jsonResponse, Task.class, false); - status = task.getStatus(); } if (result.status != 200) { globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + result.jsonResponse.toString()); @@ -263,23 +258,6 @@ public Task getTask(AccessToken clientTokenUser, String taskId , Logger globusLo return task; } - public Boolean getTaskSkippedErrors(AccessToken clientTokenUser, String taskId , Logger globusLogger) throws MalformedURLException { - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId ); - - MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), - "GET", null); - - Task task = null; - - if (result.status == 200) { - task = parseJson(result.jsonResponse, Task.class, false); - return task.getSkip_source_errors(); - } - - return false; - } - public AccessToken getClientToken() throws MalformedURLException { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); @@ -580,22 +558,20 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin jpe.printStackTrace(); logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); } - logger.fine("json: " + JsonUtil.prettyPrint(jsonObject)); + logger.info("json: " + JsonUtil.prettyPrint(jsonObject)); String taskIdentifier = jsonObject.getString("taskIdentifier"); String ruleId = ""; try { - jsonObject.getString("ruleId"); + ruleId = jsonObject.getString("ruleId"); } catch (NullPointerException npe) { - + logger.warning("NPE for jsonData object" ); } // globus task status check - String taskStatus = globusStatusCheck(taskIdentifier, globusLogger); - Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); - - + Task task = globusStatusCheck(taskIdentifier, globusLogger); + String taskStatus = getTaskStatus(task); //ToDo - always "" from 1199 if(ruleId.length() > 0) { @@ -683,7 +659,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin fileJsonObject = path.apply(fileJsonObject); path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(JsonUtil.getJsonObject(fileJsonObject.toString())); + jsonDataSecondAPI.add(fileJsonObject); countSuccess++; } else { globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); @@ -762,7 +738,6 @@ public String addFilesAsync(String curlCommand, Logger globusLogger) throws Exec private String addFiles(String curlCommand, Logger globusLogger) { - boolean success = false; ProcessBuilder processBuilder = new ProcessBuilder(); Process process = null; String line; @@ -840,9 +815,9 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } // globus task status check - String taskStatus = globusStatusCheck(taskIdentifier,globusLogger); - Boolean taskSkippedFiles = taskSkippedFiles(taskIdentifier, globusLogger); - + Task task = globusStatusCheck(taskIdentifier,globusLogger); + String taskStatus = getTaskStatus(task); + if(ruleId.length() > 0) { deletePermision(ruleId, globusLogger); } @@ -854,6 +829,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro globusLogger.info("Globus task failed during download process"); } else { + boolean taskSkippedFiles = (task.getSkip_source_errors()==null) ? false : task.getSkip_source_errors(); if(!taskSkippedFiles) { userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, dataset.getId()); } @@ -867,35 +843,42 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private String globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + private Task globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { boolean taskCompletion = false; String status = ""; + Task task = null; do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(50000); - AccessToken clientTokenUser = getClientToken(); - //success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); - Task task = getTask(clientTokenUser,taskId, globusLogger); - status = task.getStatus(); - if(status != null) { - //The task is in progress. - if (status.equalsIgnoreCase("ACTIVE")) { - if(task.getNice_status().equalsIgnoreCase("ok") || task.getNice_status().equalsIgnoreCase("queued")) { - taskCompletion = false; - } - else { + AccessToken clientTokenUser = getClientToken(); + // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); + task = getTask(clientTokenUser, taskId, globusLogger); + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress. + if (status.equalsIgnoreCase("ACTIVE")) { + if (task.getNice_status().equalsIgnoreCase("ok") + || task.getNice_status().equalsIgnoreCase("queued")) { + taskCompletion = false; + } else { + taskCompletion = true; + // status = "FAILED" + "#" + task.getNice_status() + "#" + + // task.getNice_status_short_description(); + } + } else { + // The task is either succeeded, failed or inactive. taskCompletion = true; - status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + // status = status + "#" + task.getNice_status() + "#" + + // task.getNice_status_short_description(); } } else { - //The task is either succeeded, failed or inactive. + // status = "FAILED"; taskCompletion = true; - status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); } - } - else { - status = "FAILED"; + } else { + // status = "FAILED"; taskCompletion = true; } } catch (Exception ex) { @@ -905,26 +888,31 @@ private String globusStatusCheck(String taskId, Logger globusLogger) throws Malf } while (!taskCompletion); globusLogger.info("globus transfer task completed successfully"); - - return status; + return task; } - - private Boolean taskSkippedFiles(String taskId, Logger globusLogger) throws MalformedURLException { - - try { - globusLogger.info("checking globus transfer task " + taskId); - Thread.sleep(50000); - AccessToken clientTokenUser = getClientToken(); - return getTaskSkippedErrors(clientTokenUser,taskId, globusLogger); - - } catch (Exception ex) { - ex.printStackTrace(); + + private String getTaskStatus(Task task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress. + if (status.equalsIgnoreCase("ACTIVE")) { + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } else { + // The task is either succeeded, failed or inactive. + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + status = "FAILED"; + } + } else { + status = "FAILED"; } - - return false; - + return status; } - + + public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { From e3ad262574baeb8e98022070b153303792c923ab Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 12:19:57 -0400 Subject: [PATCH 100/161] cleanup --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 01b5d7fc187..c0fe8440ac2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -540,7 +540,6 @@ public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { em.remove(lock); }); } - logger.info("RL: haslock: " + checkDatasetLock(dataset.getId())); } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) From 73fc847f044b55840f240254e6844f8d76d26ae5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 24 Jun 2022 14:32:41 -0400 Subject: [PATCH 101/161] provide access to storage prefix --- .../iq/dataverse/dataaccess/DataAccess.java | 21 +++++++++++++++++++ .../iq/dataverse/dataaccess/S3AccessIO.java | 4 ++++ .../iq/dataverse/dataaccess/StorageIO.java | 4 ++++ .../dataverse/globus/GlobusServiceBean.java | 17 ++++++++++----- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 7b844cd125f..d355fbb805f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -160,6 +160,27 @@ public static String getDriverType(String driverId) { } return System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); } + + //This + public static String getDriverPrefix(String driverId) throws IOException { + if(driverId.isEmpty() || driverId.equals("tmp")) { + return "tmp" + SEPARATOR; + } + String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + switch(storageType) { + case FILE: + return FileAccessIO.getDriverPrefix(driverId); + case S3: + return S3AccessIO.getDriverPrefix(driverId); + case SWIFT: + return SwiftAccessIO.getDriverPrefix(driverId); + default: + logger.warning("Could not find storage driver for id: " + driverId); + throw new IOException("getDriverPrefix: Unsupported storage method."); + } + + + } // createDataAccessObject() methods create a *new*, empty DataAccess objects, // for saving new, not yet saved datafiles. diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index b71c192e117..fc9111b4b81 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1270,5 +1270,9 @@ public boolean isMainDriver() { public void setMainDriver(boolean mainDriver) { this.mainDriver = mainDriver; } + + public static String getDriverPrefix(String driverId) { + return driverId+ DataAccess.SEPARATOR + getBucketName(driverId) + ":"; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 0e16c78e4b3..b92066d1f13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -596,4 +596,8 @@ public boolean downloadRedirectEnabled() { public String generateTemporaryDownloadUrl() throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } + + public static String getDriverPrefix(String driverId) { + return driverId+ DataAccess.SEPARATOR; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 108cd3f614b..e75200d6c38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -491,7 +491,8 @@ public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedE return true; } - + + //Generates the URL to launch the Globus app public String getGlobusAppUrlForDataset(Dataset d) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; @@ -504,13 +505,19 @@ public String getGlobusAppUrlForDataset(Dataset d) { logger.fine("Created apiToken for user: " + user.getIdentifier()); apiToken = authSvc.generateApiTokenForUser(( AuthenticatedUser) user); } + String storePrefix =""; + String driverId = d.getEffectiveStorageDriverId(); + try { + storePrefix = DataAccess.getDriverPrefix(driverId); + } catch(Exception e) { + logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); + } URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}"; - return tokenUtil.replaceTokensWithValues(appUrl); + + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl)+"&storeprefix=" + storePrefix; } - @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) @@ -573,7 +580,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin Task task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - //ToDo - always "" from 1199 + if(ruleId.length() > 0) { deletePermision(ruleId, globusLogger); } From 4e8599361f837cee7f357cf3dd2c9fbb27e572de Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 29 Jun 2022 12:30:54 -0400 Subject: [PATCH 102/161] fix logger name --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index e75200d6c38..2747956c2f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -77,7 +77,7 @@ public class GlobusServiceBean implements java.io.Serializable{ @EJB UserNotificationServiceBean userNotificationService; - private static final Logger logger = Logger.getLogger(FeaturedDataverseServiceBean.class.getCanonicalName()); + private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); private String code; @@ -515,7 +515,7 @@ public String getGlobusAppUrlForDataset(Dataset d) { URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; - return tokenUtil.replaceTokensWithValues(appUrl)+"&storeprefix=" + storePrefix; + return tokenUtil.replaceTokensWithValues(appUrl)+"&storePrefix=" + storePrefix; } From 011881bf997d4ceeb04efc6f1c00212812ed518c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 29 Jun 2022 18:16:11 -0400 Subject: [PATCH 103/161] refactor getDownloadURL, add GlobusTransfer --- .../harvard/iq/dataverse/util/FileUtil.java | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 93a6a52947a..8b40ee5e006 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1615,32 +1615,33 @@ public static String getPublicDownloadUrl(String dataverseSiteUrl, String persis */ public static String getFileDownloadUrlPath(String downloadType, Long fileId, boolean gbRecordsWritten, Long fileMetadataId) { String fileDownloadUrl = "/api/access/datafile/" + fileId; - if (downloadType != null && downloadType.equals("bundle")) { - if (fileMetadataId == null) { - fileDownloadUrl = "/api/access/datafile/bundle/" + fileId; - } else { - fileDownloadUrl = "/api/access/datafile/bundle/" + fileId + "?fileMetadataId=" + fileMetadataId; - } - } - if (downloadType != null && downloadType.equals("original")) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=original"; - } - if (downloadType != null && downloadType.equals("RData")) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=RData"; - } - if (downloadType != null && downloadType.equals("var")) { - if (fileMetadataId == null) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata"; - } else { - fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata?fileMetadataId=" + fileMetadataId; + if (downloadType != null) { + switch(downloadType) { + case "original": + case"RData": + case "tab": + case "GlobusTransfer": + fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=" + downloadType; + break; + case "bundle": + if (fileMetadataId == null) { + fileDownloadUrl = "/api/access/datafile/bundle/" + fileId; + } else { + fileDownloadUrl = "/api/access/datafile/bundle/" + fileId + "?fileMetadataId=" + fileMetadataId; + } + break; + case "var": + if (fileMetadataId == null) { + fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata"; + } else { + fileDownloadUrl = "/api/access/datafile/" + fileId + "/metadata?fileMetadataId=" + fileMetadataId; + } + break; + } + } - } - if (downloadType != null && downloadType.equals("tab")) { - fileDownloadUrl = "/api/access/datafile/" + fileId + "?format=tab"; - } if (gbRecordsWritten) { - if (downloadType != null && ((downloadType.equals("original") || downloadType.equals("RData") || downloadType.equals("tab")) || - ((downloadType.equals("var") || downloadType.equals("bundle") ) && fileMetadataId != null))) { + if (fileDownloadUrl.contains("?")) { fileDownloadUrl += "&gbrecs=true"; } else { fileDownloadUrl += "?gbrecs=true"; From d7d411d399d97abeecdef78c4bb579887f6085dc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 29 Jun 2022 18:16:28 -0400 Subject: [PATCH 104/161] initial test to add GT option --- .../file-download-button-fragment.xhtml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 53dc1bc11b1..cec44eb750b 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -56,6 +56,33 @@ + + + +
  • + + + + GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + + + + + GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + +
  • + +
  • clazz, Type type, Annotation[] throw new NotFoundException("Datafile " + dataFile.getId() + ": Failed to locate and/or open physical file."); } + + boolean redirectSupported = false; + String auxiliaryTag = null; + String auxiliaryType = null; + String auxiliaryFileName = null; // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): if (storageIO instanceof S3AccessIO && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { @@ -101,10 +113,8 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // for a saved original; but CANNOT if it is a column subsetting // request (must be streamed in real time locally); or a format // conversion that hasn't been cached and saved on S3 yet. - boolean redirectSupported = true; - String auxiliaryTag = null; - String auxiliaryType = null; - String auxiliaryFileName = null; + redirectSupported = true; + if ("imageThumb".equals(di.getConversionParam())) { @@ -112,7 +122,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] int requestedSize = 0; if (!"".equals(di.getConversionParamValue())) { try { - requestedSize = new Integer(di.getConversionParamValue()); + requestedSize = Integer.parseInt(di.getConversionParamValue()); } catch (java.lang.NumberFormatException ex) { // it's ok, the default size will be used. } @@ -177,40 +187,54 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirectSupported = false; } } + } + String redirect_url_str=null; + + if (redirectSupported) { + // definitely close the (potentially still open) input stream, + // since we are not going to use it. The S3 documentation in particular + // emphasizes that it is very important not to leave these + // lying around un-closed, since they are going to fill + // up the S3 connection pool! + storageIO.closeInputStream(); + // [attempt to] redirect: + try { + redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + } catch (IOException ioex) { + redirect_url_str = null; + } - if (redirectSupported) { - // definitely close the (potentially still open) input stream, - // since we are not going to use it. The S3 documentation in particular - // emphasizes that it is very important not to leave these - // lying around un-closed, since they are going to fill - // up the S3 connection pool! - storageIO.closeInputStream(); - // [attempt to] redirect: - String redirect_url_str; - try { - redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); - } catch (IOException ioex) { - redirect_url_str = null; - } - - if (redirect_url_str == null) { - throw new ServiceUnavailableException(); + if (redirect_url_str == null) { + throw new ServiceUnavailableException(); + } + } + + if (systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList() + .contains(DataAccess.getStorgageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { + if (di.getConversionParam() != null) { + if (di.getConversionParam().equals("format")) { + + if ("GlobusTransfer".equals(di.getConversionParamValue())) { + redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, dataFile); + } } + } + if (redirect_url_str!=null) { - logger.fine("Data Access API: direct S3 url: " + redirect_url_str); + logger.fine("Data Access API: redirect url: " + redirect_url_str); URI redirect_uri; try { redirect_uri = new URI(redirect_url_str); } catch (URISyntaxException ex) { - logger.info("Data Access API: failed to create S3 redirect url (" + redirect_url_str + ")"); + logger.info("Data Access API: failed to create redirect url (" + redirect_url_str + ")"); redirect_uri = null; } if (redirect_uri != null) { // increment the download count, if necessary: if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) { try { - logger.fine("writing guestbook response, for an S3 download redirect."); + logger.fine("writing guestbook response, for a download redirect."); Command cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner()); di.getCommand().submit(cmd); MakeDataCountEntry entry = new MakeDataCountEntry(di.getRequestUriInfo(), di.getRequestHttpHeaders(), di.getDataverseRequestService(), di.getGbr().getDataFile()); @@ -221,7 +245,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // finally, issue the redirect: Response response = Response.seeOther(redirect_uri).build(); - logger.fine("Issuing redirect to the file location on S3."); + logger.fine("Issuing redirect to the file location."); throw new RedirectionException(response); } throw new ServiceUnavailableException(); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 2747956c2f6..d8ccd0892d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -494,6 +494,10 @@ public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedE //Generates the URL to launch the Globus app public String getGlobusAppUrlForDataset(Dataset d) { + return getGlobusAppUrlForDataset(d, true, null); + } + + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; User user = session.getUser(); @@ -514,7 +518,7 @@ public String getGlobusAppUrlForDataset(Dataset d) { } URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + + "/" + (upload ? "upload":"download") + "?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"+ (df!=null ? "&fileId={fileId}":""); return tokenUtil.replaceTokensWithValues(appUrl)+"&storePrefix=" + storePrefix; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index d69ca405a90..8cabc0de44d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -993,19 +993,19 @@ public boolean isPublicInstall(){ } public boolean isRsyncUpload(){ - return getUploadMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), true); } public boolean isGlobusUpload(){ - return getUploadMethodAvailable(FileUploadMethods.GLOBUS.toString()); + return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true); } // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ - return getUploadMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), true); } - public boolean isRsyncOnly(){ + public boolean isRsyncOnly(){ String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); if(downloadMethods == null){ return false; @@ -1018,31 +1018,33 @@ public boolean isRsyncOnly(){ return false; } else { return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size() == 1 && uploadMethods.toLowerCase().equals(SystemConfig.FileUploadMethods.RSYNC.toString()); - } + } } public boolean isRsyncDownload() { - String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); - return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.RSYNC.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), false); } public boolean isHTTPDownload() { - String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); - logger.warning("Download Methods:" + downloadMethods); - return downloadMethods !=null && downloadMethods.toLowerCase().contains(SystemConfig.FileDownloadMethods.NATIVE.toString()); + return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), false); } public boolean isGlobusDownload() { - String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); - return downloadMethods !=null && downloadMethods.toLowerCase().contains(FileDownloadMethods.GLOBUS.toString()); + return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); } - - private Boolean getUploadMethodAvailable(String method){ - String uploadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.UploadMethods); - if (uploadMethods==null){ + + public List getGlobusStoresList() { + String globusStores = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusStores, ""); + return Arrays.asList(globusStores.split("\\s*,\\s*")); + } + + private Boolean getMethodAvailable(String method, boolean upload) { + String methods = settingsService.getValueForKey( + upload ? SettingsServiceBean.Key.UploadMethods : SettingsServiceBean.Key.DownloadMethods); + if (methods == null) { return false; } else { - return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).contains(method); + return Arrays.asList(methods.toLowerCase().split("\\s*,\\s*")).contains(method); } } From 31c16c5788e1116f3fac8fabe4b9904d00972736 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Jul 2022 15:50:19 -0400 Subject: [PATCH 106/161] swap to test globusDownload allowed --- src/main/webapp/file-download-button-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index cec44eb750b..8d90193af3b 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -59,7 +59,7 @@ -
  • +
  • Date: Fri, 1 Jul 2022 15:50:26 -0400 Subject: [PATCH 107/161] add logging --- .../java/edu/harvard/iq/dataverse/api/DownloadInstance.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java index 07215cb919e..c9eb3638b90 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstance.java @@ -11,6 +11,8 @@ import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.GuestbookResponse; import java.util.List; +import java.util.logging.Logger; + import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService; import javax.faces.context.FacesContext; import javax.ws.rs.core.HttpHeaders; @@ -22,6 +24,7 @@ */ public class DownloadInstance { + private static final Logger logger = Logger.getLogger(DownloadInstance.class.getCanonicalName()); /* private ByteArrayOutputStream outStream = null; @@ -122,6 +125,7 @@ public Boolean checkIfServiceSupportedAndSetConverter(String serviceArg, String for (OptionalAccessService dataService : servicesAvailable) { if (dataService != null) { + logger.fine("Checking service: " + dataService.getServiceName()); if (serviceArg.equals("variables")) { // Special case for the subsetting parameter (variables=): if ("subset".equals(dataService.getServiceName())) { @@ -149,6 +153,7 @@ public Boolean checkIfServiceSupportedAndSetConverter(String serviceArg, String return true; } String argValuePair = serviceArg + "=" + serviceArgValue; + logger.fine("Comparing: " + argValuePair + " and " + dataService.getServiceArguments()); if (argValuePair.startsWith(dataService.getServiceArguments())) { conversionParam = serviceArg; conversionParamValue = serviceArgValue; From c645aea1208cfbd82b5ea10bbf034d06ed1d12ad Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Jul 2022 16:07:42 -0400 Subject: [PATCH 108/161] i18n download, use icon --- src/main/java/propertyFiles/Bundle.properties | 2 ++ src/main/webapp/file-download-button-fragment.xhtml | 2 +- src/main/webapp/resources/css/structure.css | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index cc6ee97d09d..311517279f3 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1666,6 +1666,8 @@ file.fromGlobusAfterCreate.tip=File upload via Globus transfer will be enabled a file.fromGlobus=Upload with Globus file.finishGlobus=Globus Transfer has finished file.downloadFromGlobus=Download through Globus +file.globus.transfer=Globus Transfer +file.globus.of=of: file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 8d90193af3b..dd57a0371ba 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -68,7 +68,7 @@ actionListener="#{fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse, fileMetadata, 'GlobusTransfer')}"> - GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} Date: Tue, 5 Jul 2022 15:27:07 -0400 Subject: [PATCH 109/161] force lowercase for hash values - that's what is generated internally --- .../iq/dataverse/datasetutility/OptionalFileParams.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 35687151090..25240349bfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -396,7 +396,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // ------------------------------- if ((jsonObj.has(LEGACY_CHECKSUM_ATTR_NAME)) && (!jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).isJsonNull())){ - this.checkSumValue = jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).getAsString(); + this.checkSumValue = jsonObj.get(LEGACY_CHECKSUM_ATTR_NAME).getAsString().toLowerCase(); this.checkSumType= ChecksumType.MD5; } // ------------------------------- @@ -404,7 +404,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // ------------------------------- else if ((jsonObj.has(CHECKSUM_OBJECT_NAME)) && (!jsonObj.get(CHECKSUM_OBJECT_NAME).isJsonNull())){ - this.checkSumValue = ((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_VALUE).getAsString(); + this.checkSumValue = ((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_VALUE).getAsString().toLowerCase(); this.checkSumType = ChecksumType.fromString(((JsonObject) jsonObj.get(CHECKSUM_OBJECT_NAME)).get(CHECKSUM_OBJECT_TYPE).getAsString()); } From 0f7fc76d6172458d028b2a8fa89311012dcd3ee3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Jul 2022 15:40:07 -0400 Subject: [PATCH 110/161] log mismatched checksum values --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 8b40ee5e006..1e156ff667c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1852,6 +1852,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio if (!fixed) { String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.wrongChecksumValue", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); + logger.fine("Expected: " + dataFile.getChecksumValue() +", calculated: " + recalculatedChecksum); throw new IOException(info); } } From 0a8dcb763934e190fe3d73185ac00faad71edd50 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jul 2022 16:32:16 -0400 Subject: [PATCH 111/161] refactor for download redirect in remoteoverlaystore --- .../dataverse/api/DownloadInstanceWriter.java | 10 ++++------ .../dataaccess/RemoteOverlayAccessIO.java | 19 ++++++++++++++----- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index af1e430dec0..3ca34748525 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -105,7 +105,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] String auxiliaryFileName = null; // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): - if (storageIO instanceof S3AccessIO && ((S3AccessIO) storageIO).downloadRedirectEnabled()) { + if (storageIO.downloadRedirectEnabled()) { // Even if the above is true, there are a few cases where a // redirect is not applicable. @@ -199,14 +199,12 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] storageIO.closeInputStream(); // [attempt to] redirect: try { - redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + redirect_url_str = storageIO.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); } catch (IOException ioex) { + logger.warning("Unable to generate downloadURL for " + dataFile.getId() + ": " + auxiliaryTag); + //Setting null will let us try to get the file/aux file w/o redirecting redirect_url_str = null; } - - if (redirect_url_str == null) { - throw new ServiceUnavailableException(); - } } if (systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList() diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 2f6a2f80259..ec730e770d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -391,12 +391,21 @@ public boolean downloadRedirectEnabled() { return false; } - public String generateTemporaryDownloadUrl() throws IOException { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); - if (secretKey == null) { - return baseUrl + "/" + urlPath; + @Override + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { + + // ToDo - support remote auxiliary Files + if (auxiliaryTag == null) { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); + if (secretKey == null) { + return baseUrl + "/" + urlPath; + } else { + return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + secretKey); + } } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", secretKey); + return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index b92066d1f13..6888bc7d1fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -593,7 +593,7 @@ public boolean downloadRedirectEnabled() { return false; } - public String generateTemporaryDownloadUrl() throws IOException { + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } From c376a47bd9a2bcd22a58005c72a881017c42b241 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 26 Jul 2022 14:49:24 -0400 Subject: [PATCH 112/161] merge issue --- .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d8ccd0892d9..c9a2b622eef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1016,7 +1016,7 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw public String calculatemime(String fileName) throws InterruptedException { String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; - String type = FileUtil.determineFileTypeByExtension(fileName); + String type = FileUtil.determineFileTypeByNameAndExtension(fileName); if (!StringUtils.isBlank(type)) { if (FileUtil.useRecognizedType(finalType, type)) { From b46e0fdbb6a470f1e598e54b8f3d9efb483de7ff Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 26 Jul 2022 14:49:47 -0400 Subject: [PATCH 113/161] support passthrough for uploading files --- .../iq/dataverse/dataaccess/DataAccess.java | 3 +++ .../dataaccess/RemoteOverlayAccessIO.java | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index d355fbb805f..e4851be4ad5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -233,6 +233,9 @@ public static StorageIO createNewStorageIO(T dvObject, S case S3: storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; + case REMOTE: + storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + break; default: logger.warning("Could not find storage driver for: " + storageTag); throw new IOException("createDataAccessObject: Unsupported storage method " + storageDriverId); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ec730e770d2..7c70c6b867f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -339,8 +339,14 @@ public void deleteAllAuxObjects() throws IOException { public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStorageLocation.indexOf("//"); + if(suffixIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(0, fullStorageLocation.indexOf("//")); + } if (this.getDvObject() instanceof Dataset) { fullStorageLocation = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStorageLocation; @@ -429,7 +435,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); if (baseStore == null) { - String baseDriverId = System.getProperty("dataverse.files." + driverId + ".baseStore"); + String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type"); if(dvObject instanceof Dataset) { @@ -548,4 +554,8 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } + public static String getBaseStoreIdFor(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".baseStore"); + } + } From a7f001c1272d2766cc2e5ea36ea69f6d0b3256fe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 21 Jun 2022 11:24:05 -0400 Subject: [PATCH 114/161] refactor to allow URL token substitution outside tools framework --- src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 53fabf05852..71d9377b282 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -112,6 +112,7 @@ public String replaceTokensWithValues(String url) { String token = matcher.group(1); ReservedWord reservedWord = ReservedWord.fromString(token); String tValue = getTokenValue(token); + logger.fine("Replacing " + reservedWord.toString() + " with " + tValue + " in " + newUrl); newUrl = newUrl.replace(reservedWord.toString(), tValue); } return newUrl; From 425b3bde06092fe676b300a5e212bcbcad9de5fe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 16:28:22 -0400 Subject: [PATCH 115/161] Apply suggestions from code review Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a43411a3934..223ff48c92f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,14 +238,14 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. -File Storage: Using a Local Filesystem and/or Swift and/or object stores and/or trusted remote services +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Services ------------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. -Dataverse may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. The following sections describe how to set up various types of stores and how to configure for multiple stores. @@ -672,7 +672,7 @@ In addition to having the type "remote" and requiring a label, Trusted Remote St These and other available options are described in the table below. Remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity -and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in Dataverse. +and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. .. table:: :align: left From 0599ea5884d63cada4fc8306d10246e3b44c113c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 16:51:44 -0400 Subject: [PATCH 116/161] switch to hyphens per review --- doc/sphinx-guides/source/installation/config.rst | 8 ++++---- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 223ff48c92f..05019ea5230 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -681,13 +681,13 @@ and/or managing access to a secure enclave. For specific remote stores, consult JVM Option Value Description Default value =========================================== ================== ========================================================================== ============= dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) - dataverse.files..label **Required** label to be shown in the UI for this storage (none) - dataverse.files..baseUrl **Required** All files must have URLs of the form /* (none) - dataverse.files..baseStore **Required** The id of a base store (of type file, s3, or swift) (none) + dataverse.files..label **Required** label to be shown in the UI for this storage. (none) + dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (none) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` dataverse.files..secretKey A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 - dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional (none) + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) =========================================== ================== ========================================================================== ============= diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 7c70c6b867f..633237cc5d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -432,7 +432,7 @@ int getUrlExpirationMinutes() { } private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".baseUrl"); + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); @@ -555,7 +555,7 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } public static String getBaseStoreIdFor(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".baseStore"); + return System.getProperty("dataverse.files." + driverId + ".base-store"); } } From d63650908a62beaf66669bbd75ca24441d9da4f9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 16:57:01 -0400 Subject: [PATCH 117/161] reduce variations on trusted remote store --- doc/sphinx-guides/source/installation/config.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 05019ea5230..59267b77465 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,14 +238,14 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Services -------------------------------------------------------------------------------------------------------- +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores +----------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. -A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a trusted remote web-accessible system. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store. The following sections describe how to set up various types of stores and how to configure for multiple stores. @@ -671,7 +671,7 @@ Trusted Remote Storage In addition to having the type "remote" and requiring a label, Trusted Remote Stores are defined in terms of a baseURL - all files managed by this store must be at a path starting with this URL, and a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). These and other available options are described in the table below. -Remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity +Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. .. table:: From adc16f73390e2ee0da8067da63db758a28e70eee Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 4 Aug 2022 18:20:37 -0400 Subject: [PATCH 118/161] add signer tests, flip param order so sign/validate match, fix val bug --- .../iq/dataverse/util/UrlSignerUtil.java | 11 ++-- .../iq/dataverse/util/UrlSignerUtilTest.java | 50 +++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java index 8f53799cb98..b11334520e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/UrlSignerUtil.java @@ -53,14 +53,15 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin } if (method != null) { signedUrl.append(firstParam ? "?" : "&").append("method=").append(method); + firstParam=false; } - signedUrl.append("&token="); + signedUrl.append(firstParam ? "?" : "&").append("token="); logger.fine("String to sign: " + signedUrl.toString() + ""); signedUrl.append(DigestUtils.sha512Hex(signedUrl.toString() + key)); logger.fine("Generated Signed URL: " + signedUrl.toString()); if (logger.isLoggable(Level.FINE)) { logger.fine( - "URL signature is " + (isValidUrl(signedUrl.toString(), method, user, key) ? "valid" : "invalid")); + "URL signature is " + (isValidUrl(signedUrl.toString(), user, method, key) ? "valid" : "invalid")); } return signedUrl.toString(); } @@ -86,7 +87,7 @@ public static String signUrl(String baseUrl, Integer timeout, String user, Strin * the URL is only for user B) the url has expired (was used after the * until timestamp) */ - public static boolean isValidUrl(String signedUrl, String method, String user, String key) { + public static boolean isValidUrl(String signedUrl, String user, String method, String key) { boolean valid = true; try { URL url = new URL(signedUrl); @@ -114,7 +115,7 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S } } - int index = signedUrl.indexOf("&token="); + int index = signedUrl.indexOf(((dateString==null && allowedMethod==null && allowedUser==null) ? "?":"&") + "token="); // Assuming the token is last - doesn't have to be, but no reason for the URL // params to be rearranged either, and this should only cause false negatives if // it does happen @@ -134,7 +135,7 @@ public static boolean isValidUrl(String signedUrl, String method, String user, S logger.fine("Method doesn't match"); valid = false; } - if (user != null && user.equals(allowedUser)) { + if (user != null && !user.equals(allowedUser)) { logger.fine("User doesn't match"); valid = false; } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java new file mode 100644 index 00000000000..2b9d507758f --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlSignerUtilTest.java @@ -0,0 +1,50 @@ +package edu.harvard.iq.dataverse.util; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.junit.Test; + +public class UrlSignerUtilTest { + + @Test + public void testSignAndValidate() { + + final String url = "http://localhost:8080/api/test1"; + final String get = "GET"; + final String post = "POST"; + + final String user1 = "Alice"; + final String user2 = "Bob"; + final int tooQuickTimeout = -1; + final int longTimeout = 1000; + final String key = "abracadabara open sesame"; + final String badkey = "abracadabara open says me"; + + Logger.getLogger(UrlSignerUtil.class.getName()).setLevel(Level.FINE); + + String signedUrl1 = UrlSignerUtil.signUrl(url, longTimeout, user1, get, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, user1, get, key)); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, user1, null, key)); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl1, null, get, key)); + + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, null, get, badkey)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, user2, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1, user1, post, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), user1, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), user2, get, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl1.replace(user1, user2), null, get, key)); + + String signedUrl2 = UrlSignerUtil.signUrl(url, null, null, null, key); + assertTrue(UrlSignerUtil.isValidUrl(signedUrl2, null, null, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl2, null, post, key)); + assertFalse(UrlSignerUtil.isValidUrl(signedUrl2, user1, null, key)); + + String signedUrl3 = UrlSignerUtil.signUrl(url, tooQuickTimeout, user1, get, key); + + assertFalse(UrlSignerUtil.isValidUrl(signedUrl3, user1, get, key)); + } +} From 9397a583c6b6b72fc8037d7c305a083093f68703 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 13:13:42 -0400 Subject: [PATCH 119/161] update secret-key, cleanup --- doc/sphinx-guides/source/installation/config.rst | 2 +- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 59267b77465..1bbc601da4b 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -685,7 +685,7 @@ and/or managing access to a secure enclave. For specific remote stores, consult dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (none) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` - dataverse.files..secretKey A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 633237cc5d2..13bb718dc6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -188,7 +188,7 @@ private long getSizeFromHttpHeader() { } finally { EntityUtils.consume(response.getEntity()); } - } catch (Exception e) { + } catch (IOException e) { logger.warning(e.getMessage()); } return size; @@ -403,7 +403,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary // ToDo - support remote auxiliary Files if (auxiliaryTag == null) { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secretkey"); + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); if (secretKey == null) { return baseUrl + "/" + urlPath; } else { @@ -494,7 +494,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor try { remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); } catch(MalformedURLException mfue) { - logger.warning("Unable to read remoteStoreUrl for driver: " + this.driverId); + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); } } From 5410b46664435facd8fcfe0e2a624795fbe176c2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 15:49:46 -0400 Subject: [PATCH 120/161] Add tests/add support for local file base store tests --- .../iq/dataverse/dataaccess/FileAccessIO.java | 8 +- .../dataaccess/RemoteOverlayAccessIO.java | 4 +- .../dataaccess/RemoteOverlayAccessIOTest.java | 104 ++++++++++++++++++ 3 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 4eee72b95d9..44722157804 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -584,12 +584,8 @@ private String getDatasetDirectory() throws IOException { } - private String getFilesRootDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory"); - - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + protected String getFilesRootDirectory() { + String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files"); return filesRootDirectory; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 13bb718dc6d..ebe9ec99c90 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -453,7 +453,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + fullStorageLocation; break; default: @@ -475,7 +475,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory") + "/" + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + fullStorageLocation; break; default: diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java new file mode 100644 index 00000000000..c85a7e6adae --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -0,0 +1,104 @@ +/* + * Copyright 2018 Forschungszentrum Jülich GmbH + * SPDX-License-Identifier: Apache 2.0 + */ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import static org.junit.jupiter.api.Assertions.*; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import java.io.IOException; +import java.nio.file.Paths; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +public class RemoteOverlayAccessIOTest { + + @Mock + + private Dataset dataset; + private DataFile datafile; + private String logoPath = "resources/images/dataverse_project_logo.svg"; + private String pid = "10.5072/F2/ABCDEF"; + + @BeforeEach + public void setUp() { + System.setProperty("dataverse.files.test.type", "remote"); + System.setProperty("dataverse.files.test.label", "testOverlay"); + System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org"); + System.setProperty("dataverse.files.test.base-store", "file"); + System.setProperty("dataverse.files.test.download-redirect", "true"); + System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp"); + System.setProperty("dataverse.files.test.secret-key", "12345"); // Real keys should be much longer, more random + System.setProperty("dataverse.files.file.type", "file"); + System.setProperty("dataverse.files.file.label", "default"); + datafile = MocksFactory.makeDataFile(); + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(GlobalId.parse("doi:" + pid).get()); + datafile.setOwner(dataset); + datafile.setStorageIdentifier("test://" + logoPath); + + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.test.type"); + System.clearProperty("dataverse.files.test.label"); + System.clearProperty("dataverse.files.test.base-url"); + System.clearProperty("dataverse.files.test.base-store"); + System.clearProperty("dataverse.files.test.download-redirect"); + System.clearProperty("dataverse.files.test.label"); + System.clearProperty("dataverse.files.test.remote-store-name"); + System.clearProperty("dataverse.files.test.secret-key"); + System.clearProperty("dataverse.files.file.type"); + System.clearProperty("dataverse.files.file.label"); + } + + @Test + void testRemoteOverlayFile() throws IOException { + // We can read the storageIdentifier and get the driver + assertTrue(datafile.getStorageIdentifier() + .startsWith(DataAccess.getStorgageDriverFromIdentifier(datafile.getStorageIdentifier()))); + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("test").equals(System.getProperty("dataverse.files.test.type"))); + // When we get a StorageIO for the file, it is the right type + StorageIO storageIO = DataAccess.getStorageIO(datafile); + assertTrue(storageIO instanceof RemoteOverlayAccessIO); + // When we use it, we can get properties like the remote store name + RemoteOverlayAccessIO remoteIO = (RemoteOverlayAccessIO) storageIO; + assertTrue(remoteIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.test.remote-store-name"))); + // And can get a temporary download URL for the main file + String signedURL = remoteIO.generateTemporaryDownloadUrl(null, null, null); + // And the URL starts with the right stuff + assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.test.base-url") + "/" + logoPath)); + // And the signature is valid + assertTrue( + UrlSignerUtil.isValidUrl(signedURL, null, null, System.getProperty("dataverse.files.test.secret-key"))); + // And we get an unsigned URL with the right stuff with no key + System.clearProperty("dataverse.files.test.secret-key"); + String unsignedURL = remoteIO.generateTemporaryDownloadUrl(null, null, null); + assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.test.base-url") + "/" + logoPath)); + // Once we've opened, we can get the file size (only works if the HEAD call to + // the file URL works + remoteIO.open(DataAccessOption.READ_ACCESS); + assertTrue(remoteIO.getSize() > 0); + // If we ask for the path for an aux file, it is correct + assertTrue(Paths + .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, logoPath + ".auxobject") + .equals(remoteIO.getAuxObjectAsPath("auxobject"))); + + } + +} From 626dbf46aec8e996ba0c2a7344c16c820f57cb4e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 16:10:34 -0400 Subject: [PATCH 121/161] sign even for internal access --- .../harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ebe9ec99c90..0e18c46243f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -198,7 +198,7 @@ private long getSizeFromHttpHeader() { public InputStream getInputStream() throws IOException { if (super.getInputStream() == null) { try { - HttpGet get = new HttpGet(baseUrl + "/" + urlPath); + HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); int code = response.getStatusLine().getStatusCode(); From 01ad650ede8b6e081893c7b88a4272ca39cbfb30 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 5 Aug 2022 16:08:07 -0400 Subject: [PATCH 122/161] add an API test for local dev/testing #7324 --- .../iq/dataverse/api/RemoteStoreIT.java | 76 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 10 +++ 2 files changed, 86 insertions(+) create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java diff --git a/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java new file mode 100644 index 00000000000..45c6462dab0 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/RemoteStoreIT.java @@ -0,0 +1,76 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.response.Response; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import org.junit.BeforeClass; +import org.junit.Test; + +public class RemoteStoreIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testRemoteStore() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode()); + + Response createUserNoPrivs = UtilIT.createRandomUser(); + createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode()); + String apiTokenNoPrivs = UtilIT.getApiTokenFromResponse(createUserNoPrivs); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + /** + * Note that you must configure various JVM options for this to work: + * + * -Ddataverse.files.trsa.type=remote + * -Ddataverse.files.trsa.label=trsa + * -Ddataverse.files.trsa.base-url=https://qdr.syr.edu + * -Ddataverse.files.trsa.base-store=file + * -Ddataverse.files.trsa.secretkey=12345 + * -Ddataverse.files.trsa.url-expiration-minutes=120 + * + * (and probably download-redirect) + */ + JsonObjectBuilder remoteFileJson = Json.createObjectBuilder() + .add("description", "A remote image.") + .add("storageIdentifier", "trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png") + .add("checksumType", "MD5") + .add("md5Hash", "509ef88afa907eaf2c17c1c8d8fde77e") + .add("label", "testlogo.png") + .add("fileName", "testlogo.png") + .add("mimeType", "image/png"); + + Response addRemoteFile = UtilIT.addRemoteFile(datasetId.toString(), remoteFileJson.build().toString(), apiToken); + System.setProperty(apiToken, username); + addRemoteFile.prettyPrint(); + addRemoteFile.then().assertThat() + .statusCode(OK.getStatusCode()); + + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index c791ce72f41..5b8048a391f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -680,6 +680,16 @@ static Response uploadFileViaNative(String datasetId, String pathToFile, String return requestSpecification.post("/api/datasets/" + datasetId + "/add"); } + static Response addRemoteFile(String datasetId, String jsonAsString, String apiToken) { + RequestSpecification requestSpecification = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .multiPart("datasetId", datasetId); + if (jsonAsString != null) { + requestSpecification.multiPart("jsonData", jsonAsString); + } + return requestSpecification.post("/api/datasets/" + datasetId + "/add"); + } + static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, String formatVersion, String mimeType, boolean isPublic, String type, String apiToken) { String nullOrigin = null; return uploadAuxFile(fileId, pathToFile, formatTag, formatVersion, mimeType, isPublic, type, nullOrigin, apiToken); From 2ab62464d7f29b98ba6c412e099e0fb1f9c41ce2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 16:19:50 -0400 Subject: [PATCH 123/161] Capitalize FileDetailsHolder, reformat for reviewDog --- .../iq/dataverse/DatasetServiceBean.java | 2 +- .../iq/dataverse/globus/AccessToken.java | 37 +- ...ailsHolder.java => FileDetailsHolder.java} | 10 +- .../dataverse/globus/GlobusServiceBean.java | 787 +++++++++--------- .../edu/harvard/iq/dataverse/globus/Task.java | 5 +- .../iq/dataverse/util/json/JsonPrinter.java | 4 +- 6 files changed, 432 insertions(+), 413 deletions(-) rename src/main/java/edu/harvard/iq/dataverse/globus/{fileDetailsHolder.java => FileDetailsHolder.java} (69%) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 34cc637ae14..2aa81ff4bdb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -20,7 +20,7 @@ import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.globus.Task; -import edu.harvard.iq.dataverse.globus.fileDetailsHolder; +import edu.harvard.iq.dataverse.globus.FileDetailsHolder; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java index 2d68c5c8839..877fc68e4a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -2,7 +2,6 @@ import java.util.ArrayList; - public class AccessToken implements java.io.Serializable { private String accessToken; @@ -15,23 +14,41 @@ public class AccessToken implements java.io.Serializable { private String refreshToken; private ArrayList otherTokens; - public String getAccessToken() { return accessToken; } + public String getAccessToken() { + return accessToken; + } - String getIdToken() { return idToken; } + String getIdToken() { + return idToken; + } - Long getExpiresIn() { return expiresIn; } + Long getExpiresIn() { + return expiresIn; + } - String getResourceServer() { return resourceServer; } + String getResourceServer() { + return resourceServer; + } - String getTokenType() { return tokenType; } + String getTokenType() { + return tokenType; + } - String getState() { return state; } + String getState() { + return state; + } - String getScope() {return scope; } + String getScope() { + return scope; + } - String getRefreshToken() { return refreshToken; } + String getRefreshToken() { + return refreshToken; + } - ArrayList getOtherTokens() { return otherTokens; } + ArrayList getOtherTokens() { + return otherTokens; + } public void setAccessToken(String accessToken) { this.accessToken = accessToken; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java b/src/main/java/edu/harvard/iq/dataverse/globus/FileDetailsHolder.java similarity index 69% rename from src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java rename to src/main/java/edu/harvard/iq/dataverse/globus/FileDetailsHolder.java index fac1192d054..0b8373cba09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/fileDetailsHolder.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/FileDetailsHolder.java @@ -1,18 +1,16 @@ package edu.harvard.iq.dataverse.globus; - - -public class fileDetailsHolder { +public class FileDetailsHolder { private String hash; private String mime; private String storageID; - public fileDetailsHolder(String id, String hash, String mime) { + public FileDetailsHolder(String id, String hash, String mime) { this.storageID = id; - this.hash = hash ; - this.mime = mime ; + this.hash = hash; + this.mime = mime; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index c9a2b622eef..74be123027e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -54,10 +54,9 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; - @Stateless @Named("GlobusServiceBean") -public class GlobusServiceBean implements java.io.Serializable{ +public class GlobusServiceBean implements java.io.Serializable { @EJB protected DatasetServiceBean datasetSvc; @@ -73,7 +72,7 @@ public class GlobusServiceBean implements java.io.Serializable{ @EJB EjbDataverseEngine commandEngine; - + @EJB UserNotificationServiceBean userNotificationService; @@ -108,18 +107,20 @@ public void setUserTransferToken(String userTransferToken) { this.userTransferToken = userTransferToken; } - ArrayList checkPermisions( AccessToken clientTokenUser, String directory, String globusEndpoint, String principalType, String principal) throws MalformedURLException { + ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint, + String principalType, String principal) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); ArrayList ids = new ArrayList(); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); - for (int i = 0; i< al.getDATA().size(); i++) { + for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); - if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory )) && pr.getPrincipalType().equals(principalType) && - ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) ) { + if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory)) + && pr.getPrincipalType().equals(principalType) + && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { ids.add(pr.getId()); } else { logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); @@ -131,27 +132,30 @@ ArrayList checkPermisions( AccessToken clientTokenUser, String director return ids; } - public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { + public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) + throws MalformedURLException { if (directory != null && !directory.equals("")) { - directory = directory + "/"; + directory = directory + "/"; } logger.info("Start updating permissions." + " Directory is " + directory); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, null); + ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, null); logger.info("Size of rules " + rules.size()); int count = 0; while (count < rules.size()) { - logger.info("Start removing rules " + rules.get(count) ); + logger.info("Start removing rules " + rules.get(count)); Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPermissions(perm); permissions.setPath(directory); Gson gson = new GsonBuilder().create(); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + rules.get(count)); - logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + rules.get(count)); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + + rules.get(count)); + logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + + rules.get(count)); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(),"PUT", gson.toJson(permissions)); + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); if (result.status != 200) { logger.warning("Cannot update access rule " + rules.get(count)); } else { @@ -163,12 +167,13 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { - if(ruleId.length() > 0 ) { + if (ruleId.length() > 0) { AccessToken clientTokenUser = getClientToken(); globusLogger.info("Start deleting permissions."); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); + URL url = new URL( + "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); MakeRequestResponse result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); if (result.status != 200) { @@ -180,26 +185,25 @@ public void deletePermision(String ruleId, Logger globusLogger) throws Malformed } - public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, String directory, String globusEndpoint) throws MalformedURLException { - - ArrayList rules = checkPermisions( clientTokenUser, directory, globusEndpoint, principalType, principal); - + public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, + String directory, String globusEndpoint) throws MalformedURLException { + ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal); Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); - permissions.setPath(directory + "/" ); + permissions.setPath(directory + "/"); permissions.setPermissions(perm); Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; if (rules.size() == 0) { logger.info("Start creating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/"+ globusEndpoint + "/access"); - result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", gson.toJson(permissions)); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access"); + result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", + gson.toJson(permissions)); if (result.status == 400) { logger.severe("Path " + permissions.getPath() + " is not valid"); @@ -210,9 +214,10 @@ public int givePermission(String principalType, String principal, String perm, A return result.status; } else { logger.info("Start Updating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/"+ globusEndpoint + "/access/" + rules.get(0)); - result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + + rules.get(0)); + result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", + gson.toJson(permissions)); if (result.status == 400) { logger.severe("Path " + permissions.getPath() + " is not valid"); @@ -225,26 +230,27 @@ public int givePermission(String principalType, String principal, String perm, A return result.status; } - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId ) throws MalformedURLException { + public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId+"/successful_transfers"); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId + + "/successful_transfers"); - MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), - "GET", null); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); if (result.status == 200) { - logger.info(" SUCCESS ====== " ); + logger.info(" SUCCESS ====== "); return true; } return false; } - public Task getTask(AccessToken clientTokenUser, String taskId , Logger globusLogger) throws MalformedURLException { + public Task getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/"+taskId ); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); - MakeRequestResponse result = makeRequest(url, "Bearer",clientTokenUser.getOtherTokens().get(0).getAccessToken(), - "GET", null); + MakeRequestResponse result = makeRequest(url, "Bearer", + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); Task task = null; @@ -252,7 +258,8 @@ public Task getTask(AccessToken clientTokenUser, String taskId , Logger globusLo task = parseJson(result.jsonResponse, Task.class, false); } if (result.status != 200) { - globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + result.jsonResponse.toString()); + globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + + result.jsonResponse.toString()); } return task; @@ -260,10 +267,10 @@ public Task getTask(AccessToken clientTokenUser, String taskId , Logger globusLo public AccessToken getClientToken() throws MalformedURLException { String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - URL url = new URL("https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); + URL url = new URL( + "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - MakeRequestResponse result = makeRequest(url, "Basic", - basicGlobusToken,"POST", null); + MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken, "POST", null); AccessToken clientTokenUser = null; if (result.status == 200) { clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); @@ -271,7 +278,8 @@ public AccessToken getClientToken() throws MalformedURLException { return clientTokenUser; } - public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGlobusToken ) throws UnsupportedEncodingException, MalformedURLException { + public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGlobusToken) + throws UnsupportedEncodingException, MalformedURLException { String serverName = origRequest.getServerName(); if (serverName.equals("localhost")) { logger.severe("Changing localhost to utoronto"); @@ -286,7 +294,7 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGl + "&grant_type=authorization_code"); logger.info(url.toString()); - MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken,"POST", null); + MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken, "POST", null); AccessToken accessTokenUser = null; if (result.status == 200) { @@ -299,17 +307,19 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGl } - - public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { + public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + String jsonString) { String str = null; HttpURLConnection connection = null; int status = 0; try { connection = (HttpURLConnection) url.openConnection(); - //Basic NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 + // Basic + // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 logger.info(authType + " " + authCode); connection.setRequestProperty("Authorization", authType + " " + authCode); - //connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + // connection.setRequestProperty("Content-Type", + // "application/x-www-form-urlencoded"); connection.setRequestMethod(method); if (jsonString != null) { connection.setRequestProperty("Content-Type", "application/json"); @@ -414,6 +424,7 @@ public String getDirectory(String datasetId) { class MakeRequestResponse { public String jsonResponse; public int status; + MakeRequestResponse(String jsonResponse, int status) { this.jsonResponse = jsonResponse; this.status = status; @@ -421,25 +432,28 @@ class MakeRequestResponse { } - private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) throws MalformedURLException { - URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint +"/ls?path=" + directory + "/"); + private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) + throws MalformedURLException { + URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path=" + + directory + "/"); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(),"GET", null); + clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); logger.info("find directory status:" + result.status); return result; } - public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedEncodingException, MalformedURLException { + public boolean giveGlobusPublicPermissions(String datasetId) + throws UnsupportedEncodingException, MalformedURLException { String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { return false; } - //AccessToken clientTokenUser = getClientToken(basicGlobusToken); - AccessToken clientTokenUser = getClientToken( ); + // AccessToken clientTokenUser = getClientToken(basicGlobusToken); + AccessToken clientTokenUser = getClientToken(); if (clientTokenUser == null) { logger.severe("Cannot get client token "); return false; @@ -452,26 +466,22 @@ public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedE if (status.status == 200) { - /* FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); - ArrayList files = fl.getDATA(); - if (files != null) { - for (FileG file: files) { - if (!file.getName().contains("cached") && !file.getName().contains(".thumb")) { - int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, - directory + "/" + file.getName(), globusEndpoint); - logger.info("givePermission status " + perStatus + " for " + file.getName()); - if (perStatus == 409) { - logger.info("Permissions already exist or limit was reached for " + file.getName()); - } else if (perStatus == 400) { - logger.info("No file in Globus " + file.getName()); - } else if (perStatus != 201) { - logger.info("Cannot get permission for " + file.getName()); - } - } - } - }*/ + /* + * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); + * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file: + * files) { if (!file.getName().contains("cached") && + * !file.getName().contains(".thumb")) { int perStatus = + * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory + * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status " + * + perStatus + " for " + file.getName()); if (perStatus == 409) { + * logger.info("Permissions already exist or limit was reached for " + + * file.getName()); } else if (perStatus == 400) { + * logger.info("No file in Globus " + file.getName()); } else if (perStatus != + * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } + */ - int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, globusEndpoint); + int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, + globusEndpoint); logger.info("givePermission status " + perStatus); if (perStatus == 409) { logger.info("Permissions already exist or limit was reached"); @@ -484,55 +494,59 @@ public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedE } else if (status.status == 404) { logger.info("There is no globus directory"); - }else { - logger.severe("Cannot find directory in globus, status " + status ); + } else { + logger.severe("Cannot find directory in globus, status " + status); return false; } return true; } - //Generates the URL to launch the Globus app + // Generates the URL to launch the Globus app public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; User user = session.getUser(); - + if (user instanceof AuthenticatedUser) { apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) user); } if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { logger.fine("Created apiToken for user: " + user.getIdentifier()); - apiToken = authSvc.generateApiTokenForUser(( AuthenticatedUser) user); + apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user); } - String storePrefix =""; + String storePrefix = ""; String driverId = d.getEffectiveStorageDriverId(); try { - storePrefix = DataAccess.getDriverPrefix(driverId); - } catch(Exception e) { + storePrefix = DataAccess.getDriverPrefix(driverId); + } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); - String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/" + (upload ? "upload":"download") + "?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"+ (df!=null ? "&fileId={fileId}":""); - return tokenUtil.replaceTokensWithValues(appUrl)+"&storePrefix=" + storePrefix; + String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + "/" + + (upload ? "upload" : "download") + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}" + + (df != null ? "&fileId={fileId}" : ""); + return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; } - @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { + public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { Integer countAll = 0; Integer countSuccess = 0; Integer countError = 0; String logTimestamp = logFormatter.format(new Date()); - Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; + Logger globusLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); + String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; try { @@ -554,12 +568,10 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String datasetIdentifier = dataset.getStorageIdentifier(); - - //ToDo - use DataAccess methods? + // ToDo - use DataAccess methods? String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); - Thread.sleep(5000); JsonObject jsonObject = null; @@ -577,25 +589,26 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin try { ruleId = jsonObject.getString("ruleId"); } catch (NullPointerException npe) { - logger.warning("NPE for jsonData object" ); + logger.warning("NPE for jsonData object"); } - // globus task status check + // globus task status check Task task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - - if(ruleId.length() > 0) { + if (ruleId.length() > 0) { deletePermision(ruleId, globusLogger); } - //If success, switch to an EditInProgress lock - do this before removing the GlobusUpload lock - //Keeping a lock through the add datafiles API call avoids a conflicting edit and keeps any open dataset page refreshing until the datafile appears + // If success, switch to an EditInProgress lock - do this before removing the + // GlobusUpload lock + // Keeping a lock through the add datafiles API call avoids a conflicting edit + // and keeps any open dataset page refreshing until the datafile appears if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { datasetSvc.addDatasetLock(dataset, new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); } - + DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); if (gLock == null) { logger.log(Level.WARNING, "No lock found for dataset"); @@ -615,16 +628,16 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { - String comment = "Reason : " + taskStatus.split("#") [1] + "
    Short Description : " + taskStatus.split("#")[2]; - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); + String comment = "Reason : " + taskStatus.split("#")[1] + "
    Short Description : " + + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); globusLogger.info("Globus task failed "); - - } - else { + } else { try { // - + List inputList = new ArrayList(); JsonArray filesJsonArray = jsonObject.getJsonArray("files"); @@ -632,13 +645,14 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from externalTool + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from + // externalTool String storageIdentifier = fileJsonObject.getString("storageIdentifier"); String[] bits = storageIdentifier.split(":"); String bucketName = bits[1].replace("/", ""); String fileId = bits[bits.length - 1]; - // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; String fileName = fileJsonObject.getString("fileName"); @@ -665,19 +679,23 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin .filter(Objects::nonNull).collect(Collectors.toList()); if (newfileJsonObject != null) { - if ( !newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder().add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder().add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); fileJsonObject = path.apply(fileJsonObject); jsonDataSecondAPI.add(fileJsonObject); countSuccess++; } else { - globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + globusLogger.info(fileName + + " will be skipped from adding to dataset by second API due to missing values "); countError++; } } else { - globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); + globusLogger.info(fileName + + " will be skipped from adding to dataset by second API due to missing values "); countError++; } } @@ -686,22 +704,28 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger.info("Successfully generated new JsonData for Second API call"); - - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); String output = addFilesAsync(command, globusLogger); if (output.equalsIgnoreCase("ok")) { - //if(!taskSkippedFiles) - if (countError == 0 ){ - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, dataset.getId(), countSuccess + " files added out of "+ countAll , true); - } - else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), countSuccess + " files added out of "+ countAll , true); + // if(!taskSkippedFiles) + if (countError == 0) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll, true); } globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); } else { - globusLogger.log(Level.SEVERE, "******* Error while executing api/datasets/:persistentId/add call ", command); + globusLogger.log(Level.SEVERE, + "******* Error while executing api/datasets/:persistentId/add call ", command); } } @@ -724,35 +748,31 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } } - - public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { - CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { + public String addFilesAsync(String curlCommand, Logger globusLogger) + throws ExecutionException, InterruptedException { + CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } - return (addFiles(curlCommand, globusLogger)); + return (addFiles(curlCommand, globusLogger)); }, executor).exceptionally(ex -> { - globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); + globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); ex.printStackTrace(); return null; }); String result = addFilesFuture.get(); - return result ; + return result; } - - - - private String addFiles(String curlCommand, Logger globusLogger) - { + private String addFiles(String curlCommand, Logger globusLogger) { ProcessBuilder processBuilder = new ProcessBuilder(); Process process = null; String line; - String status = ""; + String status = ""; try { globusLogger.info("Call to : " + curlCommand); @@ -760,10 +780,11 @@ private String addFiles(String curlCommand, Logger globusLogger) process = processBuilder.start(); process.waitFor(); - BufferedReader br=new BufferedReader(new InputStreamReader(process.getInputStream())); + BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream())); StringBuilder sb = new StringBuilder(); - while((line=br.readLine())!=null) sb.append(line); + while ((line = br.readLine()) != null) + sb.append(line); globusLogger.info(" API Output : " + sb.toString()); JsonObject jsonObject = null; try (StringReader rdr = new StringReader(sb.toString())) { @@ -773,12 +794,12 @@ private String addFiles(String curlCommand, Logger globusLogger) globusLogger.log(Level.SEVERE, "Error parsing dataset json."); } - status = jsonObject.getString("status"); - } catch (Exception ex) { - globusLogger.log(Level.SEVERE, "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + status = jsonObject.getString("status"); + } catch (Exception ex) { + globusLogger.log(Level.SEVERE, + "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); } - return status; } @@ -786,9 +807,11 @@ private String addFiles(String curlCommand, Logger globusLogger) public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { String logTimestamp = logFormatter.format(new Date()); - Logger globusLogger = Logger.getLogger("edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); + Logger globusLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; + String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; try { @@ -821,39 +844,40 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro try { jsonObject.getString("ruleId"); - }catch (NullPointerException npe){ + } catch (NullPointerException npe) { } - // globus task status check - Task task = globusStatusCheck(taskIdentifier,globusLogger); + // globus task status check + Task task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - - if(ruleId.length() > 0) { + + if (ruleId.length() > 0) { deletePermision(ruleId, globusLogger); } - if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { - String comment = "Reason : " + taskStatus.split("#") [1] + "
    Short Description : " + taskStatus.split("#")[2]; - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(),comment, true); + String comment = "Reason : " + taskStatus.split("#")[1] + "
    Short Description : " + + taskStatus.split("#")[2]; + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); globusLogger.info("Globus task failed during download process"); - } - else { - boolean taskSkippedFiles = (task.getSkip_source_errors()==null) ? false : task.getSkip_source_errors(); - if(!taskSkippedFiles) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, dataset.getId()); - } - else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), ""); + } else { + boolean taskSkippedFiles = (task.getSkip_source_errors() == null) ? false : task.getSkip_source_errors(); + if (!taskSkippedFiles) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETED, + dataset.getId()); + } else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, + dataset.getId(), ""); } } } - Executor executor = Executors.newFixedThreadPool(10); - private Task globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { boolean taskCompletion = false; String status = ""; @@ -901,7 +925,7 @@ private Task globusStatusCheck(String taskId, Logger globusLogger) throws Malfor globusLogger.info("globus transfer task completed successfully"); return task; } - + private String getTaskStatus(Task task) { String status = null; if (task != null) { @@ -922,20 +946,18 @@ private String getTaskStatus(Task task) { } return status; } - - - public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { + public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) + throws InterruptedException, ExecutionException, IOException { - List> hashvalueCompletableFutures = - inputList.stream().map(iD -> calculateDetailsAsync(iD,globusLogger)).collect(Collectors.toList()); + List> hashvalueCompletableFutures = inputList.stream() + .map(iD -> calculateDetailsAsync(iD, globusLogger)).collect(Collectors.toList()); CompletableFuture allFutures = CompletableFuture .allOf(hashvalueCompletableFutures.toArray(new CompletableFuture[hashvalueCompletableFutures.size()])); - CompletableFuture> allCompletableFuture = allFutures.thenApply(future -> { - return hashvalueCompletableFutures.stream() - .map(completableFuture -> completableFuture.join()) + CompletableFuture> allCompletableFuture = allFutures.thenApply(future -> { + return hashvalueCompletableFutures.stream().map(completableFuture -> completableFuture.join()) .collect(Collectors.toList()); }); @@ -951,17 +973,17 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger } - private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { - //logger.info(" calcualte additional details for these globus id ==== " + id); + private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { + // logger.info(" calcualte additional details for these globus id ==== " + id); - return CompletableFuture.supplyAsync( () -> { + return CompletableFuture.supplyAsync(() -> { try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } try { - return ( calculateDetails(id,globusLogger) ); + return (calculateDetails(id, globusLogger)); } catch (InterruptedException | IOException e) { e.printStackTrace(); } @@ -971,8 +993,8 @@ private CompletableFuture calculateDetailsAsync(String id, Lo }); } - - private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throws InterruptedException, IOException { + private FileDetailsHolder calculateDetails(String id, Logger globusLogger) + throws InterruptedException, IOException { int count = 0; String checksumVal = ""; InputStream in = null; @@ -980,8 +1002,8 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; - //ToDo: what if the file doesnot exists in s3 - //ToDo: what if checksum calculation failed + // ToDo: what if the file doesnot exists in s3 + // ToDo: what if checksum calculation failed do { try { @@ -989,11 +1011,12 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw in = dataFileStorageIO.getInputStream(); checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); count = 3; - }catch (IOException ioex) { + } catch (IOException ioex) { count = 3; logger.info(ioex.getMessage()); - globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath + ") does not appear to be an S3 object associated with driver: " ); - }catch (Exception ex) { + globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath + + ") does not appear to be an S3 object associated with driver: "); + } catch (Exception ex) { count = count + 1; ex.printStackTrace(); logger.info(ex.getMessage()); @@ -1002,14 +1025,15 @@ private fileDetailsHolder calculateDetails(String id, Logger globusLogger) throw } while (count < 3); - if(checksumVal.length() == 0 ) { + if (checksumVal.length() == 0) { checksumVal = "NULL"; } String mimeType = calculatemime(fileName); - globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + " mimeType = " + mimeType); - return new fileDetailsHolder(fileId, checksumVal, mimeType); - //getBytes(in)+"" ); + globusLogger.info(" File Name " + fileName + " File Details " + fileId + " checksum = " + checksumVal + + " mimeType = " + mimeType); + return new FileDetailsHolder(fileId, checksumVal, mimeType); + // getBytes(in)+"" ); // calculatemime(fileName)); } @@ -1027,214 +1051,193 @@ public String calculatemime(String fileName) throws InterruptedException { return finalType; } /* - public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) throws MalformedURLException { - - logger.info("=====Tasklist == dataset id :" + dataset.getId()); - String directory = null; - - try { - - List fileMetadatas = new ArrayList<>(); - - StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - - - - DatasetVersion workingVersion = dataset.getEditVersion(); - - if (workingVersion.getCreateTime() != null) { - workingVersion.setCreateTime(new Timestamp(new Date().getTime())); - } - - directory = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - - System.out.println("======= directory ==== " + directory + " ==== datasetId :" + dataset.getId()); - Map checksumMapOld = new HashMap<>(); - - Iterator fmIt = workingVersion.getFileMetadatas().iterator(); - - while (fmIt.hasNext()) { - FileMetadata fm = fmIt.next(); - if (fm.getDataFile() != null && fm.getDataFile().getId() != null) { - String chksum = fm.getDataFile().getChecksumValue(); - if (chksum != null) { - checksumMapOld.put(chksum, 1); - } - } - } - - List dFileList = new ArrayList<>(); - boolean update = false; - for (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - - String s3ObjectKey = s3ObjectSummary.getKey(); - - - String t = s3ObjectKey.replace(directory, ""); - - if (t.indexOf(".") > 0) { - long totalSize = s3ObjectSummary.getSize(); - String filePath = s3ObjectKey; - String fileName = filePath.split("/")[filePath.split("/").length - 1]; - String fullPath = datasetSIO.getStorageLocation() + "/" + fileName; - - logger.info("Full path " + fullPath); - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - InputStream in = dataFileStorageIO.getInputStream(); - - String checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - //String checksumVal = s3ObjectSummary.getETag(); - logger.info("The checksum is " + checksumVal); - if ((checksumMapOld.get(checksumVal) != null)) { - logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == file already exists "); - } else if (filePath.contains("cached") || filePath.contains(".thumb")) { - logger.info(filePath + " is ignored"); - } else { - update = true; - logger.info("datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == new file "); - try { - - DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); //MIME_TYPE_GLOBUS - datafile.setModificationTime(new Timestamp(new Date().getTime())); - datafile.setCreateDate(new Timestamp(new Date().getTime())); - datafile.setPermissionModificationTime(new Timestamp(new Date().getTime())); - - FileMetadata fmd = new FileMetadata(); - - - fmd.setLabel(fileName); - fmd.setDirectoryLabel(filePath.replace(directory, "").replace(File.separator + fileName, "")); - - fmd.setDataFile(datafile); - - datafile.getFileMetadatas().add(fmd); - - FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); - logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " + filePath + " == added to datafile, filemetadata "); - - try { - // We persist "SHA1" rather than "SHA-1". - //datafile.setChecksumType(DataFile.ChecksumType.SHA1); - datafile.setChecksumType(DataFile.ChecksumType.MD5); - datafile.setChecksumValue(checksumVal); - } catch (Exception cksumEx) { - logger.info("==== datasetId :" + dataset.getId() + "======Could not calculate checksumType signature for the new file "); - } - - datafile.setFilesize(totalSize); - - dFileList.add(datafile); - - } catch (Exception ioex) { - logger.info("datasetId :" + dataset.getId() + "======Failed to process and/or save the file " + ioex.getMessage()); - return false; - - } - } - } - } - if (update) { - - List filesAdded = new ArrayList<>(); - - if (dFileList != null && dFileList.size() > 0) { - - // Dataset dataset = version.getDataset(); - - for (DataFile dataFile : dFileList) { - - if (dataFile.getOwner() == null) { - dataFile.setOwner(dataset); - - workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); - dataFile.getFileMetadata().setDatasetVersion(workingVersion); - dataset.getFiles().add(dataFile); - - } - - filesAdded.add(dataFile); - - } - - logger.info("==== datasetId :" + dataset.getId() + " ===== Done! Finished saving new files to the dataset."); - } - - fileMetadatas.clear(); - for (DataFile addedFile : filesAdded) { - fileMetadatas.add(addedFile.getFileMetadata()); - } - filesAdded = null; - - if (workingVersion.isDraft()) { - - logger.info("Async: ==== datasetId :" + dataset.getId() + " ==== inside draft version "); - - Timestamp updateTime = new Timestamp(new Date().getTime()); - - workingVersion.setLastUpdateTime(updateTime); - dataset.setModificationTime(updateTime); - - - for (FileMetadata fileMetadata : fileMetadatas) { - - if (fileMetadata.getDataFile().getCreateDate() == null) { - fileMetadata.getDataFile().setCreateDate(updateTime); - fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); - } - fileMetadata.getDataFile().setModificationTime(updateTime); - } - - - } else { - logger.info("datasetId :" + dataset.getId() + " ==== inside released version "); - - for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { - for (FileMetadata fileMetadata : fileMetadatas) { - if (fileMetadata.getDataFile().getStorageIdentifier() != null) { - - if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { - workingVersion.getFileMetadatas().set(i, fileMetadata); - } - } - } - } - - - } - - - try { - Command cmd; - logger.info("Async: ==== datasetId :" + dataset.getId() + " ======= UpdateDatasetVersionCommand START in globus function "); - cmd = new UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, (HttpServletRequest) null)); - ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); - //new DataverseRequest(authenticatedUser, (HttpServletRequest) null) - //dvRequestService.getDataverseRequest() - commandEngine.submit(cmd); - } catch (CommandException ex) { - logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + "======CommandException updating DatasetVersion from batch job: " + ex.getMessage()); - return false; - } - - logger.info("==== datasetId :" + dataset.getId() + " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); - - //return true; - } - - } catch (Exception e) { - String message = e.getMessage(); - - logger.info("==== datasetId :" + dataset.getId() + " ======= GLOBUS CALL Exception ============== " + message); - e.printStackTrace(); - return false; - //return error(Response.Status.INTERNAL_SERVER_ERROR, "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + message + "'."); - } - - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - AccessToken clientTokenUser = getClientToken(basicGlobusToken); - updatePermision(clientTokenUser, directory, "identity", "r"); - return true; - } - -*/ + * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) + * throws MalformedURLException { + * + * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String + * directory = null; + * + * try { + * + * List fileMetadatas = new ArrayList<>(); + * + * StorageIO datasetSIO = DataAccess.getStorageIO(dataset); + * + * + * + * DatasetVersion workingVersion = dataset.getEditVersion(); + * + * if (workingVersion.getCreateTime() != null) { + * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } + * + * directory = dataset.getAuthorityForFileStorage() + "/" + + * dataset.getIdentifierForFileStorage(); + * + * System.out.println("======= directory ==== " + directory + + * " ==== datasetId :" + dataset.getId()); Map checksumMapOld + * = new HashMap<>(); + * + * Iterator fmIt = workingVersion.getFileMetadatas().iterator(); + * + * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile() + * != null && fm.getDataFile().getId() != null) { String chksum = + * fm.getDataFile().getChecksumValue(); if (chksum != null) { + * checksumMapOld.put(chksum, 1); } } } + * + * List dFileList = new ArrayList<>(); boolean update = false; for + * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { + * + * String s3ObjectKey = s3ObjectSummary.getKey(); + * + * + * String t = s3ObjectKey.replace(directory, ""); + * + * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String + * filePath = s3ObjectKey; String fileName = + * filePath.split("/")[filePath.split("/").length - 1]; String fullPath = + * datasetSIO.getStorageLocation() + "/" + fileName; + * + * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = + * DataAccess.getDirectStorageIO(fullPath); InputStream in = + * dataFileStorageIO.getInputStream(); + * + * String checksumVal = FileUtil.calculateChecksum(in, + * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); + * logger.info("The checksum is " + checksumVal); if + * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + + * dataset.getId() + "======= filename ==== " + filePath + + * " == file already exists "); } else if (filePath.contains("cached") || + * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else + * { update = true; logger.info("datasetId :" + dataset.getId() + + * "======= filename ==== " + filePath + " == new file "); try { + * + * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); + * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new + * Date().getTime())); datafile.setCreateDate(new Timestamp(new + * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new + * Date().getTime())); + * + * FileMetadata fmd = new FileMetadata(); + * + * + * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, + * "").replace(File.separator + fileName, "")); + * + * fmd.setDataFile(datafile); + * + * datafile.getFileMetadatas().add(fmd); + * + * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); + * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " + * + filePath + " == added to datafile, filemetadata "); + * + * try { // We persist "SHA1" rather than "SHA-1". + * //datafile.setChecksumType(DataFile.ChecksumType.SHA1); + * datafile.setChecksumType(DataFile.ChecksumType.MD5); + * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { + * logger.info("==== datasetId :" + dataset.getId() + + * "======Could not calculate checksumType signature for the new file "); } + * + * datafile.setFilesize(totalSize); + * + * dFileList.add(datafile); + * + * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() + + * "======Failed to process and/or save the file " + ioex.getMessage()); return + * false; + * + * } } } } if (update) { + * + * List filesAdded = new ArrayList<>(); + * + * if (dFileList != null && dFileList.size() > 0) { + * + * // Dataset dataset = version.getDataset(); + * + * for (DataFile dataFile : dFileList) { + * + * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); + * + * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); + * dataFile.getFileMetadata().setDatasetVersion(workingVersion); + * dataset.getFiles().add(dataFile); + * + * } + * + * filesAdded.add(dataFile); + * + * } + * + * logger.info("==== datasetId :" + dataset.getId() + + * " ===== Done! Finished saving new files to the dataset."); } + * + * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) { + * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null; + * + * if (workingVersion.isDraft()) { + * + * logger.info("Async: ==== datasetId :" + dataset.getId() + + * " ==== inside draft version "); + * + * Timestamp updateTime = new Timestamp(new Date().getTime()); + * + * workingVersion.setLastUpdateTime(updateTime); + * dataset.setModificationTime(updateTime); + * + * + * for (FileMetadata fileMetadata : fileMetadatas) { + * + * if (fileMetadata.getDataFile().getCreateDate() == null) { + * fileMetadata.getDataFile().setCreateDate(updateTime); + * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); } + * fileMetadata.getDataFile().setModificationTime(updateTime); } + * + * + * } else { logger.info("datasetId :" + dataset.getId() + + * " ==== inside released version "); + * + * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for + * (FileMetadata fileMetadata : fileMetadatas) { if + * (fileMetadata.getDataFile().getStorageIdentifier() != null) { + * + * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion. + * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { + * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } } + * + * + * } + * + * + * try { Command cmd; logger.info("Async: ==== datasetId :" + + * dataset.getId() + + * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new + * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, + * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand) + * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser, + * (HttpServletRequest) null) //dvRequestService.getDataverseRequest() + * commandEngine.submit(cmd); } catch (CommandException ex) { + * logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + + * "======CommandException updating DatasetVersion from batch job: " + + * ex.getMessage()); return false; } + * + * logger.info("==== datasetId :" + dataset.getId() + + * " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); + * + * //return true; } + * + * } catch (Exception e) { String message = e.getMessage(); + * + * logger.info("==== datasetId :" + dataset.getId() + + * " ======= GLOBUS CALL Exception ============== " + message); + * e.printStackTrace(); return false; //return + * error(Response.Status.INTERNAL_SERVER_ERROR, + * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" + * + message + "'."); } + * + * String basicGlobusToken = + * settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + * AccessToken clientTokenUser = getClientToken(basicGlobusToken); + * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } + * + */ } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java index 4b2a56a110d..59c3767d848 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/Task.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.globus; - public class Task { private String DATA_TYPE; @@ -86,6 +85,8 @@ public void setNice_status(String nice_status) { this.nice_status = nice_status; } - public String getNice_status_short_description() { return nice_status_short_description; } + public String getNice_status_short_description() { + return nice_status_short_description; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 3ac2778ae97..c2b1016dbb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -36,7 +36,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.license.License; -import edu.harvard.iq.dataverse.globus.fileDetailsHolder; +import edu.harvard.iq.dataverse.globus.FileDetailsHolder; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; @@ -344,7 +344,7 @@ public static JsonObjectBuilder json(Dataset ds) { .add("metadataLanguage", ds.getMetadataLanguage()); } - public static JsonObjectBuilder json(fileDetailsHolder ds) { + public static JsonObjectBuilder json(FileDetailsHolder ds) { return Json.createObjectBuilder().add(ds.getStorageID() , Json.createObjectBuilder() .add("id", ds.getStorageID() ) From c6e362cabdb62f126d66955056ac7fad7c45bbe9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 16:28:46 -0400 Subject: [PATCH 124/161] per reviewDog --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 4 +++- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 2a3de14ee5b..c088c95e350 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3329,7 +3329,9 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, return wr.getResponse(); } - dataset.getLocks().forEach(dl -> {logger.info(dl.toString());}); + dataset.getLocks().forEach(dl -> { + logger.info(dl.toString()); + }); //------------------------------------ // (2a) Make sure dataset does not have package file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 74be123027e..c496883b8c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -18,8 +18,6 @@ import javax.json.JsonPatch; import javax.servlet.http.HttpServletRequest; -import org.apache.commons.lang.StringUtils; - import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -1042,7 +1040,7 @@ public String calculatemime(String fileName) throws InterruptedException { String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; String type = FileUtil.determineFileTypeByNameAndExtension(fileName); - if (!StringUtils.isBlank(type)) { + if (!type.isBlank()) { if (FileUtil.useRecognizedType(finalType, type)) { finalType = type; } From 607d8b12344127d716d2438e2b0b21335f9d3bc5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 16:35:22 -0400 Subject: [PATCH 125/161] remove unused imports --- .../iq/dataverse/DatasetServiceBean.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 2aa81ff4bdb..e1ca5c19a90 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -17,33 +16,20 @@ import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; import edu.harvard.iq.dataverse.export.ExportService; -import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; -import edu.harvard.iq.dataverse.globus.Task; -import edu.harvard.iq.dataverse.globus.FileDetailsHolder; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.*; -import java.net.MalformedURLException; -import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.*; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executor; -import java.util.concurrent.Executors; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.EJBException; @@ -51,7 +37,6 @@ import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.inject.Named; -import javax.json.*; import javax.persistence.EntityManager; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; @@ -59,13 +44,8 @@ import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; import org.apache.commons.lang3.RandomStringUtils; -import org.apache.commons.lang.StringUtils; import org.ocpsoft.common.util.Strings; - -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; - /** * * @author skraffmiller From 1e3f6da1fb19b734bdc12047af9cb71d773b4ebc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 17:01:14 -0400 Subject: [PATCH 126/161] add links --- doc/sphinx-guides/source/installation/config.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 1bbc601da4b..293218b432a 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -238,6 +238,8 @@ As for the "Remote only" authentication mode, it means that: - ``:DefaultAuthProvider`` has been set to use the desired authentication provider - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. +.. _file-storage: + File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores ----------------------------------------------------------------------------------------------------- @@ -672,7 +674,7 @@ In addition to having the type "remote" and requiring a label, Trusted Remote St These and other available options are described in the table below. Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity -and/or managing access to a secure enclave. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. +and/or managing access to a secure enclave. See :doc:`/developers/big-data-support` for additional information on how to use a trusted remote store. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. .. table:: :align: left From 3865e228fda5d4b290e816c64e757f2bfd748165 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 17:01:33 -0400 Subject: [PATCH 127/161] partial update --- .../source/developers/big-data-support.rst | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 21675bd4960..72fbf185202 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -1,12 +1,12 @@ Big Data Support ================ -Big data support is highly experimental. Eventually this content will move to the Installation Guide. +Big data support includes some highly experimental options. Eventually more of this content will move to the Installation Guide. .. contents:: |toctitle| :local: -Various components need to be installed and/or configured for big data support. +Various components will need to be installed and/or configured for big data support via the methods described below. S3 Direct Upload and Download ----------------------------- @@ -61,6 +61,65 @@ Alternatively, you can enable CORS using the AWS S3 web interface, using json-en Since the direct upload mechanism creates the final file rather than an intermediate temporary file, user actions, such as neither saving or canceling an upload session before closing the browser page, can leave an abandoned file in the store. The direct upload mechanism attempts to use S3 Tags to aid in identifying/removing such files. Upon upload, files are given a "dv-state":"temp" tag which is removed when the dataset changes are saved and the new file(s) are added in the Dataverse installation. Note that not all S3 implementations support Tags: Minio does not. WIth such stores, direct upload works, but Tags are not used. +Trusted Remote Storage with the ``remote`` Store Type +----------------------------------------------------- + +For very large, and/or very sensitive data, it may not make sense to transfer or copy files to Dataverse at all. The experimental ``remote`` store type in the Dataverse software now supports this use case. + +With this storage option Dataverse stores a URL reference for the file rather than transferring the file bytes to a store managed directly by Dataverse. Basic configuration for a remote store is described at :ref:`file-storage` in the Configuration Guide. + + +A lightweight option for supporting file sizes beyond a few gigabytes - a size that can cause performance issues when uploaded through a Dataverse installation itself - is to configure an S3 store to provide direct upload and download via 'pre-signed URLs'. When these options are configured, file uploads and downloads are made directly to and from a configured S3 store using secure (https) connections that enforce a Dataverse installation's access controls. (The upload and download URLs are signed with a unique key that only allows access for a short time period and a Dataverse installation will only generate such a URL if the user has permission to upload/download the specific file in question.) + +This option can handle files >40GB and could be appropriate for files up to a TB. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. + +To configure these options, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: + +``./asadmin create-jvm-options "-Ddataverse.files..download-redirect=true"`` + +``./asadmin create-jvm-options "-Ddataverse.files..upload-redirect=true"`` + + +With multiple stores configured, it is possible to configure one S3 store with direct upload and/or download to support large files (in general or for specific Dataverse collections) while configuring only direct download, or no direct access for another store. + +The direct upload option now switches between uploading the file in one piece (up to 1 GB by default) and sending it as multiple parts. The default can be changed by setting: + +``./asadmin create-jvm-options "-Ddataverse.files..min-part-size="`` + +For AWS, the minimum allowed part size is 5*1024*1024 bytes and the maximum is 5 GB (5*1024**3). Other providers may set different limits. + +It is also possible to set file upload size limits per store. See the :MaxFileUploadSizeInBytes setting described in the :doc:`/installation/config` guide. + +At present, one potential drawback for direct-upload is that files are only partially 'ingested', tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: + +``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` + + +**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. +The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. + +``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` + +with the contents of the file cors.json as follows: + +.. code-block:: json + + { + "CORSRules": [ + { + "AllowedOrigins": ["*"], + "AllowedHeaders": ["*"], + "AllowedMethods": ["PUT", "GET"], + "ExposeHeaders": ["ETag"] + } + ] + } + +Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. + +Since the direct upload mechanism creates the final file rather than an intermediate temporary file, user actions, such as neither saving or canceling an upload session before closing the browser page, can leave an abandoned file in the store. The direct upload mechanism attempts to use S3 Tags to aid in identifying/removing such files. Upon upload, files are given a "dv-state":"temp" tag which is removed when the dataset changes are saved and the new file(s) are added in the Dataverse installation. Note that not all S3 implementations support Tags: Minio does not. WIth such stores, direct upload works, but Tags are not used. + + Data Capture Module (DCM) ------------------------- From ef17dd13900bd8fddafc6abef5db677c1b3f38fc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 17:34:56 -0400 Subject: [PATCH 128/161] add some validation and test --- .../dataaccess/RemoteOverlayAccessIO.java | 19 +++++++++++++++++-- .../dataaccess/RemoteOverlayAccessIOTest.java | 14 +++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 0e18c46243f..05773888533 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -12,6 +12,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; @@ -80,8 +82,9 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) this.setIsLocalFile(false); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - // TODO: validate the storage location supplied urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(urlPath); + logger.fine("Base URL: " + urlPath); } @@ -90,10 +93,22 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE this.setIsLocalFile(false); configureStores(null, driverId, storageLocation); - // TODO: validate the storage location supplied urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(urlPath); logger.fine("Base URL: " + urlPath); } + + private void validatePath(String path) throws IOException { + try { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); + } + } catch(URISyntaxException use) { + throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); + } + } + @Override public void open(DataAccessOption... options) throws IOException { diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index c85a7e6adae..fc44984b263 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -30,14 +30,15 @@ public class RemoteOverlayAccessIOTest { private Dataset dataset; private DataFile datafile; - private String logoPath = "resources/images/dataverse_project_logo.svg"; + private DataFile badDatafile; + private String logoPath = "images/dataverse_project_logo.svg"; private String pid = "10.5072/F2/ABCDEF"; @BeforeEach public void setUp() { System.setProperty("dataverse.files.test.type", "remote"); System.setProperty("dataverse.files.test.label", "testOverlay"); - System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org"); + System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org/resources"); System.setProperty("dataverse.files.test.base-store", "file"); System.setProperty("dataverse.files.test.download-redirect", "true"); System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp"); @@ -50,6 +51,9 @@ public void setUp() { datafile.setOwner(dataset); datafile.setStorageIdentifier("test://" + logoPath); + badDatafile = MocksFactory.makeDataFile(); + badDatafile.setOwner(dataset); + badDatafile.setStorageIdentifier("test://../.." + logoPath); } @AfterEach @@ -67,7 +71,7 @@ public void tearDown() { } @Test - void testRemoteOverlayFile() throws IOException { + void testRemoteOverlayFiles() throws IOException { // We can read the storageIdentifier and get the driver assertTrue(datafile.getStorageIdentifier() .startsWith(DataAccess.getStorgageDriverFromIdentifier(datafile.getStorageIdentifier()))); @@ -98,6 +102,10 @@ void testRemoteOverlayFile() throws IOException { assertTrue(Paths .get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), pid, logoPath + ".auxobject") .equals(remoteIO.getAuxObjectAsPath("auxobject"))); + IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(badDatafile), + "Expected getStorageIO() to throw, but it didn't"); + // 'test' is the driverId in the IOException messages + assertTrue(thrown.getMessage().contains("test")); } From 13a7cf492b615a4da7754d76056c240e37118331 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 18:23:08 -0400 Subject: [PATCH 129/161] remote store docs --- .../source/developers/big-data-support.rst | 73 +++++++++---------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 72fbf185202..846bc7a9a1a 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -13,7 +13,7 @@ S3 Direct Upload and Download A lightweight option for supporting file sizes beyond a few gigabytes - a size that can cause performance issues when uploaded through a Dataverse installation itself - is to configure an S3 store to provide direct upload and download via 'pre-signed URLs'. When these options are configured, file uploads and downloads are made directly to and from a configured S3 store using secure (https) connections that enforce a Dataverse installation's access controls. (The upload and download URLs are signed with a unique key that only allows access for a short time period and a Dataverse installation will only generate such a URL if the user has permission to upload/download the specific file in question.) -This option can handle files >40GB and could be appropriate for files up to a TB. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. +This option can handle files >300GB and could be appropriate for files up to a TB or larger. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. To configure these options, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: @@ -32,7 +32,7 @@ For AWS, the minimum allowed part size is 5*1024*1024 bytes and the maximum is 5 It is also possible to set file upload size limits per store. See the :MaxFileUploadSizeInBytes setting described in the :doc:`/installation/config` guide. -At present, one potential drawback for direct-upload is that files are only partially 'ingested', tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: +At present, one potential drawback for direct-upload is that files are only partially 'ingested' - tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` @@ -68,58 +68,53 @@ For very large, and/or very sensitive data, it may not make sense to transfer or With this storage option Dataverse stores a URL reference for the file rather than transferring the file bytes to a store managed directly by Dataverse. Basic configuration for a remote store is described at :ref:`file-storage` in the Configuration Guide. +Once the store is configured, it can be assigned to a collection or individual datasets as with other stores. In a dataset using this store, users can reference remote files, currently only via API, which will then appear the same basic way as other datafiles. If the store has been configured with a remote-store-name or remote-store-url, the dataset file table will include this information for remote files. (Users can also upload smaller files via the UI or API which will be stored in the configured base store.) -A lightweight option for supporting file sizes beyond a few gigabytes - a size that can cause performance issues when uploaded through a Dataverse installation itself - is to configure an S3 store to provide direct upload and download via 'pre-signed URLs'. When these options are configured, file uploads and downloads are made directly to and from a configured S3 store using secure (https) connections that enforce a Dataverse installation's access controls. (The upload and download URLs are signed with a unique key that only allows access for a short time period and a Dataverse installation will only generate such a URL if the user has permission to upload/download the specific file in question.) +The remote store leverages the same upload syntax as the :doc:`/developers/s3-direct-upload-api` (which itself uses the standard file upload API call): -This option can handle files >40GB and could be appropriate for files up to a TB. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to a Dataverse installation. +Rather than sending the file bytes, metadata for the remote file is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For remote references, the jsonData object must also include values for: -To configure these options, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: -``./asadmin create-jvm-options "-Ddataverse.files..download-redirect=true"`` + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings -``./asadmin create-jvm-options "-Ddataverse.files..upload-redirect=true"`` +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +.. code-block:: bash -With multiple stores configured, it is possible to configure one S3 store with direct upload and/or download to support large files (in general or for specific Dataverse collections) while configuring only direct download, or no direct access for another store. + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'trs://images/dataverse_project_logo.svg', 'fileName':'dataverse_logo.svg', 'mimeType':'image/svg+xml', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" -The direct upload option now switches between uploading the file in one piece (up to 1 GB by default) and sending it as multiple parts. The default can be changed by setting: + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" -``./asadmin create-jvm-options "-Ddataverse.files..min-part-size="`` +The variant allowing multiple files to be added once that is discussed in the :doc:`/developers/s3-direct-upload-api` document can also be used. -For AWS, the minimum allowed part size is 5*1024*1024 bytes and the maximum is 5 GB (5*1024**3). Other providers may set different limits. +Considerations: -It is also possible to set file upload size limits per store. See the :MaxFileUploadSizeInBytes setting described in the :doc:`/installation/config` guide. +* Remote stores are configured with a base-url which limits what files can be referenced, i.e. the absolute URL for the file is /. +* Admins are trusting the organization managing the site/service at base-url to maintain the referenced files for as long as the Dataverse instance needs them. Formal agreements are recommended for production +* For large files, direct-download should always be used with a remote store. (Otherwise the Dataverse will be involved in the download.) +* For simple websites, a remote store should be marked public which will turn off restriction and embargo functionality in Dataverse (since Dataverse cannot restrict access to the file on the remote website) +* Remote stores can be configured with a secret-key. This key will be used to sign URLs when Dataverse retrieves the file content or redirects a user for download. If remote service is able to validate the signature and reject invalid requests, the remote store mechanism can be used to manage restricted and embargoes files, access requests in Dataverse, etc. Dataverse contains Java code that validates these signatures which could be used, for example, to create a validation proxy in front of a web server to allow Dataverse to manage access. The secret-key is a shared secret between Dataverse and the remote service and is not shared with/is not accessible by users or those with access to user's machines. +* Sophisticated remote services may wish to register file URLs that do not directly reference the file contents (bytes) but instead direct the user to a website where further information about the remote service's download process can be found. +* Due to the current design, ingest cannot be done on remote files and administrators should disable ingest when using a remote store. This can be done by setting the ingest size limit for the store to 0 and/or using the recently added option to not perform tabular ingest on upload. +* Dataverse will normally try to access the file contents itself, i.e. for ingest (in future versions), full-text indexing, thumbnail creation, etc. This processing may not be desirable for large/sensitive data, and, for the case where the URL does not reference the file itself, would not be possible. At present, administrators should configure the relevant size limits to avoid such actions. +* The current implementation of remote stores is experimental in the sense that future work to enahnce it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. -At present, one potential drawback for direct-upload is that files are only partially 'ingested', tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by the Dataverse installation from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: +To configure the options mentioned above, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: +``./asadmin create-jvm-options "-Ddataverse.files..download-redirect=true"`` +``./asadmin create-jvm-options "-Ddataverse.files..secret-key=somelongrandomalphanumerickeythelongerthebetter123456"`` +``./asadmin create-jvm-options "-Ddataverse.files..public=true"`` ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` - -**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. -The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/GlobalDataverseCommunityConsortium/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. - -``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` - -with the contents of the file cors.json as follows: - -.. code-block:: json - - { - "CORSRules": [ - { - "AllowedOrigins": ["*"], - "AllowedHeaders": ["*"], - "AllowedMethods": ["PUT", "GET"], - "ExposeHeaders": ["ETag"] - } - ] - } - -Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. - -Since the direct upload mechanism creates the final file rather than an intermediate temporary file, user actions, such as neither saving or canceling an upload session before closing the browser page, can leave an abandoned file in the store. The direct upload mechanism attempts to use S3 Tags to aid in identifying/removing such files. Upon upload, files are given a "dv-state":"temp" tag which is removed when the dataset changes are saved and the new file(s) are added in the Dataverse installation. Note that not all S3 implementations support Tags: Minio does not. WIth such stores, direct upload works, but Tags are not used. - - Data Capture Module (DCM) ------------------------- From 85ebefab24254fd104cc580caa6a2c4b8c612481 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 18:27:46 -0400 Subject: [PATCH 130/161] typo in method name --- src/main/java/edu/harvard/iq/dataverse/FilePage.java | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Access.java | 2 +- .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java | 2 +- .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 4 ++-- .../iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index 6a84037f1ff..7f2c6dfca5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -1169,7 +1169,7 @@ public String getIngestMessage() { //Determines whether this File uses a public store and therefore doesn't support embargoed or restricted files public boolean isHasPublicStore() { - return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorgageDriverFromIdentifier(file.getStorageIdentifier()))); + return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 1aa7a8bab9e..1bd414476f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -333,7 +333,7 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting")); } - if(systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorgageDriverFromIdentifier(df.getStorageIdentifier()))) { + if(systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus")); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index e3a08fc5cc8..e430867fff8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -208,7 +208,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } if (systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList() - .contains(DataAccess.getStorgageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { + .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index e4851be4ad5..bc0794a1932 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -64,7 +64,7 @@ public static StorageIO getStorageIO(T dvObject) throws - public static String getStorgageDriverFromIdentifier(String storageIdentifier) { + public static String getStorageDriverFromIdentifier(String storageIdentifier) { int separatorIndex = storageIdentifier.indexOf(SEPARATOR); String driverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; // default @@ -81,7 +81,7 @@ public static StorageIO getStorageIO(T dvObject, DataAcc throw new IOException("getDataAccessObject: null or invalid datafile."); } - String storageDriverId = getStorgageDriverFromIdentifier(dvObject.getStorageIdentifier()); + String storageDriverId = getStorageDriverFromIdentifier(dvObject.getStorageIdentifier()); return getStorageIO(dvObject, req, storageDriverId); } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index fc44984b263..122f84c5c19 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -74,7 +74,7 @@ public void tearDown() { void testRemoteOverlayFiles() throws IOException { // We can read the storageIdentifier and get the driver assertTrue(datafile.getStorageIdentifier() - .startsWith(DataAccess.getStorgageDriverFromIdentifier(datafile.getStorageIdentifier()))); + .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier()))); // We can get the driver type from it's ID assertTrue(DataAccess.getDriverType("test").equals(System.getProperty("dataverse.files.test.type"))); // When we get a StorageIO for the file, it is the right type From 59b286dd8e1d10a22670a40cfedefa1d42bf7e21 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 13:15:25 -0400 Subject: [PATCH 131/161] Error handling or default on required params --- doc/sphinx-guides/source/installation/config.rst | 10 +++++----- .../dataaccess/RemoteOverlayAccessIO.java | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d72c631481c..9fe4abda64f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -679,20 +679,20 @@ and/or managing access to a secure enclave. See :doc:`/developers/big-data-suppo .. table:: :align: left - =========================================== ================== ========================================================================== ============= + =========================================== ================== ========================================================================== =================== JVM Option Value Description Default value - =========================================== ================== ========================================================================== ============= + =========================================== ================== ========================================================================== =================== dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) dataverse.files..label **Required** label to be shown in the UI for this storage. (none) dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) - dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (none) + dataverse.files..base-store **Optional** The id of a base store (of type file, s3, or swift). (the default store) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` - dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) + dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) - =========================================== ================== ========================================================================== ============= + =========================================== ================== ========================================================================== =================== diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 05773888533..a680ce7a06c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -448,11 +448,24 @@ int getUrlExpirationMinutes() { private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } else { + try { + new URI(baseUrl); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type"); + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + if(dvObject instanceof Dataset) { baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { From 7b11b1196d2baff746f83b7c0d96aaf38d553dd1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 15:14:15 -0400 Subject: [PATCH 132/161] sanity check to make sure driver being specified in addFile exists --- .../iq/dataverse/datasetutility/OptionalFileParams.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 25240349bfb..cd234dfc335 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.DataFileTag; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.util.BundleUtil; import java.lang.reflect.Type; @@ -371,8 +372,12 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // get storage identifier as string // ------------------------------- if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ - - this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + //Basic sanity check that driver specified is defined. + String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); + if(!type.equals("tmp")&& !type.equals("Undefined")) { + this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + } } // ------------------------------- From 13dcf076c64e7f6403c04cfc6f92c4802389ac07 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 15:45:17 -0400 Subject: [PATCH 133/161] only get value from json once --- .../harvard/iq/dataverse/datasetutility/OptionalFileParams.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index cd234dfc335..ad141998b15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -376,7 +376,7 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); if(!type.equals("tmp")&& !type.equals("Undefined")) { - this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + this.storageIdentifier = storageId; } } From 682c947692442bdab9c712af09e0b3021d9e968c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 17:52:20 -0400 Subject: [PATCH 134/161] also fix typo in xhtml --- src/main/webapp/file-download-button-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index dd57a0371ba..5de0468cdfd 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -59,7 +59,7 @@ -
  • +
  • Date: Tue, 9 Aug 2022 14:07:45 -0400 Subject: [PATCH 135/161] add separate downloadRedirectEnabled for aux objects method --- .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 4 ++++ .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 4 ++++ .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index a680ce7a06c..2e53c82d184 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -411,6 +411,10 @@ public boolean downloadRedirectEnabled() { } return false; } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return baseStore.downloadRedirectEnabled(auxObjectTag); + } @Override public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index fc9111b4b81..a82ff9d6f4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -851,6 +851,10 @@ public boolean downloadRedirectEnabled() { } return false; } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return downloadRedirectEnabled(); + } /** * Generates a temporary URL for a direct S3 download; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 6888bc7d1fb..0989963a417 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -593,6 +593,10 @@ public boolean downloadRedirectEnabled() { return false; } + public boolean downloadRedirectEnabled(String auxObjectTag) { + return false; + } + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } From 3f1f8200a359e56c196cec2715d469ab147ad88a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 14:11:08 -0400 Subject: [PATCH 136/161] check baseStore redirect setting for aux files --- .../harvard/iq/dataverse/api/DownloadInstanceWriter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index e430867fff8..522e033656a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -130,7 +130,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] auxiliaryTag = ImageThumbConverter.THUMBNAIL_SUFFIX + (requestedSize > 0 ? requestedSize : ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE); - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { auxiliaryType = ImageThumbConverter.THUMBNAIL_MIME_TYPE; String fileName = storageIO.getFileName(); if (fileName != null) { @@ -149,7 +149,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] auxiliaryTag = auxiliaryTag + "_" + auxVersion; } - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { String fileExtension = getFileExtension(di.getAuxiliaryFile()); auxiliaryFileName = storageIO.getFileName() + "." + auxiliaryTag + fileExtension; auxiliaryType = di.getAuxiliaryFile().getContentType(); @@ -172,7 +172,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // it has been cached already. auxiliaryTag = di.getConversionParamValue(); - if (isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { + if (storageIO.downloadRedirectEnabled(auxiliaryTag) && isAuxiliaryObjectCached(storageIO, auxiliaryTag)) { auxiliaryType = di.getServiceFormatType(di.getConversionParam(), auxiliaryTag); auxiliaryFileName = FileUtil.replaceExtension(storageIO.getFileName(), auxiliaryTag); } else { From 93c1004b5916dfc8cb5af71afaf48f605a35c67d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 12 Aug 2022 17:28:47 -0400 Subject: [PATCH 137/161] globus guide docs --- .../source/developers/big-data-support.rst | 18 +++++++++++++++++ .../source/installation/config.rst | 20 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 71822f53b1b..3055aac711a 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -115,6 +115,24 @@ To configure the options mentioned above, an administrator must set two JVM opti ``./asadmin create-jvm-options "-Ddataverse.files..public=true"`` ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` +Globus File Transfer +-------------------- + +When configured to use a Globus accessible S3 store and interact with a community-developed Dataverse Globus app, Dataverse can support the transfer of files via Globus for upload and download. Due to differences in the access control models of Dataverse and Globus, enabling this capability on a store will disable restriction and embargo capabilities in that store. + +Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: +* robust file transfer capable of restarting after network or endpoint failures, +* third-party transfer, which enables a user running Dataverse in their desktop browser to initiate transfer of their files from a remote endpoint, i.e. on a local high-performance computing cluster), directly to an S3 store managed by Dataverse. + +This mechanism requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (no cost). + +The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. + +As described in that document, Globus transfers can be initiated by choosing the Globus option on the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, Globus Transfer is one of the downloads in the file download menu. + +An overview of the control and data transfer interactions between components was presented at the Dataverse Community Meeting and can be viewed in the `Itegrations and Tools Session Video `_ around the 1 hr 28 min mark. + + Data Capture Module (DCM) ------------------------- diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 9fe4abda64f..b8c2167784b 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2903,3 +2903,23 @@ For configuration details, see :ref:`mute-notifications`. Overrides the default empty list of never muted notifications. Never muted notifications cannot be muted by the users. Always muted notifications are grayed out and are not adjustable by the user. For configuration details, see :ref:`mute-notifications`. + +:BasicGlobusToken ++++++++++++++++++ + +BasicGlobusToken encodes credentials for Globus Integration - see :doc:`/developers/big-data-support` for details. + +:GlobusEndpoint ++++++++++++++++ + +GlobusEndpoint is Globus endpoint id used with Globus Integration - see :doc:`/developers/big-data-support` for details. + +:GlobusStores ++++++++++++++ + +A comma-separated list of the S3 stores that are configured to support Globus Integration - see :doc:`/developers/big-data-support` for details. + +:GlobusAppURL ++++++++++++++ + +The URL where the Dataverse Globus app has been deployed to support Globus Integration - see :doc:`/developers/big-data-support` for details. \ No newline at end of file From f271e7d4cea273eaa4c015a4154c4149416289b0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 17 Aug 2022 12:47:49 -0400 Subject: [PATCH 138/161] typo --- src/main/java/edu/harvard/iq/dataverse/DataFile.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index b0e39d95e45..cb43dff0e20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -608,7 +608,7 @@ public String getFriendlySize() { if (filesize != null) { return FileSizeChecker.bytesToHumanReadable(filesize); } else { - return BundleUtil.getStringFromBundle("file.sizeNotAvilable"); + return BundleUtil.getStringFromBundle("file.sizeNotAvailable"); } } From a97aaeb5b16ede67a50e5d34c0159c44d44df0f8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 18 Aug 2022 08:49:13 -0400 Subject: [PATCH 139/161] Add test for bad remote URLs --- .../iq/dataverse/dataaccess/DataAccess.java | 23 +++++++++++++++++++ .../dataaccess/RemoteOverlayAccessIO.java | 17 +++++++++++++- .../iq/dataverse/dataaccess/StorageIO.java | 16 +++++++++++++ .../datasetutility/OptionalFileParams.java | 9 ++++---- .../dataaccess/RemoteOverlayAccessIOTest.java | 8 +++++++ 5 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 285bef02272..bccaf58edfc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -342,4 +342,27 @@ public static boolean uploadToDatasetAllowed(Dataset d, String storageIdentifier } return allowed; } + + + //Method to verify that a submitted storageIdentifier (i.e. in direct/remote uploads) is consistent with the store's configuration. + public static boolean isValidDirectStorageIdentifier(String storageId) { + String driverId = DataAccess.getStorageDriverFromIdentifier(storageId); + String storageType = DataAccess.getDriverType(driverId); + if (storageType.equals("tmp") || storageType.equals("Undefined")) { + return false; + } + switch (storageType) { + case FILE: + return FileAccessIO.isValidIdentifier(driverId, storageId); + case SWIFT: + return SwiftAccessIO.isValidIdentifier(driverId, storageId); + case S3: + return S3AccessIO.isValidIdentifier(driverId, storageId); + case REMOTE: + return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + default: + logger.warning("Request to validate for storage driver: " + driverId); + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index b80478baa92..bc421949ed7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -611,8 +611,23 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } + protected static boolean isValidIdentifier(String driverId, String storageId) { + String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + try { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); + return false; + } + } catch(URISyntaxException use) { + logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); + return false; + } + return true; + } + public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 0989963a417..5ed8d16ffc8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -39,6 +39,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; //import org.apache.commons.httpclient.Header; //import org.apache.commons.httpclient.methods.GetMethod; @@ -604,4 +606,18 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary public static String getDriverPrefix(String driverId) { return driverId+ DataAccess.SEPARATOR; } + + //Check that storageIdentifier is consistent with store's config + //False will prevent direct uploads + protected static boolean isValidIdentifier(String driverId, String storageId) { + return true; + } + + //Utility to verify the standard UUID pattern for stored files. + protected static boolean usesStandardNamePattern(String identifier) { + + Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); + Matcher m = r.matcher(identifier); + return m.find(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 080132409f5..959dbc4e262 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -372,14 +372,15 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ // get storage identifier as string // ------------------------------- if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ - // Basic sanity check that driver specified is defined. Note that being able to + // Basic sanity check that driver specified is defined and the overall + // identifier is consistent with that store's config. Note that being able to // specify a driver that does not support direct uploads is currently used with // out-of-band uploads, e.g. for bulk migration. String storageId = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); - String type = DataAccess.getDriverType(DataAccess.getStorageDriverFromIdentifier(storageId)); - if(!type.equals("tmp")&& !type.equals("Undefined")) { - this.storageIdentifier = storageId; + if (DataAccess.isValidDirectStorageIdentifier(storageId)) { + this.storageIdentifier = storageId; } + } // ------------------------------- diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index 2dd3f372ce1..f66b3306dda 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -113,4 +113,12 @@ void testRemoteOverlayFiles() throws IOException { } + @Test + void testRemoteOverlayIdentifierFormats() throws IOException { + + assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier())); + assertFalse(DataAccess.isValidDirectStorageIdentifier(badDatafile.getStorageIdentifier())); + assertFalse(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("test", "bad"))); + } + } From 90a4b7fc80e1e4ec52843736e946a1ec57550d31 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 18 Aug 2022 08:49:46 -0400 Subject: [PATCH 140/161] note re 404 URLs --- doc/sphinx-guides/source/developers/big-data-support.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 23bbca52b8b..b6cd6dbb93c 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -103,6 +103,7 @@ The variant allowing multiple files to be added once that is discussed in the :d Considerations: * Remote stores are configured with a base-url which limits what files can be referenced, i.e. the absolute URL for the file is /. +* The current store will not prevent you from providing a relative URL that results in a 404 when resolved. (I.e. if you make a typo). You should check to make sure the file exists at the location you specify - by trying to download in Dataverse, by checking to see that Dataverse was able to get the file size (which it does by doing a HEAD call to that location), or just manually trying the URL in your browser. * Admins are trusting the organization managing the site/service at base-url to maintain the referenced files for as long as the Dataverse instance needs them. Formal agreements are recommended for production * For large files, direct-download should always be used with a remote store. (Otherwise the Dataverse will be involved in the download.) * For simple websites, a remote store should be marked public which will turn off restriction and embargo functionality in Dataverse (since Dataverse cannot restrict access to the file on the remote website) @@ -110,7 +111,7 @@ Considerations: * Sophisticated remote services may wish to register file URLs that do not directly reference the file contents (bytes) but instead direct the user to a website where further information about the remote service's download process can be found. * Due to the current design, ingest cannot be done on remote files and administrators should disable ingest when using a remote store. This can be done by setting the ingest size limit for the store to 0 and/or using the recently added option to not perform tabular ingest on upload. * Dataverse will normally try to access the file contents itself, i.e. for ingest (in future versions), full-text indexing, thumbnail creation, etc. This processing may not be desirable for large/sensitive data, and, for the case where the URL does not reference the file itself, would not be possible. At present, administrators should configure the relevant size limits to avoid such actions. -* The current implementation of remote stores is experimental in the sense that future work to enahnce it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. +* The current implementation of remote stores is experimental in the sense that future work to enhance it is planned. This work may result in changes to how the store works and lead to additional work when upgrading for sites that start using this mechanism now. To configure the options mentioned above, an administrator must set two JVM options for the Dataverse installation using the same process as for other configuration options: From 08a2b485e085f25e929514dee1bdffec43686527 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 29 Aug 2022 11:57:36 -0400 Subject: [PATCH 141/161] add constructor for dataset/datafile for Globus download --- .../iq/dataverse/globus/GlobusServiceBean.java | 2 +- .../harvard/iq/dataverse/util/URLTokenUtil.java | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index c496883b8c0..59ec7e752e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -524,7 +524,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } - URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + "/" + (upload ? "upload" : "download") + "?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}" diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 71d9377b282..9daa9137f22 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -55,13 +55,26 @@ public URLTokenUtil(DataFile dataFile, ApiToken apiToken, FileMetadata fileMetad * @param apiToken The apiToken can be null */ public URLTokenUtil(Dataset dataset, ApiToken apiToken, String localeCode) { + this(dataset, null, apiToken, localeCode); + } + + /** + * Dataset level + * + * @param dataset Required. + * @param datafile Optional. + * @param apiToken Optional The apiToken can be null + * @localeCode Optional + * + */ + public URLTokenUtil(Dataset dataset, DataFile datafile, ApiToken apiToken, String localeCode) { if (dataset == null) { String error = "A Dataset is required."; logger.warning("Error in URLTokenUtil constructor: " + error); throw new IllegalArgumentException(error); } this.dataset = dataset; - this.dataFile = null; + this.dataFile = datafile; this.fileMetadata = null; this.apiToken = apiToken; this.localeCode = localeCode; From a243db6a93956d49a33f3323171f5730b7378e31 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 29 Aug 2022 11:59:53 -0400 Subject: [PATCH 142/161] nest if to avoid refresh of aipitoken for guest --- .../harvard/iq/dataverse/globus/GlobusServiceBean.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 59ec7e752e1..db89242dd43 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -512,10 +512,11 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) if (user instanceof AuthenticatedUser) { apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) user); - } - if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { - logger.fine("Created apiToken for user: " + user.getIdentifier()); - apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user); + + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user); + } } String storePrefix = ""; String driverId = d.getEffectiveStorageDriverId(); From 84b393c48f2d720e06cf08ebbf04b9936cb99ae3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 30 Aug 2022 10:42:26 -0400 Subject: [PATCH 143/161] handle null mimetype --- .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index db89242dd43..3bd457b50e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1041,7 +1041,7 @@ public String calculatemime(String fileName) throws InterruptedException { String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; String type = FileUtil.determineFileTypeByNameAndExtension(fileName); - if (!type.isBlank()) { + if (type!=null && !type.isBlank()) { if (FileUtil.useRecognizedType(finalType, type)) { finalType = type; } From ee153e0e5a29121fcf061afc7e5dfe2de95206cb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 12:26:33 -0400 Subject: [PATCH 144/161] Add dataset-level Globus download --- .../edu/harvard/iq/dataverse/DatasetPage.java | 17 +++++++++++ .../externaltools/ExternalToolHandler.java | 6 +--- .../dataverse/globus/GlobusServiceBean.java | 30 ++++++++++++++++--- .../iq/dataverse/util/URLTokenUtil.java | 6 ++++ src/main/webapp/dataset.xhtml | 8 +++++ 5 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 35eda0682b2..0a8db69bf5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -113,6 +113,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.export.SchemaDotOrgExporter; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; @@ -251,6 +252,8 @@ public enum DisplayMode { LicenseServiceBean licenseServiceBean; @Inject DataFileCategoryServiceBean dataFileCategoryService; + @Inject + GlobusServiceBean globusService; private Dataset dataset = new Dataset(); @@ -6045,4 +6048,18 @@ public boolean downloadingRestrictedFiles() { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } + + public void startGlobusTransfer() { + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 3ed11602e85..33d8c2d0d54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -2,10 +2,8 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.URLTokenUtil; import java.io.StringReader; @@ -106,8 +104,6 @@ public void setApiToken(ApiToken apiToken) { public String getExploreScript() { String toolUrl = this.getToolUrlWithQueryParams(); logger.fine("Exploring with " + toolUrl); - String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); - String script = "const newWin = window.open('" + toolUrl + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; - return script; + return getScriptForUrl(toolUrl); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 3bd457b50e0..bb966ff7f16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -525,14 +525,36 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } + //Use URLTokenUtil for params currently in common with external tools. URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); - String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + "/" - + (upload ? "upload" : "download") - + "?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}" - + (df != null ? "&fileId={fileId}" : ""); + String appUrl; + if (upload) { + appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + } else { + if (df == null) { + appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + + "/download?datasetPid={datasetPid}&siteUrl={siteUrl}" + + ((apiToken != null) ? "&apiToken={apiToken}" : "") + + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + } else { + String rawStorageId = df.getStorageIdentifier(); + rawStorageId=rawStorageId.substring(rawStorageId.lastIndexOf(":")+1); + appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") + + "/download-file?datasetPid={datasetPid}&siteUrl={siteUrl}" + + ((apiToken != null) ? "&apiToken={apiToken}" : "") + + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}&fileId={fileId}&storageIdentifier=" + + rawStorageId + "&fileName=" + df.getCurrentName(); + } + } return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; } + public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { + return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, null)); + + } + @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 9daa9137f22..b3d5f9d6b74 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -184,6 +184,12 @@ private String getTokenValue(String value) { } throw new IllegalArgumentException("Cannot replace reserved word: " + value); } + + public static String getScriptForUrl(String url) { + String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); + String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; + return script; + } public enum ReservedWord { diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index a1d3db43e72..1bb862721a5 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -217,6 +217,14 @@ + +
  • + + + +
  • +
    + From 8bad27543bbfc0ef620e6bd82c5f190bae9c75c8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 12:27:03 -0400 Subject: [PATCH 145/161] Make file-level globus download optional --- .../java/edu/harvard/iq/dataverse/SettingsWrapper.java | 8 ++++++++ src/main/java/edu/harvard/iq/dataverse/api/Access.java | 2 +- .../harvard/iq/dataverse/api/DownloadInstanceWriter.java | 2 +- .../iq/dataverse/settings/SettingsServiceBean.java | 4 ++-- .../java/edu/harvard/iq/dataverse/util/SystemConfig.java | 4 ++++ src/main/webapp/file-download-button-fragment.xhtml | 2 +- 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 3d4a88567ea..aa40423000d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -97,6 +97,7 @@ public class SettingsWrapper implements java.io.Serializable { private Boolean globusUpload = null; private Boolean globusDownload = null; + private Boolean globusFileDownload = null; private String globusAppUrl = null; @@ -315,6 +316,13 @@ public boolean isGlobusDownload() { return globusDownload; } + public boolean isGlobusFileDownload() { + if (globusFileDownload == null) { + globusFileDownload = systemConfig.isGlobusFileDownload(); + } + return globusFileDownload; + } + public boolean isGlobusEnabledStorageDriver(String driverId) { if (globusStoreList == null) { globusStoreList = systemConfig.getGlobusStoresList(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 1bd414476f9..abeedf23b59 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -333,7 +333,7 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting")); } - if(systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + if(systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus")); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 522e033656a..01f627ea23b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -207,7 +207,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } } - if (systemConfig.isGlobusDownload() && systemConfig.getGlobusStoresList() + if (systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList() .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 1d1b36af372..404dabd1f4e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -466,10 +466,10 @@ Whether Harvesting (OAI) service is enabled * */ GlobusAppUrl, - /**Client id for Globus application + /**Enable single-file download/transfers for Globus * */ - //GlobusClientId, + GlobusSingleFileTransfer, /** * Optional external executables to run on the metadata for dataverses * and datasets being published; as an extra validation step, to diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 8cabc0de44d..2447d0dae3f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1032,6 +1032,10 @@ public boolean isHTTPDownload() { public boolean isGlobusDownload() { return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); } + + public boolean isGlobusFileDownload() { + return (isGlobusDownload() && settingsService.isTrueForKey(SettingsServiceBean.Key.GlobusSingleFileTransfer, false)); + } public List getGlobusStoresList() { String globusStores = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusStores, ""); diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 5de0468cdfd..ac1ec525b44 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -59,7 +59,7 @@ -
  • +
  • Date: Wed, 14 Sep 2022 12:35:09 -0400 Subject: [PATCH 146/161] change setting name per review, add doc re: single file setting --- .../source/installation/config.rst | 8 ++++++-- .../dataverse/globus/GlobusServiceBean.java | 19 +++++++++---------- .../settings/SettingsServiceBean.java | 4 ++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index c8587c8b199..96848bcd633 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2959,10 +2959,10 @@ The URL of an LDN Inbox to which the LDN Announce workflow step will send messag The list of parent dataset field names for which the LDN Announce workflow step should send messages. See :doc:`/developers/workflows` for details. -:BasicGlobusToken +:GlobusBasicToken +++++++++++++++++ -BasicGlobusToken encodes credentials for Globus Integration - see :doc:`/developers/big-data-support` for details. +GlobusBasicToken encodes credentials for Globus Integration - see :doc:`/developers/big-data-support` for details. :GlobusEndpoint +++++++++++++++ @@ -2979,3 +2979,7 @@ A comma-separated list of the S3 stores that are configured to support Globus In The URL where the Dataverse Globus app has been deployed to support Globus Integration - see :doc:`/developers/big-data-support` for details. +:GlobusSingleFileTransfer ++++++++++++++++++++++++++ + +A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app - see :doc:`/developers/big-data-support` for details. diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index bb966ff7f16..cc75c1de378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -264,11 +264,11 @@ public Task getTask(AccessToken clientTokenUser, String taskId, Logger globusLog } public AccessToken getClientToken() throws MalformedURLException { - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); + String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); URL url = new URL( "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken, "POST", null); + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); AccessToken clientTokenUser = null; if (result.status == 200) { clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); @@ -276,7 +276,7 @@ public AccessToken getClientToken() throws MalformedURLException { return clientTokenUser; } - public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGlobusToken) + public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken) throws UnsupportedEncodingException, MalformedURLException { String serverName = origRequest.getServerName(); if (serverName.equals("localhost")) { @@ -292,7 +292,7 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String basicGl + "&grant_type=authorization_code"); logger.info(url.toString()); - MakeRequestResponse result = makeRequest(url, "Basic", basicGlobusToken, "POST", null); + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); AccessToken accessTokenUser = null; if (result.status == 200) { @@ -446,11 +446,10 @@ public boolean giveGlobusPublicPermissions(String datasetId) throws UnsupportedEncodingException, MalformedURLException { String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String basicGlobusToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - if (globusEndpoint.equals("") || basicGlobusToken.equals("")) { + String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); + if (globusEndpoint.equals("") || globusBasicToken.equals("")) { return false; } - // AccessToken clientTokenUser = getClientToken(basicGlobusToken); AccessToken clientTokenUser = getClientToken(); if (clientTokenUser == null) { logger.severe("Cannot get client token "); @@ -1255,9 +1254,9 @@ public String calculatemime(String fileName) throws InterruptedException { * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" * + message + "'."); } * - * String basicGlobusToken = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.BasicGlobusToken, ""); - * AccessToken clientTokenUser = getClientToken(basicGlobusToken); + * String globusBasicToken = + * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); + * AccessToken clientTokenUser = getClientToken(globusBasicToken); * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } * */ diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 404dabd1f4e..ac8795e4be4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -451,9 +451,9 @@ Whether Harvesting (OAI) service is enabled ExportInstallationAsDistributorOnlyWhenNotSet, /** - * BasicGlobusToken for Globus Application + * Basic Globus Token for Globus Application */ - BasicGlobusToken, + GlobusBasicToken, /** * GlobusEndpoint is Globus endpoint for Globus application */ From 5c2d4e8a6e6d0714acbbcc830c093bf15787bac3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 12:37:53 -0400 Subject: [PATCH 147/161] alter download info --- doc/sphinx-guides/source/developers/big-data-support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index b6cd6dbb93c..76717d1c6b5 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -133,7 +133,7 @@ This mechanism requires use of the Globus S3 connector which requires a paid Glo The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. -As described in that document, Globus transfers can be initiated by choosing the Globus option on the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, Globus Transfer is one of the downloads in the file download menu. +As described in that document, Globus transfers can be initiated by choosing the Globus option on the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, Globus Transfer is one of the downloads in the dataset access menu and optionally the file download menu (if/when supported in the dataverse-globus app). An overview of the control and data transfer interactions between components was presented at the Dataverse Community Meeting and can be viewed in the `Itegrations and Tools Session Video `_ around the 1 hr 28 min mark. From 40c9ba14d8b4f2712275e989d6421e5a4894b294 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 12:45:13 -0400 Subject: [PATCH 148/161] update youtube link per review --- doc/sphinx-guides/source/developers/big-data-support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 76717d1c6b5..1809a8b71ca 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -135,7 +135,7 @@ The setup required to enable Globus is described in the `Community Dataverse-Glo As described in that document, Globus transfers can be initiated by choosing the Globus option on the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, Globus Transfer is one of the downloads in the dataset access menu and optionally the file download menu (if/when supported in the dataverse-globus app). -An overview of the control and data transfer interactions between components was presented at the Dataverse Community Meeting and can be viewed in the `Itegrations and Tools Session Video `_ around the 1 hr 28 min mark. +An overview of the control and data transfer interactions between components was presented at the Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. Data Capture Module (DCM) ------------------------- From 3dfb8f2484af6c1e6bac2e7f929ac7cd31232bff Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 12:46:20 -0400 Subject: [PATCH 149/161] Apply suggestions from code review Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/developers/big-data-support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index b6cd6dbb93c..43923a389ca 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -123,7 +123,7 @@ To configure the options mentioned above, an administrator must set two JVM opti Globus File Transfer -------------------- -When configured to use a Globus accessible S3 store and interact with a community-developed Dataverse Globus app, Dataverse can support the transfer of files via Globus for upload and download. Due to differences in the access control models of Dataverse and Globus, enabling this capability on a store will disable restriction and embargo capabilities in that store. +When configured to use a Globus accessible S3 store and interact with a community-developed Dataverse Globus app, Dataverse can support the transfer of files via Globus for upload and download. Due to differences in the access control models of Dataverse and Globus, enabling this capability on a store will disable the ability to restrict and embargo files in that store. Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: * robust file transfer capable of restarting after network or endpoint failures, From 77cbc3ae4e643d07888ec215d6235aa682451527 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 16:40:59 -0400 Subject: [PATCH 150/161] refactor per qa --- .../iq/dataverse/globus/GlobusServiceBean.java | 16 ++++++++-------- .../globus/{Task.java => GlobusTask.java} | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) rename src/main/java/edu/harvard/iq/dataverse/globus/{Task.java => GlobusTask.java} (98%) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index cc75c1de378..09415aa023f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -243,17 +243,17 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId return false; } - public Task getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException { + public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); MakeRequestResponse result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - Task task = null; + GlobusTask task = null; if (result.status == 200) { - task = parseJson(result.jsonResponse, Task.class, false); + task = parseJson(result.jsonResponse, GlobusTask.class, false); } if (result.status != 200) { globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " @@ -613,7 +613,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin } // globus task status check - Task task = globusStatusCheck(taskIdentifier, globusLogger); + GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { @@ -869,7 +869,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } // globus task status check - Task task = globusStatusCheck(taskIdentifier, globusLogger); + GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { @@ -898,10 +898,10 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private Task globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { boolean taskCompletion = false; String status = ""; - Task task = null; + GlobusTask task = null; do { try { globusLogger.info("checking globus transfer task " + taskId); @@ -946,7 +946,7 @@ private Task globusStatusCheck(String taskId, Logger globusLogger) throws Malfor return task; } - private String getTaskStatus(Task task) { + private String getTaskStatus(GlobusTask task) { String status = null; if (task != null) { status = task.getStatus(); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java similarity index 98% rename from src/main/java/edu/harvard/iq/dataverse/globus/Task.java rename to src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java index 59c3767d848..c2b01779f4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/Task.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java @@ -1,6 +1,6 @@ package edu.harvard.iq.dataverse.globus; -public class Task { +public class GlobusTask { private String DATA_TYPE; private String type; From 280912299e7d34da5e07e92ba0d540e2a117a067 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 17:03:56 -0400 Subject: [PATCH 151/161] make polling configurable per qa --- doc/sphinx-guides/source/installation/config.rst | 5 +++++ .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 4 +++- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 4 ++++ .../java/edu/harvard/iq/dataverse/util/SystemConfig.java | 6 +++--- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 156cea6dac2..27d660796ec 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3003,6 +3003,11 @@ A comma-separated list of the S3 stores that are configured to support Globus In The URL where the Dataverse Globus app has been deployed to support Globus Integration - see :doc:`/developers/big-data-support` for details. +:GlobusPollingInterval +++++++++++++++++++++++ + +The interval in seconds between Dataverse calls to Globus to check on upload progress. Defaults to 50 seconds. + :GlobusSingleFileTransfer +++++++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 09415aa023f..9d80c5cc280 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -49,6 +49,7 @@ import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -902,10 +903,11 @@ private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws boolean taskCompletion = false; String status = ""; GlobusTask task = null; + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); do { try { globusLogger.info("checking globus transfer task " + taskId); - Thread.sleep(50000); + Thread.sleep(pollingInterval * 1000); AccessToken clientTokenUser = getClientToken(); // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); task = getTask(clientTokenUser, taskId, globusLogger); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 21d99bcf9d0..bb68152eeba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -474,6 +474,10 @@ Whether Harvesting (OAI) service is enabled * */ GlobusAppUrl, + /** Globus Polling Interval how long in seconds Dataverse waits between checks on Globus upload status checks + * + */ + GlobusPollingInterval, /**Enable single-file download/transfers for Globus * */ diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 2447d0dae3f..7abd0d02065 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -422,7 +422,7 @@ public static long getLongLimitFromStringOrDefault(String limitSetting, Long def if (limitSetting != null && !limitSetting.equals("")) { try { - limit = new Long(limitSetting); + limit = Long.valueOf(limitSetting); } catch (NumberFormatException nfe) { limit = null; } @@ -431,12 +431,12 @@ public static long getLongLimitFromStringOrDefault(String limitSetting, Long def return limit != null ? limit : defaultValue; } - static int getIntLimitFromStringOrDefault(String limitSetting, Integer defaultValue) { + public static int getIntLimitFromStringOrDefault(String limitSetting, Integer defaultValue) { Integer limit = null; if (limitSetting != null && !limitSetting.equals("")) { try { - limit = new Integer(limitSetting); + limit = Integer.valueOf(limitSetting); } catch (NumberFormatException nfe) { limit = null; } From b714cf713a264355951bef31b9e7a509407c1636 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Sep 2022 17:18:23 -0400 Subject: [PATCH 152/161] message/email tweaks --- src/main/java/propertyFiles/Bundle.properties | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 43cae4eec46..edc7cd3de95 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -232,16 +232,16 @@ notification.access.revoked.datafile=You have been removed from a role in {0}. notification.checksumfail=One or more files in your upload failed checksum validation for dataset {1}. Please re-run the upload script. If the problem persists, please contact support. notification.ingest.completed=Your Dataset {2} has one or more tabular files that completed the tabular ingest process. These files will be available for download in their original formats and other formats for enhanced archival purposes after you publish the dataset. The archival .tab files are displayed in the file table. Please see the guides for more information about ingest and support for tabular files. notification.ingest.completedwitherrors=Your Dataset {2} has one or more tabular files that have been uploaded successfully but are not supported for tabular ingest. After you publish the dataset, these files will not have additional archival features. Please see the guides for more information about ingest and support for tabular files.

    Files with incomplete ingest:{5} -notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId={1}) has been successfully uploaded and verified. -notification.mail.globus.upload.completed=Dataset {2} has been successfully uploaded via Globus and verified.

    {3}
    -notification.mail.globus.download.completed=Files from the dataset {2} has been successfully downloaded via Globus.

    {3}
    -notification.mail.globus.upload.completedWithErrors=Dataset {2} : uploading files via Globus has been completed with errors.

    {3}
    -notification.mail.globus.download.completedWithErrors=Files from the dataset {2} : downloading files via Globus has been completed with errors.

    {3}
    -notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. -notification.globus.upload.completed=Dataset {1} has been successfully uploaded via Globus and verified. -notification.globus.download.completed=Files from the dataset {1} has been successfully downloaded via Globus. -notification.globus.upload.completedWithErrors=Dataset {1} : uploading files via Globus has been completed with errors. -notification.globus.download.completedWithErrors=Files from the dataset {1} : downloading files via Globus has been completed with errors. +notification.mail.import.filesystem=Globus transfer to Dataset {2} ({0}/dataset.xhtml?persistentId={1}) was successful. File(s) have been uploaded and verified. +notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

    {3}
    +notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

    {3}
    +notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} : transferring files via Globus is complete with errors.

    {3}
    +notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} : transferring files via Globus is complete with errors.

    {3}
    +notification.import.filesystem=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. +notification.globus.download.completed=Globus transfer from the dataset {1} was successful. +notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} : uploading files via Globus is complete with errors. +notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} : downloading files via Globus is complete with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification From a7055fb7ed8b13bdb0947dba77d07b95df2c9647 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Sep 2022 14:54:40 -0400 Subject: [PATCH 153/161] cut/paste error --- .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 0931de98721..f28c1b683fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -605,7 +605,7 @@ public static String getDriverPrefix(String driverId) { } public static boolean isDirectUploadEnabled(String driverId) { - return Boolean.getBoolean(System.getProperty("dataverse.files." + driverId + ".download-redirect", "false")); + return Boolean.getBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect", "false")); } //Check that storageIdentifier is consistent with store's config From 3320036b98cecce3634dab17a7f9d15a8b3b3636 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Sep 2022 15:46:19 -0400 Subject: [PATCH 154/161] fix Boolean conversion --- .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index f28c1b683fd..90e4a54dbe8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -605,7 +605,7 @@ public static String getDriverPrefix(String driverId) { } public static boolean isDirectUploadEnabled(String driverId) { - return Boolean.getBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect", "false")); + return Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect")); } //Check that storageIdentifier is consistent with store's config From aa8e1d2868794195f9867d6b18226af049b1ee6b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 16 Sep 2022 14:01:13 -0400 Subject: [PATCH 155/161] simplify messages --- src/main/java/propertyFiles/Bundle.properties | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index edc7cd3de95..49480b001c9 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -235,13 +235,13 @@ notification.ingest.completedwitherrors=Your Dataset {2} was successful. File(s) have been uploaded and verified.

    {3}
    notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

    {3}
    -notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} : transferring files via Globus is complete with errors.

    {3}
    -notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} : transferring files via Globus is complete with errors.

    {3}
    +notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

    {3}
    +notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

    {3}
    notification.import.filesystem=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. -notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} : uploading files via Globus is complete with errors. -notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} : downloading files via Globus is complete with errors. +notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. +notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} is complete with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification From 26a7a72a9a0c5ace2e2a21549b40b10e6bf8b167 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 16 Sep 2022 14:15:42 -0400 Subject: [PATCH 156/161] word change per QA re: checking status --- src/main/java/propertyFiles/Bundle.properties | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 49480b001c9..26a20c40cb3 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1553,7 +1553,7 @@ dataset.message.publish.remind.draft.filePage=When ready for sharing, please go to the dataset page to submit it for review. dataset.message.publishSuccess=This dataset has been published. dataset.message.publishGlobusFailure.details=Could not publish Globus data. -dataset.message.publishGlobusFailure=Error with publidhing data. +dataset.message.publishGlobusFailure=Error with publishing data. dataset.message.GlobusError=Cannot go to Globus. dataset.message.only.authenticatedUsers=Only authenticated users may release Datasets. dataset.message.deleteSuccess=This dataset has been deleted. @@ -1743,7 +1743,7 @@ file.rsyncUpload.httpUploadDisabledDueToRsyncFileExistingAndPublished=HTTP uploa file.rsyncUpload.rsyncUploadDisabledDueFileUploadedViaHttp=Upload with rsync + SSH is disabled for this dataset because you have already uploaded files via HTTP. If you would like to switch to rsync upload, then you must first remove all uploaded files from this dataset. Once this dataset is published, the chosen upload method is permanently locked in. file.rsyncUpload.rsyncUploadDisabledDueFileUploadedViaHttpAndPublished=Upload with rsync + SSH is disabled for this dataset because you have already uploaded files via HTTP and published the dataset. file.globusUpload.inProgressMessage.summary=Globus Transfer in Progress -file.globusUpload.inProgressMessage.details=This dataset is locked while the data files are being transferred and verified. Large transfers may take significant time. +file.globusUpload.inProgressMessage.details=This dataset is locked while the data files are being transferred and verified. Large transfers may take significant time. You can check transfer status at https://app.globus.org/activity. file.metaData.checksum.copy=Click to copy file.metaData.dataFile.dataTab.unf=UNF file.metaData.dataFile.dataTab.variables=Variables From c93832080c14af0c3ad539bc662966d771149e8a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 16 Sep 2022 15:27:53 -0400 Subject: [PATCH 157/161] fix bullets #8891 --- doc/sphinx-guides/source/developers/big-data-support.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 103e4fb579b..d7456ed9765 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -126,6 +126,7 @@ Globus File Transfer When configured to use a Globus accessible S3 store and interact with a community-developed Dataverse Globus app, Dataverse can support the transfer of files via Globus for upload and download. Due to differences in the access control models of Dataverse and Globus, enabling this capability on a store will disable the ability to restrict and embargo files in that store. Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: + * robust file transfer capable of restarting after network or endpoint failures, * third-party transfer, which enables a user running Dataverse in their desktop browser to initiate transfer of their files from a remote endpoint, i.e. on a local high-performance computing cluster), directly to an S3 store managed by Dataverse. From 9d8f0e63f30824f67e8c722d9bbb0472e016f18a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 16 Sep 2022 17:02:28 -0400 Subject: [PATCH 158/161] doc suggestions #8891 --- .../source/developers/big-data-support.rst | 22 ++++++++++++++----- .../source/installation/config.rst | 14 +++++++----- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index d7456ed9765..0782fd239a1 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -120,23 +120,33 @@ To configure the options mentioned above, an administrator must set two JVM opti ``./asadmin create-jvm-options "-Ddataverse.files..public=true"`` ``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` +.. _globus-support: + Globus File Transfer -------------------- -When configured to use a Globus accessible S3 store and interact with a community-developed Dataverse Globus app, Dataverse can support the transfer of files via Globus for upload and download. Due to differences in the access control models of Dataverse and Globus, enabling this capability on a store will disable the ability to restrict and embargo files in that store. +Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. + +Users can transfer files via `Globus `_ into and out of datasets when their Dataverse installation is configured to use a Globus accessible S3 store and a community-developed `dataverse-globus `_ "transfer" app has been properly installed and configured. + +Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. + +As Globus aficionados know, Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: -* robust file transfer capable of restarting after network or endpoint failures, -* third-party transfer, which enables a user running Dataverse in their desktop browser to initiate transfer of their files from a remote endpoint, i.e. on a local high-performance computing cluster), directly to an S3 store managed by Dataverse. +* robust file transfer capable of restarting after network or endpoint failures +* third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation -This mechanism requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (no cost). +Globus transfer requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (at no cost). The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. -As described in that document, Globus transfers can be initiated by choosing the Globus option on the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, Globus Transfer is one of the downloads in the dataset access menu and optionally the file download menu (if/when supported in the dataverse-globus app). +As described in that document, Globus transfers can be initiated by choosing the Globus option in the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, "Globus Transfer" is one of the download options in the "Access Dataset" menu and optionally the file landing page download menu (if/when supported in the dataverse-globus app). + +An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. -An overview of the control and data transfer interactions between components was presented at the Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. +See also :ref:`Globus settings <:GlobusBasicToken>`. Data Capture Module (DCM) ------------------------- diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 27d660796ec..17d88c8ea31 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2983,32 +2983,34 @@ The URL of an LDN Inbox to which the LDN Announce workflow step will send messag The list of parent dataset field names for which the LDN Announce workflow step should send messages. See :doc:`/developers/workflows` for details. +.. _:GlobusBasicToken: + :GlobusBasicToken +++++++++++++++++ -GlobusBasicToken encodes credentials for Globus Integration - see :doc:`/developers/big-data-support` for details. +GlobusBasicToken encodes credentials for Globus integration. See :ref:`globus-support` for details. :GlobusEndpoint +++++++++++++++ -GlobusEndpoint is Globus endpoint id used with Globus Integration - see :doc:`/developers/big-data-support` for details. +GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`globus-support` for details. :GlobusStores +++++++++++++ -A comma-separated list of the S3 stores that are configured to support Globus Integration - see :doc:`/developers/big-data-support` for details. +A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details. :GlobusAppURL +++++++++++++ -The URL where the Dataverse Globus app has been deployed to support Globus Integration - see :doc:`/developers/big-data-support` for details. +The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. :GlobusPollingInterval ++++++++++++++++++++++ -The interval in seconds between Dataverse calls to Globus to check on upload progress. Defaults to 50 seconds. +The interval in seconds between Dataverse calls to Globus to check on upload progress. Defaults to 50 seconds. See :ref:`globus-support` for details. :GlobusSingleFileTransfer +++++++++++++++++++++++++ -A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app - see :doc:`/developers/big-data-support` for details. +A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app. See :ref:`globus-support` for details. From c352da8c31d5b4314163a6459df7c4a3f516ad58 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jul 2022 15:29:07 -0400 Subject: [PATCH 159/161] fix mimetype detection when using temp file copy #7527 --- .../command/impl/RedetectFileTypeCommand.java | 5 ++- .../iq/dataverse/util/FileTypeDetection.java | 12 ------ .../harvard/iq/dataverse/util/FileUtil.java | 3 ++ .../dataverse/util/FileTypeDetectionTest.java | 42 ------------------- 4 files changed, 6 insertions(+), 56 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java delete mode 100644 src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java index 8eeca0cb4cd..286b107a5fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java @@ -12,7 +12,8 @@ import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.util.EjbUtil; -import edu.harvard.iq.dataverse.util.FileTypeDetection; +import edu.harvard.iq.dataverse.util.FileUtil; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -62,7 +63,7 @@ public DataFile execute(CommandContext ctxt) throws CommandException { } logger.fine("target file: " + localFile); - String newlyDetectedContentType = FileTypeDetection.determineFileType(localFile); + String newlyDetectedContentType = FileUtil.determineFileType(localFile, fileToRedetect.getDisplayName()); fileToRedetect.setContentType(newlyDetectedContentType); } catch (IOException ex) { throw new CommandException("Exception while attempting to get the bytes of the file during file type redetection: " + ex.getLocalizedMessage(), this); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java b/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java deleted file mode 100644 index 52515c00524..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java +++ /dev/null @@ -1,12 +0,0 @@ -package edu.harvard.iq.dataverse.util; - -import java.io.File; -import java.io.IOException; - -public class FileTypeDetection { - - public static String determineFileType(File file) throws IOException { - return FileUtil.determineFileType(file, file.getName()); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index a4fe550768e..339de904f9e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -546,6 +546,9 @@ public static String determineFileType(File f, String fileName) throws IOExcepti } } + if(fileType==null) { + fileType = MIME_TYPE_UNDETERMINED_DEFAULT; + } logger.fine("returning fileType "+fileType); return fileType; } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java deleted file mode 100644 index 5d2b9b4d56a..00000000000 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java +++ /dev/null @@ -1,42 +0,0 @@ -package edu.harvard.iq.dataverse.util; - -import java.io.File; -import java.io.IOException; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.apache.commons.io.FileUtils; -import org.junit.AfterClass; -import static org.junit.Assert.assertEquals; -import org.junit.BeforeClass; -import org.junit.Test; - -public class FileTypeDetectionTest { - - static String baseDirForConfigFiles = "/tmp"; - - @BeforeClass - public static void setUpClass() { - System.setProperty("com.sun.aas.instanceRoot", baseDirForConfigFiles); - String testFile1Src = "conf/jhove/jhove.conf"; - String testFile1Tmp = baseDirForConfigFiles + "/config/jhove.conf"; - try { - FileUtils.copyFile(new File(testFile1Src), new File(testFile1Tmp)); - } catch (IOException ex) { - Logger.getLogger(JhoveFileTypeTest.class.getName()).log(Level.SEVERE, null, ex); - } - } - - @AfterClass - public static void tearDownClass() { - // SiteMapUtilTest relies on com.sun.aas.instanceRoot being null. - System.clearProperty("com.sun.aas.instanceRoot"); - } - - @Test - public void testDetermineFileTypeJupyterNoteboook() throws Exception { - File file = new File("src/test/java/edu/harvard/iq/dataverse/util/irc-metrics.ipynb"); - // https://jupyter.readthedocs.io/en/latest/reference/mimetype.html - assertEquals("application/x-ipynb+json", FileTypeDetection.determineFileType(file)); - } - -} From 272dd4d559101ce5bf1316dae15179f7f783805d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jul 2022 15:44:43 -0400 Subject: [PATCH 160/161] add test to check that java.io.File name is not used --- .../edu/harvard/iq/dataverse/util/FileUtilTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 226c677ed0f..67f1f10b891 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -329,6 +329,18 @@ public void testDetermineFileTypeByName() { fail("File does not exist: " + file.toPath().toString()); } } + + @Test + public void testDetermineFileTypeFromName() { + //Verify that name of the local file isn't used in determining the type (as we often use *.tmp when the real name has a different extension) + try { + File file = File.createTempFile("empty", "png"); + assertEquals("text/plain", FileUtil.determineFileType(file, "something.txt")); + } catch (IOException ex) { + Logger.getLogger(FileUtilTest.class.getName()).log(Level.SEVERE, null, ex); + } + + } // isThumbnailSuppported() has been moved from DataFileService to FileUtil: /** From c554ecc0622188c1ada2500f34efc95bfd21ccd1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 19 Sep 2022 11:20:45 -0400 Subject: [PATCH 161/161] rename test to avoid naming conflict #8891 --- src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 4cc726203a9..01fb8aad6cf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -343,7 +343,7 @@ public void testDetermineFileTypeByName() { } @Test - public void testDetermineFileTypeFromName() { + public void testDetermineFileTypeFromNameLocalFile() { //Verify that name of the local file isn't used in determining the type (as we often use *.tmp when the real name has a different extension) try { File file = File.createTempFile("empty", "png");