From b2ab9ad3a3ec4d03e2197d4b31866938db00d3aa Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 28 Mar 2019 13:23:12 -0400 Subject: [PATCH 1/4] Facet Category and Labels --- src/main/java/Bundle.properties | 89 +++++++++++++ src/main/java/Bundle_fr.properties | 88 +++++++++++++ src/main/java/citation_fr.properties | 6 +- .../search/SearchIncludeFragment.java | 13 +- .../dataverse/search/SearchServiceBean.java | 123 +++++++++++++----- .../iq/dataverse/search/SolrSearchResult.java | 2 +- 6 files changed, 279 insertions(+), 42 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index d7560a30af5..fe49da82b6e 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -2289,3 +2289,92 @@ rtabfileparser.ioexception.boolean=Unexpected value for the Boolean variable ({0 rtabfileparser.ioexception.read=Couldn't read Boolean variable ({0})! rtabfileparser.ioexception.parser1=R Tab File Parser: Could not obtain varQnty from the dataset metadata. rtabfileparser.ioexception.parser2=R Tab File Parser: varQnty=0 in the dataset metadata! + +#FacetCategory - staticSearchFields +staticSearchFields.id=Id +staticSearchFields.dataverseVersionIndexedBy_s=Dataverse Version Indexed By +staticSearchFields.name=Name +staticSearchFields.description=Description +staticSearchFields.identifier=Identifier +staticSearchFields.metadataSource=Metadata Source +staticSearchFields.isHarvested=Is Harvested +staticSearchFields.persistentUrl=Persistent Url +staticSearchFields.unf=Unf +staticSearchFields.dvName=Dataverse Name +staticSearchFields.dvAlias=Dataverse Alias +staticSearchFields.dvAffiliation=Dataverse Affiliation +staticSearchFields.dvDescription=Dataverse Description +staticSearchFields.dvCategory=Dataverse Category +staticSearchFields.dvSubject=Dataverse Subject +staticSearchFields.subject_ss=Subject +staticSearchFields.categoryOfDataverse=Category Of Dataverse +staticSearchFields.identifierOfDataverse=Identifier Of Dataverse +staticSearchFields.affiliation_ss=Affiliation +staticSearchFields.fileName=File Name +staticSearchFields.fileDescription=File Description +staticSearchFields.filePersistentId=File Persistent Id +staticSearchFields.fileType=File Type Searchable +staticSearchFields.fileTypeDisplay=File Type Friendly +staticSearchFields.fileContentType=File Content Type +staticSearchFields.fileTypeGroupFacet=File Type +staticSearchFields.fileSizeInBytes=File Size In Bytes +staticSearchFields.fileMd5=File Md5 +staticSearchFields.fileChecksumType=File Checksum Type +staticSearchFields.fileChecksumValue=File Checksum Value +staticSearchFields.fileNameWithoutExtension=Filename Without Extension +staticSearchFields.fileTag=File Tag +staticSearchFields.fileTags=File Tag Searchable +staticSearchFields.tabularDataTag=Tabdata Tag +staticSearchFields.fileAccess=Access +staticSearchFields.subtreePaths=Subtree +staticSearchFields.score=Relevance +staticSearchFields.dvObjectType=Type +staticSearchFields.nameSort=Name Sort +staticSearchFields.publicationDate=Publication Year +staticSearchFields.dateSort=Release Or Create Date +staticSearchFields.definitionPointDocId=Definition Point +staticSearchFields.definitionPointDvObjectId=Definition Point Dvobject Id +staticSearchFields.discoverableBy=Discoverable By +staticSearchFields.publicationStatus=Publication Status +staticSearchFields.entityId=Entity Id +staticSearchFields.parentName=Parent Name +staticSearchFields.parentId=Parent Id +staticSearchFields.parentIdentifier=Parent Identifier +staticSearchFields.parentCitation=Parent Citation +staticSearchFields.dsDescriptionValue=Dataset Description +staticSearchFields.citation=Dataset Citation +staticSearchFields.citationHtml=Dataset Citation Html +staticSearchFields.deaccessionReason=Dataset Deaccession Reason +staticSearchFields.dsPublicationDate=Dataset Publication Date +staticSearchFields.dsPersistentId=Dataset Persistent Id +staticSearchFields.datasetVersionId=Dataset Version Id +staticSearchFields.variableName=Variable Name +staticSearchFields.variableLabel=Variable Label +staticSearchFields._text_=Full Text + +#dataverse category - Facet Labels +Researcher=Researcher +Research\u0020Project=Research Project +Journal=Journal +Organization\u0020or\u0020Institution=Organization or Institution +Teaching\u0020Course=Teaching Course +Research\u0020Group=Research Group +Laboratory=Laboratory +Department=Department +Uncategorized=Uncategorized + +#filetype - Facet Labels +Document=Document +Text=Text +Tabular\u0020Data=Tabular Data +Data=Data +FITS=FITS +Shape=Shape +ZIP=ZIP +Image=Image +Network\u0020Data=Network Data +Unknown=Unknown + +#access - Facet Labels +Public=Public +Restricted=Restricted diff --git a/src/main/java/Bundle_fr.properties b/src/main/java/Bundle_fr.properties index eb7bcc63d84..1ff0e590540 100644 --- a/src/main/java/Bundle_fr.properties +++ b/src/main/java/Bundle_fr.properties @@ -2200,3 +2200,91 @@ passwdVal.passwdReq.letter=lettre passwdVal.passwdReq.numeral=chiffre passwdVal.passwdReq.special=caractère spécial dataretrieverAPI.noMsgResultsFound=Désolé, aucun résultat n'a été trouvé. + +#FacetCategory - staticSearchFields +staticSearchFields.id=Id_fr +staticSearchFields.dataverseVersionIndexedBy_s=Dataverse Version Indexed By_fr +staticSearchFields.name=Name_fr +staticSearchFields.description=Description_fr +staticSearchFields.identifier=Identifier_fr +staticSearchFields.metadataSource=Source des métadonnées +staticSearchFields.isHarvested=Is Harvested_fr +staticSearchFields.persistentUrl=Persistent Url_fr +staticSearchFields.unf=Unf_fr +staticSearchFields.dvName=Dataverse Name_fr +staticSearchFields.dvAlias=Dataverse Alias_fr +staticSearchFields.dvAffiliation=Dataverse Affiliation_fr +staticSearchFields.dvDescription=Dataverse Description_fr +staticSearchFields.dvCategory=Catégorie de dataverse +staticSearchFields.dvSubject=Sujet de dataverse +staticSearchFields.subject_ss=Sujet +staticSearchFields.categoryOfDataverse=Catégorie de Dataverse +staticSearchFields.identifierOfDataverse=Identifier Of Dataverse_fr +staticSearchFields.affiliation_ss=Affiliation_fr +staticSearchFields.fileName=File Name_fr +staticSearchFields.fileDescription=File Description_fr +staticSearchFields.filePersistentId=File Persistent Id_fr +staticSearchFields.fileType=File Type Searchable_fr +staticSearchFields.fileTypeDisplay=File Type Friendly_fr +staticSearchFields.fileContentType=File Content Type_fr +staticSearchFields.fileTypeGroupFacet=Type de fichier +staticSearchFields.fileSizeInBytes=File Size In Bytes_fr +staticSearchFields.fileMd5=File Md5_fr +staticSearchFields.fileChecksumType=File Checksum Type_fr +staticSearchFields.fileChecksumValue=File Checksum Value_fr +staticSearchFields.fileNameWithoutExtension=Filename Without Extension_fr +staticSearchFields.fileTag=Libellé de fichier +staticSearchFields.fileTags=File Tag Searchable_fr +staticSearchFields.tabularDataTag=Tabdata Tag_fr +staticSearchFields.fileAccess=Accès +staticSearchFields.subtreePaths=Subtree_fr +staticSearchFields.score=Relevance_fr +staticSearchFields.dvObjectType=Type_fr +staticSearchFields.nameSort=Name Sort_fr +staticSearchFields.publicationDate=Année de publication +staticSearchFields.dateSort=Release Or Create Date_fr +staticSearchFields.definitionPointDocId=Definition Point_fr +staticSearchFields.definitionPointDvObjectId=Definition Point Dvobject Id_fr +staticSearchFields.discoverableBy=Discoverable By_fr +staticSearchFields.publicationStatus=Statut de publication +staticSearchFields.entityId=Entity Id_fr +staticSearchFields.parentName=Parent Name_fr +staticSearchFields.parentId=Parent Id_fr +staticSearchFields.parentIdentifier=Parent Identifier_fr +staticSearchFields.parentCitation=Parent Citation_fr +staticSearchFields.dsDescriptionValue=Dataset Description_fr +staticSearchFields.citation=Dataset Citation_fr +staticSearchFields.citationHtml=Dataset Citation Html_fr +staticSearchFields.deaccessionReason=Dataset Deaccession Reason_fr +staticSearchFields.dsPublicationDate=Dataset Publication Date_fr +staticSearchFields.dsPersistentId=Dataset Persistent Id_fr +staticSearchFields.datasetVersionId=Dataset Version Id_fr +staticSearchFields.variableName=Variable Name_fr +staticSearchFields.variableLabel=Variable Label_fr +staticSearchFields._text_=Full Text_fr + +#dataverse category - Facet Labels +Researcher=Chercheur +Research\u0020Project=Projet de recherche +Journal=Revue +Organization\u0020or\u0020Institution=Organisation ou établissement +Teaching\u0020Course=Cours +Research\u0020Group=Groupe de recherche +Laboratory=Laboratoire +Uncategorized=Sans catégorie + +#filetype - Facet Labels +Document=Document +Text=Texte +Tabular\u0020Data=Données tabulaires +Data=Données +FITS=FITS +Shape=Formes +ZIP=ZIP +Image=Image +Network\u0020Data=Données en réseau +Unknown=Inconnu + +#access - Facet Labels +Public=Publique +Restricted=Restreint diff --git a/src/main/java/citation_fr.properties b/src/main/java/citation_fr.properties index bfa135e8bc9..7e635576192 100644 --- a/src/main/java/citation_fr.properties +++ b/src/main/java/citation_fr.properties @@ -234,13 +234,13 @@ datasetfieldtype.dataSources.watermark= datasetfieldtype.originOfSources.watermark= datasetfieldtype.characteristicOfSources.watermark= datasetfieldtype.accessToSources.watermark= -controlledvocabulary.subject.agricultural_sciences=Sciences de l'agriculture +controlledvocabulary.subject.agricultural_sciences=Sciences de l\u2019agriculture controlledvocabulary.subject.arts_and_humanities=Arts et sciences humaines controlledvocabulary.subject.astronomy_and_astrophysics=Astronomie et astrophysique controlledvocabulary.subject.business_and_management=Affaires et gestion controlledvocabulary.subject.chemistry=Chimie -controlledvocabulary.subject.computer_and_information_science=Informatique et science de l'information -controlledvocabulary.subject.earth_and_environmental_sciences=Sciences de la terre et de l'environnement +controlledvocabulary.subject.computer_and_information_science=Informatique et science de l\u2019information +controlledvocabulary.subject.earth_and_environmental_sciences=Sciences de la terre et de l\u2019environnement controlledvocabulary.subject.engineering=Génie controlledvocabulary.subject.law=Droit controlledvocabulary.subject.mathematical_sciences=Sciences mathématiques diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index f75671515f5..d591a55b187 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; @@ -427,9 +428,9 @@ public void search(boolean onlyDataRelatedToMe) { } // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 - previewCountbyType.put("dataverses", 0L); - previewCountbyType.put("datasets", 0L); - previewCountbyType.put("files", 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); if (solrQueryResponseAllTypes != null) { for (FacetCategory facetCategory : solrQueryResponseAllTypes.getTypeFacetCategories()) { for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { @@ -764,15 +765,15 @@ public void setTypeFilterQuery(String typeFilterQuery) { } public Long getFacetCountDatasets() { - return findFacetCountByType("datasets"); + return findFacetCountByType(BundleUtil.getStringFromBundle("datasets")); } public Long getFacetCountDataverses() { - return findFacetCountByType("dataverses"); + return findFacetCountByType(BundleUtil.getStringFromBundle("dataverses")); } public Long getFacetCountFiles() { - return findFacetCountByType("files"); + return findFacetCountByType(BundleUtil.getStringFromBundle("files")); } public String getSearchFieldRelevance() { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 77a5e3ef563..8f8abe75c3f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -30,6 +30,7 @@ import java.util.ListIterator; import java.util.Map; import java.util.Set; +import java.util.MissingResourceException; import java.util.logging.Level; import java.util.logging.Logger; import javax.annotation.PostConstruct; @@ -78,13 +79,13 @@ public class SearchServiceBean { SystemConfig systemConfig; private SolrClient solrServer; - + @PostConstruct public void init() { String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; solrServer = new HttpSolrClient.Builder(urlString).build(); } - + @PreDestroy public void close() { if (solrServer != null) { @@ -96,7 +97,7 @@ public void close() { solrServer = null; } } - + /** * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing * access to Private Data for the correct user @@ -120,7 +121,7 @@ public void close() { public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true); } - + /** * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing * access to Private Data for the correct user @@ -138,7 +139,7 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List datasetFields = datasetFieldService.findAllOrderedById(); @@ -243,7 +244,7 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List facetLabelList = new ArrayList<>(); int numMetadataSources = 0; + String metadataBlockName = ""; + String datasetFieldName = ""; + /** + * To find the metadata block name to which the facetField belongs to + * ===facetField: authorName_ss metadatablockname : citation + * ===facetField: dvCategory metadatablockname : "" + */ + for (DatasetFieldType datasetField : datasetFields) { + String solrFieldNameForDataset = datasetField.getSolrField().getNameFacetable(); + if (solrFieldNameForDataset != null && facetField.getName().equals(solrFieldNameForDataset)) { + metadataBlockName = datasetField.getMetadataBlock().getName() ; + datasetFieldName = datasetField.getName(); + break; + } + } + + for (FacetField.Count facetFieldCount : facetField.getValues()) { /** * @todo we do want to show the count for each facet */ // logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")"); + String localefriendlyName = null; if (facetFieldCount.getCount() > 0) { - FacetLabel facetLabel = new FacetLabel(facetFieldCount.getName(), facetFieldCount.getCount()); + if(metadataBlockName.length() > 0 ) { + localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName); + } else { + localefriendlyName = BundleUtil.getStringFromBundle(facetFieldCount.getName()); + if(localefriendlyName == null){ + localefriendlyName = facetFieldCount.getName(); + } + } + FacetLabel facetLabel = new FacetLabel(localefriendlyName, facetFieldCount.getCount()); // quote field facets facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\""); facetLabelList.add(facetLabel); if (facetField.getName().equals(SearchFields.PUBLICATION_STATUS)) { - if (facetLabel.getName().equals(IndexServiceBean.getUNPUBLISHED_STRING())) { + if (facetFieldCount.getName().equals(IndexServiceBean.getUNPUBLISHED_STRING())) { unpublishedAvailable = true; - } else if (facetLabel.getName().equals(IndexServiceBean.getDRAFT_STRING())) { + } else if (facetFieldCount.getName().equals(IndexServiceBean.getDRAFT_STRING())) { draftsAvailable = true; - } else if (facetLabel.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) { + } else if (facetFieldCount.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) { deaccessionedAvailable = true; } } @@ -676,14 +703,20 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List 0) { + facetCategory.setFriendlyName(friendlyName); + } else { + String[] parts = name.split("_"); + StringBuilder stringBuilder = new StringBuilder(); + for (String part : parts) { + stringBuilder.append(getCapitalizedName(part.toLowerCase()) + " "); + } + String friendlyNameWithTrailingSpace = stringBuilder.toString(); + friendlyName = friendlyNameWithTrailingSpace.replaceAll(" $", ""); + facetCategory.setFriendlyName(friendlyName); } - String friendlyNameWithTrailingSpace = stringBuilder.toString(); - String friendlyName = friendlyNameWithTrailingSpace.replaceAll(" $", ""); - facetCategory.setFriendlyName(friendlyName); + // logger.info("adding <<<" + staticSearchField + ":" + friendlyName + ">>>"); staticSolrFieldFriendlyNamesBySolrField.put(staticSearchField, friendlyName); // stop examining the declared/static fields in the SearchFields object. we found a match @@ -771,6 +804,31 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List0) { + return output; + } + else + return title; + } + + public String getCapitalizedName(String name) { return Character.toUpperCase(name.charAt(0)) + name.substring(1); } @@ -806,7 +864,7 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ } // ---------------------------------------------------- - // (1) Is this a GuestUser? + // (1) Is this a GuestUser? // Yes, see if GuestUser is part of any groups such as IP Groups. // ---------------------------------------------------- if (user instanceof GuestUser) { @@ -844,9 +902,9 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); // ---------------------------------------------------- - // (3) Is this a Super User? + // (3) Is this a Super User? // Yes, give back everything - // ---------------------------------------------------- + // ---------------------------------------------------- if (au.isSuperuser()) { // dangerous because this user will be able to see // EVERYTHING in Solr with no regard to permissions! @@ -858,7 +916,7 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ // (4) User is logged in AND onlyDatatRelatedToMe == true // Yes, give back everything -> the settings will be in // the filterqueries given to search - // ---------------------------------------------------- + // ---------------------------------------------------- if (onlyDatatRelatedToMe == true) { if (systemConfig.myDataDoesNotUsePermissionDocs()) { logger.fine("old 4.2 behavior: MyData is not using Solr permission docs"); @@ -931,3 +989,4 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ } } + diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index 71ca064852b..3754fa18283 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -901,7 +901,7 @@ public void setReleaseOrCreateDate(Date releaseOrCreateDate) { } public String getDateToDisplayOnCard() { - return DateUtil.formatDate(dateToDisplayOnCard,"MMM dd, yyyy"); + return DateUtil.formatDate(dateToDisplayOnCard,"MMM dd, yyyy"); } public void setDateToDisplayOnCard(String dateToDisplayOnCard) { From 4e15e4e9763ac79688e0d99e129fede397d0766d Mon Sep 17 00:00:00 2001 From: chenganj Date: Thu, 28 Mar 2019 13:27:32 -0400 Subject: [PATCH 2/4] merge with develop --- conf/docker-aio/1prep.sh | 1 - conf/docker-aio/testscripts/post | 1 - conf/solr/7.3.1/schema.xml | 2 - doc/release-notes/5565-file-folder-names.md | 3 ++ doc/release-notes/5649-upgrade-postgresql.md | 1 + .../branding/custom-homepage-dynamic.html | 2 +- .../_static/util/pg8-createsequence-prep.sql | 21 -------- doc/sphinx-guides/source/api/native-api.rst | 6 +-- .../source/developers/big-data-support.rst | 2 + .../source/developers/sql-upgrade-scripts.rst | 2 +- .../source/installation/config.rst | 5 +- .../source/installation/prerequisites.rst | 13 +++-- .../source/user/dataset-management.rst | 18 +++++-- .../source/user/find-use-data.rst | 2 +- scripts/deploy/phoenix.dataverse.org/post | 1 - src/main/java/Bundle.properties | 9 +++- src/main/java/ValidationMessages.properties | 2 +- .../iq/dataverse/EditDatafilesPage.java | 30 ++++------- .../dataverse/FileDirectoryNameValidator.java | 43 +++++++++++++++ .../harvard/iq/dataverse/FileMetadata.java | 12 ++++- .../dataverse/ManageFilePermissionsPage.java | 15 ++++-- .../iq/dataverse/RoleAssigneeServiceBean.java | 24 ++++++++- .../edu/harvard/iq/dataverse/Template.java | 4 +- .../ValidateDataFileDirectoryName.java | 32 +++++++++++ .../AuthenticationServiceBean.java | 16 ++++++ .../datasetutility/OptionalFileParams.java | 40 ++++++++++++-- .../iq/dataverse/search/IndexServiceBean.java | 13 ----- .../iq/dataverse/search/SearchFields.java | 5 +- .../dataverse/search/SearchServiceBean.java | 2 - .../iq/dataverse/search/SolrSearchResult.java | 8 +-- .../harvard/iq/dataverse/util/FileUtil.java | 12 +++-- .../harvard/iq/dataverse/util/StringUtil.java | 30 +++++++++++ ...11.0.1__5565-sanitize-directory-labels.sql | 9 ++++ src/main/webapp/editFilesFragment.xhtml | 19 ++++++- src/main/webapp/file.xhtml | 8 +++ src/main/webapp/filesFragment.xhtml | 5 +- .../webapp/permissions-manage-files.xhtml | 9 ++-- src/main/webapp/search-include-fragment.xhtml | 13 ++--- .../FileDirectoryNameValidatorTest.java | 40 ++++++++++++++ .../iq/dataverse/api/FileMetadataIT.java | 15 +++--- .../edu/harvard/iq/dataverse/api/FilesIT.java | 54 ++++++++++--------- .../iq/dataverse/api/MakeDataCountApiIT.java | 1 + .../edu/harvard/iq/dataverse/api/UsersIT.java | 2 +- .../iq/dataverse/ingest/IngestUtilTest.java | 13 +++-- ...t-with-files-invalid-directory-labels.json | 8 --- .../json/complete-dataset-with-files.json | 8 --- 46 files changed, 402 insertions(+), 179 deletions(-) create mode 100644 doc/release-notes/5565-file-folder-names.md create mode 100644 doc/release-notes/5649-upgrade-postgresql.md delete mode 100644 doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql create mode 100644 src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/ValidateDataFileDirectoryName.java create mode 100644 src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql create mode 100644 src/test/java/edu/harvard/iq/dataverse/FileDirectoryNameValidatorTest.java diff --git a/conf/docker-aio/1prep.sh b/conf/docker-aio/1prep.sh index 678b53e6ff4..482c683a792 100755 --- a/conf/docker-aio/1prep.sh +++ b/conf/docker-aio/1prep.sh @@ -10,7 +10,6 @@ cp ../jhove/jhove.conf testdata/ cp ../jhove/jhoveConfig.xsd testdata/ cd ../../ cp -r scripts conf/docker-aio/testdata/ -cp doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql conf/docker-aio/testdata/doc/sphinx-guides/source/_static/util/ cp doc/sphinx-guides/source/_static/util/createsequence.sql conf/docker-aio/testdata/doc/sphinx-guides/source/_static/util/ # not using dvinstall.zip for setupIT.bash; but still used in install.bash for normal ops diff --git a/conf/docker-aio/testscripts/post b/conf/docker-aio/testscripts/post index 0a9d4c43155..f38a704e454 100755 --- a/conf/docker-aio/testscripts/post +++ b/conf/docker-aio/testscripts/post @@ -3,7 +3,6 @@ cd scripts/api ./setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out cd ../.. psql -U dvnapp dvndb -f scripts/database/reference_data.sql -psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/createsequence.sql scripts/search/tests/publish-dataverse-root #git checkout scripts/api/data/dv-root.json diff --git a/conf/solr/7.3.1/schema.xml b/conf/solr/7.3.1/schema.xml index 9794ab9f152..71b9c7b57d4 100644 --- a/conf/solr/7.3.1/schema.xml +++ b/conf/solr/7.3.1/schema.xml @@ -162,8 +162,6 @@ - - diff --git a/doc/release-notes/5565-file-folder-names.md b/doc/release-notes/5565-file-folder-names.md new file mode 100644 index 00000000000..0461d65473d --- /dev/null +++ b/doc/release-notes/5565-file-folder-names.md @@ -0,0 +1,3 @@ +In this release users are given an option to edit the folder names in the file metadata. Strict validation rules for the folder names are also introduced. Only the following characters are allowed: the alphanumerics, '_', '-', '.' and ' ' (white space). Some datafiles in your Dataverse may already have folder names saved in the database (if they were extracted from uploaded zip archives with folder structure). The following sanitizing rules will be applied to all the existing folder names in the database: any invalid characters will be replaced by the '.' character. Any sequences of dots will be further replaced with a single dot. For example, the folder name ``data&info/code=@137`` will be converted to ``data.info/code.137``. This update will be automatically applied to the database the first time this release is deployed. + + diff --git a/doc/release-notes/5649-upgrade-postgresql.md b/doc/release-notes/5649-upgrade-postgresql.md new file mode 100644 index 00000000000..4b65c524eec --- /dev/null +++ b/doc/release-notes/5649-upgrade-postgresql.md @@ -0,0 +1 @@ +Upgrade your version of PostgreSQL to at least 9.3. Version 9.6 is recommended. diff --git a/doc/sphinx-guides/source/_static/installation/files/var/www/dataverse/branding/custom-homepage-dynamic.html b/doc/sphinx-guides/source/_static/installation/files/var/www/dataverse/branding/custom-homepage-dynamic.html index 4845eedea14..d648f530290 100644 --- a/doc/sphinx-guides/source/_static/installation/files/var/www/dataverse/branding/custom-homepage-dynamic.html +++ b/doc/sphinx-guides/source/_static/installation/files/var/www/dataverse/branding/custom-homepage-dynamic.html @@ -191,7 +191,7 @@
Past 30 Days
-
All
+
Total
...
...
diff --git a/doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql b/doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql deleted file mode 100644 index 740ba6ce8b6..00000000000 --- a/doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql +++ /dev/null @@ -1,21 +0,0 @@ --- handle absence of CREATE OR REPLACE LANGUAGE for postgresql 8.4 or older --- courtesy of the postgres wiki: https://wiki.postgresql.org/wiki/CREATE_OR_REPLACE_LANGUAGE -CREATE OR REPLACE FUNCTION make_plpgsql() -RETURNS VOID -LANGUAGE SQL -AS $$ -CREATE LANGUAGE plpgsql; -$$; - -SELECT - CASE - WHEN EXISTS( - SELECT 1 - FROM pg_catalog.pg_language - WHERE lanname='plpgsql' - ) - THEN NULL - ELSE make_plpgsql() END; - -DROP FUNCTION make_plpgsql(); - diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 469a375bcca..87d8d8fee88 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -440,7 +440,7 @@ Add a file to an existing Dataset. Description and tags are optional:: A more detailed "add" example using curl:: - curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@data.tsv' -F 'jsonData={"description":"My description.","categories":["Data"], "restrict":"true"}' "https://example.dataverse.edu/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID" + curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@data.tsv' -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"true"}' "https://example.dataverse.edu/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID" Example python code to add a file. This may be run by changing these parameters in the sample code: @@ -719,12 +719,12 @@ A curl example using a ``pid``:: Replacing Files ~~~~~~~~~~~~~~~ -Replace an existing file where ``id`` is the database id of the file to replace or ``pid`` is the persistent id (DOI or Handle) of the file. Note that metadata such as description and tags are not carried over from the file being replaced +Replace an existing file where ``id`` is the database id of the file to replace or ``pid`` is the persistent id (DOI or Handle) of the file. Note that metadata such as description, directoryLabel (File Path) and tags are not carried over from the file being replaced .. code-block:: bash curl -H "X-Dataverse-key:$API_TOKEN" -X POST -F 'file=@data.tsv' \ - -F 'jsonData={"description":"My description.","categories":["Data"],"forceReplace":false}'\ + -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"],"forceReplace":false}'\ "https://demo.dataverse.org/api/files/$FILE_ID/replace" Example python code to replace a file. This may be run by changing these parameters in the sample code: diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 18737066852..44e73c48f03 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -103,6 +103,7 @@ Optional steps for setting up the S3 Docker DCM Variant - Add AWS bucket info to dcmsrv - Add AWS credentials to ``~/.aws/credentials`` + - ``[default]`` - ``aws_access_key_id =`` - ``aws_secret_access_key =`` @@ -117,6 +118,7 @@ Optional steps for setting up the S3 Docker DCM Variant - Add AWS bucket info to Dataverse - Add AWS credentials to ``~/.aws/credentials`` + - ``[default]`` - ``aws_access_key_id =`` - ``aws_secret_access_key =`` diff --git a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst index 07205a5d365..3c7e21285d4 100644 --- a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst +++ b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst @@ -26,7 +26,7 @@ How to Create a SQL Upgrade Script We assume you have already read the :doc:`version-control` section and have been keeping your feature branch up to date with the "develop" branch. -Create a new file called something like ``V4.11__5513-database-variablemetadata.sql`` in the ``src/main/resources/db/migration`` directory. Use the previously released version (4.11 in the example above) rather than what we guess will be the next released version (which is often uncertain). For the "description" you should the name of your branch, which should include the GitHub issue you are working on, as in the example above. To read more about Flyway file naming conventions, see https://flywaydb.org/documentation/migrations#naming +Create a new file called something like ``V4.11.0.1__5565-sanitize-directory-labels.sql`` in the ``src/main/resources/db/migration`` directory. Use a version like "4.11.0.1" in the example above where the previously released version was 4.11, ensuring that the version number is unique. For the "description" you should the name of your branch, which should include the GitHub issue you are working on, as in the example above. To read more about Flyway file naming conventions, see https://flywaydb.org/documentation/migrations#naming The SQL migration script you wrote will be part of the war file and executed when the war file is deployed. To see a history of Flyway database migrations that have been applied, look at the ``flyway_schema_history`` table. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 4c6efcf7a86..923d9bca600 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1065,9 +1065,6 @@ in any other SQL flavor - the standard JPA code in the application simply expect the database to have a saved function ("stored procedure") named ``generateIdentifierAsSequentialNumber`` with the single return argument ``identifier``. -For systems using Postgresql 8.4 or older, the procedural language `plpgsql` should be enabled first. -We have provided an example :download:`here `. - Please note that ``:IdentifierGenerationStyle`` also plays a role for the "identifier" for files. See the section on ``:DataFilePIDFormat`` below for more details. .. _:DataFilePIDFormat: @@ -1111,6 +1108,8 @@ If you don't want to register file-based PIDs for your installation, set: Note: File-level PID registration was added in 4.9 and is required until version 4.9.3. +.. _:IndependentHandleService: + :IndependentHandleService +++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 93f840572e3..ac7a49e6ba6 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -124,9 +124,9 @@ PostgreSQL Installing PostgreSQL ======================= -Version 9.x is required. Previous versions have not been tested. +Version 9.3 is required. Previous versions have not been tested. -Version 9.6 is anticipated as an "LTS" release in RHEL and on other platforms:: +Version 9.6 is strongly recommended:: # yum install -y https://download.postgresql.org/pub/repos/yum/9.6/redhat/rhel-7-x86_64/pgdg-centos96-9.6-3.noarch.rpm # yum makecache fast @@ -135,8 +135,11 @@ Version 9.6 is anticipated as an "LTS" release in RHEL and on other platforms:: # /usr/bin/systemctl start postgresql-9.6 # /usr/bin/systemctl enable postgresql-9.6 -Note these steps are specific to RHEL/CentOS 7. For RHEL/CentOS 6 use:: +Note that the steps above are specific to RHEL/CentOS 7. For RHEL/CentOS 6 use:: + # yum install -y https://download.postgresql.org/pub/repos/yum/9.6/redhat/rhel-6-x86_64/pgdg-centos96-9.6-3.noarch.rpm + # yum makecache fast + # yum install -y postgresql96-server # service postgresql-9.6 initdb # service postgresql-9.6 start @@ -176,10 +179,6 @@ Configuring Database Access for the Dataverse Application (and the Dataverse Ins # systemctl restart postgresql-9.6 - or on RHEL/CentOS 6:: - - # service postgresql restart - On MacOS X a "Reload Configuration" icon is usually supplied in the PostgreSQL application folder. Or you could look up the process id of the PostgreSQL postmaster process, and send it the SIGHUP signal:: kill -1 PROCESS_ID diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 413f43d0fbb..d3dcfef7d5f 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -180,11 +180,15 @@ Metadata found in the header section of `Flexible Image Transport System (FITS) Compressed Files ---------------- -Compressed files in zip format are unpacked automatically. If it fails to unpack, for whatever reason, it will upload as is. If the number of files inside are more than a set limit (1,000 by default, configurable by the Administrator), you will get an error message and the zip file will uploads as is. +Compressed files in .zip format are unpacked automatically. If a .zip file fails to unpack for whatever reason, it will upload as is. If the number of files inside are more than a set limit (1,000 by default, configurable by the Administrator), you will get an error message and the .zip file will upload as is. + +If the uploaded .zip file contains a folder structure, Dataverse will keep track of this structure. A file's location within this folder structure is displayed in the file metadata as the File Path. When you download the contents of the dataset, this folder structure will be preserved and files will appear in their original locations. + +These folder names are subject to strict validation rules. Only the following characters are allowed: the alphanumerics, '_', '-', '.' and ' ' (white space). When a zip archive is uploaded, the folder names are automatically sanitized, with any invalid characters replaced by the '.' character. Any sequences of dots are further replaced with a single dot. For example, the folder name ``data&info/code=@137`` will be converted to ``data.info/code.137``. When uploading through the Web UI, the user can change the values further on the edit form presented, before clicking the 'Save' button. + +.. note:: If you upload multiple .zip files to one dataset, any subdirectories that are identical across multiple .zips will be merged together when the user downloads the full dataset. -.. note:: If the uploaded zip file contains sub-folders, the names of the folders will be preserved as the ``DirectoryLabel`` attributes in the ``FileMetadata`` objects of the corresponding individual datafiles. As of writing this - v.4.11 - these labels are only used to restore the folder structure in multi-file, zipped download bundles (see :doc:`/api/dataaccess` for more information). In the future folder structure will be supported for organizing files on the dataset page as well. -Support for unpacking tar files will be added when this ticket is closed: https://github.com/IQSS/dataverse/issues/2195. Other File Types ---------------- @@ -212,6 +216,14 @@ You will not have to leave the dataset page to complete these action, except for If you restrict files, you will also prompted with a popup asking you to fill out the Terms of Access for the files. If Terms of Access already exist, you will be asked to confirm them. Note that some Dataverse installations do not allow for file restrictions. +File Path +--------- + +The File Path metadata field is Dataverse's way of representing a file's location in a folder structure. When a user uploads a .zip file containing a folder structure, Dataverse automatically fills in the File Path information for each file contained in the .zip. If a user downloads the full dataset or a selection of files from it, they will receive a folder structure with each file positioned according to its File Path. + +A file's File Path can be manually added or edited on the Edit Files page. Changing a file's File Path will change its location in the folder structure that is created when a user downloads the full dataset or a selection of files from it. + + File Tags --------- diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index 8d19747fb18..3fd2c6439b2 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -74,7 +74,7 @@ You can find the citation for the dataset at the top of the dataset page in a bl Download Files -------------- -Within the Files tab on a dataset page, you can download the files in that dataset. To download more than one file at a time, select the files you would like to download and then click the Download button above the files. The selected files will download in zip format. +Within the Files tab on a dataset page, you can download the files in that dataset. To download more than one file at a time, select the files you would like to download and then click the Download button above the files. The selected files will download in .zip format that preserves any folder structure that the dataset owner had set up. You may also download a file from its file page by clicking the Download button in the upper right corner of the page, or by :ref:`url_download` under the Metadata tab on the lower half of the page. diff --git a/scripts/deploy/phoenix.dataverse.org/post b/scripts/deploy/phoenix.dataverse.org/post index 6348e83f24d..48aff3d8f16 100755 --- a/scripts/deploy/phoenix.dataverse.org/post +++ b/scripts/deploy/phoenix.dataverse.org/post @@ -3,7 +3,6 @@ cd scripts/api ./setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out cd ../.. psql -U dvnapp dvndb -f scripts/database/reference_data.sql -psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/pg8-createsequence-prep.sql psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/createsequence.sql curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE scripts/search/tests/publish-dataverse-root diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index fe49da82b6e..b4454c67fac 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -775,8 +775,10 @@ dataverse.results.btn.addData=Add Data dataverse.results.btn.addData.newDataverse=New Dataverse dataverse.results.btn.addData.newDataset=New Dataset dataverse.results.dialog.addDataGuest.header=Add Data -dataverse.results.dialog.addDataGuest.msg=You need to Log In to create a dataverse or add a dataset. -dataverse.results.dialog.addDataGuest.msg.signup=You need to Sign Up or Log In to create a dataverse or add a dataset. +dataverse.results.dialog.addDataGuest.msg=Log in to create a dataverse or add a dataset. +dataverse.results.dialog.addDataGuest.msg.signup=Sign up or log in to create a dataverse or add a dataset. +dataverse.results.dialog.addDataGuest.signup.title=Sign Up for a Dataverse Account +dataverse.results.dialog.addDataGuest.login.title=Log into your Dataverse Account dataverse.results.types.dataverses=Dataverses dataverse.results.types.datasets=Datasets dataverse.results.types.files=Files @@ -903,6 +905,7 @@ dataverse.permissionsFiles.usersOrGroups.description=All the users and groups th dataverse.permissionsFiles.usersOrGroups.tabHeader.userOrGroup=User/Group Name (Affiliation) dataverse.permissionsFiles.usersOrGroups.tabHeader.id=ID dataverse.permissionsFiles.usersOrGroups.tabHeader.email=Email +dataverse.permissionsFiles.usersOrGroups.tabHeader.authentication=Authentication dataverse.permissionsFiles.usersOrGroups.tabHeader.files=Files dataverse.permissionsFiles.usersOrGroups.tabHeader.access=Access dataverse.permissionsFiles.usersOrGroups.file=File @@ -1726,6 +1729,8 @@ file.metadataTab.fileMetadata.type.label=Type file.metadataTab.fileMetadata.description.label=Description file.metadataTab.fileMetadata.publicationDate.label=Publication Date file.metadataTab.fileMetadata.depositDate.label=Deposit Date +file.metadataTab.fileMetadata.hierarchy.label=File Path +file.metadataTab.fileMetadata.hierarchy.tip=Hierarchical directory structure path used to display file organization and support reproducibility. file.metadataTab.fitsMetadata.header=FITS Metadata file.versionDifferences.noChanges=No changes associated with this version diff --git a/src/main/java/ValidationMessages.properties b/src/main/java/ValidationMessages.properties index 59b2492dcd1..5744b6c4dbd 100644 --- a/src/main/java/ValidationMessages.properties +++ b/src/main/java/ValidationMessages.properties @@ -30,7 +30,7 @@ alias.illegalCharacters=Alias cannot be empty. Valid characters are a-Z, 0-9, '_ custom.response=Please enter a response. custom.questiontext=Please enter question text. filename.illegalCharacters=File Name cannot contain any of the following characters: \ / : * ? " < > | ; # . -directoryname.illegalCharacters=Directory Name cannot contain leading or trailing file separators. +directoryname.illegalCharacters=Directory Name cannot contain invalid characters. Valid characters are a-Z, 0-9, '_', '-', '.', '\\', '/' and ' ' (white space). filename.blank=Please specify a file name. diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b5583458b77..fab6ce1b0fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -69,13 +69,19 @@ import org.apache.commons.httpclient.methods.GetMethod; import java.text.DateFormat; import java.util.Arrays; +import java.util.HashSet; import java.util.ResourceBundle; import java.util.Set; import java.util.logging.Level; import javax.faces.event.AjaxBehaviorEvent; import javax.faces.event.FacesEvent; +import javax.faces.event.ValueChangeEvent; import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletResponse; +import javax.validation.ConstraintViolation; +import javax.validation.Validation; +import javax.validation.Validator; +import javax.validation.ValidatorFactory; import org.apache.commons.lang.StringUtils; import org.primefaces.context.RequestContext; @@ -1060,26 +1066,6 @@ public String saveReplacementFile() throws FileReplaceException{ public String save() { - - /* - // Validate - Set constraintViolations = workingVersion.validate(); - if (!constraintViolations.isEmpty()) { - //JsfHelper.addFlashMessage(getBundleString("dataset.message.validationError")); - logger.fine("Constraint violation detected on SAVE: "+constraintViolations.toString()); - JH.addMessage(FacesMessage.SEVERITY_ERROR, getBundleString("dataset.message.validationError")); - - //FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Validation Error", "See below for details.")); - return ""; - } - }*/ - - // Once all the filemetadatas pass the validation, we'll only allow the user - // to try to save once; (this it to prevent them from creating multiple - // DRAFT versions, if the page gets stuck in that state where it - // successfully creates a new version, but can't complete the remaining - // tasks. -- L.A. 4.2 - if (!saveEnabled) { return ""; } @@ -2760,6 +2746,10 @@ public void handleDescriptionChange(final AjaxBehaviorEvent event) { public void handleNameChange(final AjaxBehaviorEvent event) { datasetUpdateRequired = true; } + + public void handleFileDirectoryChange(final ValueChangeEvent event) { + datasetUpdateRequired = true; + } /* * Items for the "Advanced (Ingest) Options" popup. diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java b/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java new file mode 100644 index 00000000000..2482b5cd3b7 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/FileDirectoryNameValidator.java @@ -0,0 +1,43 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.util.StringUtil; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.validation.ConstraintValidator; +import javax.validation.ConstraintValidatorContext; + +/** + * + * @author skraffmi + */ +public class FileDirectoryNameValidator implements ConstraintValidator { + + @Override + public void initialize(ValidateDataFileDirectoryName constraintAnnotation) { + + } + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + return isFileDirectoryNameValid(value, context); + + } + + public static boolean isFileDirectoryNameValid(String value, ConstraintValidatorContext context) { + + if (value == null || value.isEmpty()) { + return true; + } + String validCharacters = "[\\w\\\\/. -]+"; + Pattern p = Pattern.compile(validCharacters); + Matcher m = p.matcher(value); + return m.matches(); + + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java index cdce9db23ba..d3b9fc76332 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java @@ -33,6 +33,7 @@ import javax.persistence.Version; import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.StringUtil; import org.hibernate.validator.constraints.NotBlank; import javax.validation.constraints.Pattern; @@ -55,10 +56,11 @@ public class FileMetadata implements Serializable { @Column( nullable=false ) private String label = ""; - @Pattern(regexp="|[^/\\\\]|^[^/\\\\]+.*[^/\\\\]+$", - message = "{directoryname.illegalCharacters}") + + @ValidateDataFileDirectoryName(message = "{directoryname.illegalCharacters}") @Expose @Column ( nullable=true ) + private String directoryLabel; @Column(columnDefinition = "TEXT") private String description = ""; @@ -120,6 +122,12 @@ public String getDirectoryLabel() { } public void setDirectoryLabel(String directoryLabel) { + //Strip off beginning and ending \ // - . + // and replace any sequences/combinations of / and \ with a single / + if (directoryLabel != null) { + directoryLabel = StringUtil.sanitizeFileDirectory(directoryLabel); + } + this.directoryLabel = directoryLabel; } diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java index 36df71265cc..012332f747d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.authorization.AuthenticationProvider; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; @@ -138,7 +139,7 @@ private void initMaps() { for (RoleAssignment ra : ras) { // for files, only show role assignments which can download if (ra.getRole().permissions().contains(Permission.DownloadFile)) { - raList.add(new RoleAssignmentRow(ra, roleAssigneeService.getRoleAssignee(ra.getAssigneeIdentifier()).getDisplayInfo())); + raList.add(new RoleAssignmentRow(ra, roleAssigneeService.getRoleAssignee(ra.getAssigneeIdentifier(), true).getDisplayInfo())); addFileToRoleAssignee(ra); } } @@ -150,17 +151,21 @@ private void initMaps() { List requestedFiles = fileAccessRequestMap.get(au); if (requestedFiles == null) { requestedFiles = new ArrayList<>(); - fileAccessRequestMap.put(au, requestedFiles); + AuthenticatedUser withProvider = authenticationService.getAuthenticatedUserWithProvider(au.getUserIdentifier()); + fileAccessRequestMap.put(withProvider, requestedFiles); } - - requestedFiles.add(file); - + requestedFiles.add(file); } } } } + public String getAuthProviderFriendlyName(String authProviderId){ + + return AuthenticationProvider.getFriendlyName(authProviderId); + } + private void addFileToRoleAssignee(RoleAssignment assignment) { RoleAssignee ra = roleAssigneeService.getRoleAssignee(assignment.getAssigneeIdentifier()); List assignments = roleAssigneeMap.get(ra); diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java index 7e618dacebd..b31b55b2e4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java @@ -75,6 +75,24 @@ protected void setup() { * identifier that doesn't start with one of the supported characters. */ public RoleAssignee getRoleAssignee(String identifier) { + if (identifier == null || identifier.isEmpty()) { + throw new IllegalArgumentException("Identifier cannot be null or empty string."); + } + return (getRoleAssignee(identifier, false)); + } + + /** + * @param identifier An identifier beginning with ":" (builtin), "@" + * @param augmented boolean to decide whether to get provider information + * ({@link AuthenticatedUser}), "&" ({@link Group}), or "#" + * ({@link PrivateUrlUser}). + * + * @return A RoleAssignee (User or Group) or null. + * + * @throws IllegalArgumentException if you pass null, empty string, or an + * identifier that doesn't start with one of the supported characters. + */ + public RoleAssignee getRoleAssignee(String identifier, Boolean augmented) { if (identifier == null || identifier.isEmpty()) { throw new IllegalArgumentException("Identifier cannot be null or empty string."); } @@ -82,7 +100,11 @@ public RoleAssignee getRoleAssignee(String identifier) { case ':': return predefinedRoleAssignees.get(identifier); case '@': - return authSvc.getAuthenticatedUser(identifier.substring(1)); + if (!augmented){ + return authSvc.getAuthenticatedUser(identifier.substring(1)); + } else { + return authSvc.getAuthenticatedUserWithProvider(identifier.substring(1)); + } case '&': return groupSvc.getGroup(identifier.substring(1)); case '#': diff --git a/src/main/java/edu/harvard/iq/dataverse/Template.java b/src/main/java/edu/harvard/iq/dataverse/Template.java index 05307d89ff6..b01b0a2b792 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Template.java +++ b/src/main/java/edu/harvard/iq/dataverse/Template.java @@ -26,6 +26,8 @@ import javax.persistence.TemporalType; import javax.persistence.Transient; import javax.validation.constraints.Size; + +import edu.harvard.iq.dataverse.util.DateUtil; import org.hibernate.validator.constraints.NotBlank; /** @@ -91,7 +93,7 @@ public void setCreateTime(Date createTime) { } public String getCreateDate() { - return new SimpleDateFormat("MMMM d, yyyy").format(createTime); + return DateUtil.formatDate(createTime); } @OneToOne(cascade = {CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}, orphanRemoval=true) diff --git a/src/main/java/edu/harvard/iq/dataverse/ValidateDataFileDirectoryName.java b/src/main/java/edu/harvard/iq/dataverse/ValidateDataFileDirectoryName.java new file mode 100644 index 00000000000..94e33d6220a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/ValidateDataFileDirectoryName.java @@ -0,0 +1,32 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse; + +import java.lang.annotation.Documented; +import static java.lang.annotation.ElementType.FIELD; +import java.lang.annotation.Retention; +import static java.lang.annotation.RetentionPolicy.RUNTIME; +import java.lang.annotation.Target; +import javax.validation.Constraint; +import javax.validation.Payload; + +/** + * + * @author skraffmi + */ +@Target({FIELD}) +@Retention(RUNTIME) +@Constraint(validatedBy = {FileDirectoryNameValidator.class}) +@Documented +public @interface ValidateDataFileDirectoryName { + + String message() default "Failed Validation for Validate Data File Directory Name"; + + Class[] groups() default {}; + + Class[] payload() default {}; + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index 79d704804ed..70e8c092df3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -303,6 +303,22 @@ public AuthenticatedUser getAuthenticatedUser( String identifier ) { } } + public AuthenticatedUser getAuthenticatedUserWithProvider( String identifier ) { + try { + AuthenticatedUser authenticatedUser = em.createNamedQuery("AuthenticatedUser.findByIdentifier", AuthenticatedUser.class) + .setParameter("identifier", identifier) + .getSingleResult(); + AuthenticatedUserLookup aul = em.createNamedQuery("AuthenticatedUserLookup.findByAuthUser", AuthenticatedUserLookup.class) + .setParameter("authUser", authenticatedUser) + .getSingleResult(); + authenticatedUser.setAuthProviderId(aul.getAuthenticationProviderId()); + + return authenticatedUser; + } catch ( NoResultException nre ) { + return null; + } + } + public AuthenticatedUser getAdminUser() { try { return em.createNamedQuery("AuthenticatedUser.findAdminUser", AuthenticatedUser.class) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 5c0631d95d6..14dbe45248b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -42,6 +42,9 @@ public class OptionalFileParams { private String description; public static final String DESCRIPTION_ATTR_NAME = "description"; + private String directoryLabel; + public static final String DIRECTORY_LABEL_ATTR_NAME = "directoryLabel"; + private List categories; public static final String CATEGORIES_ATTR_NAME = "categories"; @@ -99,7 +102,15 @@ public void setDescription(String description){ public String getDescription(){ return this.description; } - + + public String getDirectoryLabel() { + return directoryLabel; + } + + public void setDirectoryLabel(String directoryLabel) { + this.directoryLabel = directoryLabel; + } + public void setRestriction(boolean restrict){ this.restrict = restrict; } @@ -128,6 +139,13 @@ public boolean hasDescription(){ } return true; } + + public boolean hasDirectoryLabel(){ + if ((directoryLabel == null)||(this.directoryLabel.isEmpty())){ + return false; + } + return true; + } /** * Set tags @@ -192,7 +210,15 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ this.description = jsonObj.get(DESCRIPTION_ATTR_NAME).getAsString(); } - + + // ------------------------------- + // get directory label as string + // ------------------------------- + if ((jsonObj.has(DIRECTORY_LABEL_ATTR_NAME)) && (!jsonObj.get(DIRECTORY_LABEL_ATTR_NAME).isJsonNull())){ + + this.directoryLabel = jsonObj.get(DIRECTORY_LABEL_ATTR_NAME).getAsString(); + } + // ------------------------------- // get restriction as boolean // ------------------------------- @@ -295,8 +321,14 @@ public void addOptionalParams(DataFile df) throws DataFileTagException{ if (hasDescription()){ fm.setDescription(this.getDescription()); } - - + + // --------------------------- + // Add directory label (path) + // --------------------------- + if (hasDirectoryLabel()){ + fm.setDirectoryLabel(this.getDirectoryLabel()); + } + // --------------------------- // Add categories // --------------------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 2896e623af4..801e377cd09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -183,11 +183,9 @@ public Future indexDataverse(Dataverse dataverse) { if (dataverse.isReleased()) { solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING); solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getPublicationDate()); - solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(dataverse.getPublicationDate())); } else { solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING); solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getCreateDate()); - solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(dataverse.getCreateDate())); } /* We don't really have harvested dataverses yet; (I have in fact just removed the isHarvested() method from the Dataverse object) -- L.A. @@ -724,7 +722,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) { } } solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate); - solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate)); if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) { solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING); @@ -1059,7 +1056,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) { } } datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate); - datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate)); if (majorVersionReleaseDate == null && !datafile.isHarvested()) { datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING); @@ -1332,15 +1328,6 @@ public String removeSolrDocFromIndex(String doomed) { return response; } - public String convertToFriendlyDate(Date dateAsDate) { - if (dateAsDate == null) { - dateAsDate = new Date(); - } - // using DateFormat.MEDIUM for May 5, 2014 to match what's in DVN 3.x - DateFormat format = DateFormat.getDateInstance(DateFormat.MEDIUM); - String friendlyDate = format.format(dateAsDate); - return friendlyDate; - } private List findSolrDocIdsForDraftFilesToDelete(Dataset datasetWithDraftFilesToDelete) { List solrIdsOfFilesToDelete = new ArrayList<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index d5a5b92a502..312b70fe7fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -205,10 +205,7 @@ public class SearchFields { // PUBLICATION_YEAR used to be called PUBLICATION_DATE. public static final String PUBLICATION_YEAR = "publicationDate"; public static final String RELEASE_OR_CREATE_DATE = "dateSort"; - /** - * i.e. "Mar 17, 2015" - */ - public static final String RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT = "dateFriendly"; + public static final String DEFINITION_POINT = "definitionPointDocId"; public static final String DEFINITION_POINT_DVOBJECT_ID = "definitionPointDvObjectId"; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 8f8abe75c3f..6cfef229fb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -401,7 +401,6 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List createDataFiles(DatasetVersion version, InputStream if (!fileEntryName.equals(shortName)) { // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), - // we'll extract the directory name, then a) strip the leading and trailing slashes; - // and b) replace all the back slashes with regular ones and b) replace any multiple - // slashes with a single slash: - String directoryName = fileEntryName.replaceFirst("[\\/][\\/]*[^\\/]*$", "").replaceFirst("^[\\/]*", "").replaceAll("[\\/][\\/]*", "/"); - if (!"".equals(directoryName)) { + // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all + // the leading, trailing and duplicate slashes; then replace all the characters that + // don't pass our validation rules. + String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); + directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); + //if (!"".equals(directoryName)) { + if (!StringUtil.isEmpty(directoryName)) { logger.fine("setting the directory label to " + directoryName); datafile.getFileMetadata().setDirectoryLabel(directoryName); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java index ddacc681f76..a38079dca0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java @@ -180,6 +180,36 @@ public static String decrypt(String value, String password ) { } } + public static String sanitizeFileDirectory(String value) { + return sanitizeFileDirectory(value, false); + } + + public static String sanitizeFileDirectory(String value, boolean aggressively){ + // Replace all the combinations of slashes and backslashes with one single + // backslash: + value = value.replaceAll("[\\\\/][\\\\/]*", "/"); + + if (aggressively) { + value = value.replaceAll("[^A-Za-z0-9_ ./\\-]+", "."); + value = value.replaceAll("\\.\\.+", "."); + } + + // Strip any leading or trailing slashes, whitespaces, '-' or '.': + while (value.startsWith("/") || value.startsWith("-") || value.startsWith(".") || value.startsWith(" ")){ + value = value.substring(1); + } + while (value.endsWith("/") || value.endsWith("-") || value.endsWith(".") || value.endsWith(" ")){ + value = value.substring(0, value.length() - 1); + } + + if ("".equals(value)) { + return null; + } + + return value; + } + + private static SecretKeySpec generateKeyFromString(final String secKey) throws UnsupportedEncodingException, NoSuchAlgorithmException { byte[] key = (secKey).getBytes("UTF-8"); MessageDigest sha = MessageDigest.getInstance("SHA-1"); diff --git a/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql b/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql new file mode 100644 index 00000000000..3d3ed777c9f --- /dev/null +++ b/src/main/resources/db/migration/V4.11.0.1__5565-sanitize-directory-labels.sql @@ -0,0 +1,9 @@ +-- replace any sequences of slashes and backslashes with a single slash: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/\\][/\\]+', '/', 'g'); +-- strip (and replace with a .) any characters that are no longer allowed in the directory labels: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); +-- now replace any sequences of .s with a single .: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '\.\.+', '.', 'g'); +-- get rid of any leading or trailing slashes, spaces, '-'s and '.'s: +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '^[/ .\-]+', '', ''); +UPDATE filemetadata SET directoryLabel = regexp_replace(directoryLabel, '[/ \.\-]+$', '', ''); diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 09e202c8761..08ae1cf89b8 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -296,6 +296,7 @@ + + + + + + + + + + + +
@@ -501,7 +518,7 @@
- +
diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index d16430690e7..26e0f766117 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -503,6 +503,14 @@ +
+ +
+ +
+