From 681ac6bdcfbfeb0c26f0358bf2f7ead3f21decc3 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Mon, 22 Feb 2021 10:32:20 -0600 Subject: [PATCH 1/7] add some sorting logic (strings are broken) --- app/api/Search.scala | 8 +++-- app/services/ElasticsearchPlugin.scala | 45 ++++++++++++++++++++------ conf/routes | 2 +- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/app/api/Search.scala b/app/api/Search.scala index 405e065aa..603736ae4 100644 --- a/app/api/Search.scala +++ b/app/api/Search.scala @@ -22,7 +22,7 @@ class Search @Inject() ( /** Search using a simple text string with filters */ def search(query: String, resource_type: Option[String], datasetid: Option[String], collectionid: Option[String], spaceid: Option[String], folderid: Option[String], field: Option[String], tag: Option[String], - from: Option[Int], size: Option[Int], page: Option[Int]) = PermissionAction(Permission.ViewDataset) { implicit request => + from: Option[Int], size: Option[Int], page: Option[Int], sort: Option[String], order: Option[String]) = PermissionAction(Permission.ViewDataset) { implicit request => current.plugin[ElasticsearchPlugin] match { case Some(plugin) => { // If from is specified, use it. Otherwise use page * size of page if possible, otherwise use 0. @@ -42,7 +42,9 @@ class Search @Inject() ( (spaceid match {case Some(x) => s"&spaceid=$x" case None => ""}) + (folderid match {case Some(x) => s"&folderid=$x" case None => ""}) + (field match {case Some(x) => s"&field=$x" case None => ""}) + - (tag match {case Some(x) => s"&tag=$x" case None => ""}) + (tag match {case Some(x) => s"&tag=$x" case None => ""}) + + (sort match {case Some(x) => s"&sort=$x" case None => ""}) + + (order match {case Some(x) => s"&order=$x" case None => ""}) // Add space filter to search here as a simple permissions check val superAdmin = request.user match { @@ -54,7 +56,7 @@ class Search @Inject() ( else spaces.listAccess(0, Set[Permission](Permission.ViewSpace), request.user, true, true, false, false).map(sp => sp.id) - val response = plugin.search(query, resource_type, datasetid, collectionid, spaceid, folderid, field, tag, from_index, size, permitted, request.user) + val response = plugin.search(query, resource_type, datasetid, collectionid, spaceid, folderid, field, tag, from_index, size, sort, order, permitted, request.user) val result = SearchUtils.prepareSearchResponse(response, source_url, request.user) Ok(toJson(result)) } diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala index 1bc3677bb..f2928fed8 100644 --- a/app/services/ElasticsearchPlugin.scala +++ b/app/services/ElasticsearchPlugin.scala @@ -29,6 +29,7 @@ import play.api.libs.json._ import _root_.util.SearchUtils import org.apache.commons.lang.StringUtils import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest +import org.elasticsearch.search.sort.SortOrder /** @@ -130,7 +131,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin { * "field_leaf_key": name of immediate field only, e.g. 'lines' */ val queryObj = prepareElasticJsonQuery(query, grouping, permitted, user) - accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults)) + // TODO: Support sorting in GUI search + accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults), None, None) } /** @@ -152,8 +154,8 @@ class ElasticsearchPlugin(application: Application) extends Plugin { */ def search(query: String, resource_type: Option[String], datasetid: Option[String], collectionid: Option[String], spaceid: Option[String], folderid: Option[String], field: Option[String], tag: Option[String], - from: Option[Int], size: Option[Int], permitted: List[UUID], user: Option[User], - index: String = nameOfIndex): ElasticsearchResult = { + from: Option[Int], size: Option[Int], sort: Option[String], order: Option[String], permitted: List[UUID], + user: Option[User], index: String = nameOfIndex): ElasticsearchResult = { // Convert any parameters from API into the query syntax equivalent so we can parse it all together later var expanded_query = query @@ -166,16 +168,16 @@ class ElasticsearchPlugin(application: Application) extends Plugin { folderid.foreach(fid => expanded_query += s" in:$fid") val queryObj = prepareElasticJsonQuery(expanded_query.stripPrefix(" "), permitted, user) - accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults)) + accumulatePageResult(queryObj, user, from.getOrElse(0), size.getOrElse(maxResults), sort, order) } /** Perform search, check permissions, and keep searching again if page isn't filled with permitted resources */ def accumulatePageResult(queryObj: XContentBuilder, user: Option[User], from: Int, size: Int, - index: String = nameOfIndex): ElasticsearchResult = { + sort: Option[String], order: Option[String], index: String = nameOfIndex): ElasticsearchResult = { var total_results = ListBuffer.empty[ResourceRef] // Fetch initial page & filter by permissions - val (results, total_size) = _search(queryObj, index, Some(from), Some(size)) + val (results, total_size) = _search(queryObj, index, Some(from), Some(size), sort, order) Logger.debug(s"Found ${results.length} results with ${total_size} total") val filtered = checkResultPermissions(results, user) Logger.debug(s"Permission to see ${filtered.length} results") @@ -187,7 +189,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin { var exhausted = false while (total_results.length < size && !exhausted) { Logger.debug(s"Only have ${total_results.length} total results; searching for ${size*2} more from ${new_from}") - val (results, total_size) = _search(queryObj, index, Some(new_from), Some(size*2)) + val (results, total_size) = _search(queryObj, index, Some(new_from), Some(size*2), sort, order) Logger.debug(s"Found ${results.length} results with ${total_size} total") if (results.length == 0 || new_from+results.length == total_size) exhausted = true // No more results to find val filtered = checkResultPermissions(results, user) @@ -251,17 +253,33 @@ class ElasticsearchPlugin(application: Application) extends Plugin { /*** Execute query and return list of results and total result count as tuple */ def _search(queryObj: XContentBuilder, index: String = nameOfIndex, - from: Option[Int] = Some(0), size: Option[Int] = Some(maxResults)): (List[ResourceRef], Long) = { + from: Option[Int] = Some(0), size: Option[Int] = Some(maxResults), + sort: Option[String], order: Option[String]): (List[ResourceRef], Long) = { connect() val response = client match { case Some(x) => { - Logger.info("Searching Elasticsearch: "+queryObj.string()) + Logger.debug("Searching Elasticsearch: " + queryObj.string()) var responsePrep = x.prepareSearch(index) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setQuery(queryObj) responsePrep = responsePrep.setFrom(from.getOrElse(0)) responsePrep = responsePrep.setSize(size.getOrElse(maxResults)) + // Default to ascending if no order provided but a field is + val searchOrder = order match { + case Some("asc") => SortOrder.ASC + case Some("desc") => SortOrder.DESC + case Some("DESC") => SortOrder.DESC + case _ => SortOrder.ASC + } + // Default to name field if order is provided but no field is + sort match { + case Some(x) => responsePrep = responsePrep.addSort(x, searchOrder) + case None => order match { + case Some(o) => responsePrep = responsePrep.addSort("name", searchOrder) + case None => {} + } + } val response = responsePrep.setExplain(true).execute().actionGet() Logger.debug("Search hits: " + response.getHits().getTotalHits()) @@ -697,6 +715,13 @@ class ElasticsearchPlugin(application: Application) extends Plugin { * as strings for datatypes besides Objects. In the future, this could * be removed, but only once the Search API better supports those data types (e.g. Date). */ + + /** SUPPORTING SORT FIELDS, ADD A .RAW SUBFIELD + * https://stackoverflow.com/questions/34493947/elasticsearch-is-not-sorting-the-results + * https://discuss.elastic.co/t/how-to-create-dynamic-template-for-nested-objects/187310 + * https://github.com/elastic/elasticsearch/issues/16945 + */ + """{"clowder_object": { |"numeric_detection": true, |"properties": { @@ -900,7 +925,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin { } } - // If a term is specified that isn't in this list, it's assumed to be a metadata field + // If a term is specified that isn't in this list, it's assumed to be a metadata field (for sorting and filtering) val official_terms = List("name", "creator", "email", "resource_type", "in", "contains", "tag", "exists", "missing") // Create list of (key, operator, value) for passing to builder diff --git a/conf/routes b/conf/routes index 64a506b4b..5718830e6 100644 --- a/conf/routes +++ b/conf/routes @@ -663,7 +663,7 @@ DELETE /api/sections/:id # ---------------------------------------------------------------------- GET /api/search/json @api.Search.searchJson(query: String ?= "", grouping: String ?= "AND", from: Option[Int], size: Option[Int]) GET /api/search/multimediasearch @api.Search.searchMultimediaIndex(section_id: UUID) -GET /api/search @api.Search.search(query: String ?= "", resource_type: Option[String], datasetid: Option[String], collectionid: Option[String], spaceid: Option[String], folderid: Option[String], field: Option[String], tag: Option[String], from: Option[Int], size: Option[Int], page: Option[Int]) +GET /api/search @api.Search.search(query: String ?= "", resource_type: Option[String], datasetid: Option[String], collectionid: Option[String], spaceid: Option[String], folderid: Option[String], field: Option[String], tag: Option[String], from: Option[Int], size: Option[Int], page: Option[Int], sort: Option[String], order: Option[String]) # ---------------------------------------------------------------------- # GEOSTREAMS ENDPOINT From a558af80e408e93af22c63a02a0f4ac0e6d97d51 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Tue, 2 Mar 2021 10:23:11 -0600 Subject: [PATCH 2/7] Add better support for name sorting --- app/api/Admin.scala | 6 +++++ app/services/ElasticsearchPlugin.scala | 23 +++++++++++-------- app/services/mongodb/ElasticsearchQueue.scala | 8 +++++++ conf/routes | 1 + 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/app/api/Admin.scala b/app/api/Admin.scala index ae5f8a857..d8c878df0 100644 --- a/app/api/Admin.scala +++ b/app/api/Admin.scala @@ -180,4 +180,10 @@ class Admin @Inject() (userService: UserService, if (success) Ok(toJson(Map("status" -> "reindex successfully queued"))) else BadRequest(toJson(Map("status" -> "reindex queuing failed, Elasticsearch may be disabled"))) } + + def deleteIndex = ServerAdminAction { implicit request => + val success = esqueue.queue("delete_index") + if (success) Ok(toJson(Map("status" -> "deindex successfully queued"))) + else BadRequest(toJson(Map("status" -> "deindex queuing failed, Elasticsearch may be disabled"))) + } } diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala index f2928fed8..6ea770215 100644 --- a/app/services/ElasticsearchPlugin.scala +++ b/app/services/ElasticsearchPlugin.scala @@ -259,8 +259,13 @@ class ElasticsearchPlugin(application: Application) extends Plugin { val response = client match { case Some(x) => { Logger.debug("Searching Elasticsearch: " + queryObj.string()) + + // Exclude _sort fields in response object + var sortFilter = jsonBuilder().startObject().startArray("exclude").value("*._sort").endArray().endObject() + var responsePrep = x.prepareSearch(index) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) + .setSource(sortFilter) .setQuery(queryObj) responsePrep = responsePrep.setFrom(from.getOrElse(0)) @@ -276,7 +281,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin { sort match { case Some(x) => responsePrep = responsePrep.addSort(x, searchOrder) case None => order match { - case Some(o) => responsePrep = responsePrep.addSort("name", searchOrder) + case Some(o) => responsePrep = responsePrep.addSort("name._sort", searchOrder) case None => {} } } @@ -309,8 +314,11 @@ class ElasticsearchPlugin(application: Application) extends Plugin { .field("type", "custom") .field("tokenizer", "uax_url_email") .endObject() - .endObject() - .endObject() + .endObject().startObject("normalizer") + .startObject("case_insensitive") + .field("filter", "lowercase") + .endObject() + .endObject().endObject() .startObject("index") .startObject("mapping") .field("ignore_malformed", true) @@ -716,16 +724,13 @@ class ElasticsearchPlugin(application: Application) extends Plugin { * be removed, but only once the Search API better supports those data types (e.g. Date). */ - /** SUPPORTING SORT FIELDS, ADD A .RAW SUBFIELD - * https://stackoverflow.com/questions/34493947/elasticsearch-is-not-sorting-the-results - * https://discuss.elastic.co/t/how-to-create-dynamic-template-for-nested-objects/187310 - * https://github.com/elastic/elasticsearch/issues/16945 - */ + // TODO: With Elastic 6.8+ we can use "normalizer": "case_insensitive" for _sort fields """{"clowder_object": { |"numeric_detection": true, |"properties": { - |"name": {"type": "string"}, + |"name": {"type": "string", "fields": { + | "_sort": {"type":"string", "index": "not_analyzed"}}}, |"description": {"type": "string"}, |"resource_type": {"type": "string", "include_in_all": false}, |"child_of": {"type": "string", "include_in_all": false}, diff --git a/app/services/mongodb/ElasticsearchQueue.scala b/app/services/mongodb/ElasticsearchQueue.scala index a27bb277e..50fa9e7ab 100644 --- a/app/services/mongodb/ElasticsearchQueue.scala +++ b/app/services/mongodb/ElasticsearchQueue.scala @@ -53,6 +53,7 @@ class ElasticsearchQueue @Inject() ( } } case "index_all" => _indexAll() + case "delete_index" => _deleteIndex() case "index_swap" => _swapIndex() case _ => throw new IllegalArgumentException(s"Unrecognized action: ${action.action}") } @@ -63,6 +64,7 @@ class ElasticsearchQueue @Inject() ( case "index_dataset" => throw new IllegalArgumentException(s"No target specified for action ${action.action}") case "index_collection" => throw new IllegalArgumentException(s"No target specified for action ${action.action}") case "index_all" => _indexAll() + case "delete_index" => _deleteIndex() case "index_swap" => _swapIndex() case _ => throw new IllegalArgumentException(s"Unrecognized action: ${action.action}") } @@ -97,6 +99,12 @@ class ElasticsearchQueue @Inject() ( }) } + def _deleteIndex() = { + current.plugin[ElasticsearchPlugin].foreach(p => { + p.deleteAll() + }) + } + // Replace the main index with the newly reindexed temp file def _swapIndex() = { Logger.debug("Swapping temporary reindex for main index") diff --git a/conf/routes b/conf/routes index 5718830e6..d7822c627 100644 --- a/conf/routes +++ b/conf/routes @@ -310,6 +310,7 @@ POST /api/admin/users POST /api/sensors/config @api.Admin.sensorsConfig POST /api/changeAppearance @api.Admin.submitAppearance POST /api/reindex @api.Admin.reindex +POST /api/deleteindex @api.Admin.deleteIndex POST /api/admin/configuration @api.Admin.updateConfiguration #---------------------------------------------------------------------- From ff7e9b908a3c7556e5b870a6ba902cbe027438c8 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Wed, 10 Mar 2021 11:30:08 -0600 Subject: [PATCH 3/7] update swagger and changelog --- CHANGELOG.md | 6 ++++++ app/services/ElasticsearchPlugin.scala | 3 ++- public/swagger.yml | 10 ++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e4190b59..de2bd6ce5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## Unreleased + +### Added +- Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting. If only order is specified, created date is used. String fields are not currently supported. +- Added a new `/api/deleteindex` admin endpoint that will queue an action to delete an Elasticsearch index (usually prior to a reindex). + ## 1.14.1 - 2021-02-02 - Google will no longer work as login provider, we are working on this issue [#157](https://github.com/clowder-framework/clowder/issues/157). diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala index 6ea770215..b4df3d79e 100644 --- a/app/services/ElasticsearchPlugin.scala +++ b/app/services/ElasticsearchPlugin.scala @@ -279,9 +279,10 @@ class ElasticsearchPlugin(application: Application) extends Plugin { } // Default to name field if order is provided but no field is sort match { + case Some("name") => responsePrep = responsePrep.addSort("name._sort", searchOrder) case Some(x) => responsePrep = responsePrep.addSort(x, searchOrder) case None => order match { - case Some(o) => responsePrep = responsePrep.addSort("name._sort", searchOrder) + case Some(o) => responsePrep = responsePrep.addSort("created", searchOrder) case None => {} } } diff --git a/public/swagger.yml b/public/swagger.yml index 7afe5e842..db1f48126 100644 --- a/public/swagger.yml +++ b/public/swagger.yml @@ -150,6 +150,16 @@ paths: assuming "size" items per page. schema: type: integer + - name: sort + in: query + description: A date or numeric field to sort by. If order is given but no field specified, created date is used. + schema: + type: string + - name: order + in: query + description: Whether to scored in asc (ascending) or desc (descending) order. If a field is given without an order, asc is used. + schema: + type: string responses: 200: description: OK From 96b8fb240af1e02efddc1074235e34479eb248d7 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Wed, 10 Mar 2021 11:33:37 -0600 Subject: [PATCH 4/7] comment typo --- app/services/ElasticsearchPlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala index b4df3d79e..d0e0dc3ea 100644 --- a/app/services/ElasticsearchPlugin.scala +++ b/app/services/ElasticsearchPlugin.scala @@ -277,7 +277,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin { case Some("DESC") => SortOrder.DESC case _ => SortOrder.ASC } - // Default to name field if order is provided but no field is + // Default to created field if order is provided but no field is sort match { case Some("name") => responsePrep = responsePrep.addSort("name._sort", searchOrder) case Some(x) => responsePrep = responsePrep.addSort(x, searchOrder) From 25a1a19ad5d255fc5dd1d1fb66c8d433fc51a5b1 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Wed, 10 Mar 2021 11:35:04 -0600 Subject: [PATCH 5/7] remove 6.8 normalizer code --- app/services/ElasticsearchPlugin.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala index d0e0dc3ea..f4553bbe4 100644 --- a/app/services/ElasticsearchPlugin.scala +++ b/app/services/ElasticsearchPlugin.scala @@ -315,10 +315,6 @@ class ElasticsearchPlugin(application: Application) extends Plugin { .field("type", "custom") .field("tokenizer", "uax_url_email") .endObject() - .endObject().startObject("normalizer") - .startObject("case_insensitive") - .field("filter", "lowercase") - .endObject() .endObject().endObject() .startObject("index") .startObject("mapping") From cbc06f36b554eb0080f03f24a5e215239d7876b1 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Tue, 23 Mar 2021 13:35:11 -0500 Subject: [PATCH 6/7] Update public/swagger.yml Co-authored-by: Mike Lambert --- public/swagger.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/public/swagger.yml b/public/swagger.yml index db1f48126..3c0ee0a07 100644 --- a/public/swagger.yml +++ b/public/swagger.yml @@ -151,15 +151,17 @@ paths: schema: type: integer - name: sort - in: query - description: A date or numeric field to sort by. If order is given but no field specified, created date is used. - schema: - type: string + in: query + description: A date or numeric field to sort by. If order is given but no field specified, created date is used. + schema: + type: string - name: order - in: query - description: Whether to scored in asc (ascending) or desc (descending) order. If a field is given without an order, asc is used. - schema: - type: string + in: query + description: Whether to scored in asc (ascending) or desc (descending) order. If a field is given without an order, asc is used. + schema: + type: string + enum: [asc, desc] + default: asc responses: 200: description: OK From f1812b38f53bee4da56e67d1bb7a4cc920b50ee6 Mon Sep 17 00:00:00 2001 From: Max Burnette Date: Tue, 23 Mar 2021 13:52:39 -0500 Subject: [PATCH 7/7] dummy out name._sort sorting to avoid errors on old Dbs --- app/services/ElasticsearchPlugin.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/ElasticsearchPlugin.scala b/app/services/ElasticsearchPlugin.scala index 34a72b453..25b7c8d9d 100644 --- a/app/services/ElasticsearchPlugin.scala +++ b/app/services/ElasticsearchPlugin.scala @@ -279,7 +279,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin { } // Default to created field if order is provided but no field is sort match { - case Some("name") => responsePrep = responsePrep.addSort("name._sort", searchOrder) + // case Some("name") => responsePrep = responsePrep.addSort("name._sort", searchOrder) TODO: Not yet supported case Some(x) => responsePrep = responsePrep.addSort(x, searchOrder) case None => order match { case Some(o) => responsePrep = responsePrep.addSort("created", searchOrder) @@ -951,7 +951,7 @@ class ElasticsearchPlugin(application: Application) extends Plugin { matches += mat } } - + // If a term is specified that isn't in this list, it's assumed to be a metadata field (for sorting and filtering) val official_terms = List("name", "creator", "created", "email", "resource_type", "in", "contains", "tag", "exists", "missing")