From bc5c32d2e367aafe8fd33a627a8fef96a553dbfc Mon Sep 17 00:00:00 2001 From: n1mus <709030+n1mus@users.noreply.github.com> Date: Fri, 29 Apr 2022 11:43:11 -0700 Subject: [PATCH 01/10] use locale en_US & other cleanup --- .dockerignore | 6 +- CHANGELOG.md | 8 + Makefile | 3 +- client_src/test/test_integration.py | 2 +- .../utils/json_validation.py | 2 +- spec/analyzers/icu_tokenize.json | 2 +- .../generic/fulltext_search.yaml | 94 ------------ .../stored_queries/test_fulltext_search.py | 145 ------------------ 8 files changed, 17 insertions(+), 245 deletions(-) delete mode 100644 spec/stored_queries/generic/fulltext_search.yaml diff --git a/.dockerignore b/.dockerignore index e0862330..e46761e2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -48,4 +48,8 @@ dmypy.json # docker bits Dockerfile* -docker-compose* \ No newline at end of file +docker-compose* + +# Temp files +tmp/ + diff --git a/CHANGELOG.md b/CHANGELOG.md index 3affa19e..f6f252ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Changed +- Use locale en_US instead of c.utf-8 + +### Removed +- Generic fulltext search and tests + ## [0.0.19] - 2022-04-15 ### Added - github actions to build `develop`, `pr-x` and released version (e.g. `1.2.3`) Tags diff --git a/Makefile b/Makefile index e4d49ecd..bfabf26a 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,7 @@ shell: docker-compose run re_api sh reset: - docker-compose --rmi all -v - docker-compose build + docker-compose down --rmi all -v full_query_testing: DO_QUERY_TESTING=full time python -m pytest -s $(QUERY_TESTING_FILE) diff --git a/client_src/test/test_integration.py b/client_src/test/test_integration.py index baa17e97..8794b579 100644 --- a/client_src/test/test_integration.py +++ b/client_src/test/test_integration.py @@ -2,7 +2,7 @@ import os from uuid import uuid4 -from relation_engine_client import REClient +from relation_engine_client.main import REClient from relation_engine_client.exceptions import RERequestError, RENotFound _API_URL = os.environ.get("RE_API_URL", "http://localhost:5000") diff --git a/relation_engine_server/utils/json_validation.py b/relation_engine_server/utils/json_validation.py index c44538f5..b95be623 100644 --- a/relation_engine_server/utils/json_validation.py +++ b/relation_engine_server/utils/json_validation.py @@ -159,7 +159,7 @@ def resolve_remote(self, uri): if scheme in self.handlers: result = self.handlers[scheme](uri) - elif scheme in [u"http", u"https"]: + elif scheme in ["http", "https"]: # Requests has support for detecting the correct encoding of # json over http result = requests.get(uri).json() diff --git a/spec/analyzers/icu_tokenize.json b/spec/analyzers/icu_tokenize.json index 3f69a950..7d2d8429 100644 --- a/spec/analyzers/icu_tokenize.json +++ b/spec/analyzers/icu_tokenize.json @@ -2,7 +2,7 @@ "name": "icu_tokenize", "type": "text", "properties": { - "locale": "c.utf-8", + "locale": "en_US", "accent": false, "case": "lower", "stemming": false, diff --git a/spec/stored_queries/generic/fulltext_search.yaml b/spec/stored_queries/generic/fulltext_search.yaml deleted file mode 100644 index 6859add4..00000000 --- a/spec/stored_queries/generic/fulltext_search.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# Should be REVISED or DEPRECATED. -# Is currently unused outside testing. -# -# Search a collection with a fulltext index with an attribute name and search text -# Also supports filtering by outer-level attributes -# Not recommended for fast searching because it can be very slow and even timeout at 60s -name: fulltext_search -params: - type: object - required: ["@coll", search_attrkey, search_text] - additionalProperties: false - properties: - "@coll": - type: string - title: Collection name - examples: [ncbi_taxon, gtdb_taxon] - search_attrkey: - type: string - title: Search attribute key - examples: [scientific_name, name] - search_text: - type: string - title: Search text - examples: [escherichia, es] - description: Text to search on the search attribute values - ts: - type: [integer, "null"] - title: Versioning timestamp - default: null - filter_attr_expr: - type: [array, "null"] - title: Filter by document attribute equality - items: - type: object - maxItems: 50 - examples: [ - [{"rank": "species"}, {"rank": "strain"}, {"strain": true}], - [{"rank": "species", "strain": false}] - ] - default: null - description: | - An array of single-level objects. - In each item object, the key-value pairs would restrict the documents to those containing all the attribute key-value pairs. - But if any item object in the array satisfies the document, the document is filtered into the results. - Basically works like a boolean expression where each key-value pair is a boolean value, each item object is a boolean term, and the array is a sum of boolean terms - Null or empty arrays have no filtering effect. - offset: - type: [integer, "null"] - title: Paging offset - maximum: 100000 - default: 0 - limit: - type: [integer, "null"] - title: Max results to return - default: 20 - maximum: 1000 - select: - type: [string, array, "null"] - items: - type: string - examples: [scientific_name, [scientific_name, id]] - default: null - description: Document attributes to keep in the results -query: | - LET search_text__norm = REGEX_REPLACE(LOWER(TRIM(@search_text)), "\\s+", " ") - LET search_text__first_exact_tok = REGEX_SPLIT(search_text__norm, " ")[0] - LET search_text__icu_toks = TOKENS(@search_text, "icu_tokenize") /* db analyzer icu_tokenize */ - LET search_text__wordboundmod_icu_toks = ( - FOR tok IN search_text__icu_toks - RETURN REGEX_REPLACE(tok, ",.*", "") /* commas cannot be escaped */ - ) - LET search_text__fulltext = CONCAT_SEPARATOR(", ", - FOR tok IN search_text__wordboundmod_icu_toks - RETURN CONCAT("prefix:", tok) - ) - LET filter_attr_expr = @filter_attr_expr ? @filter_attr_expr : [] /* null to [] */ - LET search_text__wildcard = CONCAT("%", CONCAT_SEPARATOR("%", search_text__icu_toks), "%") /* e.g., %tok0%tok1%tokn% */ - FOR doc IN FULLTEXT(@@coll, @search_attrkey, search_text__fulltext) - FILTER @ts ? doc.created <= @ts AND doc.expired >= @ts : true - /* keep doc if any obj in filter_attr_expr is a sub-obj of doc */ - FILTER LENGTH(filter_attr_expr) > 0 ? ( - FOR term IN filter_attr_expr - RETURN MATCHES(doc, term) - ) ANY == true : true - LET attrval__norm = REGEX_REPLACE(LOWER(TRIM(doc.@search_attrkey)), "\\s+", " ") - LET attrval__icu_toks = TOKENS(doc.@search_attrkey, "icu_tokenize") - SORT LIKE(doc.@search_attrkey, search_text__wildcard, true) DESC, /* icu tok ordering */ - /* TODO - icu tok ordering with no insertions? */ - CONTAINS(attrval__icu_toks[0], search_text__icu_toks[0], true) == 0 DESC, /* first icu tok */ - CONTAINS(attrval__norm, search_text__first_exact_tok, true) == 0 DESC, /* first exact tok */ - CONTAINS(attrval__norm, search_text__norm, true) == 0 DESC, /* exact match */ - doc.@search_attrkey /* lexical */ - LIMIT @offset ? @offset : 0, @limit ? @limit : 20 - RETURN @select ? KEEP(doc, @select) : doc diff --git a/spec/test/stored_queries/test_fulltext_search.py b/spec/test/stored_queries/test_fulltext_search.py index 99bd4d44..f7acb620 100644 --- a/spec/test/stored_queries/test_fulltext_search.py +++ b/spec/test/stored_queries/test_fulltext_search.py @@ -1,12 +1,9 @@ """ Tests for stored queries involving a fulltext search: -* Generic fulltext_search (should be used with caution because it can be slow and timeout at 60s) * Taxonomy taxonomy_search_species_strain * Taxonomy taxonomy_search_species_strain_no_sort The latter two are switched between depending on the length of the search text. -These stored query tests are all bundled in one test file because their original purpose is to do a species/strain -name search on the ncbi_taxon collection These tests run within the re_api docker image, and require access to the ArangoDB, auth, and workspace images. """ @@ -251,148 +248,6 @@ def test_prefix_hit(self): ) -class TestFulltextSearchStoredQuery(unittest.TestCase): - @classmethod - def setUpClass(cls): - check_spec_test_env() - create_test_docs("ncbi_taxon", ncbi_taxa) - - def test_ncbi_taxon_scinames(self): - """Happy path""" - for sciname in scinames_test_all: - _fulltext_search_query( - self, - coll="ncbi_taxon", - search_attrkey="scientific_name", - search_text=sciname, - ts=_NOW if sciname in scinames_test_latest else None, - filter_attr_expr=[ - {"rank": "species"}, - {"rank": "strain"}, - {"strain": True}, - ], - offset=None, - limit=LIMIT, - select="scientific_name", - # --- - expect_error=False, - expect_hit=True, - ) - - def test_null_bind_params(self): - """Leave off parameters""" - for sciname in scinames_test_all: - _fulltext_search_query( - self, - coll="ncbi_taxon", - search_attrkey="scientific_name", - search_text=sciname, - ts=None, - filter_attr_expr=None, - offset=None, - limit=None, - select=None, - # --- - expect_error=False, - expect_hit=True, - ) - - def test_fully_specified_bind_params(self): - """Specify all parameters""" - for sciname in scinames_test_all: - _fulltext_search_query( - self, - coll="ncbi_taxon", - search_attrkey="scientific_name", - search_text=sciname, - ts=_NOW if sciname in scinames_test_latest else None, - filter_attr_expr=[ - {"rank": "species"}, - {"rank": "strain"}, - {"strain": True}, - ], - offset=0, - limit=LIMIT, - select=["id", "scientific_name"], - # --- - expect_error=False, - expect_hit=True, - ) - - def test_extra_params(self): - """Extra params not in spec/aql""" - _fulltext_search_query( - self, - coll="ncbi_taxon", - search_attrkey="scientific_name", - search_text="esch", - ts=None, - filter_attr_expr=[ - {"rank": "species"}, - {"rank": "strain"}, - {"strain": True}, - ], - offset=0, - limit=LIMIT, - select=["id", "scientific_name"], - extra_unused_param=42, - # --- - expect_error=("Additional properties are not allowed"), - ) - - def test_validation_fail(self): - _fulltext_search_query( - self, - coll=[], - search_attrkey=42, - search_text={"hi": 1}, - ts=None, - filter_attr_expr=None, - offset=None, - limit=None, - select=None, - # --- - expect_error="[] is not of type 'string'", - ) - - def test_aql_error(self): - for sciname in scinames_test_all: - _fulltext_search_query( - self, - coll="ncbi_taxon", - search_attrkey="fake_attrkey", - search_text=sciname, - ts=None, - filter_attr_expr=None, - offset=None, - limit=None, - select=None, - # --- - expect_error=True, - ) - - def test_no_hit(self): - for sciname in scinames_test_all: - _fulltext_search_query( - self, - coll="ncbi_taxon", - search_attrkey="scientific_name", - search_text=sciname[::-1], - ts=None, - filter_attr_expr=None, - offset=None, - limit=None, - select=None, - # --- - expect_error=False, - expect_hit=False, - expected_hits=[], - ) - - -# --- Test helpers --- - - def _switch_taxonomy_search_species_strain_queries(search_text): return ( "taxonomy_search_species_strain_no_sort" From eadf2ac268c5db9aea886399f4b3525909451148 Mon Sep 17 00:00:00 2001 From: n1mus <709030+n1mus@users.noreply.github.com> Date: Mon, 2 May 2022 12:00:04 -0700 Subject: [PATCH 02/10] use image arangodb:3.9 --- CHANGELOG.md | 1 + docker-compose.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6f252ec..050b0132 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Use locale en_US instead of c.utf-8 +- Upgrade to image arangodb:3.9 ### Removed - Generic fulltext search and tests diff --git a/docker-compose.yaml b/docker-compose.yaml index e1739190..ac4d333d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -46,7 +46,7 @@ services: # Arangodb server in cluster mode arangodb: - image: arangodb:3.5 + image: arangodb:3.9 ports: - "127.0.0.1:8529:8529" command: sh -c "arangodb --starter.local" From 60a716b42b08c1e18f02c31abbebb7d3b4ebb0c6 Mon Sep 17 00:00:00 2001 From: n1mus <709030+n1mus@users.noreply.github.com> Date: Mon, 2 May 2022 12:32:47 -0700 Subject: [PATCH 03/10] update CHANGELOG.md --- CHANGELOG.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 050b0132..3185fb0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## [0.0.19] - 2022-05-02 +### Added +- github actions to build `develop`, `pr-x` and released version (e.g. `1.2.3`) Tags +- instructions for how to release to production ### Changed - Use locale en_US instead of c.utf-8 @@ -14,11 +17,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - Generic fulltext search and tests -## [0.0.19] - 2022-04-15 -### Added -- github actions to build `develop`, `pr-x` and released version (e.g. `1.2.3`) Tags -- instructions for how to release to production - ## [0.0.18] - 2022-03-02 ### Added - taxonomy_search_species_strain and taxonomy_search_species_strain_no_sort stored queries From 4d75d6e545ae25f0a506ec460dc825838b724264 Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 3 May 2022 14:12:24 -0700 Subject: [PATCH 04/10] Adding a throwaway workflow to demonstrate running workflows from a shared repo --- .github/workflows/run_shared_workflow.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/run_shared_workflow.yaml diff --git a/.github/workflows/run_shared_workflow.yaml b/.github/workflows/run_shared_workflow.yaml new file mode 100644 index 00000000..4d379fab --- /dev/null +++ b/.github/workflows/run_shared_workflow.yaml @@ -0,0 +1,14 @@ +--- +name: Run shared workflow +on: + pull_request: + branches: + - main + types: + - opened + - synchronize + - ready_for_review + +jobs: + run_important_job: + uses: kbase/.github/.github/workflows/shared_workflow.yaml From 4f9c41b43b5495adc0455c51491495d32533e338 Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 3 May 2022 14:17:43 -0700 Subject: [PATCH 05/10] another demo file --- .github/workflows/demo_workflow.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/workflows/demo_workflow.yaml diff --git a/.github/workflows/demo_workflow.yaml b/.github/workflows/demo_workflow.yaml new file mode 100644 index 00000000..d49c964f --- /dev/null +++ b/.github/workflows/demo_workflow.yaml @@ -0,0 +1,12 @@ +name: Shared workflow demo + + on: + workflow_call + + jobs: + say_hello: + runs-on: ubuntu-latest + steps: + - + name: say hello + run: echo "hello world" From ed44b5471fe54f6fe028632ee488c56c6fce7ad0 Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 3 May 2022 14:24:50 -0700 Subject: [PATCH 06/10] run shared workflow on develop branch --- .github/workflows/run_shared_workflow.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_shared_workflow.yaml b/.github/workflows/run_shared_workflow.yaml index 4d379fab..509e75dc 100644 --- a/.github/workflows/run_shared_workflow.yaml +++ b/.github/workflows/run_shared_workflow.yaml @@ -3,7 +3,7 @@ name: Run shared workflow on: pull_request: branches: - - main + - develop types: - opened - synchronize @@ -11,4 +11,4 @@ on: jobs: run_important_job: - uses: kbase/.github/.github/workflows/shared_workflow.yaml + uses: kbase/relation_engine/.github/workflows/demo_workflow.yaml From b0dcd5fec441dc92ee9de6f43c2cb7c91454305a Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 3 May 2022 14:27:05 -0700 Subject: [PATCH 07/10] fix yaml --- .github/workflows/demo_workflow.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/demo_workflow.yaml b/.github/workflows/demo_workflow.yaml index d49c964f..e5634477 100644 --- a/.github/workflows/demo_workflow.yaml +++ b/.github/workflows/demo_workflow.yaml @@ -1,9 +1,10 @@ +--- name: Shared workflow demo - on: +on: workflow_call - jobs: +jobs: say_hello: runs-on: ubuntu-latest steps: From f390f91a8b4a46982baf04223ff43159350bd8b1 Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 3 May 2022 14:37:06 -0700 Subject: [PATCH 08/10] add workflow version --- .github/workflows/run_shared_workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_shared_workflow.yaml b/.github/workflows/run_shared_workflow.yaml index 509e75dc..d42a9298 100644 --- a/.github/workflows/run_shared_workflow.yaml +++ b/.github/workflows/run_shared_workflow.yaml @@ -11,4 +11,4 @@ on: jobs: run_important_job: - uses: kbase/relation_engine/.github/workflows/demo_workflow.yaml + uses: kbase/relation_engine/.github/workflows/demo_workflow.yaml@develop From e0256c78977f928b30458c99529faba4177f3e99 Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Wed, 18 May 2022 06:39:18 -0700 Subject: [PATCH 09/10] Removing workflows added for demo purposes; stop the tests from running on push events --- .github/workflows/demo_workflow.yaml | 13 ------------- .github/workflows/run_shared_workflow.yaml | 14 -------------- .github/workflows/run_tests.yaml | 7 ++++++- 3 files changed, 6 insertions(+), 28 deletions(-) delete mode 100644 .github/workflows/demo_workflow.yaml delete mode 100644 .github/workflows/run_shared_workflow.yaml diff --git a/.github/workflows/demo_workflow.yaml b/.github/workflows/demo_workflow.yaml deleted file mode 100644 index e5634477..00000000 --- a/.github/workflows/demo_workflow.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -name: Shared workflow demo - -on: - workflow_call - -jobs: - say_hello: - runs-on: ubuntu-latest - steps: - - - name: say hello - run: echo "hello world" diff --git a/.github/workflows/run_shared_workflow.yaml b/.github/workflows/run_shared_workflow.yaml deleted file mode 100644 index d42a9298..00000000 --- a/.github/workflows/run_shared_workflow.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: Run shared workflow -on: - pull_request: - branches: - - develop - types: - - opened - - synchronize - - ready_for_review - -jobs: - run_important_job: - uses: kbase/relation_engine/.github/workflows/demo_workflow.yaml@develop diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 1b864459..e6f56d16 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -1,6 +1,11 @@ name: Relation Engine test and deploy on: - [push, pull_request] + pull_request: + types: + - opened + - synchronize + - ready_for_review + jobs: run_tests: runs-on: ubuntu-latest From 5d19553295bf0c1df84556940b4ba573747ea0b4 Mon Sep 17 00:00:00 2001 From: n1mus <709030+n1mus@users.noreply.github.com> Date: Wed, 18 May 2022 08:37:29 -0700 Subject: [PATCH 10/10] prep release docs (#126) --- CHANGELOG.md | 9 +++++---- VERSION | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3185fb0d..d03fe682 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,15 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.0.20] 2022-05-17 +### Changed +- Use locale en_US instead of c.utf-8 +- Upgrade to image arangodb:3.9 + ## [0.0.19] - 2022-05-02 ### Added - github actions to build `develop`, `pr-x` and released version (e.g. `1.2.3`) Tags - instructions for how to release to production -### Changed -- Use locale en_US instead of c.utf-8 -- Upgrade to image arangodb:3.9 - ### Removed - Generic fulltext search and tests diff --git a/VERSION b/VERSION index 44517d51..fe04e7f6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.19 +0.0.20