diff --git a/README.md b/README.md index 85a97cb8..fa850c76 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ These specifications are used by the [Relation Engine API](relation_engine_serve The relation engine server (`relation_engine_server/`) is a simple API that allows KBase community developers to interact with the Relation Engine graph database. You can run stored queries or do bulk updates on documents. ## Relation Engine Startup -* Docker image is built with environment variable `SPEC_RELEASE_PATH=/opt/spec.tar.gz'. This contains the specs from the repo itself. +* Docker image is built with environment variable `SPEC_RELEASE_PATH=/opt/spec.tar.gz`. This contains the specs from the repo itself. * Wait for response from auth, workspace, and arangodb services, as they are set up * Specs are set up. Either the repo specs or remote specs are loaded into the specs root path * Collections, views, and analyzers from the specs are added to the ArangoDB server. If the collection, view, or analyzer already exists, but in a different configuration, it will _not_ be overwritten. diff --git a/spec/stored_queries/generic/fulltext_search.yaml b/spec/stored_queries/generic/fulltext_search.yaml index b8a31b0a..6859add4 100644 --- a/spec/stored_queries/generic/fulltext_search.yaml +++ b/spec/stored_queries/generic/fulltext_search.yaml @@ -1,3 +1,6 @@ +# Should be REVISED or DEPRECATED. +# Is currently unused outside testing. +# # Search a collection with a fulltext index with an attribute name and search text # Also supports filtering by outer-level attributes # Not recommended for fast searching because it can be very slow and even timeout at 60s diff --git a/spec/test/data/ncbi_taxon.json b/spec/test/data/ncbi_taxon.json index 9a4092f8..31866edb 100644 --- a/spec/test/data/ncbi_taxon.json +++ b/spec/test/data/ncbi_taxon.json @@ -2260,5 +2260,167 @@ "expired": 1612915015846, "release_created": 1541030400000, "release_expired": 1612137599999 + }, + { + "_key": "338794_2018-11-01", + "_id": "ncbi_taxon/338794_2018-11-01", + "_rev": "_b2jbO4G--D", + "id": "338794", + "scientific_name": "low G+C Gram-positive bacterium HTA462", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 338794, + "gencode": 11, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "586732_2018-11-01", + "_id": "ncbi_taxon/586732_2018-11-01", + "_rev": "_b2kB1gK--B", + "id": "586732", + "scientific_name": "Integrating expression vector pJEB403+drrA", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 586732, + "gencode": 11, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "1127597_2018-11-01", + "_id": "ncbi_taxon/1127597_2018-11-01", + "_rev": "_b2lFmce--B", + "id": "1127597", + "scientific_name": "Fusarium cf. solani 3+4-uuu DPGS-2011", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 1127597, + "gencode": 1, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "1173779_2018-11-01", + "_id": "ncbi_taxon/1173779_2018-11-01", + "_rev": "_b2lOxFa--_", + "id": "1173779", + "scientific_name": "Salmonella enterica subsp. diarizonae serovar 60:r:e,n,x,z15", + "rank": "no rank", + "strain": true, + "aliases": [], + "ncbi_taxon_id": 1173779, + "gencode": 11, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "1906029_2018-11-01", + "_id": "ncbi_taxon/1906029_2018-11-01", + "_rev": "_b2nDL5---_", + "id": "1906029", + "scientific_name": "Nostoc sp. 'Peltigera sp. \"hawaiensis\" P1236 cyanobiont'", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 1906029, + "gencode": 11, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "1945188_2018-11-01", + "_id": "ncbi_taxon/1945188_2018-11-01", + "_rev": "_b2nJbF2--_", + "id": "1945188", + "scientific_name": "Reporter vector p1168hIL6mC/EBP-luc+", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 1945188, + "gencode": 11, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "1945295_2018-11-01", + "_id": "ncbi_taxon/1945295_2018-11-01", + "_rev": "_b2nJbIK--_", + "id": "1945295", + "scientific_name": "Vector pEntry-attR2-IRES-eGFP-luc+-pA-attL3", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 1945295, + "gencode": 11, + "first_version": "2018-11-01", + "last_version": "2021-02-01", + "created": 1541030460000, + "expired": 9007199254740991, + "release_created": 1541030400000, + "release_expired": 9007199254740991 + }, + { + "_key": "2727889_2021-02-01", + "_id": "ncbi_taxon/2727889_2021-02-01", + "_rev": "_b2n6us---A", + "id": "2727889", + "scientific_name": "Pleurocapsales cyanobacterium 'Beach rock 4+5\"'", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": 2727889, + "gencode": 11, + "first_version": "2021-02-01", + "last_version": "2021-02-01", + "created": 1612915015847, + "expired": 9007199254740991, + "release_created": 1612137600000, + "release_expired": 9007199254740991 + }, + { + "_key": "fake_2021-02-01", + "_id": "ncbi_taxon/fake_2021-02-01", + "_rev": "fake", + "id": "fake", + "scientific_name": "|Fake|fake|fake| ||fake||", + "rank": "species", + "strain": false, + "aliases": [], + "ncbi_taxon_id": -1, + "gencode": 11, + "first_version": "2021-02-01", + "last_version": "2021-02-01", + "created": 1612915015847, + "expired": 9007199254740991, + "release_created": 1612137600000, + "release_expired": 9007199254740991 } ] diff --git a/spec/test/stored_queries/test_fulltext_search.py b/spec/test/stored_queries/test_fulltext_search.py index e0340d02..99bd4d44 100644 --- a/spec/test/stored_queries/test_fulltext_search.py +++ b/spec/test/stored_queries/test_fulltext_search.py @@ -35,6 +35,7 @@ ncbi_taxa = json.load(fh) # scinames_test_all are all the test scinames +# These are selected from the ncbi_taxon collection scinames_test_all = [ # --- Token preceded by punctuation --- "Lactobacillus sp. 'thermophilus'", @@ -55,7 +56,18 @@ "Vaccinia virus WR 65-16", "Dengue virus 2 Jamaica/1409/1983", "Dengue virus 2 Thailand/NGS-C/1944", - # --- Dups (techinically only applicable to live data) --- + # --- Escape chars ( ,:+-|"' ) --- + # --- TODO sample scinames with the escape chars in different variety of syntaxes --- + "Salmonella enterica subsp. diarizonae serovar 60:r:e,n,x,z15", + "Fusarium cf. solani 3+4-uuu DPGS-2011", + "Integrating expression vector pJEB403+drrA", + "Vector pEntry-attR2-IRES-eGFP-luc+-pA-attL3", + "low G+C Gram-positive bacterium HTA462", + "Reporter vector p1168hIL6mC/EBP-luc+", + "Pleurocapsales cyanobacterium 'Beach rock 4+5\"'", + "Nostoc sp. 'Peltigera sp. \"hawaiensis\" P1236 cyanobiont'", + "|Fake|fake|fake| ||fake||", + # --- Dups (technically only applicable to live data) --- "environmental samples", "Listeria sp. FSL_L7-0091", "Listeria sp. FSL_L7-1519", @@ -64,7 +76,8 @@ "Corticiaceae sp.", "Escherichia coli", ] -# scinames_test_latest are the test scinames that are compatible with a current timestamp +# scinames_test_latest are the test scinames that are not expired and +# compatible with a current timestamp scinames_test_latest = [ "Lactobacillus sp. 'thermophilus'", "Rabbit fibroma virus (strain Kasza)", @@ -79,6 +92,15 @@ "Vaccinia virus WR 65-16", "Dengue virus 2 Jamaica/1409/1983", "Dengue virus 2 Thailand/NGS-C/1944", + "Salmonella enterica subsp. diarizonae serovar 60:r:e,n,x,z15", + "Fusarium cf. solani 3+4-uuu DPGS-2011", + "Integrating expression vector pJEB403+drrA", + "Vector pEntry-attR2-IRES-eGFP-luc+-pA-attL3", + "low G+C Gram-positive bacterium HTA462", + "Reporter vector p1168hIL6mC/EBP-luc+", + "Pleurocapsales cyanobacterium 'Beach rock 4+5\"'", + "Nostoc sp. 'Peltigera sp. \"hawaiensis\" P1236 cyanobiont'", + "|Fake|fake|fake| ||fake||", "environmental samples", "Listeria sp. FSL_L7-0091", "Listeria sp. FSL_L7-1519",