Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ coverage_report/

# Spec archives
spec.tar.gz

# Creds
arango_live_server_config.json
Comment thread
This conversation was marked as resolved.
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
.PHONY: test reset
QUERY_TESTING_FILE = spec/test/stored_queries/test_query.py

.PHONY: test reset full_query_testing sampling_query_testing

test:
docker-compose build
Expand All @@ -13,3 +15,9 @@ shell:
reset:
docker-compose --rmi all -v
docker-compose build

full_query_testing:
DO_QUERY_TESTING=full time python -m pytest -s $(QUERY_TESTING_FILE)

sampling_query_testing:
DO_QUERY_TESTING=sampling time python -m pytest -s $(QUERY_TESTING_FILE)
2 changes: 2 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ coverage==5.2.1
typed-ast>=1.4.0
black==20.8b1
pytest==6.2.5
python-arango==5.4.0
numpy==1.21.2
Comment thread
This conversation was marked as resolved.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Flask==1.0.2
itsdangerous==2.0.1
greenlet==0.4.16
gunicorn==19.9.0
gevent==1.3.7
Expand Down
6 changes: 3 additions & 3 deletions scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ sh /app/scripts/start_server.sh &
coverage erase
# spec validation
python -m spec.validate
# run importer/, relation_engine_server/, and spec/ tests
coverage run --branch -m unittest discover -v
# run importer/, relation_engine_server/, and spec/ tests, skip test_query.py
coverage run --branch -m pytest --ignore=spec/test/stored_queries/test_query.py
# RE client tests
PYTHONPATH=client_src python -m unittest discover client_src/test
PYTHONPATH=client_src python -m pytest client_src/test
coverage html --omit=*/test_*
55 changes: 55 additions & 0 deletions spec/stored_queries/taxonomy/taxonomy_ncbi_species.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Search ncbi_taxon collection for species/strains by scientific name
Comment thread
This conversation was marked as resolved.
name: taxonomy_ncbi_species
params:
type: object
required: [search_text]
additionalProperties: false
properties:
search_text:
type: string
title: Search text
examples: [escherichia, es]
description: Text to search on the search attribute values
ts:
type: [integer, "null"]
title: Versioning timestamp
default: null
offset:
type: [integer, "null"]
title: Paging offset
maximum: 100000
default: 0
limit:
type: [integer, "null"]
title: Max results to return
default: 20
maximum: 1000
select:
type: [string, array, "null"]
items:
type: string
examples: [scientific_name, [scientific_name, id]]
default: null
description: Document attributes to keep in the results
query: |
LET search_text__norm = REGEX_REPLACE(LOWER(TRIM(@search_text)), "\\s+", " ")
LET search_text__first_exact_tok = REGEX_SPLIT(search_text__norm, " ")[0]
LET search_text__icu_toks = TOKENS(@search_text, "icu_tokenize") // analyzer
LET search_text__wordboundmod_icu_toks = (
FOR tok IN search_text__icu_toks
RETURN REGEX_REPLACE(tok, ",.*", "") // commas cannot be escaped in fulltext search
)
LET search_text__fulltext = CONCAT_SEPARATOR(", ", // comma delimit
FOR tok IN search_text__wordboundmod_icu_toks // prepend "prefix:"
RETURN CONCAT("prefix:", tok)
)
FOR doc IN FULLTEXT(ncbi_taxon, "scientific_name", search_text__fulltext)
FILTER @ts ? doc.created <= @ts AND doc.expired >= @ts : true
FILTER doc.rank IN ["species", "strain"] OR doc.strain
LET doc_sciname__norm = REGEX_REPLACE(LOWER(TRIM(doc.scientific_name)), "\\s+", " ") // for exact matching
LET contains_ind = CONTAINS(doc_sciname__norm, search_text__norm, true)
SORT contains_ind == 0 DESC, // prefix match
doc_sciname__norm == search_text__norm DESC, // exact match
doc.scientific_name // lexical
LIMIT @offset ? @offset : 0, @limit ? @limit : 20
RETURN @select ? KEEP(doc, @select) : doc
50 changes: 50 additions & 0 deletions spec/stored_queries/taxonomy/taxonomy_ncbi_species_no_sort.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Search ncbi_taxon collection for species/strains by scientific name
Comment thread
This conversation was marked as resolved.
# Except do not sort, just return the first however many documents
# Useful for short prefixes (e.g., "s") that would be expensive yet not meaningful to sort
name: taxonomy_ncbi_species_no_sort
params:
type: object
required: [search_text]
additionalProperties: false
properties:
search_text:
type: string
title: Search text
examples: [escherichia, es]
description: Text to search on the search attribute values
ts:
type: [integer, "null"]
title: Versioning timestamp
default: null
offset:
type: [integer, "null"]
title: Paging offset
maximum: 100000
default: 0
limit:
type: [integer, "null"]
title: Max results to return
default: 20
maximum: 1000
select:
type: [string, array, "null"]
items:
type: string
examples: [scientific_name, [scientific_name, id]]
default: null
description: Document attributes to keep in the results
query: |
LET search_text__icu_toks = TOKENS(@search_text, "icu_tokenize") // analyzer
LET search_text__wordboundmod_icu_toks = (
FOR tok IN search_text__icu_toks
RETURN REGEX_REPLACE(tok, ",.*", "") // commas cannot be escaped in fulltext search
)
LET search_text__fulltext = CONCAT_SEPARATOR(", ", // comma delimit
FOR tok IN search_text__wordboundmod_icu_toks // prepend "prefix:"
RETURN CONCAT("prefix:", tok)
)
FOR doc IN FULLTEXT(ncbi_taxon, "scientific_name", search_text__fulltext)
FILTER @ts ? doc.created <= @ts AND doc.expired >= @ts : true
FILTER doc.rank IN ["species", "strain"] OR doc.strain
LIMIT @offset ? @offset : 0, @limit ? @limit : 20
RETURN @select ? KEEP(doc, @select) : doc
Loading