Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions relation_engine_server/test/spec_release/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
## Test Spec Release

The file in this directory, `spec.tar.gz` is a cached release of the `relation_engine_spec` repo, found here:
`sample_spec_release`, and the corresponding archive, `spec.tar.gz`, contain a set of sample schema files suitable for use in tests.
Comment thread
jayrbolton marked this conversation as resolved.

https://github.com/kbase/relation_engine_spec
To create a new version of `spec.tar.gz`, you will need to exec into the `re_api` docker image to ensure that the new archive and its contents have the appropriate file owner and permissions (all files must have owner and group `root`/`root`).

It is cached here to avoid Github API usage limits when running tests on Travis.
Example commands:

It is also stored in the docker image for the RE API for use in tests in other codebases that depend on this one.
```
$ docker exec -it relation_engine_re_api_run_1234567890 sh
# # in the docker image
# cd relation_engine_server/test/spec_release
# # ... perform any edits ...
# tar -czvf new_spec.tar.gz sample_spec_release/
# # check the file listing is as expected
# tar -ztvf new_spec.tar.gz
# mv spec.tar.gz old_spec.tar.gz
# mv new_spec.tar.gz spec.tar.gz
# # ensure that the tests pass
# cd /app
# sh scripts/run_tests.sh
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: ncbi_taxon
type: vertex
delta: true

indexes:
- type: fulltext
fields: [scientific_name]
- type: persistent
fields: [id, expired, created]
- type: persistent
fields: [expired, created, last_version]

schema:
"$schema": http://json-schema.org/draft-07/schema#
type: object
description: Template for a vertex entry in the NCBI taxonomy tree.
required: [id, scientific_name, rank, strain]
properties:
id:
type: string
description: NCBI Taxon id (positive integer)
examples: ['1', '2053699']
scientific_name:
type: string
title: Taxon name.
examples: ['Methylophilus methylotrophus', 'Bacteria', 'Firmicutes']
aliases:
type: array
description: Aliases
examples:
- - category: authority
name: Borreliella burgdorferi (Johnson et al. 1984) Adeolu and Gupta 2015
- category: genbank common name
name: Lyme disease spirochet
- category: synonym
name: Borrelia burgdorferi
- - category: common name
name: E. coli
- category: authority
name: '"Bacterium coli commune" Escherich 1885'
- category: synonym
name: Bacterium coli
items:
type: object
required: ['category', 'name']
properties:
category: {type: string}
name: {type: string}
rank:
type: string
title: Taxonomic rank
examples: ["Domain", "Phylum", "no rank"]
strain:
type: boolean
title: Strain flag
description: Whether this node corresponds to a strain. Strains are considered to be nodes
that have a rank of "no rank" and whose parents' rank is either species or subspecies or
where the parent's strain flag is true.
ncbi_taxon_id:
type: integer
title: The NCBI taxon ID as a number
gencode:
type: integer
title: The numerc ID of the genetic code for this organism.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: test_edge
type: edge
schema:
"$schema": "http://json-schema.org/draft-07/schema#"
type: object
required: [_from, _to]
description: Example edge schema for testing.
properties:
_from: {type: string}
_to: {type: string}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: test_vertex
type: vertex
schema:
"$schema": "http://json-schema.org/draft-07/schema#"
type: object
required: [_key]
description: An example vertex schema for testing
properties:
_key: {type: string}
is_public: {type: boolean}
ws_id: {type: integer}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: ncbi_taxonomy
category: taxonomy
title: NCBI Taxonomy
home_url: https://www.ncbi.nlm.nih.gov/taxonomy
data_url: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/
logo_path: /images/third-party-data-sources/ncbi/logo-51-64.png
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# TODO

x = 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Fetch a taxon document by taxonomy ID
name: ncbi_fetch_taxon
params:
type: object
required: [id, ts]
properties:
id:
type: string
title: NCBI Taxonomy ID
ts:
type: integer
title: Versioning timestamp
query: |
for t in ncbi_taxon
filter t.id == @id
filter t.created <= @ts AND t.expired >= @ts
limit 1
return t
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Test query - fetch a single test vertex by ID
name: fetch_test_vertex
params:
type: object
required: [key]
properties:
key:
type: string
title: _key to match on
query: |
FOR o IN test_vertex
FILTER o._key == @key
RETURN o
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Test query - List all test vertices
# Has some simple auth against ws_ids
name: list_test_vertices
query: |
FOR o IN test_vertex
FILTER o.is_public || o.ws_id IN ws_ids
RETURN o
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"name": "test_vertices",
"type": "arangosearch",
"writebufferIdle": 64,
"writebufferActive": 0,
"primarySort": [],
"writebufferSizeMax": 33554432,
"commitIntervalMsec": 1000,
"consolidationPolicy": {
"type": "bytes_accum",
"threshold": 0.1
},
"cleanupIntervalStep": 10,
"links": {
"test_vertex": {
"analyzers": [
"identity"
],
"fields": {
"_key": {
"analyzers": [
"text_en"
]
},
"is_public": {},
"ws_id": {}
},
"includeAllFields": false,
"storeValues": "none",
"trackListPositions": false
}
},
"consolidationIntervalMsec": 60000
}
Binary file modified relation_engine_server/test/spec_release/spec.tar.gz
Binary file not shown.
80 changes: 61 additions & 19 deletions relation_engine_server/test/test_spec_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,37 @@
from relation_engine_server.utils import spec_loader
from relation_engine_server.utils.spec_loader import SchemaNonexistent
from relation_engine_server.utils.config import get_config
from relation_engine_server.utils.wait_for import wait_for_api

_CONF = get_config()
_TEST_DIR = os_path.join('/app', 'relation_engine_server', 'test', 'data')


class TestSpecLoader(unittest.TestCase):

@classmethod
def setUpClass(cls):
wait_for_api()
cls.config = get_config()
cls.test_dir = os_path.join('/app', 'relation_engine_server', 'test')
cls.test_spec_dir = os_path.join(cls.test_dir, 'spec_release', 'sample_spec_release')

config = get_config()
cls.repo_path = config['spec_paths']['repo']
for key in config['spec_paths'].keys():
if cls.repo_path in config['spec_paths'][key]:
config['spec_paths'][key] = config['spec_paths'][key].replace(
cls.repo_path,
cls.test_spec_dir
)
cls.config = config
Comment on lines +19 to +27
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit config settings to point to the sample_spec_release dir for the duration of this test.


@classmethod
def tearDownClass(cls):
# undo all the config changes
for key in cls.config['spec_paths'].keys():
if cls.test_spec_dir in cls.config['spec_paths'][key]:
cls.config['spec_paths'][key] = cls.config['spec_paths'][key].replace(
cls.test_spec_dir,
cls.repo_path
)

def test_get_names(self, schema_type_names=[], expected=[]):
"""test getting the names of all the schemas of a given type"""

# this method should only be run from another test method
if len(schema_type_names) == 0:
Expand All @@ -32,20 +49,20 @@ def test_get_names(self, schema_type_names=[], expected=[]):
method = getattr(spec_loader, 'get_' + schema_type_singular + '_names')

# save the original value
original_config_dir = _CONF['spec_paths'][schema_type_plural]
original_config_dir = self.config['spec_paths'][schema_type_plural]
# set the config to the test directory
_CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, schema_type_plural)
self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', schema_type_plural)

got_names_method = method()
got_names_singular = spec_loader.get_names(schema_type_singular)
got_names_plural = spec_loader.get_names(schema_type_plural)

_CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, 'empty')
self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', 'empty')
got_names_method_empty = method()
got_names_empty = spec_loader.get_names(schema_type_singular)

# restore the original value
_CONF['spec_paths'][schema_type_plural] = original_config_dir
# restore the original value before running tests
self.config['spec_paths'][schema_type_plural] = original_config_dir

# ensure the results are as expected
# get_collection_names
Expand All @@ -67,15 +84,17 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None):
self.assertTrue(True)
return

print("running test_run_spec_loading_tests with schema_type " + schema_type_names[0])
method = getattr(spec_loader, 'get_' + schema_type_names[0])
schema_type_singular = schema_type_names[0]
schema_type_plural = schema_type_names[1]
# e.g. 'spec_loader.get_collection'
method = getattr(spec_loader, 'get_' + schema_type_singular)

# get the path of the requested file
result_path = method(test_name, path_only=True)
self.assertIsInstance(result_path, str)
self.assertIn(test_name, result_path)
self.assertIn(
self.config['spec_paths'][schema_type_names[1]],
self.config['spec_paths'][schema_type_plural],
result_path,
)

Expand All @@ -92,18 +111,18 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None):
self.assertEqual(result_obj['name'], test_name)

# check the contents of the dict when getting a data source
if schema_type_names[0] == 'data_source':
if schema_type_singular == 'data_source':

# logo_url should start with the same base as _CONF['kbase_endpoint']
endpoint = urlparse(_CONF['kbase_endpoint'])
# logo_url should start with the same base as config['kbase_endpoint']
endpoint = urlparse(self.config['kbase_endpoint'])
self.assertIn(endpoint.scheme + '://' + endpoint.netloc, result_obj['logo_url'])

# logo_path is deleted
self.assertNotIn('logo_path', result_obj.keys())

# a nonexistent file raises the appropriate error
fake_name = '../../../../spec/repo/collections/djornl/djornl_edge'
err_msg = schema_type_names[0].capitalize().replace("_", " ") + " '" + fake_name + "' does not exist."
fake_name = 'test/test_node'
err_msg = schema_type_singular.capitalize().replace("_", " ") + " '" + fake_name + "' does not exist."
with self.assertRaisesRegex(SchemaNonexistent, err_msg):
method(fake_name, path_only=True)

Expand All @@ -125,6 +144,10 @@ def test_get_schemas_of_various_types(self):
'schema_type_names': ['stored_query', 'stored_queries'],
'example': 'ncbi_fetch_taxon',
},
{
'schema_type_names': ['view', 'views'],
'example': 'test_vertices',
}
Comment on lines +147 to +150
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add in tests for retrieving views

]

for schema in schema_type_list:
Expand Down Expand Up @@ -162,3 +185,22 @@ def test_get_schema_for_doc(self):
err_msg = f"Collection 'fake_name' does not exist."
with self.assertRaisesRegex(SchemaNonexistent, err_msg):
spec_loader.get_schema_for_doc(fake_name, path_only=True)

def test_prevent_non_spec_dir_access(self):
"""
Ensure that matching files in directories outside the designated spec repo cannot be retrieved
"""

# this query is OK as the file is still in the spec repo
path_in_spec_repo = '../../../../../**/fetch_test_vertex'
result = spec_loader.get_schema('stored_queries', path_in_spec_repo, path_only=True)
self.assertEqual(
result,
os_path.join(self.test_spec_dir, 'stored_queries', 'test', 'fetch_test_vertex.yaml')
)

# this matches a file in one of the other test data dirs => should throw an error
path_outside_spec_repo = '../../../../data/collections/test_node'
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prior to the fixes, this would retrieve a file. 😱

err_msg = f"Stored query '{path_outside_spec_repo}' does not exist"
with self.assertRaisesRegex(SchemaNonexistent, err_msg):
spec_loader.get_schema('stored_queries', path_outside_spec_repo, path_only=True)
24 changes: 21 additions & 3 deletions relation_engine_server/utils/spec_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,19 @@ def get_schema(schema_type, name, path_only=False):

schema_search_type = pluralise_schema_type(schema_type)

try:
path = _find_paths(_CONF['spec_paths'][schema_search_type], name + '.yaml')[0]
except IndexError:
yaml_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.yaml')
json_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.json')
# ensure we're using the canonical path and that all paths are unique
# we are only interested in paths that are in the designated spec repo
repo_path = os.path.abspath(_CONF['spec_paths']['repo'])
all_paths_set = set(os.path.abspath(path) for path in yaml_paths + json_paths)
all_paths = [p for p in all_paths_set if p.startswith(repo_path)]

if len(all_paths) == 0:
raise SchemaNonexistent(singularise_schema_type(schema_type), name)

# ignore duplicates or multiple results, just go with the first one
path = all_paths[0]
if path_only:
return path

Expand Down Expand Up @@ -112,6 +120,11 @@ def get_stored_query_names():
return get_names('stored_queries')


def get_view_names():
"""Return an array of all view base names."""
return get_names('views')


def get_collection(name, path_only=False):
"""Get YAML content (or file path) for a specific collection. Throws an error if nonexistent."""
return get_schema('collection', name, path_only)
Expand All @@ -133,6 +146,11 @@ def get_stored_query(name, path_only=False):
return get_schema('stored_query', name, path_only)


def get_view(name, path_only=False):
"""Get content or file path for a view file. Throws an error if nonexistent."""
return get_schema('view', name, path_only)


def _find_paths(dir_path, file_pattern):
"""
Return all file paths from a filename pattern, starting from a parent
Expand Down