diff --git a/relation_engine_server/test/spec_release/README.md b/relation_engine_server/test/spec_release/README.md index 8419f925..a371c2e3 100644 --- a/relation_engine_server/test/spec_release/README.md +++ b/relation_engine_server/test/spec_release/README.md @@ -1,8 +1,22 @@ +## Test Spec Release -The file in this directory, `spec.tar.gz` is a cached release of the `relation_engine_spec` repo, found here: +`sample_spec_release`, and the corresponding archive, `spec.tar.gz`, contain a set of sample schema files suitable for use in tests. -https://github.com/kbase/relation_engine_spec +To create a new version of `spec.tar.gz`, you will need to exec into the `re_api` docker image to ensure that the new archive and its contents have the appropriate file owner and permissions (all files must have owner and group `root`/`root`). -It is cached here to avoid Github API usage limits when running tests on Travis. +Example commands: -It is also stored in the docker image for the RE API for use in tests in other codebases that depend on this one. +``` +$ docker exec -it relation_engine_re_api_run_1234567890 sh +# # in the docker image +# cd relation_engine_server/test/spec_release +# # ... perform any edits ... +# tar -czvf new_spec.tar.gz sample_spec_release/ +# # check the file listing is as expected +# tar -ztvf new_spec.tar.gz +# mv spec.tar.gz old_spec.tar.gz +# mv new_spec.tar.gz spec.tar.gz +# # ensure that the tests pass +# cd /app +# sh scripts/run_tests.sh +``` diff --git a/relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml b/relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml new file mode 100644 index 00000000..39c97168 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml @@ -0,0 +1,64 @@ +name: ncbi_taxon +type: vertex +delta: true + +indexes: + - type: fulltext + fields: [scientific_name] + - type: persistent + fields: [id, expired, created] + - type: persistent + fields: [expired, created, last_version] + +schema: + "$schema": http://json-schema.org/draft-07/schema# + type: object + description: Template for a vertex entry in the NCBI taxonomy tree. + required: [id, scientific_name, rank, strain] + properties: + id: + type: string + description: NCBI Taxon id (positive integer) + examples: ['1', '2053699'] + scientific_name: + type: string + title: Taxon name. + examples: ['Methylophilus methylotrophus', 'Bacteria', 'Firmicutes'] + aliases: + type: array + description: Aliases + examples: + - - category: authority + name: Borreliella burgdorferi (Johnson et al. 1984) Adeolu and Gupta 2015 + - category: genbank common name + name: Lyme disease spirochet + - category: synonym + name: Borrelia burgdorferi + - - category: common name + name: E. coli + - category: authority + name: '"Bacterium coli commune" Escherich 1885' + - category: synonym + name: Bacterium coli + items: + type: object + required: ['category', 'name'] + properties: + category: {type: string} + name: {type: string} + rank: + type: string + title: Taxonomic rank + examples: ["Domain", "Phylum", "no rank"] + strain: + type: boolean + title: Strain flag + description: Whether this node corresponds to a strain. Strains are considered to be nodes + that have a rank of "no rank" and whose parents' rank is either species or subspecies or + where the parent's strain flag is true. + ncbi_taxon_id: + type: integer + title: The NCBI taxon ID as a number + gencode: + type: integer + title: The numerc ID of the genetic code for this organism. diff --git a/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml new file mode 100644 index 00000000..fab7ad6e --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml @@ -0,0 +1,10 @@ +name: test_edge +type: edge +schema: + "$schema": "http://json-schema.org/draft-07/schema#" + type: object + required: [_from, _to] + description: Example edge schema for testing. + properties: + _from: {type: string} + _to: {type: string} diff --git a/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml new file mode 100644 index 00000000..b2d34668 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml @@ -0,0 +1,11 @@ +name: test_vertex +type: vertex +schema: + "$schema": "http://json-schema.org/draft-07/schema#" + type: object + required: [_key] + description: An example vertex schema for testing + properties: + _key: {type: string} + is_public: {type: boolean} + ws_id: {type: integer} diff --git a/relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml b/relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml new file mode 100644 index 00000000..37a88195 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml @@ -0,0 +1,6 @@ +name: ncbi_taxonomy +category: taxonomy +title: NCBI Taxonomy +home_url: https://www.ncbi.nlm.nih.gov/taxonomy +data_url: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/ +logo_path: /images/third-party-data-sources/ncbi/logo-51-64.png diff --git a/relation_engine_server/test/spec_release/sample_spec_release/migrations/__init__.py b/relation_engine_server/test/spec_release/sample_spec_release/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py b/relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py new file mode 100644 index 00000000..ce5ce389 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py @@ -0,0 +1,3 @@ +# TODO + +x = 1 diff --git a/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml new file mode 100644 index 00000000..3a9c4170 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml @@ -0,0 +1,18 @@ +# Fetch a taxon document by taxonomy ID +name: ncbi_fetch_taxon +params: + type: object + required: [id, ts] + properties: + id: + type: string + title: NCBI Taxonomy ID + ts: + type: integer + title: Versioning timestamp +query: | + for t in ncbi_taxon + filter t.id == @id + filter t.created <= @ts AND t.expired >= @ts + limit 1 + return t diff --git a/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml new file mode 100644 index 00000000..8845f4a1 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml @@ -0,0 +1,13 @@ +# Test query - fetch a single test vertex by ID +name: fetch_test_vertex +params: + type: object + required: [key] + properties: + key: + type: string + title: _key to match on +query: | + FOR o IN test_vertex + FILTER o._key == @key + RETURN o diff --git a/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml new file mode 100644 index 00000000..5d027d78 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml @@ -0,0 +1,7 @@ +# Test query - List all test vertices +# Has some simple auth against ws_ids +name: list_test_vertices +query: | + FOR o IN test_vertex + FILTER o.is_public || o.ws_id IN ws_ids + RETURN o diff --git a/relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json b/relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json new file mode 100644 index 00000000..d45c3731 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json @@ -0,0 +1,34 @@ +{ + "name": "test_vertices", + "type": "arangosearch", + "writebufferIdle": 64, + "writebufferActive": 0, + "primarySort": [], + "writebufferSizeMax": 33554432, + "commitIntervalMsec": 1000, + "consolidationPolicy": { + "type": "bytes_accum", + "threshold": 0.1 + }, + "cleanupIntervalStep": 10, + "links": { + "test_vertex": { + "analyzers": [ + "identity" + ], + "fields": { + "_key": { + "analyzers": [ + "text_en" + ] + }, + "is_public": {}, + "ws_id": {} + }, + "includeAllFields": false, + "storeValues": "none", + "trackListPositions": false + } + }, + "consolidationIntervalMsec": 60000 +} diff --git a/relation_engine_server/test/spec_release/spec.tar.gz b/relation_engine_server/test/spec_release/spec.tar.gz index e654605a..e4c2d7b7 100644 Binary files a/relation_engine_server/test/spec_release/spec.tar.gz and b/relation_engine_server/test/spec_release/spec.tar.gz differ diff --git a/relation_engine_server/test/test_spec_loader.py b/relation_engine_server/test/test_spec_loader.py index d1da1df9..0833e5e9 100644 --- a/relation_engine_server/test/test_spec_loader.py +++ b/relation_engine_server/test/test_spec_loader.py @@ -7,20 +7,37 @@ from relation_engine_server.utils import spec_loader from relation_engine_server.utils.spec_loader import SchemaNonexistent from relation_engine_server.utils.config import get_config -from relation_engine_server.utils.wait_for import wait_for_api - -_CONF = get_config() -_TEST_DIR = os_path.join('/app', 'relation_engine_server', 'test', 'data') class TestSpecLoader(unittest.TestCase): @classmethod def setUpClass(cls): - wait_for_api() - cls.config = get_config() + cls.test_dir = os_path.join('/app', 'relation_engine_server', 'test') + cls.test_spec_dir = os_path.join(cls.test_dir, 'spec_release', 'sample_spec_release') + + config = get_config() + cls.repo_path = config['spec_paths']['repo'] + for key in config['spec_paths'].keys(): + if cls.repo_path in config['spec_paths'][key]: + config['spec_paths'][key] = config['spec_paths'][key].replace( + cls.repo_path, + cls.test_spec_dir + ) + cls.config = config + + @classmethod + def tearDownClass(cls): + # undo all the config changes + for key in cls.config['spec_paths'].keys(): + if cls.test_spec_dir in cls.config['spec_paths'][key]: + cls.config['spec_paths'][key] = cls.config['spec_paths'][key].replace( + cls.test_spec_dir, + cls.repo_path + ) def test_get_names(self, schema_type_names=[], expected=[]): + """test getting the names of all the schemas of a given type""" # this method should only be run from another test method if len(schema_type_names) == 0: @@ -32,20 +49,20 @@ def test_get_names(self, schema_type_names=[], expected=[]): method = getattr(spec_loader, 'get_' + schema_type_singular + '_names') # save the original value - original_config_dir = _CONF['spec_paths'][schema_type_plural] + original_config_dir = self.config['spec_paths'][schema_type_plural] # set the config to the test directory - _CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, schema_type_plural) + self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', schema_type_plural) got_names_method = method() got_names_singular = spec_loader.get_names(schema_type_singular) got_names_plural = spec_loader.get_names(schema_type_plural) - _CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, 'empty') + self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', 'empty') got_names_method_empty = method() got_names_empty = spec_loader.get_names(schema_type_singular) - # restore the original value - _CONF['spec_paths'][schema_type_plural] = original_config_dir + # restore the original value before running tests + self.config['spec_paths'][schema_type_plural] = original_config_dir # ensure the results are as expected # get_collection_names @@ -67,15 +84,17 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None): self.assertTrue(True) return - print("running test_run_spec_loading_tests with schema_type " + schema_type_names[0]) - method = getattr(spec_loader, 'get_' + schema_type_names[0]) + schema_type_singular = schema_type_names[0] + schema_type_plural = schema_type_names[1] + # e.g. 'spec_loader.get_collection' + method = getattr(spec_loader, 'get_' + schema_type_singular) # get the path of the requested file result_path = method(test_name, path_only=True) self.assertIsInstance(result_path, str) self.assertIn(test_name, result_path) self.assertIn( - self.config['spec_paths'][schema_type_names[1]], + self.config['spec_paths'][schema_type_plural], result_path, ) @@ -92,18 +111,18 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None): self.assertEqual(result_obj['name'], test_name) # check the contents of the dict when getting a data source - if schema_type_names[0] == 'data_source': + if schema_type_singular == 'data_source': - # logo_url should start with the same base as _CONF['kbase_endpoint'] - endpoint = urlparse(_CONF['kbase_endpoint']) + # logo_url should start with the same base as config['kbase_endpoint'] + endpoint = urlparse(self.config['kbase_endpoint']) self.assertIn(endpoint.scheme + '://' + endpoint.netloc, result_obj['logo_url']) # logo_path is deleted self.assertNotIn('logo_path', result_obj.keys()) # a nonexistent file raises the appropriate error - fake_name = '../../../../spec/repo/collections/djornl/djornl_edge' - err_msg = schema_type_names[0].capitalize().replace("_", " ") + " '" + fake_name + "' does not exist." + fake_name = 'test/test_node' + err_msg = schema_type_singular.capitalize().replace("_", " ") + " '" + fake_name + "' does not exist." with self.assertRaisesRegex(SchemaNonexistent, err_msg): method(fake_name, path_only=True) @@ -125,6 +144,10 @@ def test_get_schemas_of_various_types(self): 'schema_type_names': ['stored_query', 'stored_queries'], 'example': 'ncbi_fetch_taxon', }, + { + 'schema_type_names': ['view', 'views'], + 'example': 'test_vertices', + } ] for schema in schema_type_list: @@ -162,3 +185,22 @@ def test_get_schema_for_doc(self): err_msg = f"Collection 'fake_name' does not exist." with self.assertRaisesRegex(SchemaNonexistent, err_msg): spec_loader.get_schema_for_doc(fake_name, path_only=True) + + def test_prevent_non_spec_dir_access(self): + """ + Ensure that matching files in directories outside the designated spec repo cannot be retrieved + """ + + # this query is OK as the file is still in the spec repo + path_in_spec_repo = '../../../../../**/fetch_test_vertex' + result = spec_loader.get_schema('stored_queries', path_in_spec_repo, path_only=True) + self.assertEqual( + result, + os_path.join(self.test_spec_dir, 'stored_queries', 'test', 'fetch_test_vertex.yaml') + ) + + # this matches a file in one of the other test data dirs => should throw an error + path_outside_spec_repo = '../../../../data/collections/test_node' + err_msg = f"Stored query '{path_outside_spec_repo}' does not exist" + with self.assertRaisesRegex(SchemaNonexistent, err_msg): + spec_loader.get_schema('stored_queries', path_outside_spec_repo, path_only=True) diff --git a/relation_engine_server/utils/spec_loader.py b/relation_engine_server/utils/spec_loader.py index 2cc1ffa8..f91daa2d 100644 --- a/relation_engine_server/utils/spec_loader.py +++ b/relation_engine_server/utils/spec_loader.py @@ -74,11 +74,19 @@ def get_schema(schema_type, name, path_only=False): schema_search_type = pluralise_schema_type(schema_type) - try: - path = _find_paths(_CONF['spec_paths'][schema_search_type], name + '.yaml')[0] - except IndexError: + yaml_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.yaml') + json_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.json') + # ensure we're using the canonical path and that all paths are unique + # we are only interested in paths that are in the designated spec repo + repo_path = os.path.abspath(_CONF['spec_paths']['repo']) + all_paths_set = set(os.path.abspath(path) for path in yaml_paths + json_paths) + all_paths = [p for p in all_paths_set if p.startswith(repo_path)] + + if len(all_paths) == 0: raise SchemaNonexistent(singularise_schema_type(schema_type), name) + # ignore duplicates or multiple results, just go with the first one + path = all_paths[0] if path_only: return path @@ -112,6 +120,11 @@ def get_stored_query_names(): return get_names('stored_queries') +def get_view_names(): + """Return an array of all view base names.""" + return get_names('views') + + def get_collection(name, path_only=False): """Get YAML content (or file path) for a specific collection. Throws an error if nonexistent.""" return get_schema('collection', name, path_only) @@ -133,6 +146,11 @@ def get_stored_query(name, path_only=False): return get_schema('stored_query', name, path_only) +def get_view(name, path_only=False): + """Get content or file path for a view file. Throws an error if nonexistent.""" + return get_schema('view', name, path_only) + + def _find_paths(dir_path, file_pattern): """ Return all file paths from a filename pattern, starting from a parent