kbase · jayrbolton · Sep 8, 2020 · Sep 8, 2020 · Sep 8, 2020 · ialarmedalien
@@ -1,8 +1,22 @@
+## Test Spec Release
 
-The file in this directory, `spec.tar.gz` is a cached release of the `relation_engine_spec` repo, found here:
+`sample_spec_release`, and the corresponding archive, `spec.tar.gz`, contain a set of sample schema files suitable for use in tests.
 
-https://github.com/kbase/relation_engine_spec
+To create a new version of `spec.tar.gz`, you will need to exec into the `re_api` docker image to ensure that the new archive and its contents have the appropriate file owner and permissions (all files must have owner and group `root`/`root`).
 
-It is cached here to avoid Github API usage limits when running tests on Travis.
+Example commands:
 
-It is also stored in the docker image for the RE API for use in tests in other codebases that depend on this one.
+```
+$ docker exec -it relation_engine_re_api_run_1234567890 sh
+# # in the docker image
+# cd relation_engine_server/test/spec_release
+# # ... perform any edits ...
+# tar -czvf new_spec.tar.gz sample_spec_release/
+# # check the file listing is as expected
+# tar -ztvf new_spec.tar.gz
+# mv spec.tar.gz old_spec.tar.gz
+# mv new_spec.tar.gz spec.tar.gz
+# # ensure that the tests pass
+# cd /app
+# sh scripts/run_tests.sh
+```
@@ -0,0 +1,64 @@
+name: ncbi_taxon
+type: vertex
+delta: true
+
+indexes:
+  - type: fulltext
+    fields: [scientific_name]
+  - type: persistent
+    fields: [id, expired, created]
+  - type: persistent
+    fields: [expired, created, last_version]
+
+schema:
+  "$schema": http://json-schema.org/draft-07/schema#
+  type: object
+  description: Template for a vertex entry in the NCBI taxonomy tree.
+  required: [id, scientific_name, rank, strain]
+  properties:
+    id:
+      type: string
+      description: NCBI Taxon id (positive integer)
+      examples: ['1', '2053699']
+    scientific_name:
+      type: string
+      title: Taxon name.
+      examples: ['Methylophilus methylotrophus', 'Bacteria', 'Firmicutes']
+    aliases:
+      type: array
+      description: Aliases
+      examples:
+      - - category: authority
+          name: Borreliella burgdorferi (Johnson et al. 1984) Adeolu and Gupta 2015
+        - category: genbank common name
+          name: Lyme disease spirochet
+        - category: synonym
+          name: Borrelia burgdorferi
+      - - category: common name
+          name: E. coli
+        - category: authority
+          name: '"Bacterium coli commune" Escherich 1885'
+        - category: synonym
+          name: Bacterium coli
+      items:
+        type: object
+        required: ['category', 'name']
+        properties:
+          category: {type: string}
+          name: {type: string}
+    rank:
+      type: string
+      title: Taxonomic rank
+      examples: ["Domain", "Phylum", "no rank"]
+    strain:
+      type: boolean
+      title: Strain flag
+      description: Whether this node corresponds to a strain. Strains are considered to be nodes
+        that have a rank of "no rank" and whose parents' rank is either species or subspecies or
+        where the parent's strain flag is true.
+    ncbi_taxon_id:
+      type: integer
+      title: The NCBI taxon ID as a number
+    gencode:
+      type: integer
+      title: The numerc ID of the genetic code for this organism.
@@ -0,0 +1,10 @@
+name: test_edge
+type: edge
+schema:
+  "$schema": "http://json-schema.org/draft-07/schema#"
+  type: object
+  required: [_from, _to]
+  description: Example edge schema for testing.
+  properties:
+    _from: {type: string}
+    _to: {type: string}
@@ -0,0 +1,11 @@
+name: test_vertex
+type: vertex
+schema:
+  "$schema": "http://json-schema.org/draft-07/schema#"
+  type: object
+  required: [_key]
+  description: An example vertex schema for testing
+  properties:
+    _key: {type: string}
+    is_public: {type: boolean}
+    ws_id: {type: integer}
@@ -0,0 +1,6 @@
+name: ncbi_taxonomy
+category: taxonomy
+title: NCBI Taxonomy
+home_url: https://www.ncbi.nlm.nih.gov/taxonomy
+data_url: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/
+logo_path: /images/third-party-data-sources/ncbi/logo-51-64.png
@@ -0,0 +1,3 @@
+# TODO
+
+x = 1
@@ -0,0 +1,18 @@
+# Fetch a taxon document by taxonomy ID
+name: ncbi_fetch_taxon
+params:
+  type: object
+  required: [id, ts]
+  properties:
+    id:
+      type: string
+      title: NCBI Taxonomy ID
+    ts:
+      type: integer
+      title: Versioning timestamp
+query: |
+  for t in ncbi_taxon
+      filter t.id == @id
+      filter t.created <= @ts AND t.expired >= @ts
+      limit 1
+      return t
@@ -0,0 +1,13 @@
+# Test query - fetch a single test vertex by ID
+name: fetch_test_vertex
+params:
+  type: object
+  required: [key]
+  properties:
+    key:
+      type: string
+      title: _key to match on
+query: |
+  FOR o IN test_vertex
+    FILTER o._key == @key
+    RETURN o
@@ -0,0 +1,7 @@
+# Test query - List all test vertices
+# Has some simple auth against ws_ids
+name: list_test_vertices
+query: |
+  FOR o IN test_vertex
+    FILTER o.is_public || o.ws_id IN ws_ids
+    RETURN o
@@ -0,0 +1,34 @@
+{
+  "name": "test_vertices",
+  "type": "arangosearch",
+  "writebufferIdle": 64,
+  "writebufferActive": 0,
+  "primarySort": [],
+  "writebufferSizeMax": 33554432,
+  "commitIntervalMsec": 1000,
+  "consolidationPolicy": {
+    "type": "bytes_accum",
+    "threshold": 0.1
+  },
+  "cleanupIntervalStep": 10,
+  "links": {
+    "test_vertex": {
+      "analyzers": [
+        "identity"
+      ],
+      "fields": {
+        "_key": {
+          "analyzers": [
+            "text_en"
+          ]
+        },
+        "is_public": {},
+        "ws_id": {}
+      },
+      "includeAllFields": false,
+      "storeValues": "none",
+      "trackListPositions": false
+    }
+  },
+  "consolidationIntervalMsec": 60000
+}
@@ -7,20 +7,37 @@
 from relation_engine_server.utils import spec_loader
 from relation_engine_server.utils.spec_loader import SchemaNonexistent
 from relation_engine_server.utils.config import get_config
-from relation_engine_server.utils.wait_for import wait_for_api
-
-_CONF = get_config()
-_TEST_DIR = os_path.join('/app', 'relation_engine_server', 'test', 'data')
 
 
 class TestSpecLoader(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        wait_for_api()
-        cls.config = get_config()
+        cls.test_dir = os_path.join('/app', 'relation_engine_server', 'test')
+        cls.test_spec_dir = os_path.join(cls.test_dir, 'spec_release', 'sample_spec_release')
+
+        config = get_config()
+        cls.repo_path = config['spec_paths']['repo']
+        for key in config['spec_paths'].keys():
+            if cls.repo_path in config['spec_paths'][key]:
+                config['spec_paths'][key] = config['spec_paths'][key].replace(
+                    cls.repo_path,
+                    cls.test_spec_dir
+                )
+        cls.config = config
+
+    @classmethod
+    def tearDownClass(cls):
+        # undo all the config changes
+        for key in cls.config['spec_paths'].keys():
+            if cls.test_spec_dir in cls.config['spec_paths'][key]:
+                cls.config['spec_paths'][key] = cls.config['spec_paths'][key].replace(
+                    cls.test_spec_dir,
+                    cls.repo_path
+                )
 
     def test_get_names(self, schema_type_names=[], expected=[]):
+        """test getting the names of all the schemas of a given type"""
 
         # this method should only be run from another test method
         if len(schema_type_names) == 0:
@@ -32,20 +49,20 @@ def test_get_names(self, schema_type_names=[], expected=[]):
         method = getattr(spec_loader, 'get_' + schema_type_singular + '_names')
 
         # save the original value
-        original_config_dir = _CONF['spec_paths'][schema_type_plural]
+        original_config_dir = self.config['spec_paths'][schema_type_plural]
         # set the config to the test directory
-        _CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, schema_type_plural)
+        self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', schema_type_plural)
 
         got_names_method = method()
         got_names_singular = spec_loader.get_names(schema_type_singular)
         got_names_plural = spec_loader.get_names(schema_type_plural)
 
-        _CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, 'empty')
+        self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', 'empty')
         got_names_method_empty = method()
         got_names_empty = spec_loader.get_names(schema_type_singular)
 
-        # restore the original value
-        _CONF['spec_paths'][schema_type_plural] = original_config_dir
+        # restore the original value before running tests
+        self.config['spec_paths'][schema_type_plural] = original_config_dir
 
         # ensure the results are as expected
         # get_collection_names
@@ -67,15 +84,17 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None):
             self.assertTrue(True)
             return
 
-        print("running test_run_spec_loading_tests with schema_type " + schema_type_names[0])
-        method = getattr(spec_loader, 'get_' + schema_type_names[0])
+        schema_type_singular = schema_type_names[0]
+        schema_type_plural = schema_type_names[1]
+        # e.g. 'spec_loader.get_collection'
+        method = getattr(spec_loader, 'get_' + schema_type_singular)
 
         # get the path of the requested file
         result_path = method(test_name, path_only=True)
         self.assertIsInstance(result_path, str)
         self.assertIn(test_name, result_path)
         self.assertIn(
-            self.config['spec_paths'][schema_type_names[1]],
+            self.config['spec_paths'][schema_type_plural],
             result_path,
         )
 
@@ -92,18 +111,18 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None):
         self.assertEqual(result_obj['name'], test_name)
 
         # check the contents of the dict when getting a data source
-        if schema_type_names[0] == 'data_source':
+        if schema_type_singular == 'data_source':
 
-            # logo_url should start with the same base as _CONF['kbase_endpoint']
-            endpoint = urlparse(_CONF['kbase_endpoint'])
+            # logo_url should start with the same base as config['kbase_endpoint']
+            endpoint = urlparse(self.config['kbase_endpoint'])
             self.assertIn(endpoint.scheme + '://' + endpoint.netloc, result_obj['logo_url'])
 
             # logo_path is deleted
             self.assertNotIn('logo_path', result_obj.keys())
 
         # a nonexistent file raises the appropriate error
-        fake_name = '../../../../spec/repo/collections/djornl/djornl_edge'
-        err_msg = schema_type_names[0].capitalize().replace("_", " ") + " '" + fake_name + "' does not exist."
+        fake_name = 'test/test_node'
+        err_msg = schema_type_singular.capitalize().replace("_", " ") + " '" + fake_name + "' does not exist."
         with self.assertRaisesRegex(SchemaNonexistent, err_msg):
             method(fake_name, path_only=True)
 
@@ -125,6 +144,10 @@ def test_get_schemas_of_various_types(self):
                 'schema_type_names': ['stored_query', 'stored_queries'],
                 'example': 'ncbi_fetch_taxon',
             },
+            {
+                'schema_type_names': ['view', 'views'],
+                'example': 'test_vertices',
+            }
         ]
 
         for schema in schema_type_list:
@@ -162,3 +185,22 @@ def test_get_schema_for_doc(self):
         err_msg = f"Collection 'fake_name' does not exist."
         with self.assertRaisesRegex(SchemaNonexistent, err_msg):
             spec_loader.get_schema_for_doc(fake_name, path_only=True)
+
+    def test_prevent_non_spec_dir_access(self):
+        """
+        Ensure that matching files in directories outside the designated spec repo cannot be retrieved
+        """
+
+        # this query is OK as the file is still in the spec repo
+        path_in_spec_repo = '../../../../../**/fetch_test_vertex'
+        result = spec_loader.get_schema('stored_queries', path_in_spec_repo, path_only=True)
+        self.assertEqual(
+            result,
+            os_path.join(self.test_spec_dir, 'stored_queries', 'test', 'fetch_test_vertex.yaml')
+        )
+
+        # this matches a file in one of the other test data dirs => should throw an error
+        path_outside_spec_repo = '../../../../data/collections/test_node'
+        err_msg = f"Stored query '{path_outside_spec_repo}' does not exist"
+        with self.assertRaisesRegex(SchemaNonexistent, err_msg):
+            spec_loader.get_schema('stored_queries', path_outside_spec_repo, path_only=True)
@@ -74,11 +74,19 @@ def get_schema(schema_type, name, path_only=False):
 
     schema_search_type = pluralise_schema_type(schema_type)
 
-    try:
-        path = _find_paths(_CONF['spec_paths'][schema_search_type], name + '.yaml')[0]
-    except IndexError:
+    yaml_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.yaml')
+    json_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.json')
+    # ensure we're using the canonical path and that all paths are unique
+    # we are only interested in paths that are in the designated spec repo
+    repo_path = os.path.abspath(_CONF['spec_paths']['repo'])
+    all_paths_set = set(os.path.abspath(path) for path in yaml_paths + json_paths)
+    all_paths = [p for p in all_paths_set if p.startswith(repo_path)]
+
+    if len(all_paths) == 0:
         raise SchemaNonexistent(singularise_schema_type(schema_type), name)
 
+    # ignore duplicates or multiple results, just go with the first one
+    path = all_paths[0]
     if path_only:
         return path
 
@@ -112,6 +120,11 @@ def get_stored_query_names():
     return get_names('stored_queries')
 
 
+def get_view_names():
+    """Return an array of all view base names."""
+    return get_names('views')
+
+
 def get_collection(name, path_only=False):
     """Get YAML content (or file path) for a specific collection. Throws an error if nonexistent."""
     return get_schema('collection', name, path_only)
@@ -133,6 +146,11 @@ def get_stored_query(name, path_only=False):
     return get_schema('stored_query', name, path_only)
 
 
+def get_view(name, path_only=False):
+    """Get content or file path for a view file. Throws an error if nonexistent."""
+    return get_schema('view', name, path_only)
+
+
 def _find_paths(dir_path, file_pattern):
     """
     Return all file paths from a filename pattern, starting from a parent