From 30e25ce035637b04e40a0848e4c2d61ea97a4608 Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 8 Sep 2020 09:08:55 -0700 Subject: [PATCH 1/2] Patch security hole in spec_loader.py add sample_spec_release dir and instructions for updating the test spec archive add "get_view" method to spec_loader.py and sample view to the test spec archive --- .../test/spec_release/README.md | 22 ++++- .../collections/ncbi/ncbi_taxon.yaml | 64 ++++++++++++++ .../collections/test/test_edge.yaml | 10 +++ .../collections/test/test_vertex.yaml | 11 +++ .../data_sources/ncbi_taxonomy.yaml | 6 ++ .../migrations/__init__.py | 0 .../sample_spec_release/migrations/example.py | 3 + .../ncbi_tax/ncbi_fetch_taxon.yaml | 18 ++++ .../test/fetch_test_vertex.yaml | 13 +++ .../test/list_test_vertices.yaml | 7 ++ .../views/test_vertices.json | 34 ++++++++ .../test/spec_release/spec.tar.gz | Bin 2838 -> 2197 bytes .../test/test_spec_loader.py | 80 +++++++++++++----- relation_engine_server/utils/spec_loader.py | 23 ++++- 14 files changed, 265 insertions(+), 26 deletions(-) create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/migrations/__init__.py create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml create mode 100644 relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json diff --git a/relation_engine_server/test/spec_release/README.md b/relation_engine_server/test/spec_release/README.md index 8419f925..a371c2e3 100644 --- a/relation_engine_server/test/spec_release/README.md +++ b/relation_engine_server/test/spec_release/README.md @@ -1,8 +1,22 @@ +## Test Spec Release -The file in this directory, `spec.tar.gz` is a cached release of the `relation_engine_spec` repo, found here: +`sample_spec_release`, and the corresponding archive, `spec.tar.gz`, contain a set of sample schema files suitable for use in tests. -https://github.com/kbase/relation_engine_spec +To create a new version of `spec.tar.gz`, you will need to exec into the `re_api` docker image to ensure that the new archive and its contents have the appropriate file owner and permissions (all files must have owner and group `root`/`root`). -It is cached here to avoid Github API usage limits when running tests on Travis. +Example commands: -It is also stored in the docker image for the RE API for use in tests in other codebases that depend on this one. +``` +$ docker exec -it relation_engine_re_api_run_1234567890 sh +# # in the docker image +# cd relation_engine_server/test/spec_release +# # ... perform any edits ... +# tar -czvf new_spec.tar.gz sample_spec_release/ +# # check the file listing is as expected +# tar -ztvf new_spec.tar.gz +# mv spec.tar.gz old_spec.tar.gz +# mv new_spec.tar.gz spec.tar.gz +# # ensure that the tests pass +# cd /app +# sh scripts/run_tests.sh +``` diff --git a/relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml b/relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml new file mode 100644 index 00000000..39c97168 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/collections/ncbi/ncbi_taxon.yaml @@ -0,0 +1,64 @@ +name: ncbi_taxon +type: vertex +delta: true + +indexes: + - type: fulltext + fields: [scientific_name] + - type: persistent + fields: [id, expired, created] + - type: persistent + fields: [expired, created, last_version] + +schema: + "$schema": http://json-schema.org/draft-07/schema# + type: object + description: Template for a vertex entry in the NCBI taxonomy tree. + required: [id, scientific_name, rank, strain] + properties: + id: + type: string + description: NCBI Taxon id (positive integer) + examples: ['1', '2053699'] + scientific_name: + type: string + title: Taxon name. + examples: ['Methylophilus methylotrophus', 'Bacteria', 'Firmicutes'] + aliases: + type: array + description: Aliases + examples: + - - category: authority + name: Borreliella burgdorferi (Johnson et al. 1984) Adeolu and Gupta 2015 + - category: genbank common name + name: Lyme disease spirochet + - category: synonym + name: Borrelia burgdorferi + - - category: common name + name: E. coli + - category: authority + name: '"Bacterium coli commune" Escherich 1885' + - category: synonym + name: Bacterium coli + items: + type: object + required: ['category', 'name'] + properties: + category: {type: string} + name: {type: string} + rank: + type: string + title: Taxonomic rank + examples: ["Domain", "Phylum", "no rank"] + strain: + type: boolean + title: Strain flag + description: Whether this node corresponds to a strain. Strains are considered to be nodes + that have a rank of "no rank" and whose parents' rank is either species or subspecies or + where the parent's strain flag is true. + ncbi_taxon_id: + type: integer + title: The NCBI taxon ID as a number + gencode: + type: integer + title: The numerc ID of the genetic code for this organism. diff --git a/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml new file mode 100644 index 00000000..fab7ad6e --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_edge.yaml @@ -0,0 +1,10 @@ +name: test_edge +type: edge +schema: + "$schema": "http://json-schema.org/draft-07/schema#" + type: object + required: [_from, _to] + description: Example edge schema for testing. + properties: + _from: {type: string} + _to: {type: string} diff --git a/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml new file mode 100644 index 00000000..b2d34668 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/collections/test/test_vertex.yaml @@ -0,0 +1,11 @@ +name: test_vertex +type: vertex +schema: + "$schema": "http://json-schema.org/draft-07/schema#" + type: object + required: [_key] + description: An example vertex schema for testing + properties: + _key: {type: string} + is_public: {type: boolean} + ws_id: {type: integer} diff --git a/relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml b/relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml new file mode 100644 index 00000000..37a88195 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/data_sources/ncbi_taxonomy.yaml @@ -0,0 +1,6 @@ +name: ncbi_taxonomy +category: taxonomy +title: NCBI Taxonomy +home_url: https://www.ncbi.nlm.nih.gov/taxonomy +data_url: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/ +logo_path: /images/third-party-data-sources/ncbi/logo-51-64.png diff --git a/relation_engine_server/test/spec_release/sample_spec_release/migrations/__init__.py b/relation_engine_server/test/spec_release/sample_spec_release/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py b/relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py new file mode 100644 index 00000000..ce5ce389 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/migrations/example.py @@ -0,0 +1,3 @@ +# TODO + +x = 1 diff --git a/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml new file mode 100644 index 00000000..3a9c4170 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/ncbi_tax/ncbi_fetch_taxon.yaml @@ -0,0 +1,18 @@ +# Fetch a taxon document by taxonomy ID +name: ncbi_fetch_taxon +params: + type: object + required: [id, ts] + properties: + id: + type: string + title: NCBI Taxonomy ID + ts: + type: integer + title: Versioning timestamp +query: | + for t in ncbi_taxon + filter t.id == @id + filter t.created <= @ts AND t.expired >= @ts + limit 1 + return t diff --git a/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml new file mode 100644 index 00000000..8845f4a1 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/fetch_test_vertex.yaml @@ -0,0 +1,13 @@ +# Test query - fetch a single test vertex by ID +name: fetch_test_vertex +params: + type: object + required: [key] + properties: + key: + type: string + title: _key to match on +query: | + FOR o IN test_vertex + FILTER o._key == @key + RETURN o diff --git a/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml new file mode 100644 index 00000000..5d027d78 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/stored_queries/test/list_test_vertices.yaml @@ -0,0 +1,7 @@ +# Test query - List all test vertices +# Has some simple auth against ws_ids +name: list_test_vertices +query: | + FOR o IN test_vertex + FILTER o.is_public || o.ws_id IN ws_ids + RETURN o diff --git a/relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json b/relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json new file mode 100644 index 00000000..d45c3731 --- /dev/null +++ b/relation_engine_server/test/spec_release/sample_spec_release/views/test_vertices.json @@ -0,0 +1,34 @@ +{ + "name": "test_vertices", + "type": "arangosearch", + "writebufferIdle": 64, + "writebufferActive": 0, + "primarySort": [], + "writebufferSizeMax": 33554432, + "commitIntervalMsec": 1000, + "consolidationPolicy": { + "type": "bytes_accum", + "threshold": 0.1 + }, + "cleanupIntervalStep": 10, + "links": { + "test_vertex": { + "analyzers": [ + "identity" + ], + "fields": { + "_key": { + "analyzers": [ + "text_en" + ] + }, + "is_public": {}, + "ws_id": {} + }, + "includeAllFields": false, + "storeValues": "none", + "trackListPositions": false + } + }, + "consolidationIntervalMsec": 60000 +} diff --git a/relation_engine_server/test/spec_release/spec.tar.gz b/relation_engine_server/test/spec_release/spec.tar.gz index e654605a76d146fd27e1ef4bd9f76d710d169155..e4c2d7b71b6b7a939c4cdbdcc0cf2abcb0aa30b9 100644 GIT binary patch literal 2197 zcmV;G2x|8qiwFP!000021MM4YZ`(LBpZzNa?g7~aY{xIzt#LhEdP(kb=nHLj_roF( zXo@BCGocX?>D4m*_PGXPE)mqY6{d6XE+>jo_s`+Fy_=%G4)(YIVFmo96c~} z5yp;Ve-Vnm_PIA0_nmQX)bEaty1juj>K=jf(1MgfqBM~JM^Xs=Il5i=|5H#U`VZM$ z63s-U9>x?7*S|9!3~T*Y0Xy{Xx-4SabvyCm{$m&gkA}nj_22Jy2ZJN%Rxts!{y(RG zdS~%pr2gId?~MxnKOPT8wf?JsM{xD(>{X+22T!5b_}0?YP&JhKztTcd-~BV8lFOQq#N6f`oj(F?Qwu z;a2@O#0sF!|5X6yf7B@$T`!;wR?R>_v1raI7=1&kZ zAu$4qs9`0Zzy~C|c=ZMZT)u>LflS|v%jZ|;i0jziPoKg+@z-MBoL~L&<|T+9zT(-} z|2e~{&F7bSRP7%CcKLsQJSzJCuv@SHDuIIkKSwnK;e6%!M%9f+@E@W;iI8GsFa`r8 z(HJM@ghfbqt6b)*EH-Rg%3;3Blp7~g&OG?=0bea|(;*wA_TC4DJ^VlNrp(pk?%p-v z!S!DsOM<%os{{_wzkSG3Ke-dpz8PRw{CCD%^`A4U>%U6i5nN;?pH(UN!b?ILX;bdm z77?!1eMJk{tL$Tb3$!Y${FuMH@O$~MXP0obCW*zV&5kV6bWY`F&tIujOhm{LTS81X zXzJ^!XeK0R>Y3OBdlIlc8pud~URp$Rb(Zp3Ek={e>{@iQ-r(d7{R0BKpzj6-rzXKNiYp^T+ z4+i5x{Kw`0u#W##z*<~iD!JidXqyWYCiAAUVB83BJ>VviPax1bR+E#H+uPfY;nj(F z*oj!snTwl~oQbt7OFz>wQse*I)L8rHJx&^2%!L~h9ZcYag=B7)qXCnC8xxOSv<=7h zx}~0&4sEB`9t}G&rjlnZ!Fynu$pEVP<`OP`w)Xqr@y zx%&?#qITNa5psUwOES~#?)W5ad(`~wlF*$=5w^h9!Ym7YsyxYJV{j8VPj8%95g=uj zuGCOOT*`GymXVg*1m3SNB0r`WEwK20PcwYgOM{R#{cf1TgU$@pt%3D8UB6B zHNo^C6WVC7$ftK{#BFDBeU@;Jbh?plj#KU{jO@x|G}3IwJlC+lUh5E>J$a=Oz1hW_ z5oFuvEyy<4_vu&c(*jP?RA#pRuN&nFY|s3)oxV15)fGk8!9i_qBFlV*BFcq1RnP%V z_sjBQ6fG(3Y@dx3rmF>#MDJQAjS&_dd~C8I;%dJRJHbz}P)xHMy3BMxXMM7iYbNx# zcie*Ge%Bd{o;*3ewgd$Mcc0m$BO2OH$p|l~4i;R*0pp2+Fn!bbjX|Q!;LnJssbs`F zUoaUmFTu>8$wD~85LGz-AySe>+4rBO)I}k(_cs1p?RAeJXeYB=X_kNvsL#Ze(87HNI<2N+2z?^gjOhF`kic^hNq+${I%G|>v*-)K~lR_snM3G`XHHV-`Go`kx zS}7$U8Ulg=h*+UP%vK*@J?1tLXy-BVinKaTiBJN{tTd)dL9GDBTa`>#Z}|YXNJVYr zO<5ePOicTshLcHUX;50_JhvE7OOKPUHsU#4o&mv+hA0WA87ca+hu^y=FYHLAXZYYO z7?H6%)u?>KBVF&9$;ZS*BBsL5*IAX6)qgjP-agQu{#iJ@{vVF(`~TIzr}e*i+0)cY z_Z?$L{om`4dRzCu#`XQrO5nYzj7{D$`#KvcZ?+7|1Tljm7>ef@n~C&-6myFipH7n* zmJ^pgH$@WI)+#zl-aGc?A(6+rwjZxmw28@ni(pILzr-#k>>Op+AQf+V^dR1t<{o`Nm& z`%A1ws}xS@9|y0tAq(6p!&SFybhAlX+9FeC3otoy@YitV3X_{Z5?7Z^S5;9y=6$k$&8{( zI6{49D0Oq6J=9iO+2csK)%If(tGB<7z`o@1&U&jqUx!?WH&0hS{DYYDMif|e9oMBg z7Ndf$Q-|-rHbYg>)_3b^wz0JJP>w&2Oq{)2m<^24V!W1U&|*VaRI2fNs&!*UXhmww z)I@Fa6iRuBw(eRsnbin9RM$0UQ1DV{f<3lc?dN)gobmSnSM)vs(35X|WUG*5tr5Io zGkeO)cTOrs#87l52=aL11SSeKwky~Cgxbd)nSO${Ikfk(g&4G-L$qDi-`ZJD)Dg0i zST7e5I$0E2WkOo@k^i1AqF0t9sVDncOuC&A)+B5%kvX=+hM!BryHUI%${b~1ak{C} zvAzjTE5i|b7fI@{Nqa=l=iGA-AQE`Cr2D=>(A5iByium%TLNII)nNNFRY7{8+|HGt zwb5Mf-AIwFe081-u?vQAG1mJL_nxF9aRrc3m=vIZV@S0I`vJ@Wy~#(51OP(5cZ%mU*TdEb2f>VrxbyXzy!fJu2!YWnwPvqvGI!8OA>+7IcuB z#_^5$!$n)xkXEYBIV0-wRv)*qfKO0;V-S`yuF>Y%Hg{u{CM`mnxFNX z`n!H_OI=Isc01}e{i2&toRE};z++=9h|RR>cq+3=E|~|qKNZ%-djNhGexSA@%Fh1) zY3)W7UYg`hXhnj?CqX1DBG!1p+KTX>OFV(xq z(K)eBaLyz#kZ9%B4ob#!L52Av!yNwC75eWwc}>=8bRPs);A4Il9kdXb+swTkXXnlv zxrRjgeVN)?C1Q$(dbcc1Uus>pO=6DYxT9jbZF>gJH+@T7aEqTf%~@n`W}>gp!y~z{ z#j%uP<=X-MCQ{Qk*Xa@8xWjMKS$t1nPaH1`Kl!4;H!1rWCAByAWz_qa?uhajFKvUH z-^_H!6SnMnYf-OdXHvvoWXZ+QYVREs1=)9_9!?^Nz>Mh|rS;(JW2;b_DQpNJgqOfe zUBJnxceqxWZvit}B;-hE9fkpe9v2Dd9{S=un;?7h1ske1d^lGo?m$N%VK588<|emt zeZl6K^5S=;NxyG-aU4&BQql$c{JVY^CAjlO<~g?b4H- zjdO6x-by39sU^$k?*?h1E-E~BE>4Om|E#&{ta#&W5ra^IYma!aR6(-9I#D3_J@ZPr zLgiR>@XWwNK6dZ;%x?*jV>F@pkp7YAiVo_1RgmavyN?>TR>XuIqgS)?LE*9v3{l|PZ|3;F>vjOaP z=N%_uP*CDfu|V_TmoEbNyBGR}Doq!_SozRCAbL3+65`1?aLdOT37ATT$YqP}d^lNK zy|+)I5`?G6%eBKA8*w+d*9u@IVghRo+)%SCp9Z}{*X9Y&MDK=5CI9{cH)Rz%)8#T6 z`>41ZD|}h&T`$8w$g_qB8-WkU8+xMjRK??Onq!6cmXKT-sKU!e@`9}!a*;K6+G*Ai zYq+N!OJKtGaH&1jhiMq6KH9+7I&%*ti1wnOlQ6T!eJb;@yRP(#i9&W+^wq9(RQC2# zR;O*nD!8{JOX7&gfJu&wTJI&Zf{k$=Gj|Ycy|_4Yrx#RAjF3@5jR|CwOJx>qh#I9s zV5)SY`PAAb90xvp3L4;bTN5C!kv1=xn%xo3Progz%$t~6gV2w<#rL=2$ERxykny^wwS6+ve}-aGGcVQ{YrRo$(LB7^M!WQ% z@sv+>;RB_&yV)mY+*NEciI1Am3fIMjZr|dtS-Ggg`mR{4>*Mn(?j?oqYO(IWzkYV; zlIFBJnW-`MEdUiEt*_1b3?J8ee<5vy&7w=bo`UwCw#7QLwV+gI5+Od#CX58lJAySz zUS=38WyIfe7GIO4Iz-9$;0L4>bbw%&xjHRXGtMAug*@~KA0YKl8jkcVG|NcsEG_vu zw~$LV(36GYSa(90Y1v>>PiMqZ-!4IJv|w_}QeV-yBWXu^xqGCIcZ3z06i%~NVs~Qa zexI9xklbB>4P5Mbf_ZxOv_KucM8h(5+#|@td?8WQa;tH?X z2cr!6*66$5_m!Hwpy!T1XxbPeL3d8rjVFGUbq>?2nLm*G^65b;|2}-HAXdXE6MqYP zr!!l$3IcuTRTgwrIq;2VdAiQX8hZZ4J0BBvACK91VV4Gka*N8&?koQv74iH1%Ns7v z9?u;{JsIQ4`jI34YK?<#JEH|%col+^%x|VWzup@L9v!~Uy|&l z0>xt^1!MYIS-HCRl!~q$ThMyiKri{lT$Ffh0q*16<0Si%kzmVD9?~%u&^Rvhs=w(0 zuk9#O`LdaPibLNhd^vnzb-=VZW8OK@jS#TibVE8zh4o&mkA8cOkOoe26^)knM?!sh zk+1VKEZ&CO!zd9j$M8Gp?rnvRR27`jt47c`o2YBGPlds{fv5Tw8RGI}ONaZA$arsb l>mh(8I1i{tL{_9pp+)$>=|fZ>)?V$PH2zLk(GlqA{s+5s-Mjz* diff --git a/relation_engine_server/test/test_spec_loader.py b/relation_engine_server/test/test_spec_loader.py index d1da1df9..0833e5e9 100644 --- a/relation_engine_server/test/test_spec_loader.py +++ b/relation_engine_server/test/test_spec_loader.py @@ -7,20 +7,37 @@ from relation_engine_server.utils import spec_loader from relation_engine_server.utils.spec_loader import SchemaNonexistent from relation_engine_server.utils.config import get_config -from relation_engine_server.utils.wait_for import wait_for_api - -_CONF = get_config() -_TEST_DIR = os_path.join('/app', 'relation_engine_server', 'test', 'data') class TestSpecLoader(unittest.TestCase): @classmethod def setUpClass(cls): - wait_for_api() - cls.config = get_config() + cls.test_dir = os_path.join('/app', 'relation_engine_server', 'test') + cls.test_spec_dir = os_path.join(cls.test_dir, 'spec_release', 'sample_spec_release') + + config = get_config() + cls.repo_path = config['spec_paths']['repo'] + for key in config['spec_paths'].keys(): + if cls.repo_path in config['spec_paths'][key]: + config['spec_paths'][key] = config['spec_paths'][key].replace( + cls.repo_path, + cls.test_spec_dir + ) + cls.config = config + + @classmethod + def tearDownClass(cls): + # undo all the config changes + for key in cls.config['spec_paths'].keys(): + if cls.test_spec_dir in cls.config['spec_paths'][key]: + cls.config['spec_paths'][key] = cls.config['spec_paths'][key].replace( + cls.test_spec_dir, + cls.repo_path + ) def test_get_names(self, schema_type_names=[], expected=[]): + """test getting the names of all the schemas of a given type""" # this method should only be run from another test method if len(schema_type_names) == 0: @@ -32,20 +49,20 @@ def test_get_names(self, schema_type_names=[], expected=[]): method = getattr(spec_loader, 'get_' + schema_type_singular + '_names') # save the original value - original_config_dir = _CONF['spec_paths'][schema_type_plural] + original_config_dir = self.config['spec_paths'][schema_type_plural] # set the config to the test directory - _CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, schema_type_plural) + self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', schema_type_plural) got_names_method = method() got_names_singular = spec_loader.get_names(schema_type_singular) got_names_plural = spec_loader.get_names(schema_type_plural) - _CONF['spec_paths'][schema_type_plural] = os_path.join(_TEST_DIR, 'empty') + self.config['spec_paths'][schema_type_plural] = os_path.join(self.test_dir, 'data', 'empty') got_names_method_empty = method() got_names_empty = spec_loader.get_names(schema_type_singular) - # restore the original value - _CONF['spec_paths'][schema_type_plural] = original_config_dir + # restore the original value before running tests + self.config['spec_paths'][schema_type_plural] = original_config_dir # ensure the results are as expected # get_collection_names @@ -67,15 +84,17 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None): self.assertTrue(True) return - print("running test_run_spec_loading_tests with schema_type " + schema_type_names[0]) - method = getattr(spec_loader, 'get_' + schema_type_names[0]) + schema_type_singular = schema_type_names[0] + schema_type_plural = schema_type_names[1] + # e.g. 'spec_loader.get_collection' + method = getattr(spec_loader, 'get_' + schema_type_singular) # get the path of the requested file result_path = method(test_name, path_only=True) self.assertIsInstance(result_path, str) self.assertIn(test_name, result_path) self.assertIn( - self.config['spec_paths'][schema_type_names[1]], + self.config['spec_paths'][schema_type_plural], result_path, ) @@ -92,18 +111,18 @@ def test_run_spec_loading_tests(self, schema_type_names=[], test_name=None): self.assertEqual(result_obj['name'], test_name) # check the contents of the dict when getting a data source - if schema_type_names[0] == 'data_source': + if schema_type_singular == 'data_source': - # logo_url should start with the same base as _CONF['kbase_endpoint'] - endpoint = urlparse(_CONF['kbase_endpoint']) + # logo_url should start with the same base as config['kbase_endpoint'] + endpoint = urlparse(self.config['kbase_endpoint']) self.assertIn(endpoint.scheme + '://' + endpoint.netloc, result_obj['logo_url']) # logo_path is deleted self.assertNotIn('logo_path', result_obj.keys()) # a nonexistent file raises the appropriate error - fake_name = '../../../../spec/repo/collections/djornl/djornl_edge' - err_msg = schema_type_names[0].capitalize().replace("_", " ") + " '" + fake_name + "' does not exist." + fake_name = 'test/test_node' + err_msg = schema_type_singular.capitalize().replace("_", " ") + " '" + fake_name + "' does not exist." with self.assertRaisesRegex(SchemaNonexistent, err_msg): method(fake_name, path_only=True) @@ -125,6 +144,10 @@ def test_get_schemas_of_various_types(self): 'schema_type_names': ['stored_query', 'stored_queries'], 'example': 'ncbi_fetch_taxon', }, + { + 'schema_type_names': ['view', 'views'], + 'example': 'test_vertices', + } ] for schema in schema_type_list: @@ -162,3 +185,22 @@ def test_get_schema_for_doc(self): err_msg = f"Collection 'fake_name' does not exist." with self.assertRaisesRegex(SchemaNonexistent, err_msg): spec_loader.get_schema_for_doc(fake_name, path_only=True) + + def test_prevent_non_spec_dir_access(self): + """ + Ensure that matching files in directories outside the designated spec repo cannot be retrieved + """ + + # this query is OK as the file is still in the spec repo + path_in_spec_repo = '../../../../../**/fetch_test_vertex' + result = spec_loader.get_schema('stored_queries', path_in_spec_repo, path_only=True) + self.assertEqual( + result, + os_path.join(self.test_spec_dir, 'stored_queries', 'test', 'fetch_test_vertex.yaml') + ) + + # this matches a file in one of the other test data dirs => should throw an error + path_outside_spec_repo = '../../../../data/collections/test_node' + err_msg = f"Stored query '{path_outside_spec_repo}' does not exist" + with self.assertRaisesRegex(SchemaNonexistent, err_msg): + spec_loader.get_schema('stored_queries', path_outside_spec_repo, path_only=True) diff --git a/relation_engine_server/utils/spec_loader.py b/relation_engine_server/utils/spec_loader.py index 2cc1ffa8..625ebfd9 100644 --- a/relation_engine_server/utils/spec_loader.py +++ b/relation_engine_server/utils/spec_loader.py @@ -74,11 +74,18 @@ def get_schema(schema_type, name, path_only=False): schema_search_type = pluralise_schema_type(schema_type) - try: - path = _find_paths(_CONF['spec_paths'][schema_search_type], name + '.yaml')[0] - except IndexError: + yaml_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.yaml') + json_paths = _find_paths(_CONF['spec_paths'][schema_search_type], f'{name}.json') + # ensure we're using the canonical path and that all paths are unique + # we are only interested in paths that are in the designated spec repo + repo_path = os.path.abspath(_CONF['spec_paths']['repo']) + all_paths = [p for p in set(os.path.abspath(path) for path in yaml_paths + json_paths) if repo_path in p] + + if len(all_paths) == 0: raise SchemaNonexistent(singularise_schema_type(schema_type), name) + # ignore duplicates or multiple results, just go with the first one + path = all_paths[0] if path_only: return path @@ -112,6 +119,11 @@ def get_stored_query_names(): return get_names('stored_queries') +def get_view_names(): + """Return an array of all stored queries base names.""" + return get_names('views') + + def get_collection(name, path_only=False): """Get YAML content (or file path) for a specific collection. Throws an error if nonexistent.""" return get_schema('collection', name, path_only) @@ -133,6 +145,11 @@ def get_stored_query(name, path_only=False): return get_schema('stored_query', name, path_only) +def get_view(name, path_only=False): + """Get AQL content or file path for a specific stored query. Throws an error if nonexistent.""" + return get_schema('view', name, path_only) + + def _find_paths(dir_path, file_pattern): """ Return all file paths from a filename pattern, starting from a parent From 4dc2ea32617de8f98856c53e5cfcbaf7ece7a71f Mon Sep 17 00:00:00 2001 From: ialarmedalien Date: Tue, 8 Sep 2020 10:12:10 -0700 Subject: [PATCH 2/2] Fix incorrect comments for get_view and get_view_names Simplify code to reduce array of paths to a unique set --- relation_engine_server/utils/spec_loader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/relation_engine_server/utils/spec_loader.py b/relation_engine_server/utils/spec_loader.py index 625ebfd9..f91daa2d 100644 --- a/relation_engine_server/utils/spec_loader.py +++ b/relation_engine_server/utils/spec_loader.py @@ -79,7 +79,8 @@ def get_schema(schema_type, name, path_only=False): # ensure we're using the canonical path and that all paths are unique # we are only interested in paths that are in the designated spec repo repo_path = os.path.abspath(_CONF['spec_paths']['repo']) - all_paths = [p for p in set(os.path.abspath(path) for path in yaml_paths + json_paths) if repo_path in p] + all_paths_set = set(os.path.abspath(path) for path in yaml_paths + json_paths) + all_paths = [p for p in all_paths_set if p.startswith(repo_path)] if len(all_paths) == 0: raise SchemaNonexistent(singularise_schema_type(schema_type), name) @@ -120,7 +121,7 @@ def get_stored_query_names(): def get_view_names(): - """Return an array of all stored queries base names.""" + """Return an array of all view base names.""" return get_names('views') @@ -146,7 +147,7 @@ def get_stored_query(name, path_only=False): def get_view(name, path_only=False): - """Get AQL content or file path for a specific stored query. Throws an error if nonexistent.""" + """Get content or file path for a view file. Throws an error if nonexistent.""" return get_schema('view', name, path_only)