From 83725d9143fd7333ab8c2e5767a76b79e7350956 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Wed, 21 Aug 2019 13:42:08 -0600 Subject: [PATCH 01/20] add the ability to catch all issues at once for csv files and a file to cause some issues with encoding --- multinet/uploaders/csv.py | 22 ++++++++++++++++++---- test/data/clubs_broken_encoding.csv | 8 ++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 test/data/clubs_broken_encoding.csv diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index f75f5022..448b7d27 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -14,6 +14,8 @@ def validate_csv(rows): """Perform any necessary CSV validation, and return appropriate errors.""" + data_errors = [] + fieldnames = rows[0].keys() if "_key" in fieldnames: # Node Table, check for key uniqueness @@ -26,8 +28,11 @@ def validate_csv(rows): else: uniqueKeys.add(key) + if "?" in uniqueKeys: + data_errors.append({"error": "encoding", "detail": "utf-8"}) + if len(duplicates) > 0: - return {"error": "duplicate", "detail": list(duplicates)} + data_errors.append({"error": "duplicate", "detail": list(duplicates)}) elif "_from" in fieldnames and "_to" in fieldnames: # Edge Table, check that each cell has the correct format valid_cell = re.compile("[^/]+/[^/]+") @@ -46,9 +51,15 @@ def validate_csv(rows): detail.append({"fields": fields, "row": i + 2}) if detail: - return {"error": "syntax", "detail": detail} + data_errors.append({"error": "syntax", "detail": detail}) + else: + # Unsupported Table, error since we don't know what's coming in + data_errors.append({"error": "unsupported"}) - return None + if len(data_errors) > 0: + return {"errors": data_errors} + else: + return None @bp.route("//", methods=["POST"]) @@ -62,14 +73,17 @@ def upload(workspace, table): `_from` and `_to` fields, it will be treated as an edge table. """ app.logger.info("Bulk Loading") + app.logger.info(request.data) + app.logger.info(request.data.decode("utf-8")) # Read the request body into CSV format - body = request.data.decode("utf8") + body = request.data.decode("utf-8") rows = list(csv.DictReader(StringIO(body))) # Perform validation. result = validate_csv(rows) if result: + app.logger.info(result) return (result, "400 CSV Validation Failed") # Set the collection, paying attention to whether the data contains diff --git a/test/data/clubs_broken_encoding.csv b/test/data/clubs_broken_encoding.csv new file mode 100644 index 00000000..5e85ac9d --- /dev/null +++ b/test/data/clubs_broken_encoding.csv @@ -0,0 +1,8 @@ +_key,name +0,St Andrews Lodge +?,Loyal Nine +2,North Caucus +3,Long Room Club +4,? +5,Boston Committee +6,London Enemies From 9350c91d814e502f4656c727c68b31c002b908cc Mon Sep 17 00:00:00 2001 From: JackWilb Date: Wed, 21 Aug 2019 13:50:05 -0600 Subject: [PATCH 02/20] remove testing code --- multinet/uploaders/csv.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 448b7d27..94d7cab6 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -73,17 +73,14 @@ def upload(workspace, table): `_from` and `_to` fields, it will be treated as an edge table. """ app.logger.info("Bulk Loading") - app.logger.info(request.data) - app.logger.info(request.data.decode("utf-8")) # Read the request body into CSV format - body = request.data.decode("utf-8") + body = request.data.decode("utf8") rows = list(csv.DictReader(StringIO(body))) # Perform validation. result = validate_csv(rows) if result: - app.logger.info(result) return (result, "400 CSV Validation Failed") # Set the collection, paying attention to whether the data contains From d681d93d9a586a59e8dffd86d54315ba823cbea0 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Thu, 22 Aug 2019 09:02:19 -0600 Subject: [PATCH 03/20] move the utf8 decoding check down to where the decoding happens --- multinet/uploaders/csv.py | 10 ++++++---- test/data/clubs_broken_encoding.csv | 8 -------- test/data/clubs_utf16.csv | Bin 0 -> 246 bytes 3 files changed, 6 insertions(+), 12 deletions(-) delete mode 100644 test/data/clubs_broken_encoding.csv create mode 100644 test/data/clubs_utf16.csv diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 94d7cab6..f1d5011d 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -28,9 +28,6 @@ def validate_csv(rows): else: uniqueKeys.add(key) - if "?" in uniqueKeys: - data_errors.append({"error": "encoding", "detail": "utf-8"}) - if len(duplicates) > 0: data_errors.append({"error": "duplicate", "detail": list(duplicates)}) elif "_from" in fieldnames and "_to" in fieldnames: @@ -75,7 +72,12 @@ def upload(workspace, table): app.logger.info("Bulk Loading") # Read the request body into CSV format - body = request.data.decode("utf8") + try: + body = request.data.decode("utf8") + except UnicodeDecodeError: + response = {"errors": [{"error": "unsupported", "detail": "utf8"}]} + return (response, "400 CSV Decode Failed") + rows = list(csv.DictReader(StringIO(body))) # Perform validation. diff --git a/test/data/clubs_broken_encoding.csv b/test/data/clubs_broken_encoding.csv deleted file mode 100644 index 5e85ac9d..00000000 --- a/test/data/clubs_broken_encoding.csv +++ /dev/null @@ -1,8 +0,0 @@ -_key,name -0,St Andrews Lodge -?,Loyal Nine -2,North Caucus -3,Long Room Club -4,? -5,Boston Committee -6,London Enemies diff --git a/test/data/clubs_utf16.csv b/test/data/clubs_utf16.csv new file mode 100644 index 0000000000000000000000000000000000000000..a2550bb3db723ca97012e9fdc0fb7f261b30bfe1 GIT binary patch literal 246 zcmX|*%L>9U6hvq3SM&p1_};6yD@D}3R2x)m8)zz$pI6TfMM6Vo=FGkMe$INzDQc`% zr5gT6nxKyAm1&Jzsl$CMpmTZjEm2b_(~(|HR_Fn3{TWF?(lO&svW0l*$`?rIL$75{ zrURU3uTeF`1!4hlVp55_=e9W!e~G_=4rtFV>sYs8bI);U`|3Zx8mL`*mr*Xg;0F^G BDkT5_ literal 0 HcmV?d00001 From 27c8332fda3152e895fadef85ffc8556343e18c2 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Wed, 21 Aug 2019 13:42:08 -0600 Subject: [PATCH 04/20] add the ability to catch all issues at once for csv files and a file to cause some issues with encoding --- multinet/uploaders/csv.py | 22 ++++++++++++++++++---- test/data/clubs_broken_encoding.csv | 8 ++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 test/data/clubs_broken_encoding.csv diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index f75f5022..448b7d27 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -14,6 +14,8 @@ def validate_csv(rows): """Perform any necessary CSV validation, and return appropriate errors.""" + data_errors = [] + fieldnames = rows[0].keys() if "_key" in fieldnames: # Node Table, check for key uniqueness @@ -26,8 +28,11 @@ def validate_csv(rows): else: uniqueKeys.add(key) + if "?" in uniqueKeys: + data_errors.append({"error": "encoding", "detail": "utf-8"}) + if len(duplicates) > 0: - return {"error": "duplicate", "detail": list(duplicates)} + data_errors.append({"error": "duplicate", "detail": list(duplicates)}) elif "_from" in fieldnames and "_to" in fieldnames: # Edge Table, check that each cell has the correct format valid_cell = re.compile("[^/]+/[^/]+") @@ -46,9 +51,15 @@ def validate_csv(rows): detail.append({"fields": fields, "row": i + 2}) if detail: - return {"error": "syntax", "detail": detail} + data_errors.append({"error": "syntax", "detail": detail}) + else: + # Unsupported Table, error since we don't know what's coming in + data_errors.append({"error": "unsupported"}) - return None + if len(data_errors) > 0: + return {"errors": data_errors} + else: + return None @bp.route("//
", methods=["POST"]) @@ -62,14 +73,17 @@ def upload(workspace, table): `_from` and `_to` fields, it will be treated as an edge table. """ app.logger.info("Bulk Loading") + app.logger.info(request.data) + app.logger.info(request.data.decode("utf-8")) # Read the request body into CSV format - body = request.data.decode("utf8") + body = request.data.decode("utf-8") rows = list(csv.DictReader(StringIO(body))) # Perform validation. result = validate_csv(rows) if result: + app.logger.info(result) return (result, "400 CSV Validation Failed") # Set the collection, paying attention to whether the data contains diff --git a/test/data/clubs_broken_encoding.csv b/test/data/clubs_broken_encoding.csv new file mode 100644 index 00000000..5e85ac9d --- /dev/null +++ b/test/data/clubs_broken_encoding.csv @@ -0,0 +1,8 @@ +_key,name +0,St Andrews Lodge +?,Loyal Nine +2,North Caucus +3,Long Room Club +4,? +5,Boston Committee +6,London Enemies From 4799d84b39044863b892b465a295114ed468eddf Mon Sep 17 00:00:00 2001 From: JackWilb Date: Wed, 21 Aug 2019 13:50:05 -0600 Subject: [PATCH 05/20] remove testing code --- multinet/uploaders/csv.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 448b7d27..94d7cab6 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -73,17 +73,14 @@ def upload(workspace, table): `_from` and `_to` fields, it will be treated as an edge table. """ app.logger.info("Bulk Loading") - app.logger.info(request.data) - app.logger.info(request.data.decode("utf-8")) # Read the request body into CSV format - body = request.data.decode("utf-8") + body = request.data.decode("utf8") rows = list(csv.DictReader(StringIO(body))) # Perform validation. result = validate_csv(rows) if result: - app.logger.info(result) return (result, "400 CSV Validation Failed") # Set the collection, paying attention to whether the data contains From 50636e4d1fcc3d6e2eddfd0972bc7b2cc0038d2a Mon Sep 17 00:00:00 2001 From: JackWilb Date: Thu, 22 Aug 2019 09:02:19 -0600 Subject: [PATCH 06/20] move the utf8 decoding check down to where the decoding happens --- multinet/uploaders/csv.py | 10 ++++++---- test/data/clubs_broken_encoding.csv | 8 -------- test/data/clubs_utf16.csv | Bin 0 -> 246 bytes 3 files changed, 6 insertions(+), 12 deletions(-) delete mode 100644 test/data/clubs_broken_encoding.csv create mode 100644 test/data/clubs_utf16.csv diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 94d7cab6..f1d5011d 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -28,9 +28,6 @@ def validate_csv(rows): else: uniqueKeys.add(key) - if "?" in uniqueKeys: - data_errors.append({"error": "encoding", "detail": "utf-8"}) - if len(duplicates) > 0: data_errors.append({"error": "duplicate", "detail": list(duplicates)}) elif "_from" in fieldnames and "_to" in fieldnames: @@ -75,7 +72,12 @@ def upload(workspace, table): app.logger.info("Bulk Loading") # Read the request body into CSV format - body = request.data.decode("utf8") + try: + body = request.data.decode("utf8") + except UnicodeDecodeError: + response = {"errors": [{"error": "unsupported", "detail": "utf8"}]} + return (response, "400 CSV Decode Failed") + rows = list(csv.DictReader(StringIO(body))) # Perform validation. diff --git a/test/data/clubs_broken_encoding.csv b/test/data/clubs_broken_encoding.csv deleted file mode 100644 index 5e85ac9d..00000000 --- a/test/data/clubs_broken_encoding.csv +++ /dev/null @@ -1,8 +0,0 @@ -_key,name -0,St Andrews Lodge -?,Loyal Nine -2,North Caucus -3,Long Room Club -4,? -5,Boston Committee -6,London Enemies diff --git a/test/data/clubs_utf16.csv b/test/data/clubs_utf16.csv new file mode 100644 index 0000000000000000000000000000000000000000..a2550bb3db723ca97012e9fdc0fb7f261b30bfe1 GIT binary patch literal 246 zcmX|*%L>9U6hvq3SM&p1_};6yD@D}3R2x)m8)zz$pI6TfMM6Vo=FGkMe$INzDQc`% zr5gT6nxKyAm1&Jzsl$CMpmTZjEm2b_(~(|HR_Fn3{TWF?(lO&svW0l*$`?rIL$75{ zrURU3uTeF`1!4hlVp55_=e9W!e~G_=4rtFV>sYs8bI);U`|3Zx8mL`*mr*Xg;0F^G BDkT5_ literal 0 HcmV?d00001 From bce6b262af709fef7c928f62e118b63134ac2fa3 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Thu, 22 Aug 2019 12:00:51 -0600 Subject: [PATCH 07/20] change decode error to not utf --- multinet/uploaders/csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index f1d5011d..a28bbcb4 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -75,7 +75,7 @@ def upload(workspace, table): try: body = request.data.decode("utf8") except UnicodeDecodeError: - response = {"errors": [{"error": "unsupported", "detail": "utf8"}]} + response = {"errors": [{"error": "unsupported", "detail": "not utf8"}]} return (response, "400 CSV Decode Failed") rows = list(csv.DictReader(StringIO(body))) From bdb42a6684358e4e0d677cbef191dab9f4b85511 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Thu, 22 Aug 2019 12:02:16 -0600 Subject: [PATCH 08/20] add files to test newick imports and add validation to newick files similar to the csv validation --- multinet/uploaders/newick.py | 64 ++++++++++++++++++++++++- test/data/basic_newick.tree | 1 + test/data/basic_newick_duplicates.tree | 1 + test/data/basic_newick_utf16.tree | Bin 0 -> 32 bytes 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 test/data/basic_newick.tree create mode 100644 test/data/basic_newick_duplicates.tree create mode 100644 test/data/basic_newick_utf16.tree diff --git a/multinet/uploaders/newick.py b/multinet/uploaders/newick.py index b7c134b7..c5cba7a9 100644 --- a/multinet/uploaders/newick.py +++ b/multinet/uploaders/newick.py @@ -11,6 +11,57 @@ bp.before_request(api.require_db) +def validate_newick(tree): + """Validate newick tree.""" + data_errors = [] + unique_keys = [] + duplicate_keys = [] + unique_edges = [] + duplicate_edges = [] + + def read_tree(parent, node): + nonlocal data_errors + nonlocal unique_keys + nonlocal duplicate_keys + nonlocal unique_edges + nonlocal duplicate_edges + + key = node.name or uuid.uuid4().hex + + if key not in unique_keys: + unique_keys.append(key) + elif key not in duplicate_keys: + duplicate_keys.append(key) + + for desc in node.descendants: + read_tree(key, desc) + + if parent: + edge = { + "_from": "table/%s" % (parent), + "_to": "table/%s" % (key), + "length": node.length, + } + + if edge not in unique_edges: + unique_edges.append(edge) + else: + duplicate_edges.append(edge) + + read_tree(None, tree[0]) + + if len(duplicate_keys) > 0: + data_errors.append({"error": "duplicate", "detail": duplicate_keys}) + + if len(duplicate_edges) > 0: + data_errors.append({"error": "duplicate", "detail": duplicate_edges}) + + if len(data_errors) > 0: + return data_errors + else: + return + + @bp.route("//
", methods=["POST"]) def upload(workspace, table): """ @@ -21,7 +72,18 @@ def upload(workspace, table): `data` - the newick data, passed in the request body. """ app.logger.info("newick tree") - tree = newick.loads(request.data.decode("utf8")) + try: + body = request.data.decode("utf8") + except UnicodeDecodeError: + response = {"errors": [{"error": "unsupported", "detail": "not utf8"}]} + return (response, "400 Nested Json Decode Failed") + + tree = newick.loads(body) + + result = validate_newick(tree) + if result: + return ({"errors": result}, "400 Newick Validation Failed") + workspace = db.db(workspace) edgetable_name = "%s_edges" % table nodetable_name = "%s_nodes" % table diff --git a/test/data/basic_newick.tree b/test/data/basic_newick.tree new file mode 100644 index 00000000..e7548cac --- /dev/null +++ b/test/data/basic_newick.tree @@ -0,0 +1 @@ +(B,(A,C,E),D); diff --git a/test/data/basic_newick_duplicates.tree b/test/data/basic_newick_duplicates.tree new file mode 100644 index 00000000..7886bd9c --- /dev/null +++ b/test/data/basic_newick_duplicates.tree @@ -0,0 +1 @@ +(B,(A,C,A),D); diff --git a/test/data/basic_newick_utf16.tree b/test/data/basic_newick_utf16.tree new file mode 100644 index 0000000000000000000000000000000000000000..ae988151e49b3e72ff84a1a06bd74a427e0b4718 GIT binary patch literal 32 hcmezWPlLgUL5D$u!4XJ01F Date: Thu, 22 Aug 2019 12:06:10 -0600 Subject: [PATCH 09/20] fix issue with duplicate edges --- multinet/uploaders/newick.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multinet/uploaders/newick.py b/multinet/uploaders/newick.py index c5cba7a9..1504521b 100644 --- a/multinet/uploaders/newick.py +++ b/multinet/uploaders/newick.py @@ -45,7 +45,7 @@ def read_tree(parent, node): if edge not in unique_edges: unique_edges.append(edge) - else: + elif edge not in duplicate_edges: duplicate_edges.append(edge) read_tree(None, tree[0]) From 71b739b1c5e7137e744a17c43fc5d21b8ef6ffa1 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Fri, 23 Aug 2019 15:32:11 -0600 Subject: [PATCH 10/20] fix testing for the decode function and move the decode function outside of the main flask route --- multinet/uploaders/csv.py | 15 ++++++++++++--- test/test_csv_uploader.py | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index a28bbcb4..25712b4c 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -59,6 +59,16 @@ def validate_csv(rows): return None +def decode_data(input): + """Decode the request data assuming utf8 encoding.""" + try: + body = input.decode("utf8") + except UnicodeDecodeError: + return None + + return body + + @bp.route("//
", methods=["POST"]) def upload(workspace, table): """ @@ -72,9 +82,8 @@ def upload(workspace, table): app.logger.info("Bulk Loading") # Read the request body into CSV format - try: - body = request.data.decode("utf8") - except UnicodeDecodeError: + body = decode_data(request.data) + if not body: response = {"errors": [{"error": "unsupported", "detail": "not utf8"}]} return (response, "400 CSV Decode Failed") diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index f740f4e0..2e66a4f6 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -3,7 +3,7 @@ from io import StringIO import os -from multinet.uploaders.csv import validate_csv +from multinet.uploaders.csv import validate_csv, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -24,9 +24,23 @@ def test_validate_csv(): rows = list(csv.DictReader(StringIO(test_file))) validation_resp = validate_csv(rows) - assert "error" in validation_resp.keys() - assert "5" in validation_resp["detail"] - assert "2" in validation_resp["detail"] + assert "errors" in validation_resp.keys() + assert ( + "5" + in [ + error + for error in validation_resp["errors"] + if (error["error"] == "duplicate") + ][0]["detail"] + ) + assert ( + "2" + in [ + error + for error in validation_resp["errors"] + if (error["error"] == "duplicate") + ][0]["detail"] + ) # Test invalid syntax with open(invalid_headers_file_path) as test_file: @@ -34,8 +48,18 @@ def test_validate_csv(): rows = list(csv.DictReader(StringIO(test_file))) validation_resp = validate_csv(rows) - invalid_rows = [x["row"] for x in validation_resp["detail"]] - assert "error" in validation_resp.keys() + invalid_rows = [ + x["row"] + for x in [ + error for error in validation_resp["errors"] if (error["error"] == "syntax") + ][0]["detail"] + ] + assert "errors" in validation_resp.keys() assert 3 in invalid_rows assert 4 in invalid_rows assert 5 in invalid_rows + + # Test unicode decode errors + test_data = b"\xff\xfe_\x00k\x00e\x00y\x00,\x00n\x00a\x00m\x00e\x00\n" + decoded_data = decode_data(test_data) + assert decoded_data is None From 360afd948daaf410a0c08376cefeb37ebffc0543 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Mon, 26 Aug 2019 10:28:07 -0600 Subject: [PATCH 11/20] fix merge conflict from stashed changes and separate out the data decoding --- multinet/uploaders/newick.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/multinet/uploaders/newick.py b/multinet/uploaders/newick.py index 1504521b..f129a244 100644 --- a/multinet/uploaders/newick.py +++ b/multinet/uploaders/newick.py @@ -62,6 +62,16 @@ def read_tree(parent, node): return +def decode_data(input): + """Decode the request data assuming utf8 encoding.""" + try: + body = input.decode("utf8") + except UnicodeDecodeError: + return None + + return body + + @bp.route("//
", methods=["POST"]) def upload(workspace, table): """ @@ -72,11 +82,11 @@ def upload(workspace, table): `data` - the newick data, passed in the request body. """ app.logger.info("newick tree") - try: - body = request.data.decode("utf8") - except UnicodeDecodeError: + + body = decode_data(request.data) + if not body: response = {"errors": [{"error": "unsupported", "detail": "not utf8"}]} - return (response, "400 Nested Json Decode Failed") + return (response, "400 Newick Decode Failed") tree = newick.loads(body) From 8f5b99d28e9b822b6d7e48e3f4a38982e30c447f Mon Sep 17 00:00:00 2001 From: JackWilb Date: Mon, 26 Aug 2019 11:32:53 -0600 Subject: [PATCH 12/20] remove problematic files --- multinet/test/test_db.py | 21 --------------------- multinet/test/test_multinet.py | 0 2 files changed, 21 deletions(-) delete mode 100644 multinet/test/test_db.py delete mode 100644 multinet/test/test_multinet.py diff --git a/multinet/test/test_db.py b/multinet/test/test_db.py deleted file mode 100644 index ec951d83..00000000 --- a/multinet/test/test_db.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Tests the db.py file in the multinet directory""" -import pytest - -from multinet import db - - -# @pytest.fixture -# def db(): -# """Return db instance using multinet.db.db.""" -# return multinet_db('test_db') - - -def test_create_workspace(): - """Test that this workspace exists after function call.""" - name = 'test_workspace_131312' - sys = db.db('_system') - print('sys', sys) - - assert(not sys.has_database(name)) - db.create_workspace(name) - assert(sys.has_database(name)) diff --git a/multinet/test/test_multinet.py b/multinet/test/test_multinet.py deleted file mode 100644 index e69de29b..00000000 From 86a24f0f9bfb8ab23ffc1243258b82ad360bbf53 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Mon, 26 Aug 2019 12:02:52 -0600 Subject: [PATCH 13/20] fix the newick uploader to be consistent with csv (returns) and add a test --- multinet/uploaders/newick.py | 4 ++-- test/test_newick_uploader.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 test/test_newick_uploader.py diff --git a/multinet/uploaders/newick.py b/multinet/uploaders/newick.py index f129a244..d7cf25cf 100644 --- a/multinet/uploaders/newick.py +++ b/multinet/uploaders/newick.py @@ -57,7 +57,7 @@ def read_tree(parent, node): data_errors.append({"error": "duplicate", "detail": duplicate_edges}) if len(data_errors) > 0: - return data_errors + return {"errors": data_errors} else: return @@ -92,7 +92,7 @@ def upload(workspace, table): result = validate_newick(tree) if result: - return ({"errors": result}, "400 Newick Validation Failed") + return (result, "400 Newick Validation Failed") workspace = db.db(workspace) edgetable_name = "%s_edges" % table diff --git a/test/test_newick_uploader.py b/test/test_newick_uploader.py new file mode 100644 index 00000000..8b4eac93 --- /dev/null +++ b/test/test_newick_uploader.py @@ -0,0 +1,30 @@ +"""Tests functions in the Neick Uploader Flask Blueprint.""" +import newick +import os + +from multinet.uploaders.newick import validate_newick, decode_data + +TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) + + +def test_validate_newick(): + """Tests the validate_csv function.""" + duplicate_keys_file_path = os.path.join( + TEST_DATA_DIR, "basic_newick_duplicates.tree" + ) + + # Test duplicate keys + with open(duplicate_keys_file_path) as test_file: + test_file = test_file.read() + + body = newick.loads(test_file) + validation_resp = validate_newick(body) + assert "errors" in validation_resp.keys() + + # Test unicode decode errors + test_data = ( + b"\xff\xfe(\x00B\x00,\x00(\x00A\x00," + b"\x00C\x00,\x00E\x00)\x00,\x00D\x00)\x00;\x00\n\x00" + ) + decoded_data = decode_data(test_data) + assert decoded_data is None From 3ebc27105177d43a590b3a1223b889f8bab4071b Mon Sep 17 00:00:00 2001 From: JackWilb Date: Tue, 27 Aug 2019 14:14:53 -0600 Subject: [PATCH 14/20] fix tests to be more concise --- test/test_csv_uploader.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index 2e66a4f6..dbd2bfeb 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -25,22 +25,13 @@ def test_validate_csv(): rows = list(csv.DictReader(StringIO(test_file))) validation_resp = validate_csv(rows) assert "errors" in validation_resp.keys() - assert ( - "5" - in [ - error - for error in validation_resp["errors"] - if (error["error"] == "duplicate") - ][0]["detail"] - ) - assert ( - "2" - in [ - error - for error in validation_resp["errors"] - if (error["error"] == "duplicate") - ][0]["detail"] - ) + duplicate_keys = [ + error["detail"] + for error in validation_resp["errors"] + if (error["error"] == "duplicate") + ][0] + assert "5" in duplicate_keys + assert "2" in duplicate_keys # Test invalid syntax with open(invalid_headers_file_path) as test_file: From 46e2d66982e40a78a992956ce0fba9d7861a61e7 Mon Sep 17 00:00:00 2001 From: JackWilb Date: Tue, 27 Aug 2019 14:30:17 -0600 Subject: [PATCH 15/20] fix error handling in the validation checks --- multinet/errors.py | 12 ++++++++++++ multinet/uploaders/csv.py | 13 ++++--------- multinet/uploaders/newick.py | 12 ++++-------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/multinet/errors.py b/multinet/errors.py index 38a0b51e..fe008be9 100644 --- a/multinet/errors.py +++ b/multinet/errors.py @@ -153,3 +153,15 @@ class DatabaseNotLive(ServerError): def flask_response(self): """Generate a 500 error.""" return ("", "500 Database Not Live") + + +class DecodeFailed(ServerError): + """Exception for reporting decoding errors.""" + + def __init__(self, errors): + """Initialize the exception.""" + self.errors = errors + + def flask_response(self): + """Generate a 400 error.""" + return ({"errors": self.errors}, "400 Validation Failed") diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index c0aed38d..83e6fc2c 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -4,7 +4,7 @@ import re from .. import db, util -from ..errors import ValidationFailed +from ..errors import ValidationFailed, DecodeFailed from flask import Blueprint, request from flask import current_app as app @@ -55,7 +55,7 @@ def validate_csv(rows): data_errors.append({"error": "unsupported"}) if len(data_errors) > 0: - return {"errors": data_errors} + raise ValidationFailed(data_errors) else: return None @@ -65,7 +65,7 @@ def decode_data(input): try: body = input.decode("utf8") except UnicodeDecodeError: - return None + raise DecodeFailed([{"error": "unsupported", "detail": "not utf8"}]) return body @@ -84,16 +84,11 @@ def upload(workspace, table): # Read the request body into CSV format body = decode_data(request.data) - if not body: - response = {"errors": [{"error": "unsupported", "detail": "not utf8"}]} - return (response, "400 CSV Decode Failed") rows = list(csv.DictReader(StringIO(body))) # Perform validation. - result = validate_csv(rows) - if result: - raise ValidationFailed(result) + validate_csv(rows) # Set the collection, paying attention to whether the data contains # _from/_to fields. diff --git a/multinet/uploaders/newick.py b/multinet/uploaders/newick.py index 189677c0..22196945 100644 --- a/multinet/uploaders/newick.py +++ b/multinet/uploaders/newick.py @@ -3,6 +3,7 @@ import newick from .. import db, util +from ..errors import ValidationFailed, DecodeFailed from flask import Blueprint, request from flask import current_app as app @@ -57,7 +58,7 @@ def read_tree(parent, node): data_errors.append({"error": "duplicate", "detail": duplicate_edges}) if len(data_errors) > 0: - return {"errors": data_errors} + raise ValidationFailed(data_errors) else: return @@ -67,7 +68,7 @@ def decode_data(input): try: body = input.decode("utf8") except UnicodeDecodeError: - return None + raise DecodeFailed([{"error": "unsupported", "detail": "not utf8"}]) return body @@ -84,15 +85,10 @@ def upload(workspace, table): app.logger.info("newick tree") body = decode_data(request.data) - if not body: - response = {"errors": [{"error": "unsupported", "detail": "not utf8"}]} - return (response, "400 Newick Decode Failed") tree = newick.loads(body) - result = validate_newick(tree) - if result: - return (result, "400 Newick Validation Failed") + validate_newick(tree) workspace = db.db(workspace) edgetable_name = "%s_edges" % table From b7941f73bf7d8c20f3343f4270d239a18cfb725f Mon Sep 17 00:00:00 2001 From: JackWilb Date: Tue, 27 Aug 2019 14:51:00 -0600 Subject: [PATCH 16/20] fix test to deal with the new error raising --- test/test_csv_uploader.py | 27 +++++---------------------- test/test_newick_uploader.py | 8 ++++---- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index dbd2bfeb..da774e1c 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -2,7 +2,9 @@ import csv from io import StringIO import os +import pytest +from multinet.errors import ValidationFailed, DecodeFailed from multinet.uploaders.csv import validate_csv, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -23,34 +25,15 @@ def test_validate_csv(): test_file = test_file.read() rows = list(csv.DictReader(StringIO(test_file))) - validation_resp = validate_csv(rows) - assert "errors" in validation_resp.keys() - duplicate_keys = [ - error["detail"] - for error in validation_resp["errors"] - if (error["error"] == "duplicate") - ][0] - assert "5" in duplicate_keys - assert "2" in duplicate_keys + pytest.raises(ValidationFailed, validate_csv, rows) # Test invalid syntax with open(invalid_headers_file_path) as test_file: test_file = test_file.read() rows = list(csv.DictReader(StringIO(test_file))) - validation_resp = validate_csv(rows) - invalid_rows = [ - x["row"] - for x in [ - error for error in validation_resp["errors"] if (error["error"] == "syntax") - ][0]["detail"] - ] - assert "errors" in validation_resp.keys() - assert 3 in invalid_rows - assert 4 in invalid_rows - assert 5 in invalid_rows + pytest.raises(ValidationFailed, validate_csv, rows) # Test unicode decode errors test_data = b"\xff\xfe_\x00k\x00e\x00y\x00,\x00n\x00a\x00m\x00e\x00\n" - decoded_data = decode_data(test_data) - assert decoded_data is None + pytest.raises(DecodeFailed, decode_data, test_data) diff --git a/test/test_newick_uploader.py b/test/test_newick_uploader.py index 8b4eac93..38f2c717 100644 --- a/test/test_newick_uploader.py +++ b/test/test_newick_uploader.py @@ -1,7 +1,9 @@ """Tests functions in the Neick Uploader Flask Blueprint.""" import newick import os +import pytest +from multinet.errors import ValidationFailed, DecodeFailed from multinet.uploaders.newick import validate_newick, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -18,13 +20,11 @@ def test_validate_newick(): test_file = test_file.read() body = newick.loads(test_file) - validation_resp = validate_newick(body) - assert "errors" in validation_resp.keys() + pytest.raises(ValidationFailed, validate_newick, body) # Test unicode decode errors test_data = ( b"\xff\xfe(\x00B\x00,\x00(\x00A\x00," b"\x00C\x00,\x00E\x00)\x00,\x00D\x00)\x00;\x00\n\x00" ) - decoded_data = decode_data(test_data) - assert decoded_data is None + pytest.raises(DecodeFailed, decode_data, test_data) From fd830edb5762315ebaac0e8e477bbd95d3b5edfb Mon Sep 17 00:00:00 2001 From: JackWilb Date: Tue, 27 Aug 2019 15:10:23 -0600 Subject: [PATCH 17/20] Revert "fix test to deal with the new error raising" This reverts commit b7941f73bf7d8c20f3343f4270d239a18cfb725f. --- test/test_csv_uploader.py | 27 ++++++++++++++++++++++----- test/test_newick_uploader.py | 8 ++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index da774e1c..dbd2bfeb 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -2,9 +2,7 @@ import csv from io import StringIO import os -import pytest -from multinet.errors import ValidationFailed, DecodeFailed from multinet.uploaders.csv import validate_csv, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -25,15 +23,34 @@ def test_validate_csv(): test_file = test_file.read() rows = list(csv.DictReader(StringIO(test_file))) - pytest.raises(ValidationFailed, validate_csv, rows) + validation_resp = validate_csv(rows) + assert "errors" in validation_resp.keys() + duplicate_keys = [ + error["detail"] + for error in validation_resp["errors"] + if (error["error"] == "duplicate") + ][0] + assert "5" in duplicate_keys + assert "2" in duplicate_keys # Test invalid syntax with open(invalid_headers_file_path) as test_file: test_file = test_file.read() rows = list(csv.DictReader(StringIO(test_file))) - pytest.raises(ValidationFailed, validate_csv, rows) + validation_resp = validate_csv(rows) + invalid_rows = [ + x["row"] + for x in [ + error for error in validation_resp["errors"] if (error["error"] == "syntax") + ][0]["detail"] + ] + assert "errors" in validation_resp.keys() + assert 3 in invalid_rows + assert 4 in invalid_rows + assert 5 in invalid_rows # Test unicode decode errors test_data = b"\xff\xfe_\x00k\x00e\x00y\x00,\x00n\x00a\x00m\x00e\x00\n" - pytest.raises(DecodeFailed, decode_data, test_data) + decoded_data = decode_data(test_data) + assert decoded_data is None diff --git a/test/test_newick_uploader.py b/test/test_newick_uploader.py index 38f2c717..8b4eac93 100644 --- a/test/test_newick_uploader.py +++ b/test/test_newick_uploader.py @@ -1,9 +1,7 @@ """Tests functions in the Neick Uploader Flask Blueprint.""" import newick import os -import pytest -from multinet.errors import ValidationFailed, DecodeFailed from multinet.uploaders.newick import validate_newick, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -20,11 +18,13 @@ def test_validate_newick(): test_file = test_file.read() body = newick.loads(test_file) - pytest.raises(ValidationFailed, validate_newick, body) + validation_resp = validate_newick(body) + assert "errors" in validation_resp.keys() # Test unicode decode errors test_data = ( b"\xff\xfe(\x00B\x00,\x00(\x00A\x00," b"\x00C\x00,\x00E\x00)\x00,\x00D\x00)\x00;\x00\n\x00" ) - pytest.raises(DecodeFailed, decode_data, test_data) + decoded_data = decode_data(test_data) + assert decoded_data is None From 8baf84f626482a0fbccd9973f871ca033d0420eb Mon Sep 17 00:00:00 2001 From: JackWilb Date: Tue, 27 Aug 2019 15:29:09 -0600 Subject: [PATCH 18/20] Fix tests for new methodology after rolling back the past commits --- test/test_csv_uploader.py | 33 ++++++++++++++++----------------- test/test_newick_uploader.py | 13 +++++++++---- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index dbd2bfeb..34e1f873 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -2,7 +2,9 @@ import csv from io import StringIO import os +import pytest +from multinet.errors import ValidationFailed, DecodeFailed from multinet.uploaders.csv import validate_csv, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -23,13 +25,13 @@ def test_validate_csv(): test_file = test_file.read() rows = list(csv.DictReader(StringIO(test_file))) - validation_resp = validate_csv(rows) - assert "errors" in validation_resp.keys() - duplicate_keys = [ - error["detail"] - for error in validation_resp["errors"] - if (error["error"] == "duplicate") - ][0] + + with pytest.raises(ValidationFailed) as v_error: + validate_csv(rows) + + validation_resp = v_error.value.errors[0] + assert "error" in validation_resp + duplicate_keys = validation_resp["detail"] assert "5" in duplicate_keys assert "2" in duplicate_keys @@ -38,19 +40,16 @@ def test_validate_csv(): test_file = test_file.read() rows = list(csv.DictReader(StringIO(test_file))) - validation_resp = validate_csv(rows) - invalid_rows = [ - x["row"] - for x in [ - error for error in validation_resp["errors"] if (error["error"] == "syntax") - ][0]["detail"] - ] - assert "errors" in validation_resp.keys() + with pytest.raises(ValidationFailed) as v_error: + validate_csv(rows) + + validation_resp = v_error.value.errors[0] + invalid_rows = [x["row"] for x in validation_resp["detail"]] + assert "error" in validation_resp assert 3 in invalid_rows assert 4 in invalid_rows assert 5 in invalid_rows # Test unicode decode errors test_data = b"\xff\xfe_\x00k\x00e\x00y\x00,\x00n\x00a\x00m\x00e\x00\n" - decoded_data = decode_data(test_data) - assert decoded_data is None + pytest.raises(DecodeFailed, decode_data, test_data) diff --git a/test/test_newick_uploader.py b/test/test_newick_uploader.py index 8b4eac93..25688755 100644 --- a/test/test_newick_uploader.py +++ b/test/test_newick_uploader.py @@ -1,7 +1,9 @@ """Tests functions in the Neick Uploader Flask Blueprint.""" import newick import os +import pytest +from multinet.errors import ValidationFailed, DecodeFailed from multinet.uploaders.newick import validate_newick, decode_data TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) @@ -18,13 +20,16 @@ def test_validate_newick(): test_file = test_file.read() body = newick.loads(test_file) - validation_resp = validate_newick(body) - assert "errors" in validation_resp.keys() + + with pytest.raises(ValidationFailed) as v_error: + validate_newick(body) + + validation_resp = v_error.value.errors[0] + assert "error" in validation_resp.keys() # Test unicode decode errors test_data = ( b"\xff\xfe(\x00B\x00,\x00(\x00A\x00," b"\x00C\x00,\x00E\x00)\x00,\x00D\x00)\x00;\x00\n\x00" ) - decoded_data = decode_data(test_data) - assert decoded_data is None + pytest.raises(DecodeFailed, decode_data, test_data) From 7e2541bef82d60a71dd9421fc227ba345c38441f Mon Sep 17 00:00:00 2001 From: JackWilb Date: Wed, 28 Aug 2019 12:18:10 -0600 Subject: [PATCH 19/20] move decode_data to utils and clean up some unneeded code --- multinet/errors.py | 2 +- multinet/uploaders/csv.py | 13 ++----------- multinet/uploaders/newick.py | 19 ++----------------- multinet/util.py | 12 +++++++++++- 4 files changed, 16 insertions(+), 30 deletions(-) diff --git a/multinet/errors.py b/multinet/errors.py index fe008be9..59ef5ed8 100644 --- a/multinet/errors.py +++ b/multinet/errors.py @@ -164,4 +164,4 @@ def __init__(self, errors): def flask_response(self): """Generate a 400 error.""" - return ({"errors": self.errors}, "400 Validation Failed") + return ({"errors": self.errors}, "400 Decode Failed") diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 83e6fc2c..090c14fd 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -4,7 +4,8 @@ import re from .. import db, util -from ..errors import ValidationFailed, DecodeFailed +from ..errors import ValidationFailed +from ..util import decode_data from flask import Blueprint, request from flask import current_app as app @@ -60,16 +61,6 @@ def validate_csv(rows): return None -def decode_data(input): - """Decode the request data assuming utf8 encoding.""" - try: - body = input.decode("utf8") - except UnicodeDecodeError: - raise DecodeFailed([{"error": "unsupported", "detail": "not utf8"}]) - - return body - - @bp.route("//
", methods=["POST"]) def upload(workspace, table): """ diff --git a/multinet/uploaders/newick.py b/multinet/uploaders/newick.py index 22196945..1a1d02ff 100644 --- a/multinet/uploaders/newick.py +++ b/multinet/uploaders/newick.py @@ -3,7 +3,8 @@ import newick from .. import db, util -from ..errors import ValidationFailed, DecodeFailed +from ..errors import ValidationFailed +from ..util import decode_data from flask import Blueprint, request from flask import current_app as app @@ -21,12 +22,6 @@ def validate_newick(tree): duplicate_edges = [] def read_tree(parent, node): - nonlocal data_errors - nonlocal unique_keys - nonlocal duplicate_keys - nonlocal unique_edges - nonlocal duplicate_edges - key = node.name or uuid.uuid4().hex if key not in unique_keys: @@ -63,16 +58,6 @@ def read_tree(parent, node): return -def decode_data(input): - """Decode the request data assuming utf8 encoding.""" - try: - body = input.decode("utf8") - except UnicodeDecodeError: - raise DecodeFailed([{"error": "unsupported", "detail": "not utf8"}]) - - return body - - @bp.route("//
", methods=["POST"]) def upload(workspace, table): """ diff --git a/multinet/util.py b/multinet/util.py index 21675b00..389b90f7 100644 --- a/multinet/util.py +++ b/multinet/util.py @@ -4,7 +4,7 @@ from flask import Response from . import db -from .errors import DatabaseNotLive +from .errors import DatabaseNotLive, DecodeFailed def generate(iterator): @@ -28,3 +28,13 @@ def require_db(): """Check if the db is live.""" if not db.check_db(): raise DatabaseNotLive() + + +def decode_data(input): + """Decode the request data assuming utf8 encoding.""" + try: + body = input.decode("utf8") + except UnicodeDecodeError as e: + raise DecodeFailed([{"error": "utf8", "detail": str(e)}]) + + return body From 37298f835f14d87e5edb72e5d8bd21939fd34f4b Mon Sep 17 00:00:00 2001 From: JackWilb Date: Wed, 28 Aug 2019 14:14:10 -0600 Subject: [PATCH 20/20] update the decode error to take only one error, not a list --- multinet/errors.py | 6 +++--- multinet/util.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/multinet/errors.py b/multinet/errors.py index 59ef5ed8..a6329e26 100644 --- a/multinet/errors.py +++ b/multinet/errors.py @@ -158,10 +158,10 @@ def flask_response(self): class DecodeFailed(ServerError): """Exception for reporting decoding errors.""" - def __init__(self, errors): + def __init__(self, error): """Initialize the exception.""" - self.errors = errors + self.error = error def flask_response(self): """Generate a 400 error.""" - return ({"errors": self.errors}, "400 Decode Failed") + return (self.error, "400 Decode Failed") diff --git a/multinet/util.py b/multinet/util.py index 389b90f7..455caf86 100644 --- a/multinet/util.py +++ b/multinet/util.py @@ -35,6 +35,6 @@ def decode_data(input): try: body = input.decode("utf8") except UnicodeDecodeError as e: - raise DecodeFailed([{"error": "utf8", "detail": str(e)}]) + raise DecodeFailed({"error": "utf8", "detail": str(e)}) return body