Skip to content
This repository was archived by the owner on Feb 23, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
83725d9
add the ability to catch all issues at once for csv files and a file …
JackWilb Aug 21, 2019
9350c91
remove testing code
JackWilb Aug 21, 2019
d681d93
move the utf8 decoding check down to where the decoding happens
JackWilb Aug 22, 2019
27c8332
add the ability to catch all issues at once for csv files and a file …
JackWilb Aug 21, 2019
4799d84
remove testing code
JackWilb Aug 21, 2019
50636e4
move the utf8 decoding check down to where the decoding happens
JackWilb Aug 22, 2019
bce6b26
change decode error to not utf
JackWilb Aug 22, 2019
bdb42a6
add files to test newick imports and add validation to newick files s…
JackWilb Aug 22, 2019
eb951d3
fix merge conflicts
JackWilb Aug 22, 2019
b86c4bb
fix issue with duplicate edges
JackWilb Aug 22, 2019
71b739b
fix testing for the decode function and move the decode function outs…
JackWilb Aug 23, 2019
360afd9
fix merge conflict from stashed changes and separate out the data dec…
JackWilb Aug 26, 2019
f75363a
Merge branch 'master' into upload-validation-errors
JackWilb Aug 26, 2019
8f5b99d
remove problematic files
JackWilb Aug 26, 2019
86a24f0
fix the newick uploader to be consistent with csv (returns) and add a…
JackWilb Aug 26, 2019
7bacd3f
Merge branch 'master' into upload-validation-errors
JackWilb Aug 27, 2019
b8137b3
Merge branch 'master' into upload-validation-errors
JackWilb Aug 27, 2019
6a6912e
Merge branch 'master' into upload-validation-errors
JackWilb Aug 27, 2019
3ebc271
fix tests to be more concise
JackWilb Aug 27, 2019
46e2d66
fix error handling in the validation checks
JackWilb Aug 27, 2019
b7941f7
fix test to deal with the new error raising
JackWilb Aug 27, 2019
fd830ed
Revert "fix test to deal with the new error raising"
JackWilb Aug 27, 2019
8baf84f
Fix tests for new methodology after rolling back the past commits
JackWilb Aug 27, 2019
7e37fe0
Merge branch 'master' into upload-validation-errors
JackWilb Aug 28, 2019
7e2541b
move decode_data to utils and clean up some unneeded code
JackWilb Aug 28, 2019
0d56835
Merge branch 'upload-validation-errors' of github.com:multinet-app/mu…
JackWilb Aug 28, 2019
37298f8
update the decode error to take only one error, not a list
JackWilb Aug 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions multinet/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,15 @@ class DatabaseNotLive(ServerError):
def flask_response(self):
"""Generate a 500 error."""
return ("", "500 Database Not Live")


class DecodeFailed(ServerError):
"""Exception for reporting decoding errors."""

def __init__(self, error):
"""Initialize the exception."""
self.error = error

def flask_response(self):
"""Generate a 400 error."""
return (self.error, "400 Decode Failed")
22 changes: 15 additions & 7 deletions multinet/uploaders/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from .. import db, util
from ..errors import ValidationFailed
from ..util import decode_data

from flask import Blueprint, request
from flask import current_app as app
Expand All @@ -15,6 +16,8 @@

def validate_csv(rows):
"""Perform any necessary CSV validation, and return appropriate errors."""
data_errors = []

fieldnames = rows[0].keys()
if "_key" in fieldnames:
# Node Table, check for key uniqueness
Expand All @@ -28,7 +31,7 @@ def validate_csv(rows):
unique_keys.add(key)

if len(duplicates) > 0:
return {"error": "duplicate", "detail": list(duplicates)}
data_errors.append({"error": "duplicate", "detail": list(duplicates)})
elif "_from" in fieldnames and "_to" in fieldnames:
# Edge Table, check that each cell has the correct format
valid_cell = re.compile("[^/]+/[^/]+")
Expand All @@ -47,9 +50,15 @@ def validate_csv(rows):
detail.append({"fields": fields, "row": i + 2})

if detail:
return {"error": "syntax", "detail": detail}
data_errors.append({"error": "syntax", "detail": detail})
else:
# Unsupported Table, error since we don't know what's coming in
data_errors.append({"error": "unsupported"})
Comment thread
JackWilb marked this conversation as resolved.

return None
if len(data_errors) > 0:
raise ValidationFailed(data_errors)
else:
return None
Comment thread
JackWilb marked this conversation as resolved.


@bp.route("/<workspace>/<table>", methods=["POST"])
Expand All @@ -65,13 +74,12 @@ def upload(workspace, table):
app.logger.info("Bulk Loading")

# Read the request body into CSV format
body = request.data.decode("utf8")
body = decode_data(request.data)

rows = list(csv.DictReader(StringIO(body)))

# Perform validation.
result = validate_csv(rows)
if result:
raise ValidationFailed(result)
validate_csv(rows)

# Set the collection, paying attention to whether the data contains
# _from/_to fields.
Expand Down
55 changes: 54 additions & 1 deletion multinet/uploaders/newick.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import newick

from .. import db, util
from ..errors import ValidationFailed
from ..util import decode_data

from flask import Blueprint, request
from flask import current_app as app
Expand All @@ -11,6 +13,51 @@
bp.before_request(util.require_db)


def validate_newick(tree):
"""Validate newick tree."""
data_errors = []
unique_keys = []
duplicate_keys = []
unique_edges = []
duplicate_edges = []

def read_tree(parent, node):
key = node.name or uuid.uuid4().hex

if key not in unique_keys:
unique_keys.append(key)
elif key not in duplicate_keys:
duplicate_keys.append(key)

for desc in node.descendants:
read_tree(key, desc)

if parent:
edge = {
"_from": "table/%s" % (parent),
"_to": "table/%s" % (key),
"length": node.length,
}

if edge not in unique_edges:
unique_edges.append(edge)
elif edge not in duplicate_edges:
duplicate_edges.append(edge)

read_tree(None, tree[0])

if len(duplicate_keys) > 0:
data_errors.append({"error": "duplicate", "detail": duplicate_keys})

if len(duplicate_edges) > 0:
data_errors.append({"error": "duplicate", "detail": duplicate_edges})

if len(data_errors) > 0:
raise ValidationFailed(data_errors)
else:
return
Comment thread
JackWilb marked this conversation as resolved.


@bp.route("/<workspace>/<table>", methods=["POST"])
def upload(workspace, table):
"""
Expand All @@ -21,7 +68,13 @@ def upload(workspace, table):
`data` - the newick data, passed in the request body.
"""
app.logger.info("newick tree")
tree = newick.loads(request.data.decode("utf8"))

body = decode_data(request.data)

tree = newick.loads(body)

validate_newick(tree)

workspace = db.db(workspace)
edgetable_name = "%s_edges" % table
nodetable_name = "%s_nodes" % table
Expand Down
12 changes: 11 additions & 1 deletion multinet/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from flask import Response

from . import db
from .errors import DatabaseNotLive
from .errors import DatabaseNotLive, DecodeFailed


def generate(iterator):
Expand All @@ -28,3 +28,13 @@ def require_db():
"""Check if the db is live."""
if not db.check_db():
raise DatabaseNotLive()


def decode_data(input):
"""Decode the request data assuming utf8 encoding."""
try:
body = input.decode("utf8")
except UnicodeDecodeError as e:
raise DecodeFailed({"error": "utf8", "detail": str(e)})

return body
1 change: 1 addition & 0 deletions test/data/basic_newick.tree
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(B,(A,C,E),D);
1 change: 1 addition & 0 deletions test/data/basic_newick_duplicates.tree
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(B,(A,C,A),D);
Binary file added test/data/basic_newick_utf16.tree
Binary file not shown.
Binary file added test/data/clubs_utf16.csv
Binary file not shown.
28 changes: 21 additions & 7 deletions test/test_csv_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import csv
from io import StringIO
import os
import pytest

from multinet.uploaders.csv import validate_csv
from multinet.errors import ValidationFailed, DecodeFailed
from multinet.uploaders.csv import validate_csv, decode_data

TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))

Expand All @@ -23,19 +25,31 @@ def test_validate_csv():
test_file = test_file.read()

rows = list(csv.DictReader(StringIO(test_file)))
validation_resp = validate_csv(rows)
assert "error" in validation_resp.keys()
assert "5" in validation_resp["detail"]
assert "2" in validation_resp["detail"]

with pytest.raises(ValidationFailed) as v_error:
validate_csv(rows)

validation_resp = v_error.value.errors[0]
assert "error" in validation_resp
duplicate_keys = validation_resp["detail"]
assert "5" in duplicate_keys
assert "2" in duplicate_keys

# Test invalid syntax
with open(invalid_headers_file_path) as test_file:
test_file = test_file.read()

rows = list(csv.DictReader(StringIO(test_file)))
validation_resp = validate_csv(rows)
with pytest.raises(ValidationFailed) as v_error:
validate_csv(rows)

validation_resp = v_error.value.errors[0]
invalid_rows = [x["row"] for x in validation_resp["detail"]]
assert "error" in validation_resp.keys()
assert "error" in validation_resp
assert 3 in invalid_rows
assert 4 in invalid_rows
assert 5 in invalid_rows

# Test unicode decode errors
test_data = b"\xff\xfe_\x00k\x00e\x00y\x00,\x00n\x00a\x00m\x00e\x00\n"
pytest.raises(DecodeFailed, decode_data, test_data)
35 changes: 35 additions & 0 deletions test/test_newick_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Tests functions in the Neick Uploader Flask Blueprint."""
import newick
import os
import pytest

from multinet.errors import ValidationFailed, DecodeFailed
from multinet.uploaders.newick import validate_newick, decode_data

TEST_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))


def test_validate_newick():
"""Tests the validate_csv function."""
duplicate_keys_file_path = os.path.join(
TEST_DATA_DIR, "basic_newick_duplicates.tree"
)

# Test duplicate keys
with open(duplicate_keys_file_path) as test_file:
test_file = test_file.read()

body = newick.loads(test_file)

with pytest.raises(ValidationFailed) as v_error:
validate_newick(body)

validation_resp = v_error.value.errors[0]
assert "error" in validation_resp.keys()

# Test unicode decode errors
test_data = (
b"\xff\xfe(\x00B\x00,\x00(\x00A\x00,"
b"\x00C\x00,\x00E\x00)\x00,\x00D\x00)\x00;\x00\n\x00"
)
pytest.raises(DecodeFailed, decode_data, test_data)