From a55e94dbfb0b88fc1c6b6a826ff18efd6f84fbdd Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 15 Apr 2020 17:37:13 -0400 Subject: [PATCH 1/7] Do main split of validate_csv --- multinet/uploaders/csv.py | 90 ++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 3dc50895..8d0c66f3 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -57,16 +57,44 @@ def flask_response(self) -> FlaskTuple: return ("Could not read CSV data", "415 Unsupported Media Type") -def validate_csv( - rows: Sequence[MutableMapping], key_field: str = "_key", overwrite: bool = False -) -> None: - """Perform any necessary CSV validation, and return appropriate errors.""" +def is_edge_table(rows: Sequence[MutableMapping]): + """Determine if this table should be treated as an edge table.""" + fieldnames = rows[0].keys() + return "_from" in fieldnames and "_to" in fieldnames + + +def is_node_table(rows: Sequence[MutableMapping], key_field: str, overwrite: bool): + """Determine if this table should be treated as a node table.""" + fieldnames = rows[0].keys() + return key_field != "_key" or "_key" in fieldnames + + +def validate_edge_table(rows: Sequence[MutableMapping]): + """Validate that the given table is a valid edge table.""" data_errors: List[ValidationFailure] = [] + valid_cell = re.compile("[^/]+/[^/]+") + + for i, row in enumerate(rows): + fields: List[str] = [] + if not valid_cell.match(row["_from"]): + fields.append("_from") + if not valid_cell.match(row["_to"]): + fields.append("_to") + + if fields: + # i+2 -> +1 for index offset, +1 due to header row + data_errors.append(InvalidRow(fields=fields, row=i + 2)) + + if len(data_errors) > 0: + raise ValidationFailed(data_errors) - if not rows: - raise ValidationFailed([MissingBody()]) +def validate_node_table( + rows: Sequence[MutableMapping], key_field: str, overwrite: bool +): + """Validate that the given table is a valid node table.""" fieldnames = rows[0].keys() + data_errors: List[ValidationFailure] = [] if key_field != "_key" and key_field not in fieldnames: data_errors.append(KeyFieldDoesNotExist(key=key_field)) @@ -76,39 +104,33 @@ def validate_csv( data_errors.append(KeyFieldAlreadyExists(key=key_field)) raise ValidationFailed(data_errors) - if key_field in fieldnames: - # Node Table, check for key uniqueness - keys = [row[key_field] for row in rows] - unique_keys: Set[str] = set() - for key in keys: - if key in unique_keys: - data_errors.append(DuplicateKey(key=key)) - else: - unique_keys.add(key) - - elif "_from" in fieldnames and "_to" in fieldnames: - # Edge Table, check that each cell has the correct format - valid_cell = re.compile("[^/]+/[^/]+") - - for i, row in enumerate(rows): - fields: List[str] = [] - if not valid_cell.match(row["_from"]): - fields.append("_from") - if not valid_cell.match(row["_to"]): - fields.append("_to") - - if fields: - # i+2 -> +1 for index offset, +1 due to header row - data_errors.append(InvalidRow(fields=fields, row=i + 2)) - - else: - # Unsupported Table, error since we don't know what's coming in - data_errors.append(UnsupportedTable()) + keys = [row[key_field] for row in rows] + unique_keys: Set[str] = set() + for key in keys: + if key in unique_keys: + data_errors.append(DuplicateKey(key=key)) + else: + unique_keys.add(key) if len(data_errors) > 0: raise ValidationFailed(data_errors) +def validate_csv( + rows: Sequence[MutableMapping], key_field: str, overwrite: bool +) -> None: + """Perform any necessary CSV validation, and return appropriate errors.""" + if not rows: + raise ValidationFailed([MissingBody()]) + + if is_node_table(rows, key_field, overwrite): + validate_node_table(rows, key_field, overwrite) + elif is_edge_table(rows): + validate_edge_table(rows) + else: + raise ValidationFailed([UnsupportedTable()]) + + def set_table_key(rows: List[Dict[str, str]], key: str) -> List[Dict[str, str]]: """Update the _key field in each row.""" new_rows: List[Dict[str, str]] = [] From 429a1054b5a14fd52acfb7fd210561162986bc60 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 15 Apr 2020 17:37:23 -0400 Subject: [PATCH 2/7] Update tests to supply required args to validate_csv --- test/test_csv_uploader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index 5be5a3c2..7c30bc4c 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -30,7 +30,7 @@ def test_missing_key_field(): correct = UnsupportedTable().asdict() with pytest.raises(ValidationFailed) as v_error: - validate_csv(rows) + validate_csv(rows, "_key", False) validation_resp = v_error.value.errors assert len(validation_resp) == 1 @@ -44,7 +44,7 @@ def test_invalid_key_field(): correct = KeyFieldDoesNotExist(key=invalid_key).asdict() with pytest.raises(ValidationFailed) as v_error: - validate_csv(rows, key_field=invalid_key) + validate_csv(rows, key_field=invalid_key, overwrite=False) validation_resp = v_error.value.errors assert len(validation_resp) == 1 @@ -83,7 +83,7 @@ def test_duplicate_keys(): """Test that duplicate keys are handled properly.""" rows = read_csv("clubs_invalid_duplicate_keys.csv") with pytest.raises(ValidationFailed) as v_error: - validate_csv(rows) + validate_csv(rows, key_field="_key", overwrite=False) validation_resp = v_error.value.errors correct = [err.asdict() for err in [DuplicateKey(key="2"), DuplicateKey(key="5")]] @@ -94,7 +94,7 @@ def test_invalid_headers(): """Test that invalid headers are handled properly.""" rows = read_csv("membership_invalid_syntax.csv") with pytest.raises(ValidationFailed) as v_error: - validate_csv(rows) + validate_csv(rows, key_field="_key", overwrite=False) validation_resp = v_error.value.errors correct = [ From 8ca7da46edaa1d28fd804572c21cff1452952cac Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Mon, 20 Apr 2020 18:51:44 -0400 Subject: [PATCH 3/7] Add missing return type annotations on functions --- multinet/uploaders/csv.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index 8d0c66f3..e4fbfcd6 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -57,19 +57,21 @@ def flask_response(self) -> FlaskTuple: return ("Could not read CSV data", "415 Unsupported Media Type") -def is_edge_table(rows: Sequence[MutableMapping]): +def is_edge_table(rows: Sequence[MutableMapping]) -> bool: """Determine if this table should be treated as an edge table.""" fieldnames = rows[0].keys() return "_from" in fieldnames and "_to" in fieldnames -def is_node_table(rows: Sequence[MutableMapping], key_field: str, overwrite: bool): +def is_node_table( + rows: Sequence[MutableMapping], key_field: str, overwrite: bool +) -> bool: """Determine if this table should be treated as a node table.""" fieldnames = rows[0].keys() return key_field != "_key" or "_key" in fieldnames -def validate_edge_table(rows: Sequence[MutableMapping]): +def validate_edge_table(rows: Sequence[MutableMapping]) -> None: """Validate that the given table is a valid edge table.""" data_errors: List[ValidationFailure] = [] valid_cell = re.compile("[^/]+/[^/]+") @@ -91,7 +93,7 @@ def validate_edge_table(rows: Sequence[MutableMapping]): def validate_node_table( rows: Sequence[MutableMapping], key_field: str, overwrite: bool -): +) -> None: """Validate that the given table is a valid node table.""" fieldnames = rows[0].keys() data_errors: List[ValidationFailure] = [] From ff5bfc0ac21979c9a1bec5e9b631675343600202 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 22 Apr 2020 17:43:06 -0400 Subject: [PATCH 4/7] Update test/test_csv_uploader.py Co-Authored-By: Jack Wilburn --- test/test_csv_uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_csv_uploader.py b/test/test_csv_uploader.py index 7c30bc4c..758cc24e 100644 --- a/test/test_csv_uploader.py +++ b/test/test_csv_uploader.py @@ -30,7 +30,7 @@ def test_missing_key_field(): correct = UnsupportedTable().asdict() with pytest.raises(ValidationFailed) as v_error: - validate_csv(rows, "_key", False) + validate_csv(rows, key_field="_key", overwrite=False) validation_resp = v_error.value.errors assert len(validation_resp) == 1 From a8e15f7ce740f52568b693b090457716b79913e2 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 22 Apr 2020 17:46:47 -0400 Subject: [PATCH 5/7] Replace list comprehension with generator expression Co-Authored-By: Roni Choudhury <2903332+waxlamp@users.noreply.github.com> --- multinet/uploaders/csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index e4fbfcd6..3f5d496b 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -106,7 +106,7 @@ def validate_node_table( data_errors.append(KeyFieldAlreadyExists(key=key_field)) raise ValidationFailed(data_errors) - keys = [row[key_field] for row in rows] + keys = (row[key_field] for row in rows) unique_keys: Set[str] = set() for key in keys: if key in unique_keys: From 617745641657daf1d706f7bf76eee374ecdd29b1 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 22 Apr 2020 17:46:59 -0400 Subject: [PATCH 6/7] Add comment explaining regex --- multinet/uploaders/csv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index e4fbfcd6..181fe312 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -74,6 +74,8 @@ def is_node_table( def validate_edge_table(rows: Sequence[MutableMapping]) -> None: """Validate that the given table is a valid edge table.""" data_errors: List[ValidationFailure] = [] + + # Checks that a cell has the form table_name/key valid_cell = re.compile("[^/]+/[^/]+") for i, row in enumerate(rows): From bbb041170505091de86dfbea4ac5210d323065b7 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Wed, 22 Apr 2020 17:50:00 -0400 Subject: [PATCH 7/7] Remove unused argument to is_node_table --- multinet/uploaders/csv.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/multinet/uploaders/csv.py b/multinet/uploaders/csv.py index d3c29e37..15d41e9c 100644 --- a/multinet/uploaders/csv.py +++ b/multinet/uploaders/csv.py @@ -63,9 +63,7 @@ def is_edge_table(rows: Sequence[MutableMapping]) -> bool: return "_from" in fieldnames and "_to" in fieldnames -def is_node_table( - rows: Sequence[MutableMapping], key_field: str, overwrite: bool -) -> bool: +def is_node_table(rows: Sequence[MutableMapping], key_field: str) -> bool: """Determine if this table should be treated as a node table.""" fieldnames = rows[0].keys() return key_field != "_key" or "_key" in fieldnames @@ -127,7 +125,7 @@ def validate_csv( if not rows: raise ValidationFailed([MissingBody()]) - if is_node_table(rows, key_field, overwrite): + if is_node_table(rows, key_field): validate_node_table(rows, key_field, overwrite) elif is_edge_table(rows): validate_edge_table(rows)