From 9a2773dcf0ef6aef5e27946ac7dea1c8e4dd59b6 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Fri, 7 Jul 2023 16:51:26 +0530 Subject: [PATCH 1/2] fix: intial --- cpp/src/arrow/acero/hash_join_node.cc | 4 ++-- python/pyarrow/tests/test_table.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc index a6179141649..254dad361ff 100644 --- a/cpp/src/arrow/acero/hash_join_node.cc +++ b/cpp/src/arrow/acero/hash_join_node.cc @@ -236,14 +236,14 @@ Status HashJoinSchema::ValidateSchemas(JoinType join_type, const Schema& left_sc const auto& type = *field->type(); if (!IsTypeSupported(type)) { return Status::Invalid("Data type ", type, - " is not supported in join non-key field"); + " is not supported in join non-key field ", field->name()); } } for (const auto& field : right_schema.fields()) { const auto& type = *field->type(); if (!IsTypeSupported(type)) { return Status::Invalid("Data type ", type, - " is not supported in join non-key field"); + " is not supported in join non-key field ", field->name()); } } diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index e66c7a79e87..a5f005fb4eb 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -2422,3 +2422,27 @@ def test_numpy_asarray(constructor): result = np.asarray(table3, dtype="int32") np.testing.assert_allclose(result, expected) assert result.dtype == "int32" + + +def test_invalid_non_join_column(): + NUM_ITEMS = 30 + t1 = pa.Table.from_pydict({ + 'id': [x.to_bytes(4, 'big') for x in range(NUM_ITEMS)], + 'array_column': [[z for z in range(3)] for x in range(NUM_ITEMS)], + }) + t2 = pa.Table.from_pydict({ + 'id': [x.to_bytes(4, 'big') for x in range(NUM_ITEMS)], + 'value': [x for x in range(NUM_ITEMS)] + }) + + # check as left table + with pytest.raises(pa.lib.ArrowInvalid) as excinfo: + t1.join(t2, 'id', join_type='inner') + exp_error_msg = "Data type list is not supported " \ + + "in join non-key field array_column" + assert exp_error_msg in str(excinfo.value) + + # check as right table + with pytest.raises(pa.lib.ArrowInvalid) as excinfo: + t2.join(t1, 'id', join_type='inner') + assert exp_error_msg in str(excinfo.value) From 6a21e9492a6b54716357c60d53ad3968bc480f4e Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Wed, 12 Jul 2023 09:40:37 +0530 Subject: [PATCH 2/2] fix: reviews --- python/pyarrow/tests/test_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index a5f005fb4eb..457734bb733 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -2427,11 +2427,11 @@ def test_numpy_asarray(constructor): def test_invalid_non_join_column(): NUM_ITEMS = 30 t1 = pa.Table.from_pydict({ - 'id': [x.to_bytes(4, 'big') for x in range(NUM_ITEMS)], + 'id': range(NUM_ITEMS), 'array_column': [[z for z in range(3)] for x in range(NUM_ITEMS)], }) t2 = pa.Table.from_pydict({ - 'id': [x.to_bytes(4, 'big') for x in range(NUM_ITEMS)], + 'id': range(NUM_ITEMS), 'value': [x for x in range(NUM_ITEMS)] })