From c0c3148043f2359e8b17cc6234f2eb6546168a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 22 May 2025 10:00:34 +0200 Subject: [PATCH 1/2] GH-46373: [Python] Exercise fallback case on tests for parquet.read_table in case dataset is not available --- python/pyarrow/tests/parquet/test_basic.py | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 43fddd413a0..d0153af6e27 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -169,6 +169,30 @@ def test_invalid_source(): pq.ParquetFile(None) +def test_read_table_without_dataset(tempdir): + from unittest import mock + + class MockParquetDataset: + def __init__(self, *args, **kwargs): + raise ImportError("MockParquetDataset") + + path = tempdir / "test.parquet" + table = pa.table({"a": [1, 2, 3]}) + _write_table(table, path) + + with mock.patch('pyarrow.parquet.core.ParquetDataset', new=MockParquetDataset): + with pytest.raises(ValueError, match="the 'filters' keyword"): + pq.read_table(path, filters=[('integer', '=', 1)]) + with pytest.raises(ValueError, match="the 'partitioning' keyword"): + pq.read_table(path, partitioning=['week', 'color']) + with pytest.raises(ValueError, match="the 'schema' argument"): + pq.read_table(path, schema=table.schema) + with pytest.raises(OSError, match="is a directory"): + pq.read_table(tempdir) + result = pq.read_table(path) + assert result == table + + @pytest.mark.slow def test_file_with_over_int16_max_row_groups(): # PARQUET-1857: Parquet encryption support introduced a INT16_MAX upper From eebbb7fa92582f592064c283af1f2ed2512a0d35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 22 May 2025 11:40:20 +0200 Subject: [PATCH 2/2] Do not check error message on directory to avoid OS specifics --- python/pyarrow/tests/parquet/test_basic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index d0153af6e27..67515c5e247 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -187,7 +187,8 @@ def __init__(self, *args, **kwargs): pq.read_table(path, partitioning=['week', 'color']) with pytest.raises(ValueError, match="the 'schema' argument"): pq.read_table(path, schema=table.schema) - with pytest.raises(OSError, match="is a directory"): + # Error message varies depending on OS + with pytest.raises(OSError): pq.read_table(tempdir) result = pq.read_table(path) assert result == table