From 457fb4462d2b4df899d00ac5a417c7c571bfa992 Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Tue, 6 May 2025 14:40:32 +0700
Subject: [PATCH 1/6] Fix docstring for pyarrow.parquet.read_table

---
 python/pyarrow/parquet/core.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index f5a472c9a9b..5a36d578bdc 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1626,10 +1626,12 @@ def partitioning(self):
 
 Parameters
 ----------
-source : str, pyarrow.NativeFile, or file-like object
-    If a string passed, can be a single file name or directory name. For
-    file-like objects, only read a single file. Use pyarrow.BufferReader to
-    read a file contained in a bytes or buffer-like object.
+source : str, List[str], pyarrow.NativeFile, or file-like object
+    The source to read data from.
+    If a single string is passed, it can be a single file name or directory name.
+    If a list of strings is passed, each string should be a file name.
+    For file-like objects, only read a single file.
+    Use pyarrow.BufferReader to read a file contained in a bytes or buffer-like object.
 columns : list
     If not None, only these columns will be read from the file. A column
     name may be a prefix of a nested field, e.g. 'a' will select 'a.b',

From 22b4a85606505ec81853b4caec94250b3ae07e4e Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Tue, 6 May 2025 19:58:27 +0700
Subject: [PATCH 2/6] Fix docstring

---
 python/pyarrow/parquet/core.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 5a36d578bdc..a658dfaef66 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1626,11 +1626,10 @@ def partitioning(self):
 
 Parameters
 ----------
-source : str, List[str], pyarrow.NativeFile, or file-like object
-    The source to read data from.
-    If a single string is passed, it can be a single file name or directory name.
-    If a list of strings is passed, each string should be a file name.
-    For file-like objects, only read a single file.
+source : str, pyarrow.NativeFile, or file-like object
+    If a string is passed, it should be single file name.
+    If the dataset module is enabled, you can also pass a directory name or a list
+    of file names.
     Use pyarrow.BufferReader to read a file contained in a bytes or buffer-like object.
 columns : list
     If not None, only these columns will be read from the file. A column

From db591783513c4eb38c5015c072ffca8ae3af29e0 Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Tue, 6 May 2025 20:39:35 +0700
Subject: [PATCH 3/6] Add type checks for source argument in read_table

---
 python/pyarrow/parquet/core.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index a658dfaef66..e91ce07a235 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1826,7 +1826,14 @@ def read_table(source, *, columns=None, use_threads=True,
         filesystem, path = _resolve_filesystem_and_path(source, filesystem)
         if filesystem is not None:
             source = filesystem.open_input_file(path)
-        # TODO test that source is not a directory or a list
+        if not (
+            isinstance(source, str)
+            or isinstance(source, pa.NativeFile)
+            or hasattr(source, "read")
+        ):
+            raise ValueError(
+                "source should be a file name, a pyarrow.NativeFile or a file-like object"
+            )
         dataset = ParquetFile(
             source, read_dictionary=read_dictionary,
             memory_map=memory_map, buffer_size=buffer_size,

From e93d845000ebe14e60df11b016bcd951b3742193 Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Fri, 9 May 2025 17:41:07 +0700
Subject: [PATCH 4/6] Update python/pyarrow/parquet/core.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 python/pyarrow/parquet/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index e91ce07a235..1c58ce9c007 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1832,7 +1832,8 @@ def read_table(source, *, columns=None, use_threads=True,
             or hasattr(source, "read")
         ):
             raise ValueError(
-                "source should be a file name, a pyarrow.NativeFile or a file-like object"
+                "source should be a file name, a pyarrow.NativeFile or a file-like object "
+                "when the pyarrow.dataset module is not available"
             )
         dataset = ParquetFile(
             source, read_dictionary=read_dictionary,

From 06ba561316c98c0b3ff79b4a768e51018aaf048b Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Fri, 9 May 2025 18:17:22 +0700
Subject: [PATCH 5/6] Add directory check

---
 python/pyarrow/parquet/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 1c58ce9c007..0b89948ef36 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1827,7 +1827,7 @@ def read_table(source, *, columns=None, use_threads=True,
         if filesystem is not None:
             source = filesystem.open_input_file(path)
         if not (
-            isinstance(source, str)
+            (isinstance(source, str) and not os.path.isdir(source))
             or isinstance(source, pa.NativeFile)
             or hasattr(source, "read")
         ):

From a7818902ea6e7b46b7faccd4ff3f8477727ec129 Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Fri, 9 May 2025 18:32:57 +0700
Subject: [PATCH 6/6] linting

---
 python/pyarrow/parquet/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 0b89948ef36..4c2c8cba0b4 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1832,8 +1832,8 @@ def read_table(source, *, columns=None, use_threads=True,
             or hasattr(source, "read")
         ):
             raise ValueError(
-                "source should be a file name, a pyarrow.NativeFile or a file-like object "
-                "when the pyarrow.dataset module is not available"
+                "source should be a file name, a pyarrow.NativeFile or a file-like "
+                "object when the pyarrow.dataset module is not available"
             )
         dataset = ParquetFile(
             source, read_dictionary=read_dictionary,