diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/io-hdfs.pxi index dc6ba23abc9..31c0437e48a 100644 --- a/python/pyarrow/io-hdfs.pxi +++ b/python/pyarrow/io-hdfs.pxi @@ -480,7 +480,5 @@ cdef class HdfsFile(NativeFile): object mode object parent - cdef object __weakref__ - def __dealloc__(self): self.parent = None diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 8b364dc7163..0b444cdb98c 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -410,9 +410,21 @@ cdef class PythonFile(NativeFile): cdef: object handle - def __cinit__(self, handle, mode='w'): + def __cinit__(self, handle, mode=None): self.handle = handle + if mode is None: + try: + mode = handle.mode + except AttributeError: + # Not all file-like objects have a mode attribute + # (e.g. BytesIO) + try: + mode = 'w' if handle.writable() else 'r' + except AttributeError: + raise ValueError("could not infer open mode for file-like " + "object %r, please pass it explicitly" + % (handle,)) if mode.startswith('w'): self.wr_file.reset(new PyOutputStream(handle)) self.is_writable = True diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index 31732a6e077..e4d574f18b3 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -337,6 +337,7 @@ cdef class NativeFile: bint is_writable readonly bint closed bint own_file + object __weakref__ # By implementing these "virtual" functions (all functions in Cython # extension classes are technically virtual in the C++ sense) we can expose diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 736020f60fd..d269ad0e7cd 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -21,6 +21,7 @@ import os import pytest import sys +import weakref import numpy as np @@ -124,6 +125,44 @@ def get_buffer(): assert buf.to_pybytes() == b'sample' assert buf.parent is not None + +def test_python_file_implicit_mode(tmpdir): + path = os.path.join(str(tmpdir), 'foo.txt') + with open(path, 'wb') as f: + pf = pa.PythonFile(f) + assert pf.writable() + assert not pf.readable() + assert not pf.seekable() # PyOutputStream isn't seekable + f.write(b'foobar\n') + + with open(path, 'rb') as f: + pf = pa.PythonFile(f) + assert pf.readable() + assert not pf.writable() + assert pf.seekable() + assert pf.read() == b'foobar\n' + + bio = BytesIO() + pf = pa.PythonFile(bio) + assert pf.writable() + assert not pf.readable() + assert not pf.seekable() + pf.write(b'foobar\n') + assert bio.getvalue() == b'foobar\n' + + +def test_python_file_closing(): + bio = BytesIO() + pf = pa.PythonFile(bio) + wr = weakref.ref(pf) + del pf + assert wr() is None # object was destroyed + assert not bio.closed + pf = pa.PythonFile(bio) + pf.close() + assert bio.closed + + # ---------------------------------------------------------------------- # Buffers