From 9eccefd6bed13efee466c2caff25be1db00295da Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Tue, 1 Nov 2016 13:41:41 +0100 Subject: [PATCH] ARROW-357: Use a single RowGroup for Parquet files as default. Change-Id: Ibdbd1db9fcd6c2e6ce588b3f326caf00d38df48a --- python/pyarrow/parquet.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx index 019dd2c1de4..a56c1e1456d 100644 --- a/python/pyarrow/parquet.pyx +++ b/python/pyarrow/parquet.pyx @@ -106,7 +106,8 @@ def write_table(table, filename, chunk_size=None, version=None, table : pyarrow.Table filename : string chunk_size : int - The maximum number of rows in each Parquet RowGroup + The maximum number of rows in each Parquet RowGroup. As a default, + we will write a single RowGroup per file. version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 use_dictionary : bool or list @@ -121,7 +122,7 @@ def write_table(table, filename, chunk_size=None, version=None, cdef WriterProperties.Builder properties_builder cdef int64_t chunk_size_ = 0 if chunk_size is None: - chunk_size_ = min(ctable_.num_rows(), int(2**16)) + chunk_size_ = ctable_.num_rows() else: chunk_size_ = chunk_size