From 8f419e626293ca02869a9817c96eac1e553cfa2d Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Tue, 15 Aug 2023 18:02:59 -0400 Subject: [PATCH 01/19] Add missing docstrings to Cython --- python/pyarrow/_compute.pyx | 9 ++ python/pyarrow/_csv.pyx | 27 ++++++ python/pyarrow/_dataset.pyx | 74 +++++++++++++++ python/pyarrow/_dataset_orc.pyx | 9 ++ python/pyarrow/_dataset_parquet.pyx | 41 +++++++++ python/pyarrow/_fs.pyx | 9 ++ python/pyarrow/_json.pyx | 18 ++++ python/pyarrow/_parquet.pyx | 92 +++++++++++++++++++ python/pyarrow/array.pxi | 33 +++++++ python/pyarrow/gandiva.pyx | 135 ++++++++++++++++++++++++++++ python/pyarrow/io.pxi | 54 +++++++++++ python/pyarrow/scalar.pxi | 9 ++ python/pyarrow/table.pxi | 11 +++ python/pyarrow/tensor.pxi | 53 +++++++++++ python/pyarrow/types.pxi | 45 ++++++++++ 15 files changed, 619 insertions(+) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index bc3b9e8c558..0d2e4ca1bd0 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2345,6 +2345,15 @@ cdef class Expression(_Weakrefable): return self.expr def equals(self, Expression other): + """ + Parameters + ---------- + other : pyarrow.compute.Expression + + Returns + ------- + bool + """ return self.expr.Equals(other.unwrap()) def __str__(self): diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index 0b72f5249f9..db301c91cc8 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -290,6 +290,15 @@ cdef class ReadOptions(_Weakrefable): check_status(deref(self.options).Validate()) def equals(self, ReadOptions other): + """ + Parameters + ---------- + other : ReadOptions + + Returns + ------- + bool + """ return ( self.use_threads == other.use_threads and self.block_size == other.block_size and @@ -536,6 +545,15 @@ cdef class ParseOptions(_Weakrefable): check_status(deref(self.options).Validate()) def equals(self, ParseOptions other): + """ + Parameters + ---------- + other : ParseOptions + + Returns + ------- + bool + """ return ( self.delimiter == other.delimiter and self.quote_char == other.quote_char and @@ -1042,6 +1060,15 @@ cdef class ConvertOptions(_Weakrefable): check_status(deref(self.options).Validate()) def equals(self, ConvertOptions other): + """ + Parameters + ---------- + other : ConvertOptions + + Returns + ------- + bool + """ return ( self.check_utf8 == other.check_utf8 and self.column_types == other.column_types and diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index badf6e4a4c5..948c848a140 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -1992,9 +1992,27 @@ cdef class IpcFileFormat(FileFormat): self.init(shared_ptr[CFileFormat](new CIpcFileFormat())) def equals(self, IpcFileFormat other): + """ + Parameters + ---------- + other : IpcFileFormat + + Returns + ------- + True + """ return True def make_write_options(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + + Returns + ------- + pyarrow.dataset.IpcWriteOptions + """ cdef IpcFileWriteOptions opts = \ FileFormat.make_write_options(self) opts.write_options = IpcWriteOptions(**kwargs) @@ -2071,6 +2089,15 @@ cdef class CsvFileFormat(FileFormat): self.csv_format = sp.get() def make_write_options(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + + Returns + ------- + pyarrow.dataset.CsvWriteOptions + """ cdef CsvFileWriteOptions opts = \ FileFormat.make_write_options(self) opts.write_options = WriteOptions(**kwargs) @@ -2093,6 +2120,15 @@ cdef class CsvFileFormat(FileFormat): super()._set_default_fragment_scan_options(options) def equals(self, CsvFileFormat other): + """ + Parameters + ---------- + other : CsvFileFormat + + Returns + ------- + bool + """ return ( self.parse_options.equals(other.parse_options) and self.default_fragment_scan_options == @@ -2165,6 +2201,15 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions): make_streamwrap_func(read_options.encoding, 'utf-8')) def equals(self, CsvFragmentScanOptions other): + """ + Parameters + ---------- + other : CsvFragmentScanOptions + + Returns + ------- + bool + """ return ( other and self.convert_options.equals(other.convert_options) and @@ -2250,6 +2295,15 @@ cdef class JsonFileFormat(FileFormat): super()._set_default_fragment_scan_options(options) def equals(self, JsonFileFormat other): + """ + Parameters + ---------- + other : JsonFileFormat + + Returns + ------- + bool + """ return (other and self.default_fragment_scan_options == other.default_fragment_scan_options) @@ -2308,6 +2362,15 @@ cdef class JsonFragmentScanOptions(FragmentScanOptions): self.json_options.read_options = read_options.options def equals(self, JsonFragmentScanOptions other): + """ + Parameters + ---------- + other : JsonFragmentScanOptions + + Returns + ------- + bool + """ return ( other and self.read_options.equals(other.read_options) and @@ -2353,6 +2416,17 @@ cdef class Partitioning(_Weakrefable): return False def parse(self, path): + """ + Parse a path into a partition expression. + + Parameters + ---------- + path : str + + Returns + ------- + Expression + """ cdef CResult[CExpression] result result = self.partitioning.Parse(tobytes(path)) return Expression.wrap(GetResultValue(result)) diff --git a/python/pyarrow/_dataset_orc.pyx b/python/pyarrow/_dataset_orc.pyx index 40a21ef5462..19338b240fd 100644 --- a/python/pyarrow/_dataset_orc.pyx +++ b/python/pyarrow/_dataset_orc.pyx @@ -32,6 +32,15 @@ cdef class OrcFileFormat(FileFormat): self.init(shared_ptr[CFileFormat](new COrcFileFormat())) def equals(self, OrcFileFormat other): + """ + Parameters + ---------- + other : OrcFileFormat + + Returns + ------- + True + """ return True @property diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 4ad0caec307..d5f96d3142b 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -178,6 +178,15 @@ cdef class ParquetFileFormat(FileFormat): return parquet_read_options def make_write_options(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + + Returns + ------- + pyarrow.dataset.ParquetFileWriteOptions + """ opts = FileFormat.make_write_options(self) ( opts).update(**kwargs) return opts @@ -189,6 +198,15 @@ cdef class ParquetFileFormat(FileFormat): super()._set_default_fragment_scan_options(options) def equals(self, ParquetFileFormat other): + """ + Parameters + ---------- + other : ParquetFileFormat + + Returns + ------- + bool + """ return ( self.read_options.equals(other.read_options) and self.default_fragment_scan_options == @@ -502,6 +520,15 @@ cdef class ParquetReadOptions(_Weakrefable): self._coerce_int96_timestamp_unit = TimeUnit_NANO def equals(self, ParquetReadOptions other): + """ + Parameters + ---------- + other : ParquetReadOptions + + Returns + ------- + bool + """ return (self.dictionary_columns == other.dictionary_columns and self.coerce_int96_timestamp_unit == other.coerce_int96_timestamp_unit) @@ -527,6 +554,11 @@ cdef class ParquetFileWriteOptions(FileWriteOptions): object _properties def update(self, **kwargs): + """ + Parameters + ---------- + **kwargs : dict + """ arrow_fields = { "use_deprecated_int96_timestamps", "coerce_timestamps", @@ -720,6 +752,15 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): self.reader_properties().set_thrift_container_size_limit(size) def equals(self, ParquetFragmentScanOptions other): + """ + Parameters + ---------- + other : ParquetFragmentScanOptions + + Returns + ------- + bool + """ attrs = ( self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.thrift_string_size_limit, self.thrift_container_size_limit) diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 69105afc2fc..764a6d070bd 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -505,6 +505,15 @@ cdef class FileSystem(_Weakrefable): return self.wrapped def equals(self, FileSystem other): + """ + Parameters + ---------- + other : FileSystem + + Returns + ------- + bool + """ return self.fs.Equals(other.unwrap()) def __eq__(self, other): diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx index 70cde6e23fe..db1e804001b 100644 --- a/python/pyarrow/_json.pyx +++ b/python/pyarrow/_json.pyx @@ -83,6 +83,15 @@ cdef class ReadOptions(_Weakrefable): ) def equals(self, ReadOptions other): + """ + Parameters + ---------- + other : ReadOptions + + Returns + ------- + bool + """ return ( self.use_threads == other.use_threads and self.block_size == other.block_size @@ -212,6 +221,15 @@ cdef class ParseOptions(_Weakrefable): self.options.unexpected_field_behavior = v def equals(self, ParseOptions other): + """ + Parameters + ---------- + other : ParseOptions + + Returns + ------- + bool + """ return ( self.explicit_schema == other.explicit_schema and self.newlines_in_values == other.newlines_in_values and diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 4448f359ac1..96b95e76da8 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1183,6 +1183,22 @@ cdef class ParquetReader(_Weakrefable): FileDecryptionProperties decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None): + """ + Open a parquet file for reading. + + Parameters + ---------- + source : object + use_memory_map : bool, default False + read_dictionary : iterable[int or str], optional + metadata : FileMetaData, optional + buffer_size : int, default 0 + pre_buffer : bool, default False + coerce_int96_timestamp_unit : str, optional + decryption_properties : FileDecryptionProperties, optional + thrift_string_size_limit : int, optional + thrift_container_size_limit : int, optional + """ cdef: shared_ptr[CFileMetaData] c_metadata CReaderProperties properties = default_reader_properties() @@ -1285,13 +1301,35 @@ cdef class ParquetReader(_Weakrefable): return self.reader.get().num_row_groups() def set_use_threads(self, bint use_threads): + """ + Parameters + ---------- + use_threads : bool + """ self.reader.get().set_use_threads(use_threads) def set_batch_size(self, int64_t batch_size): + """ + Parameters + ---------- + batch_size : int64 + """ self.reader.get().set_batch_size(batch_size) def iter_batches(self, int64_t batch_size, row_groups, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + batch_size : int64 + row_groups : list[int] + column_indices : list[int], optional + use_threads : bool, default True + + Yields + ------ + next : RecordBatch + """ cdef: vector[int] c_row_groups vector[int] c_column_indices @@ -1336,10 +1374,32 @@ cdef class ParquetReader(_Weakrefable): def read_row_group(self, int i, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + i : int + column_indices : list[int], optional + use_threads : bool, default True + + Returns + ------- + table : pyarrow.Table + """ return self.read_row_groups([i], column_indices, use_threads) def read_row_groups(self, row_groups not None, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + row_groups : list[int] + column_indices : list[int], optional + use_threads : bool, default True + + Returns + ------- + table : pyarrow.Table + """ cdef: shared_ptr[CTable] ctable vector[int] c_row_groups @@ -1366,6 +1426,16 @@ cdef class ParquetReader(_Weakrefable): return pyarrow_wrap_table(ctable) def read_all(self, column_indices=None, bint use_threads=True): + """ + Parameters + ---------- + column_indices : list[int], optional + use_threads : bool, default True + + Returns + ------- + table : pyarrow.Table + """ cdef: shared_ptr[CTable] ctable vector[int] c_column_indices @@ -1387,6 +1457,16 @@ cdef class ParquetReader(_Weakrefable): return pyarrow_wrap_table(ctable) def scan_contents(self, column_indices=None, batch_size=65536): + """ + Parameters + ---------- + column_indices : list[int], optional + batch_size : int32, default 65536 + + Returns + ------- + num_rows : int64 + """ cdef: vector[int] c_column_indices int32_t c_batch_size @@ -1434,6 +1514,18 @@ cdef class ParquetReader(_Weakrefable): return self._column_idx_map[tobytes(column_name)] def read_column(self, int column_index): + """ + Read the column at the specified index. + + Parameters + ---------- + column_index : int + Index of the column. + + Returns + ------- + column : pyarrow.ChunkedArray + """ cdef shared_ptr[CChunkedArray] out with nogil: check_status(self.reader.get() diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 2f8959cd721..9c3f9af8c46 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1265,6 +1265,17 @@ cdef class Array(_PandasConvertible): return frombytes(result, safe=True) def format(self, **kwargs): + """ + DEPRECATED, use Array.to_string + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + str + """ import warnings warnings.warn('Array.format is deprecated, use Array.to_string') return self.to_string(**kwargs) @@ -1281,6 +1292,15 @@ cdef class Array(_PandasConvertible): return NotImplemented def equals(Array self, Array other not None): + """ + Parameters + ---------- + other : Array + + Returns + ------- + bool + """ return self.ap.Equals(deref(other.ap)) def __len__(self): @@ -2336,6 +2356,19 @@ cdef class UnionArray(Array): """ def child(self, int pos): + """ + DEPRECATED, use field instead. + + Parameters + ---------- + pos : int + The physical index of the union child field (not its type code). + + Returns + ------- + field : Array + The given child field. + """ import warnings warnings.warn("child is deprecated, use field", FutureWarning) return self.field(pos) diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index cc46bc760ff..cf3b945534b 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -191,6 +191,19 @@ cdef class Projector(_Weakrefable): return self.projector.get().DumpIR().decode() def evaluate(self, RecordBatch batch, SelectionVector selection=None): + """ + Evaluate the specified record batch and return the arrays at the + filtered positions. + + Parameters + ---------- + batch : RecordBatch + selection : pyarrow.gandiva.SelectionVector + + Returns + ------- + list[Array] + """ cdef vector[shared_ptr[CArray]] results if selection is None: check_status(self.projector.get().Evaluate( @@ -227,6 +240,19 @@ cdef class Filter(_Weakrefable): return self.filter.get().DumpIR().decode() def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'): + """ + Evaluate the specified record batch and return a selection vector. + + Parameters + ---------- + batch : RecordBatch + pool : MemoryPool + dtype : DataType or str, default int32 + + Returns + ------- + pyarrow.gandiva.SelectionVector + """ cdef: DataType type = ensure_type(dtype) shared_ptr[CSelectionVector] selection @@ -252,6 +278,18 @@ cdef class Filter(_Weakrefable): cdef class TreeExprBuilder(_Weakrefable): def make_literal(self, value, dtype): + """ + Create a node on a literal. + + Parameters + ---------- + value : a literal value + dtype : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef: DataType type = ensure_type(dtype) shared_ptr[CNode] r @@ -289,6 +327,19 @@ cdef class TreeExprBuilder(_Weakrefable): def make_expression(self, Node root_node not None, Field return_field not None): + """ + Create an expression with the specified root_node, + and the result written to result_field. + + Parameters + ---------- + root_node : pyarrow.gandiva.Node + return_field : Field + + Returns + ------- + pyarrow.gandiva.Expression + """ cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression( root_node.node, return_field.sp_field) cdef Expression expression = Expression() @@ -296,6 +347,19 @@ cdef class TreeExprBuilder(_Weakrefable): return expression def make_function(self, name, children, DataType return_type): + """ + Create a node with a function. + + Parameters + ---------- + name : str + children : pyarrow.gandiva.NodeVector + return_type : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef c_vector[shared_ptr[CNode]] c_children cdef Node child for child in children: @@ -307,17 +371,53 @@ cdef class TreeExprBuilder(_Weakrefable): return Node.create(r) def make_field(self, Field field not None): + """ + Create a node with an arrow field. + + Parameters + ---------- + field : Field + + Returns + ------- + pyarrow.gandiva.Node + """ cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field) return Node.create(r) def make_if(self, Node condition not None, Node this_node not None, Node else_node not None, DataType return_type not None): + """ + Create a node with an if-else expression. + + Parameters + ---------- + condition : pyarrow.gandiva.Node + this_node : pyarrow.gandiva.Node + else_node : pyarrow.gandiva.Node + return_type : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf( condition.node, this_node.node, else_node.node, return_type.sp_type) return Node.create(r) def make_and(self, children): + """ + Create a Node with a boolean AND expression. + + Parameters + ---------- + children : pyarrow.gandiva.NodeVector + + Returns + ------- + pyarrow.gandiva.Node + """ cdef c_vector[shared_ptr[CNode]] c_children cdef Node child for child in children: @@ -328,6 +428,17 @@ cdef class TreeExprBuilder(_Weakrefable): return Node.create(r) def make_or(self, children): + """ + Create a Node with a boolean OR expression. + + Parameters + ---------- + children : pyarrow.gandiva.NodeVector + + Returns + ------- + pyarrow.gandiva.Node + """ cdef c_vector[shared_ptr[CNode]] c_children cdef Node child for child in children: @@ -420,6 +531,19 @@ cdef class TreeExprBuilder(_Weakrefable): return Node.create(r) def make_in_expression(self, Node node not None, values, dtype): + """ + Create a Node with an IN expression. + + Parameters + ---------- + node : pyarrow.gandiva.Node + values : iterable + dtype : DataType + + Returns + ------- + pyarrow.gandiva.Node + """ cdef DataType type = ensure_type(dtype) if type.id == _Type_INT32: @@ -444,6 +568,17 @@ cdef class TreeExprBuilder(_Weakrefable): raise TypeError("Data type " + str(dtype) + " not supported.") def make_condition(self, Node condition not None): + """ + Create a condition with the specified node. + + Parameters + ---------- + condition : pyarrow.gandiva.Node + + Returns + ------- + pyarrow.gandiva.Condition + """ cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition( condition.node) return Condition.create(r) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 2a78f7e7954..c772fa2e048 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -575,6 +575,14 @@ cdef class NativeFile(_Weakrefable): return line def read_buffer(self, nbytes=None): + """ + Read from buffer. + + Parameters + ---------- + nbytes : int, optional + maximum number of bytes read + """ cdef: int64_t c_nbytes int64_t bytes_read = 0 @@ -602,6 +610,14 @@ cdef class NativeFile(_Weakrefable): raise UnsupportedOperation() def writelines(self, lines): + """ + Write lines to the file. + + Parameters + ---------- + lines : iterable + Iterable of bytes-like objects or exporters of buffer protocol + """ self._assert_writable() for line in lines: @@ -865,12 +881,35 @@ cdef class PythonFile(NativeFile): self.is_writable = True def truncate(self, pos=None): + """ + Parameters + ---------- + pos : int, optional + """ self.handle.truncate(pos) def readline(self, size=None): + """ + Read and return a line of bytes from the file. + + If size is specified, read at most size bytes. + + Parameters + ---------- + size : int + maximum number of bytes read + """ return self.handle.readline(size) def readlines(self, hint=None): + """ + Read lines of the file. + + Parameters + ---------- + hint : int + maximum number of bytes read until we stop + """ return self.handle.readlines(hint) @@ -1146,16 +1185,31 @@ cdef class FixedSizeBufferWriter(NativeFile): self.is_writable = True def set_memcopy_threads(self, int num_threads): + """ + Parameters + ---------- + num_threads : int + """ cdef CFixedSizeBufferWriter* writer = \ self.output_stream.get() writer.set_memcopy_threads(num_threads) def set_memcopy_blocksize(self, int64_t blocksize): + """ + Parameters + ---------- + blocksize : int64 + """ cdef CFixedSizeBufferWriter* writer = \ self.output_stream.get() writer.set_memcopy_blocksize(blocksize) def set_memcopy_threshold(self, int64_t threshold): + """ + Parameters + ---------- + threshold : int64 + """ cdef CFixedSizeBufferWriter* writer = \ self.output_stream.get() writer.set_memcopy_threshold(threshold) diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index aff1c311abb..e19807ba56e 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -123,6 +123,15 @@ cdef class Scalar(_Weakrefable): return str(self.as_py()) def equals(self, Scalar other not None): + """ + Parameters + ---------- + other : pyarrow.Scalar + + Returns + ------- + bool + """ return self.wrapped.get().Equals(other.unwrap().get()[0]) def __eq__(self, other): diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index f08162089b8..3e79286758c 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -160,6 +160,17 @@ cdef class ChunkedArray(_PandasConvertible): return frombytes(result, safe=True) def format(self, **kwargs): + """ + DEPRECATED + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + str + """ import warnings warnings.warn('ChunkedArray.format is deprecated, ' 'use ChunkedArray.to_string') diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index ba732879aad..a39ecbd3ff3 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -316,6 +316,13 @@ shape: {0.shape}""".format(self) def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCOOTensor + + Parameters + ---------- + obj : numpy.ndarray + Data used to populate the rows. + dim_names : list, optional + Names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -549,6 +556,14 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + """ return frombytes(self.stp.dim_name(i)) @property @@ -781,6 +796,14 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + """ return frombytes(self.stp.dim_name(i)) @property @@ -816,6 +839,13 @@ shape: {0.shape}""".format(self) def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSCMatrix + + Parameters + ---------- + obj : numpy.ndarray + Data used to populate the rows. + dim_names : list, optional + Names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -997,6 +1027,14 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + """ return frombytes(self.stp.dim_name(i)) @property @@ -1040,6 +1078,13 @@ shape: {0.shape}""".format(self) def from_dense_numpy(cls, obj, dim_names=None): """ Convert numpy.ndarray to arrow::SparseCSFTensor + + Parameters + ---------- + obj : numpy.ndarray + Data used to populate the rows. + dim_names : list, optional + Names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -1190,6 +1235,14 @@ shape: {0.shape}""".format(self) return self.stp.size() def dim_name(self, i): + """ + Returns the name of the i-th tensor dimension. + + Parameters + ---------- + i : int + The physical index of the tensor dimension. + """ return frombytes(self.stp.dim_name(i)) @property diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 12ad2fc4b6f..016f84166b7 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -199,6 +199,15 @@ cdef class DataType(_Weakrefable): self.pep3118_format = _datatype_to_pep3118(self.type) cpdef Field field(self, i): + """ + Parameters + ---------- + i : int + + Returns + ------- + pyarrow.Field + """ if not isinstance(i, int): raise TypeError(f"Expected int index, got type '{type(i)}'") cdef int index = _normalize_index(i, self.type.num_fields()) @@ -1886,6 +1895,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping): return self.wrapped def equals(self, KeyValueMetadata other): + """ + Parameters + ---------- + other : pyarrow.KeyValueMetadata + + Returns + ------- + bool + """ return self.metadata.Equals(deref(other.wrapped)) def __repr__(self): @@ -1925,9 +1943,27 @@ cdef class KeyValueMetadata(_Metadata, Mapping): return KeyValueMetadata, (list(self.items()),) def key(self, i): + """ + Parameters + ---------- + i : int + + Returns + ------- + str + """ return self.metadata.key(i) def value(self, i): + """ + Parameters + ---------- + i : int + + Returns + ------- + Any + """ return self.metadata.value(i) def keys(self): @@ -1943,6 +1979,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping): yield (self.metadata.key(i), self.metadata.value(i)) def get_all(self, key): + """ + Parameters + ---------- + key : str + + Returns + ------- + list + """ key = tobytes(key) return [v for k, v in self.items() if k == key] From dc0dfc87a372a065a9149a83b19be93e3145d400 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Wed, 16 Aug 2023 17:51:57 -0400 Subject: [PATCH 02/19] Add import info --- python/pyarrow/_csv.pyx | 6 +++--- python/pyarrow/_dataset.pyx | 12 ++++++------ python/pyarrow/_dataset_orc.pyx | 2 +- python/pyarrow/_dataset_parquet.pyx | 6 +++--- python/pyarrow/_fs.pyx | 2 +- python/pyarrow/_json.pyx | 4 ++-- python/pyarrow/array.pxi | 4 ++-- python/pyarrow/gandiva.pyx | 8 ++++---- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index db301c91cc8..e532d8d8ab2 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -293,7 +293,7 @@ cdef class ReadOptions(_Weakrefable): """ Parameters ---------- - other : ReadOptions + other : pyarrow.csv.ReadOptions Returns ------- @@ -548,7 +548,7 @@ cdef class ParseOptions(_Weakrefable): """ Parameters ---------- - other : ParseOptions + other : pyarrow.csv.ParseOptions Returns ------- @@ -1063,7 +1063,7 @@ cdef class ConvertOptions(_Weakrefable): """ Parameters ---------- - other : ConvertOptions + other : pyarrow.csv.ConvertOptions Returns ------- diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 948c848a140..f752a27e60a 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -1995,7 +1995,7 @@ cdef class IpcFileFormat(FileFormat): """ Parameters ---------- - other : IpcFileFormat + other : pyarrow.dataset.IpcFileFormat Returns ------- @@ -2123,7 +2123,7 @@ cdef class CsvFileFormat(FileFormat): """ Parameters ---------- - other : CsvFileFormat + other : pyarrow.dataset.CsvFileFormat Returns ------- @@ -2204,7 +2204,7 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions): """ Parameters ---------- - other : CsvFragmentScanOptions + other : pyarrow.dataset.CsvFragmentScanOptions Returns ------- @@ -2298,7 +2298,7 @@ cdef class JsonFileFormat(FileFormat): """ Parameters ---------- - other : JsonFileFormat + other : pyarrow.dataset.JsonFileFormat Returns ------- @@ -2365,7 +2365,7 @@ cdef class JsonFragmentScanOptions(FragmentScanOptions): """ Parameters ---------- - other : JsonFragmentScanOptions + other : pyarrow.dataset.JsonFragmentScanOptions Returns ------- @@ -2425,7 +2425,7 @@ cdef class Partitioning(_Weakrefable): Returns ------- - Expression + pyarrow.dataset.Expression """ cdef CResult[CExpression] result result = self.partitioning.Parse(tobytes(path)) diff --git a/python/pyarrow/_dataset_orc.pyx b/python/pyarrow/_dataset_orc.pyx index 19338b240fd..a8cce336222 100644 --- a/python/pyarrow/_dataset_orc.pyx +++ b/python/pyarrow/_dataset_orc.pyx @@ -35,7 +35,7 @@ cdef class OrcFileFormat(FileFormat): """ Parameters ---------- - other : OrcFileFormat + other : pyarrow.dataset.OrcFileFormat Returns ------- diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index d5f96d3142b..875e4e833de 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -201,7 +201,7 @@ cdef class ParquetFileFormat(FileFormat): """ Parameters ---------- - other : ParquetFileFormat + other : pyarrow.dataset.ParquetFileFormat Returns ------- @@ -523,7 +523,7 @@ cdef class ParquetReadOptions(_Weakrefable): """ Parameters ---------- - other : ParquetReadOptions + other : pyarrow.dataset.ParquetReadOptions Returns ------- @@ -755,7 +755,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): """ Parameters ---------- - other : ParquetFragmentScanOptions + other : pyarrow.dataset.ParquetFragmentScanOptions Returns ------- diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 764a6d070bd..dbd7ebe5e4d 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -508,7 +508,7 @@ cdef class FileSystem(_Weakrefable): """ Parameters ---------- - other : FileSystem + other : pyarrow.fs.FileSystem Returns ------- diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx index db1e804001b..d36dad67abb 100644 --- a/python/pyarrow/_json.pyx +++ b/python/pyarrow/_json.pyx @@ -86,7 +86,7 @@ cdef class ReadOptions(_Weakrefable): """ Parameters ---------- - other : ReadOptions + other : pyarrow.json.ReadOptions Returns ------- @@ -224,7 +224,7 @@ cdef class ParseOptions(_Weakrefable): """ Parameters ---------- - other : ParseOptions + other : pyarrow.json.ParseOptions Returns ------- diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 9c3f9af8c46..8423569c7df 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1295,7 +1295,7 @@ cdef class Array(_PandasConvertible): """ Parameters ---------- - other : Array + other : pyarrow.Array Returns ------- @@ -2366,7 +2366,7 @@ cdef class UnionArray(Array): Returns ------- - field : Array + field : pyarrow.Field The given child field. """ import warnings diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index cf3b945534b..627b67be340 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -197,12 +197,12 @@ cdef class Projector(_Weakrefable): Parameters ---------- - batch : RecordBatch + batch : pyarrow.RecordBatch selection : pyarrow.gandiva.SelectionVector Returns ------- - list[Array] + list[pyarrow.Array] """ cdef vector[shared_ptr[CArray]] results if selection is None: @@ -245,7 +245,7 @@ cdef class Filter(_Weakrefable): Parameters ---------- - batch : RecordBatch + batch : pyarrow.RecordBatch pool : MemoryPool dtype : DataType or str, default int32 @@ -334,7 +334,7 @@ cdef class TreeExprBuilder(_Weakrefable): Parameters ---------- root_node : pyarrow.gandiva.Node - return_field : Field + return_field : pyarrow.Field Returns ------- From 764ed621c5e1c2e102e705cfe65b43a1fba8acf3 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:57:50 -0400 Subject: [PATCH 03/19] Update python/pyarrow/array.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/array.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 8423569c7df..52ac901ab0b 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1266,7 +1266,7 @@ cdef class Array(_PandasConvertible): def format(self, **kwargs): """ - DEPRECATED, use Array.to_string + DEPRECATED, use pyarrow.Array.to_string Parameters ---------- From a710339e32665abad745d32f0387e64049b4e964 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:58:10 -0400 Subject: [PATCH 04/19] Update python/pyarrow/gandiva.pyx Co-authored-by: Sutou Kouhei --- python/pyarrow/gandiva.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index 627b67be340..b796befe299 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -372,7 +372,7 @@ cdef class TreeExprBuilder(_Weakrefable): def make_field(self, Field field not None): """ - Create a node with an arrow field. + Create a node with an Arrow field. Parameters ---------- From 020c32a05bb83af53fd5082c76c1d40e424d5ae0 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:58:25 -0400 Subject: [PATCH 05/19] Update python/pyarrow/gandiva.pyx Co-authored-by: Sutou Kouhei --- python/pyarrow/gandiva.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index b796befe299..389b97be4cc 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -376,7 +376,7 @@ cdef class TreeExprBuilder(_Weakrefable): Parameters ---------- - field : Field + field : pyarrow.Field Returns ------- From 90fb99d4434f1281e6068c88c8efc7044101dd7f Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:59:27 -0400 Subject: [PATCH 06/19] Update python/pyarrow/gandiva.pyx Co-authored-by: Sutou Kouhei --- python/pyarrow/gandiva.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index 389b97be4cc..ba3c4e9e7a2 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -412,7 +412,7 @@ cdef class TreeExprBuilder(_Weakrefable): Parameters ---------- - children : pyarrow.gandiva.NodeVector + children : list[pyarrow.gandiva.Node] Returns ------- From 0637b1ca746fcb0ffcd32dc9c9945e2be6d552f0 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:59:47 -0400 Subject: [PATCH 07/19] Update python/pyarrow/types.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/types.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 016f84166b7..5dea66857b9 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1986,7 +1986,7 @@ cdef class KeyValueMetadata(_Metadata, Mapping): Returns ------- - list + list[byte] """ key = tobytes(key) return [v for k, v in self.items() if k == key] From 187d73bae3fe6292f7d109bc1a31a51cca4c519e Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:00:11 -0400 Subject: [PATCH 08/19] Update python/pyarrow/types.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/types.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 5dea66857b9..19e369ddb55 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1962,7 +1962,7 @@ cdef class KeyValueMetadata(_Metadata, Mapping): Returns ------- - Any + byte """ return self.metadata.value(i) From d5527bdcfa277b2c1bfac6d758dbc845188b1747 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:41:12 -0400 Subject: [PATCH 09/19] Update python/pyarrow/gandiva.pyx Co-authored-by: Sutou Kouhei --- python/pyarrow/gandiva.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index ba3c4e9e7a2..35bbf5018f0 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -433,7 +433,7 @@ cdef class TreeExprBuilder(_Weakrefable): Parameters ---------- - children : pyarrow.gandiva.NodeVector + children : list[pyarrow.gandiva.Node] Returns ------- From 64a0d7fe0b1f9586f6c33ae852bf45a0e8547dbb Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:41:31 -0400 Subject: [PATCH 10/19] Update python/pyarrow/io.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/io.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index c772fa2e048..27c8a4a374e 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -897,7 +897,7 @@ cdef class PythonFile(NativeFile): Parameters ---------- size : int - maximum number of bytes read + Maximum number of bytes read """ return self.handle.readline(size) From 6b6f8d08286ddaae6f45ded0896b4030145049d7 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:41:48 -0400 Subject: [PATCH 11/19] Update python/pyarrow/io.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/io.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 27c8a4a374e..e3018ab4704 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -908,7 +908,7 @@ cdef class PythonFile(NativeFile): Parameters ---------- hint : int - maximum number of bytes read until we stop + Maximum number of bytes read until we stop """ return self.handle.readlines(hint) From d41b20d5d3f5bf9bed59d665ee4f6e5bf4efb9c6 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:42:05 -0400 Subject: [PATCH 12/19] Update python/pyarrow/types.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/types.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 19e369ddb55..f2dd59a0f1a 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1950,7 +1950,7 @@ cdef class KeyValueMetadata(_Metadata, Mapping): Returns ------- - str + byte """ return self.metadata.key(i) From d07253a156948d3abbf3ef66a50b0b25b4f5e618 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:42:24 -0400 Subject: [PATCH 13/19] Update python/pyarrow/tensor.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/tensor.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index a39ecbd3ff3..ba68193aa42 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -1083,7 +1083,7 @@ shape: {0.shape}""".format(self) ---------- obj : numpy.ndarray Data used to populate the rows. - dim_names : list, optional + dim_names : list[str], optional Names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) From 11fa110a58d45ca83cadede7cfbd5ba1675eba40 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:42:38 -0400 Subject: [PATCH 14/19] Update python/pyarrow/tensor.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/tensor.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index ba68193aa42..ab125ec3400 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -844,7 +844,7 @@ shape: {0.shape}""".format(self) ---------- obj : numpy.ndarray Data used to populate the rows. - dim_names : list, optional + dim_names : list[str], optional Names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) From 396700cd68e198550611310d4083dd22ac9727d4 Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:43:01 -0400 Subject: [PATCH 15/19] Update python/pyarrow/table.pxi Co-authored-by: Alenka Frim --- python/pyarrow/table.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 3e79286758c..2eae38485dc 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -161,7 +161,7 @@ cdef class ChunkedArray(_PandasConvertible): def format(self, **kwargs): """ - DEPRECATED + DEPRECATED, use pyarrow.ChunkedArray.to_string Parameters ---------- From 0800ded39cb9ef43e30762636c3abf28111bf19a Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:43:15 -0400 Subject: [PATCH 16/19] Update python/pyarrow/tensor.pxi Co-authored-by: Sutou Kouhei --- python/pyarrow/tensor.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index ab125ec3400..50f474369c9 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -321,7 +321,7 @@ shape: {0.shape}""".format(self) ---------- obj : numpy.ndarray Data used to populate the rows. - dim_names : list, optional + dim_names : list[str], optional Names of the dimensions. """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) From dc9b6149617c27f38cb858c5127e6d683eb25e5d Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Thu, 17 Aug 2023 10:59:07 -0400 Subject: [PATCH 17/19] Add return values to docs --- python/pyarrow/tensor.pxi | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index 50f474369c9..1afce7f4a10 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -323,6 +323,10 @@ shape: {0.shape}""".format(self) Data used to populate the rows. dim_names : list[str], optional Names of the dimensions. + + Returns + ------- + pyarrow.SparseCOOTensor """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -563,6 +567,10 @@ shape: {0.shape}""".format(self) ---------- i : int The physical index of the tensor dimension. + + Returns + ------- + str """ return frombytes(self.stp.dim_name(i)) @@ -616,6 +624,10 @@ shape: {0.shape}""".format(self) The dense numpy array that should be converted. dim_names : list, optional The names of the dimensions. + + Returns + ------- + pyarrow.SparseCSRMatrix """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -803,6 +815,10 @@ shape: {0.shape}""".format(self) ---------- i : int The physical index of the tensor dimension. + + Returns + ------- + str """ return frombytes(self.stp.dim_name(i)) @@ -846,6 +862,10 @@ shape: {0.shape}""".format(self) Data used to populate the rows. dim_names : list[str], optional Names of the dimensions. + + Returns + ------- + pyarrow.SparseCSCMatrix """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -1034,6 +1054,10 @@ shape: {0.shape}""".format(self) ---------- i : int The physical index of the tensor dimension. + + Returns + ------- + str """ return frombytes(self.stp.dim_name(i)) @@ -1085,6 +1109,10 @@ shape: {0.shape}""".format(self) Data used to populate the rows. dim_names : list[str], optional Names of the dimensions. + + Returns + ------- + pyarrow.SparseCSFTensor """ return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names)) @@ -1242,6 +1270,10 @@ shape: {0.shape}""".format(self) ---------- i : int The physical index of the tensor dimension. + + Returns + ------- + str """ return frombytes(self.stp.dim_name(i)) From 88df84cf2882db73861bed991099a44a2ec65c9e Mon Sep 17 00:00:00 2001 From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> Date: Thu, 17 Aug 2023 15:10:52 -0400 Subject: [PATCH 18/19] Update python/pyarrow/_parquet.pyx --- python/pyarrow/_parquet.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 96b95e76da8..ea4d5e30a45 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1188,7 +1188,7 @@ cdef class ParquetReader(_Weakrefable): Parameters ---------- - source : object + source : str, pyarrow.Buffer, pyarrow.NativeFile, or file-like Python object use_memory_map : bool, default False read_dictionary : iterable[int or str], optional metadata : FileMetaData, optional From 259dc249e396fc8ab55468bff9097121e703f30f Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Thu, 17 Aug 2023 16:06:51 -0400 Subject: [PATCH 19/19] Fix broken links --- python/pyarrow/_compute.pyx | 2 +- python/pyarrow/_dataset.pyx | 4 ++-- python/pyarrow/_dataset_parquet.pyx | 2 +- python/pyarrow/_parquet.pyx | 2 +- python/pyarrow/array.pxi | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 0d2e4ca1bd0..453f487c4de 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2348,7 +2348,7 @@ cdef class Expression(_Weakrefable): """ Parameters ---------- - other : pyarrow.compute.Expression + other : pyarrow.dataset.Expression Returns ------- diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index f752a27e60a..8f5688de290 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -2011,7 +2011,7 @@ cdef class IpcFileFormat(FileFormat): Returns ------- - pyarrow.dataset.IpcWriteOptions + pyarrow.ipc.IpcWriteOptions """ cdef IpcFileWriteOptions opts = \ FileFormat.make_write_options(self) @@ -2096,7 +2096,7 @@ cdef class CsvFileFormat(FileFormat): Returns ------- - pyarrow.dataset.CsvWriteOptions + pyarrow.csv.WriteOptions """ cdef CsvFileWriteOptions opts = \ FileFormat.make_write_options(self) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 875e4e833de..4de396f4f50 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -185,7 +185,7 @@ cdef class ParquetFileFormat(FileFormat): Returns ------- - pyarrow.dataset.ParquetFileWriteOptions + pyarrow.dataset.FileWriteOptions """ opts = FileFormat.make_write_options(self) ( opts).update(**kwargs) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index ea4d5e30a45..50b4ed8e86e 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1188,7 +1188,7 @@ cdef class ParquetReader(_Weakrefable): Parameters ---------- - source : str, pyarrow.Buffer, pyarrow.NativeFile, or file-like Python object + source : str, pathlib.Path, pyarrow.NativeFile, or file-like object use_memory_map : bool, default False read_dictionary : iterable[int or str], optional metadata : FileMetaData, optional diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 52ac901ab0b..ce4eafd8e30 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2357,7 +2357,7 @@ cdef class UnionArray(Array): def child(self, int pos): """ - DEPRECATED, use field instead. + DEPRECATED, use field() instead. Parameters ----------