Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions python/pyarrow/_compute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,11 @@ cdef class FunctionRegistry(_Weakrefable):
def get_function(self, name):
"""
Look up a function by name in the registry.

Parameters
----------
name : str
The name of the function to lookup
"""
cdef:
c_string c_name = tobytes(name)
Expand All @@ -485,6 +490,11 @@ def get_function(name):

The function is looked up in the global registry
(as returned by `function_registry()`).

Parameters
----------
name : str
The name of the function to lookup
"""
return _global_func_registry.get_function(name)

Expand All @@ -502,6 +512,17 @@ def call_function(name, args, options=None, memory_pool=None):

The function is looked up in the global registry
(as returned by `function_registry()`).

Parameters
----------
name : str
The name of the function to call.
args : list
The arguments to the function.
options : optional
options provided to the function.
memory_pool : MemoryPool, optional
memory pool to use for allocations during function execution.
"""
func = _global_func_registry.get_function(name)
return func.call(args, options=options, memory_pool=memory_pool)
Expand All @@ -524,6 +545,14 @@ cdef class FunctionOptions(_Weakrefable):

@staticmethod
def deserialize(buf):
"""
Deserialize options for a function.

Parameters
----------
buf : Buffer
The buffer containing the data to deserialize.
"""
cdef:
shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
CResult[unique_ptr[CFunctionOptions]] maybe_options = \
Expand Down Expand Up @@ -670,13 +699,29 @@ class CastOptions(_CastOptions):

@staticmethod
def safe(target_type=None):
""""
Create a CastOptions for a safe cast.

Parameters
----------
target_type : optional
Target cast type for the safe cast.
"""
self = CastOptions()
self._set_safe()
self._set_type(target_type)
return self

@staticmethod
def unsafe(target_type=None):
""""
Create a CastOptions for an unsafe cast.

Parameters
----------
target_type : optional
Target cast type for the unsafe cast.
"""
self = CastOptions()
self._set_unsafe()
self._set_type(target_type)
Expand Down
85 changes: 43 additions & 42 deletions python/pyarrow/_csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,26 @@ cdef class ReadOptions(_Weakrefable):
This will determine multi-threading granularity as well as
the size of individual record batches or table chunks.
Minimum valid value for block size is 1
skip_rows: int, optional (default 0)
skip_rows : int, optional (default 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for the record, did you have to do these by hand or is there a mechanical fixer?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did those by hand, I wanted to check what issues the docstrings had and thus had to look at them anyway. At that point fixing them was a minor deal.

If it doesn't already exist I guess that an automation to fix them wouldn't be too hard to build, but given that @kszucs has a PR to prevent broken/missing docstrings from happening again I hope this is the only time we have to do this work.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@amol- yes, #7732 should be ready depending on which rules do we want to enable at first.

The number of rows to skip before the column names (if any)
and the CSV data.
skip_rows_after_names: int, optional (default 0)
skip_rows_after_names : int, optional (default 0)
The number of rows to skip after the column names.
This number can be larger than the number of rows in one
block, and empty rows are counted.
The order of application is as follows:
- `skip_rows` is applied (if non-zero);
- column names aread (unless `column_names` is set);
- `skip_rows_after_names` is applied (if non-zero).
column_names: list, optional
column_names : list, optional
The column names of the target table. If empty, fall back on
`autogenerate_column_names`.
autogenerate_column_names: bool, optional (default False)
autogenerate_column_names : bool, optional (default False)
Whether to autogenerate column names if `column_names` is empty.
If true, column names will be of the form "f0", "f1"...
If false, column names will be read from the first CSV row
after `skip_rows`.
encoding: str, optional (default 'utf8')
encoding : str, optional (default 'utf8')
The character encoding of the CSV data. Columns that cannot
decode using this encoding can still be read as Binary.
"""
Expand Down Expand Up @@ -235,22 +235,22 @@ cdef class ParseOptions(_Weakrefable):

Parameters
----------
delimiter: 1-character string, optional (default ',')
delimiter : 1-character string, optional (default ',')
The character delimiting individual cells in the CSV data.
quote_char: 1-character string or False, optional (default '"')
quote_char : 1-character string or False, optional (default '"')
The character used optionally for quoting CSV values
(False if quoting is not allowed).
double_quote: bool, optional (default True)
double_quote : bool, optional (default True)
Whether two quotes in a quoted CSV value denote a single quote
in the data.
escape_char: 1-character string or False, optional (default False)
escape_char : 1-character string or False, optional (default False)
The character used optionally for escaping special characters
(False if escaping is not allowed).
newlines_in_values: bool, optional (default False)
newlines_in_values : bool, optional (default False)
Whether newline characters are allowed in CSV values.
Setting this to True reduces the performance of multi-threaded
CSV reading.
ignore_empty_lines: bool, optional (default True)
ignore_empty_lines : bool, optional (default True)
Whether empty lines are ignored in CSV input.
If False, an empty line is interpreted as containing a single empty
value (assuming a one-column CSV file).
Expand Down Expand Up @@ -423,53 +423,53 @@ cdef class ConvertOptions(_Weakrefable):
----------
check_utf8 : bool, optional (default True)
Whether to check UTF8 validity of string columns.
column_types: pa.Schema or dict, optional
column_types : pa.Schema or dict, optional
Explicitly map column names to column types. Passing this argument
disables type inference on the defined columns.
null_values: list, optional
null_values : list, optional
A sequence of strings that denote nulls in the data
(defaults are appropriate in most cases). Note that by default,
string columns are not checked for null values. To enable
null checking for those, specify ``strings_can_be_null=True``.
true_values: list, optional
true_values : list, optional
A sequence of strings that denote true booleans in the data
(defaults are appropriate in most cases).
false_values: list, optional
false_values : list, optional
A sequence of strings that denote false booleans in the data
(defaults are appropriate in most cases).
decimal_point: 1-character string, optional (default '.')
decimal_point : 1-character string, optional (default '.')
The character used as decimal point in floating-point and decimal
data.
timestamp_parsers: list, optional
timestamp_parsers : list, optional
A sequence of strptime()-compatible format strings, tried in order
when attempting to infer or convert timestamp values (the special
value ISO8601() can also be given). By default, a fast built-in
ISO-8601 parser is used.
strings_can_be_null: bool, optional (default False)
strings_can_be_null : bool, optional (default False)
Whether string / binary columns can have null values.
If true, then strings in null_values are considered null for
string columns.
If false, then all strings are valid string values.
quoted_strings_can_be_null: bool, optional (default True)
quoted_strings_can_be_null : bool, optional (default True)
Whether quoted values can be null.
If true, then strings in "null_values" are also considered null
when they appear quoted in the CSV file. Otherwise, quoted values
are never considered null.
auto_dict_encode: bool, optional (default False)
auto_dict_encode : bool, optional (default False)
Whether to try to automatically dict-encode string / binary data.
If true, then when type inference detects a string or binary column,
it it dict-encoded up to `auto_dict_max_cardinality` distinct values
(per chunk), after which it switches to regular encoding.
This setting is ignored for non-inferred columns (those in
`column_types`).
auto_dict_max_cardinality: int, optional
auto_dict_max_cardinality : int, optional
The maximum dictionary cardinality for `auto_dict_encode`.
This value is per chunk.
include_columns: list, optional
include_columns : list, optional
The names of columns to include in the Table.
If empty, the Table will include all columns from the CSV file.
If not empty, only these columns will be included, in this order.
include_missing_columns: bool, optional (default False)
include_missing_columns : bool, optional (default False)
If false, columns in `include_columns` but not in the CSV file will
error out.
If true, columns in `include_columns` but not in the CSV file will
Expand Down Expand Up @@ -848,20 +848,20 @@ def read_csv(input_file, read_options=None, parse_options=None,

Parameters
----------
input_file: string, path or file-like object
input_file : string, path or file-like object
The location of CSV data. If a string or path, and if it ends
with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
the data is automatically decompressed when reading.
read_options: pyarrow.csv.ReadOptions, optional
read_options : pyarrow.csv.ReadOptions, optional
Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
for defaults)
parse_options: pyarrow.csv.ParseOptions, optional
parse_options : pyarrow.csv.ParseOptions, optional
Options for the CSV parser
(see pyarrow.csv.ParseOptions constructor for defaults)
convert_options: pyarrow.csv.ConvertOptions, optional
convert_options : pyarrow.csv.ConvertOptions, optional
Options for converting CSV data
(see pyarrow.csv.ConvertOptions constructor for defaults)
memory_pool: MemoryPool, optional
memory_pool : MemoryPool, optional
Pool to allocate Table memory from

Returns
Expand Down Expand Up @@ -906,20 +906,20 @@ def open_csv(input_file, read_options=None, parse_options=None,

Parameters
----------
input_file: string, path or file-like object
input_file : string, path or file-like object
The location of CSV data. If a string or path, and if it ends
with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
the data is automatically decompressed when reading.
read_options: pyarrow.csv.ReadOptions, optional
read_options : pyarrow.csv.ReadOptions, optional
Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
for defaults)
parse_options: pyarrow.csv.ParseOptions, optional
parse_options : pyarrow.csv.ParseOptions, optional
Options for the CSV parser
(see pyarrow.csv.ParseOptions constructor for defaults)
convert_options: pyarrow.csv.ConvertOptions, optional
convert_options : pyarrow.csv.ConvertOptions, optional
Options for converting CSV data
(see pyarrow.csv.ConvertOptions constructor for defaults)
memory_pool: MemoryPool, optional
memory_pool : MemoryPool, optional
Pool to allocate Table memory from

Returns
Expand Down Expand Up @@ -1014,13 +1014,13 @@ def write_csv(data, output_file, write_options=None,

Parameters
----------
data: pyarrow.RecordBatch or pyarrow.Table
data : pyarrow.RecordBatch or pyarrow.Table
The data to write.
output_file: string, path, pyarrow.NativeFile, or file-like object
output_file : string, path, pyarrow.NativeFile, or file-like object
The location where to write the CSV data.
write_options: pyarrow.csv.WriteOptions
write_options : pyarrow.csv.WriteOptions
Options to configure writing the CSV data.
memory_pool: MemoryPool, optional
memory_pool : MemoryPool, optional
Pool for temporary allocations.
"""
cdef:
Expand All @@ -1047,17 +1047,18 @@ def write_csv(data, output_file, write_options=None,


cdef class CSVWriter(_CRecordBatchWriter):
"""Writer to create a CSV file.
"""
Writer to create a CSV file.

Parameters
----------
sink: string, path, pyarrow.OutputStream or file-like object
sink : str, path, pyarrow.OutputStream or file-like object
The location where to write the CSV data.
schema: pyarrow.Schema
schema : pyarrow.Schema
The schema of the data to be written.
write_options: pyarrow.csv.WriteOptions
write_options : pyarrow.csv.WriteOptions
Options to configure writing the CSV data.
memory_pool: MemoryPool, optional
memory_pool : MemoryPool, optional
Pool for temporary allocations.
"""

Expand Down
3 changes: 2 additions & 1 deletion python/pyarrow/_cuda.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ cdef class Context(_Weakrefable):
return pyarrow_wrap_cudabuffer(cudabuf)

def foreign_buffer(self, address, size, base=None):
"""Create device buffer from address and size as a view.
"""
Create device buffer from address and size as a view.

The caller is responsible for allocating and freeing the
memory. When `address==size==0` then a new zero-sized buffer
Expand Down
Loading