From cf709ac9b831ceb17a99b82ad5d64477f7dd51b3 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Wed, 12 Nov 2025 23:44:31 +0100 Subject: [PATCH 1/6] initial commit --- docs/source/python/api/compute.rst | 66 +++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 703eaf4ce5d8..8ba41d694b86 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -32,7 +32,11 @@ Aggregations approximate_median count count_distinct + first + first_last index + kurtosis + last max mean min @@ -41,6 +45,7 @@ Aggregations pivot_wider product quantile + skew stddev sum tdigest @@ -69,6 +74,7 @@ throws an ``ArrowInvalid`` exception when overflow is detected. cumulative_prod cumulative_prod_checked cumulative_max + cumulative_mean cumulative_min Arithmetic Functions @@ -126,6 +132,7 @@ representation based on the rounding criterion. ceil floor round + round_binary round_to_multiple trunc @@ -171,6 +178,24 @@ variants which detect domain errors where appropriate. tan tan_checked +Hyperbolic Trigonometric Functions +----------------------- + +Hyperbolic trigonometric functions are also supported, and, where applicable, also offer ``_checked`` +variants which detect domain errors if needed. + +.. autosummary:: + :toctree: ../generated/ + + acosh + acosh_checked + asinh + atanh + atanh_checked + cosh + sinh + tanh + Comparisons ----------- @@ -281,6 +306,7 @@ String Transforms utf8_capitalize utf8_length utf8_lower + utf8_normalize utf8_replace_slice utf8_reverse utf8_swapcase @@ -338,6 +364,7 @@ String Component Extraction :toctree: ../generated/ extract_regex + extract_regex_span String Joining -------------- @@ -369,7 +396,9 @@ Containment Tests find_substring find_substring_regex index_in + index_in_meta_binary is_in + is_in_meta_binary match_like match_substring match_substring_regex @@ -425,10 +454,11 @@ Temporal Component Extraction day_of_week day_of_year hour + is_dst + is_leap_year iso_week iso_year iso_calendar - is_leap_year microsecond millisecond minute @@ -472,12 +502,21 @@ Timezone Handling assume_timezone local_timestamp +Random Number Generation +----------------- + +.. autosummary:: + :toctree: ../generated/ + + random + Associative Transforms ---------------------- .. autosummary:: :toctree: ../generated/ + dictionary_decode dictionary_encode unique value_counts @@ -492,7 +531,9 @@ Selections array_take drop_null filter + inverse_permutation take + scatter Sorts and Partitions -------------------- @@ -501,9 +542,14 @@ Sorts and Partitions :toctree: ../generated/ array_sort_indices + bottom_k_unstable partition_nth_indices + rank + rank_normal + rank_quantile select_k_unstable sort_indices + top_k_unstable Structural Transforms --------------------- @@ -531,6 +577,7 @@ Pairwise Functions :toctree: ../generated/ pairwise_diff + pairwise_diff_checked Compute Options --------------- @@ -542,6 +589,7 @@ Compute Options AssumeTimezoneOptions CastOptions CountOptions + CumulativeOptions CumulativeSumOptions DayOfWeekOptions DictionaryEncodeOptions @@ -550,6 +598,7 @@ Compute Options FilterOptions IndexOptions JoinOptions + ListFlattenOptions ListSliceOptions MakeStructOptions MapLookupOptions @@ -562,8 +611,11 @@ Compute Options PartitionNthOptions PivotWiderOptions QuantileOptions + RandomOptions + RankQuantileOptions ReplaceSliceOptions ReplaceSubstringOptions + RoundBinaryOptions RoundOptions RoundTemporalOptions RoundToMultipleOptions @@ -571,6 +623,7 @@ Compute Options ScalarAggregateOptions SelectKOptions SetLookupOptions + SkewOptions SliceOptions SortOptions SplitOptions @@ -581,8 +634,10 @@ Compute Options TakeOptions TDigestOptions TrimOptions + Utf8NormalizeOptions VarianceOptions WeekOptions + WinsorizeOptions User-Defined Functions ---------------------- @@ -593,6 +648,15 @@ User-Defined Functions register_scalar_function UdfContext +Statistical Functions +---------------------- + +.. autosummary:: + :toctree: ../generated/ + + winsorize + + Expression Functions -------------------- From cb4cb705ef78cbc3a6da5820a786b8c926011749 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 13 Nov 2025 23:24:43 +0100 Subject: [PATCH 2/6] Adding arange asarray repeat infer_type to Arrays doc page --- docs/source/python/api/arrays.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index dc24be8bd06d..148f2fbe8be4 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -29,12 +29,22 @@ These functions create new Arrow arrays: .. autosummary:: :toctree: ../generated/ + arange array + asarray nulls + repeat Array Types ----------- +This function infers Arrow data type: + +.. autosummary:: + :toctree: ../generated/ + + infer_type + An array's Python class depends on its data type. Concrete array classes may expose data type-specific methods or properties. @@ -140,6 +150,8 @@ classes may expose data type-specific methods or properties. TimestampScalar DurationScalar MonthDayNanoIntervalScalar + Decimal32Scalar + Decimal64Scalar Decimal128Scalar Decimal256Scalar DictionaryScalar From 20c0f2944de73913bf26d76d60d74f2b6a53028b Mon Sep 17 00:00:00 2001 From: tadeja Date: Fri, 14 Nov 2025 11:50:42 +0100 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: Alenka Frim --- docs/source/python/api/compute.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 8ba41d694b86..cf5f03c07e1d 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -179,7 +179,7 @@ variants which detect domain errors where appropriate. tan_checked Hyperbolic Trigonometric Functions ------------------------ +---------------------------------- Hyperbolic trigonometric functions are also supported, and, where applicable, also offer ``_checked`` variants which detect domain errors if needed. @@ -503,7 +503,7 @@ Timezone Handling local_timestamp Random Number Generation ------------------ +------------------------ .. autosummary:: :toctree: ../generated/ @@ -649,14 +649,13 @@ User-Defined Functions UdfContext Statistical Functions ----------------------- +--------------------- .. autosummary:: :toctree: ../generated/ winsorize - Expression Functions -------------------- From 56043a161f563f631c1ed6f9433f6f9913d22944 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Fri, 14 Nov 2025 12:55:50 +0100 Subject: [PATCH 4/6] Adding asarray to __init__.py --- python/pyarrow/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index da2fe9664754..641806161bf9 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -193,7 +193,7 @@ def print_entry(label, value): SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix, SparseCSFTensor, infer_type, from_numpy_dtype, - arange, + arange, asarray, NullArray, NumericArray, IntegerArray, FloatingPointArray, BooleanArray, From bd372aa2c7a1b231753485f9e9ad954828f6ad60 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Fri, 14 Nov 2025 22:37:02 +0100 Subject: [PATCH 5/6] Adding more missing functions and options to compute.rst --- docs/source/python/api/compute.rst | 34 +++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index cf5f03c07e1d..e3d0916be2a1 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -93,6 +93,8 @@ throws an ``ArrowInvalid`` exception when overflow is detected. add_checked divide divide_checked + exp + expm1 multiply multiply_checked negate @@ -551,6 +553,14 @@ Sorts and Partitions sort_indices top_k_unstable +Statistical Functions +--------------------- + +.. autosummary:: + :toctree: ../generated/ + + winsorize + Structural Transforms --------------------- @@ -595,6 +605,7 @@ Compute Options DictionaryEncodeOptions ElementWiseAggregateOptions ExtractRegexOptions + ExtractRegexSpanOptions FilterOptions IndexOptions JoinOptions @@ -606,12 +617,12 @@ Compute Options ModeOptions NullOptions PadOptions - ZeroFillOptions PairwiseOptions PartitionNthOptions PivotWiderOptions QuantileOptions RandomOptions + RankOptions RankQuantileOptions ReplaceSliceOptions ReplaceSubstringOptions @@ -638,23 +649,30 @@ Compute Options VarianceOptions WeekOptions WinsorizeOptions + ZeroFillOptions -User-Defined Functions ----------------------- +Functions Registry +------------------ .. autosummary:: :toctree: ../generated/ - register_scalar_function - UdfContext + call_function + call_tabular_function + get_function + list_functions -Statistical Functions ---------------------- +User-Defined Functions +---------------------- .. autosummary:: :toctree: ../generated/ - winsorize + register_aggregate_function + register_scalar_function + register_tabular_function + register_vector_function + UdfContext Expression Functions -------------------- From 80f3b01e634fb43e43a7b6546c55f32daf03fb0b Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Wed, 19 Nov 2025 12:08:54 +0100 Subject: [PATCH 6/6] Removing asarray and *_in_meta_binary for now --- docs/source/python/api/arrays.rst | 1 - docs/source/python/api/compute.rst | 2 -- python/pyarrow/__init__.py | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index 148f2fbe8be4..290ce09befb1 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -31,7 +31,6 @@ These functions create new Arrow arrays: arange array - asarray nulls repeat diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index e3d0916be2a1..b74d674ac613 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -398,9 +398,7 @@ Containment Tests find_substring find_substring_regex index_in - index_in_meta_binary is_in - is_in_meta_binary match_like match_substring match_substring_regex diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 641806161bf9..da2fe9664754 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -193,7 +193,7 @@ def print_entry(label, value): SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix, SparseCSFTensor, infer_type, from_numpy_dtype, - arange, asarray, + arange, NullArray, NumericArray, IntegerArray, FloatingPointArray, BooleanArray,