diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index cc9847e2dce..c4c1e70d089 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -183,6 +183,32 @@ def sum(array): return call_function('sum', [array]) +def mode(array): + """ + Return the mode (most common value) of a passed numerical + (chunked) array. If there is more than one such value, only + the smallest is returned. + + Parameters + ---------- + array : pyarrow.Array or pyarrow.ChunkedArray + + Returns + ------- + mode : pyarrow.StructScalar + + Examples + -------- + >>> import pyarrow as pa + >>> import pyarrow.compute as pc + >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2]) + >>> pc.mode(arr) + + + """ + return call_function("mode", [array]) + + def filter(data, mask, null_selection_behavior='drop'): """ Select values (or records) from array- or table-like data given boolean diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index bdc057d707b..129c7826759 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -109,6 +109,31 @@ def test_sum_chunked_array(arrow_type): assert pc.sum(arr).as_py() is None # noqa: E711 +def test_mode_array(): + # ARROW-9917 + + arr = pa.array([1, 1, 3, 4, 3, 5], type='int64') + expected = {"mode": 1, "count": 2} + assert pc.mode(arr).as_py() == {"mode": 1, "count": 2} + + arr = pa.array([], type='int64') + expected = {"mode": None, "count": None} + assert pc.mode(arr).as_py() == expected + + +def test_mode_chunked_array(): + # ARROW-9917 + + arr = pa.chunked_array([pa.array([1, 1, 3, 4, 3, 5], type='int64')]) + expected = {"mode": 1, "count": 2} + assert pc.mode(arr).as_py() == expected + + arr = pa.chunked_array((), type='int64') + expected = {"mode": None, "count": None} + assert arr.num_chunks == 0 + assert pc.mode(arr).as_py() == expected + + def test_match_substring(): arr = pa.array(["ab", "abc", "ba", None]) result = pc.match_substring(arr, "ab")