From fa7cc524d27f74a89d00cceaa2ad8f0d36b13110 Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 9 Jan 2026 15:21:50 +1100 Subject: [PATCH 1/3] Implement the order() verb for basic sequences, Factors. --- src/biocutils/__init__.py | 2 + src/biocutils/order.py | 145 ++++++++++++++++++++++++++++++++++++++ tests/test_order.py | 52 ++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 src/biocutils/order.py create mode 100644 tests/test_order.py diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index 81ce3ae..23deebe 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -63,3 +63,5 @@ from .biocobject import BiocObject from .table import table + +from .order import order, sort diff --git a/src/biocutils/order.py b/src/biocutils/order.py new file mode 100644 index 0000000..4990e54 --- /dev/null +++ b/src/biocutils/order.py @@ -0,0 +1,145 @@ +from typing import Any, Union, Sequence, Optional +from functools import singledispatch + +import numpy + +from .subset import subset +from .Factor import Factor + + +@singledispatch +def order( + x: Any, + force_last: Union[set, Sequence] = [None, numpy.ma.masked, numpy.nan], + decreasing: bool = False, + dtype: Optional[numpy.dtype] = None +) -> numpy.ndarray: + """ + Obtain an ordering of entries of ``x``. + This ordering should be stable. + + Args: + x: + Values to be ordered. + All values should be comparable aside from those listed in ``force_last``. + + force_last: + Values that are incomparable and will be placed last, i.e., at the end of the ordering. + No attempt is made to order values within ``force_last``. + + decreasing: + Whether to order by decreasing value. + Default is to order by increasing value. + + dtype: + Integer type of the output array. + If ``None``, defaults to the smallest type that can hold the length of ``x``. + + Returns: + Integer NumPy array containing the ordering required to permute ``x`` into a sorted state, + i.e., given an output array ``o``, ``subset(x, o)`` will be sorted. + + Examples: + >>> import biocutils + >>> + >>> x = [15,1,22,3,14] + >>> o = biocutils.order(x) + >>> print(o) + >>> biocutils.subset(x, o) + >>> o = biocutils.order(x, decreasing=True) + >>> print(o) + >>> biocutils.subset(x, o) + >>> + >>> x = ["C", "B", None, "D", "D", None, "A"] + >>> o = biocutils.order(x) + >>> print(o) + >>> biocutils.subset(x, o) + >>> o = biocutils.order(x, force_last=set([None, "A"])) + >>> print(o) + >>> biocutils.subset(x, o) + >>> + >>> # Factor ordering respects the ordering in the levels. + >>> x = biocutils.Factor.from_sequence(["C", "B", "D", "A", "C", "A", "D"], ["D", "C", "B", "A"]) + >>> o = biocutils.order(x) + >>> print(o) + >>> print(biocutils.subset(x, o)) + """ + + collected = [] + forced = [] + if len(force_last) > 0: + for i, y in enumerate(x): + if y not in force_last: + collected.append(i) + else: + forced.append(i) + else: + collected = list(range(len(x))) + + def key(i): + return x[i] + collected.sort(key=key, reverse=decreasing) + + if dtype is None: + dtype = numpy.min_scalar_type(len(x) - 1) + output = numpy.ndarray(len(x), dtype=dtype) + output[:len(collected)] = collected + if len(forced) > 0: + output[len(collected):] = forced + + return output + + +@order.register +def _order_Factor( + x: Factor, + force_last: Union[set, Sequence] = set([None]), + decreasing: bool = False, + dtype: Optional[numpy.dtype] = None +) -> numpy.ndarray: + + new_force_last = set() + for i, lev in enumerate(x.get_levels()): + if lev in force_last: + new_force_last.add(i) + if None in force_last: + new_force_last.add(-1) + + # For consistency with R, we order by codes. + return order.registry[object](x.get_codes(), force_last=new_force_last, decreasing=decreasing, dtype=dtype) + + +@singledispatch +def sort( + x: Any, + force_last: Union[set, Sequence] = [None, numpy.ma.masked], + decreasing: bool = False +) -> Any: + """ + Sort an arbitrary iterable sequence. + + Args: + x: + Values to be sorted. + All values should be comparable aside from those listed in ``force_last``. + + force_last: + Values that are incomparable and will be placed last, i.e., at the end of the ordering. + No attempt is made to order values within ``force_last``. + + decreasing: + Whether to sort by decreasing value. + Default is to sort by increasing value. + + Returns: + Sorted contents of ``x``. + This is usually of the same class as ``x``. + + Examples: + >>> import biocutils + >>> biocutils.sort(range(20, 10, -1)) + >>> biocutils.sort(["A", "B", None, "C", "D"], decreasing=True) + >>> import numpy + >>> biocutils.sort(numpy.random.rand(10)) + """ + return subset(x, order(x, force_last=force_last, decreasing=decreasing)) diff --git a/tests/test_order.py b/tests/test_order.py new file mode 100644 index 0000000..09fa5ab --- /dev/null +++ b/tests/test_order.py @@ -0,0 +1,52 @@ +import biocutils +import numpy + + +def test_order_simple(): + o = biocutils.order(["D", "B", "C", "A"]) + assert list(o) == [3, 1, 2, 0] + assert o.dtype == numpy.dtype("uint8") + + o = biocutils.order(["D", "B", "C", "A"], dtype=numpy.dtype("uint32")) + assert list(o) == [3, 1, 2, 0] + assert o.dtype == numpy.dtype("uint32") + + # Handles ties stably. + o = biocutils.order(["D", "B", "D", "C", "A", "D"]) + assert list(o) == [4, 1, 3, 0, 2, 5] + + # Reverses correctly with stable ties. + o = biocutils.order(["D", "B", "D", "C", "A", "D"], decreasing=True) + assert list(o) == [0, 2, 5, 3, 1, 4] + + # Ignores incomparable values. + o = biocutils.order(["D", "B", None, "C", "A"]) + assert list(o) == [4, 1, 3, 0, 2] + + +def test_order_Factor(): + f = biocutils.Factor.from_sequence(["D", "B", "C", "A"]) + o = biocutils.order(f) + assert list(o) == [3, 1, 2, 0] + o = biocutils.order(f, decreasing=True) + assert list(o) == [0, 2, 1, 3] + + # Respects the level ordering. + o = biocutils.order(biocutils.Factor.from_sequence(["D", "B", "C", "A"], ["D", "C", "B", "A"])) + assert list(o) == [0, 2, 1, 3] + + # Respects various incomparable values. + f = biocutils.Factor.from_sequence(["D", "B", None, "C", "A"]) + o = biocutils.order(f) + assert list(o) == [4, 1, 3, 0, 2] + o = biocutils.order(f, force_last=[None, "A"]) + assert list(o) == [1, 3, 0, 2, 4] + + +def test_order_sort(): + assert biocutils.sort(["A", "B", None, "C", "D"], decreasing=True) == ["D", "C", "B", "A", None] + + x = numpy.random.rand(20) + s = biocutils.sort(x) + assert s.dtype == x.dtype + assert (s == sorted(x)).all() From 852c4fb25cf2ce659b572ea2214862585180c0e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 9 Jan 2026 04:23:18 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/biocutils/__init__.py | 2 +- src/biocutils/order.py | 121 ++++++++++++++++++++++++++++++-------- 2 files changed, 96 insertions(+), 27 deletions(-) diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py index 23deebe..3a3497e 100644 --- a/src/biocutils/__init__.py +++ b/src/biocutils/__init__.py @@ -64,4 +64,4 @@ from .biocobject import BiocObject from .table import table -from .order import order, sort +from .order import order, sort diff --git a/src/biocutils/order.py b/src/biocutils/order.py index 4990e54..dacefca 100644 --- a/src/biocutils/order.py +++ b/src/biocutils/order.py @@ -12,7 +12,7 @@ def order( x: Any, force_last: Union[set, Sequence] = [None, numpy.ma.masked, numpy.nan], decreasing: bool = False, - dtype: Optional[numpy.dtype] = None + dtype: Optional[numpy.dtype] = None, ) -> numpy.ndarray: """ Obtain an ordering of entries of ``x``. @@ -42,27 +42,83 @@ def order( Examples: >>> import biocutils >>> - >>> x = [15,1,22,3,14] - >>> o = biocutils.order(x) + >>> x = [ + ... 15, + ... 1, + ... 22, + ... 3, + ... 14, + ... ] + >>> o = biocutils.order( + ... x + ... ) >>> print(o) - >>> biocutils.subset(x, o) - >>> o = biocutils.order(x, decreasing=True) + >>> biocutils.subset( + ... x, o + ... ) + >>> o = biocutils.order( + ... x, + ... decreasing=True, + ... ) >>> print(o) - >>> biocutils.subset(x, o) + >>> biocutils.subset( + ... x, o + ... ) >>> - >>> x = ["C", "B", None, "D", "D", None, "A"] - >>> o = biocutils.order(x) + >>> x = [ + ... "C", + ... "B", + ... None, + ... "D", + ... "D", + ... None, + ... "A", + ... ] + >>> o = biocutils.order( + ... x + ... ) >>> print(o) - >>> biocutils.subset(x, o) - >>> o = biocutils.order(x, force_last=set([None, "A"])) + >>> biocutils.subset( + ... x, o + ... ) + >>> o = biocutils.order( + ... x, + ... force_last=set( + ... [None, "A"] + ... ), + ... ) >>> print(o) - >>> biocutils.subset(x, o) + >>> biocutils.subset( + ... x, o + ... ) >>> >>> # Factor ordering respects the ordering in the levels. - >>> x = biocutils.Factor.from_sequence(["C", "B", "D", "A", "C", "A", "D"], ["D", "C", "B", "A"]) - >>> o = biocutils.order(x) + >>> x = biocutils.Factor.from_sequence( + ... [ + ... "C", + ... "B", + ... "D", + ... "A", + ... "C", + ... "A", + ... "D", + ... ], + ... [ + ... "D", + ... "C", + ... "B", + ... "A", + ... ], + ... ) + >>> o = biocutils.order( + ... x + ... ) >>> print(o) - >>> print(biocutils.subset(x, o)) + >>> print( + ... biocutils.subset( + ... x, o + ... ) + ... ) """ collected = [] @@ -78,14 +134,15 @@ def order( def key(i): return x[i] + collected.sort(key=key, reverse=decreasing) if dtype is None: dtype = numpy.min_scalar_type(len(x) - 1) output = numpy.ndarray(len(x), dtype=dtype) - output[:len(collected)] = collected + output[: len(collected)] = collected if len(forced) > 0: - output[len(collected):] = forced + output[len(collected) :] = forced return output @@ -95,9 +152,8 @@ def _order_Factor( x: Factor, force_last: Union[set, Sequence] = set([None]), decreasing: bool = False, - dtype: Optional[numpy.dtype] = None + dtype: Optional[numpy.dtype] = None, ) -> numpy.ndarray: - new_force_last = set() for i, lev in enumerate(x.get_levels()): if lev in force_last: @@ -110,11 +166,7 @@ def _order_Factor( @singledispatch -def sort( - x: Any, - force_last: Union[set, Sequence] = [None, numpy.ma.masked], - decreasing: bool = False -) -> Any: +def sort(x: Any, force_last: Union[set, Sequence] = [None, numpy.ma.masked], decreasing: bool = False) -> Any: """ Sort an arbitrary iterable sequence. @@ -137,9 +189,26 @@ def sort( Examples: >>> import biocutils - >>> biocutils.sort(range(20, 10, -1)) - >>> biocutils.sort(["A", "B", None, "C", "D"], decreasing=True) + >>> biocutils.sort( + ... range( + ... 20, 10, -1 + ... ) + ... ) + >>> biocutils.sort( + ... [ + ... "A", + ... "B", + ... None, + ... "C", + ... "D", + ... ], + ... decreasing=True, + ... ) >>> import numpy - >>> biocutils.sort(numpy.random.rand(10)) + >>> biocutils.sort( + ... numpy.random.rand( + ... 10 + ... ) + ... ) """ return subset(x, order(x, force_last=force_last, decreasing=decreasing)) From f99e9fffbd8b0fc6088ba26837451af1e3d5354a Mon Sep 17 00:00:00 2001 From: LTLA Date: Fri, 9 Jan 2026 15:29:07 +1100 Subject: [PATCH 3/3] Get some more coverage. --- tests/test_order.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_order.py b/tests/test_order.py index 09fa5ab..9f4c8df 100644 --- a/tests/test_order.py +++ b/tests/test_order.py @@ -23,6 +23,9 @@ def test_order_simple(): o = biocutils.order(["D", "B", None, "C", "A"]) assert list(o) == [4, 1, 3, 0, 2] + o = biocutils.order(["D", "B", "C", "A"], force_last=[]) # for coverage purposes. + assert list(o) == [3, 1, 2, 0] + def test_order_Factor(): f = biocutils.Factor.from_sequence(["D", "B", "C", "A"]) @@ -31,6 +34,9 @@ def test_order_Factor(): o = biocutils.order(f, decreasing=True) assert list(o) == [0, 2, 1, 3] + o = biocutils.order(f, force_last=[]) # for coverage purposes. + assert list(o) == [3, 1, 2, 0] + # Respects the level ordering. o = biocutils.order(biocutils.Factor.from_sequence(["D", "B", "C", "A"], ["D", "C", "B", "A"])) assert list(o) == [0, 2, 1, 3]