From 06126cf8c936a24d4e692fbf313127b9ad801649 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Thu, 8 Jan 2026 21:58:35 +1100
Subject: [PATCH 1/6] Added utility to identify duplicates.

---
 src/biocutils/duplicated.py | 59 +++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 src/biocutils/duplicated.py

diff --git a/src/biocutils/duplicated.py b/src/biocutils/duplicated.py
new file mode 100644
index 0000000..fbc8f70
--- /dev/null
+++ b/src/biocutils/duplicated.py
@@ -0,0 +1,59 @@
+import numpy
+
+
+@singledispatch
+def duplicated(x: Any, incomparables: set = set(), from_last: bool = False) -> numpy.ndarray:
+    available = set()
+    output = numpy.ndarray(len(x), dtype=numpy.bool_)
+
+    def process(i, y):
+        if y in incomparables:
+            output[i] = False
+        elif y in available:
+            output[i] = True
+        else:
+            available.add(y)
+            output[i] = False
+
+    if not from_last:
+        for i, y in enumerate(x):
+            process(i, y)
+    else:
+        for i in range(len(x) - 1, -1, -1):
+            process(i, x[i])
+
+    return output
+
+
+@duplicated.register
+def _duplicated_Factor(x: Factor, incomparables: set = set(), from_last: bool = False) -> numpy.ndarray:
+    present = []
+    for lev in x.get_levels():
+        if lev in incomparables:
+            present.append(None)
+        else:
+            present.append(False)
+    
+    def process(i, y):
+        tmp = present[i]
+        if tmp is None:
+            output[i] = False
+        elif tmp:
+            output[i] = True
+        else:
+            present[i] = True
+            output[i] = False
+
+    if not from_last:
+        for i, y in enumerate(x):
+            process(i, y)
+    else:
+        for i in range(len(x) - 1, -1, -1):
+            process(i, x[i])
+
+    return output
+
+
+def unique(x: Any) -> Any:
+    return subset(x, numpy.where(duplicated(x))[0])
+

From e31002cdd900646b90c26d833f7a118f9fc55a17 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Thu, 8 Jan 2026 23:55:09 +1100
Subject: [PATCH 2/6] Added more stuff.

---
 src/biocutils/duplicated.py | 53 ++++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/src/biocutils/duplicated.py b/src/biocutils/duplicated.py
index fbc8f70..6056a16 100644
--- a/src/biocutils/duplicated.py
+++ b/src/biocutils/duplicated.py
@@ -1,8 +1,33 @@
+from typing import Any, Union, Sequence
+
 import numpy
 
+from .Factor import Factor
+
 
 @singledispatch
-def duplicated(x: Any, incomparables: set = set(), from_last: bool = False) -> numpy.ndarray:
+def duplicated(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool = False) -> numpy.ndarray:
+    """
+    Find duplicated elements of ``x``.
+
+    Args:
+        x:
+            Object to be searched for duplicates.
+            This is usually a sequence that can be iterated over. 
+
+        incomparables:
+            Values of ``x`` that cannot be compared.
+            Any value of ``x`` in ``incomparables`` will never be a duplicate. 
+            Any object that has an ``__in__`` method can be used here.
+
+        from_last:
+            Whether to report the last occurrence as a non-duplicate.
+
+    Returns:
+        NumPy array of length equal to that of ``x``,
+        containing truthy values for only the first occurrence of each value of ``x``.
+        If ``from_last = True``, truthy values are only reported for the last occurrence of each value of ``x``.
+    """
     available = set()
     output = numpy.ndarray(len(x), dtype=numpy.bool_)
 
@@ -26,7 +51,7 @@ def process(i, y):
 
 
 @duplicated.register
-def _duplicated_Factor(x: Factor, incomparables: set = set(), from_last: bool = False) -> numpy.ndarray:
+def _duplicated_Factor(x: Factor, incomparables: Union[set, Sequence] = set(), from_last: bool = False) -> numpy.ndarray:
     present = []
     for lev in x.get_levels():
         if lev in incomparables:
@@ -54,6 +79,26 @@ def process(i, y):
     return output
 
 
-def unique(x: Any) -> Any:
-    return subset(x, numpy.where(duplicated(x))[0])
+def unique(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool = False) -> Any:
+    """
+    Get all unique values of ``x``.
+
+    Args:
+        x:
+            Object in which to find unique entries.
+            This is usually a sequence that can be iterated over. 
+
+        incomparables:
+            Values of ``x`` that cannot be compared.
+            Any value of ``x`` in ``incomparables`` will never be a duplicate. 
+            Any object that has an ``__in__`` method can be used here.
 
+        from_last:
+            Whether to retain the last occurrence of each value in ``x``. 
+            By default, the first occurrence is retained.
+
+    Returns:
+        An object containing unique values of ``x``.
+        This is usually of the same class as ``x``.
+    """
+    return subset(x, numpy.where(duplicated(x))[0])

From 45fc695e7bfc2a76955bf66ba144fd4a7cea7dde Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 9 Jan 2026 00:22:40 +1100
Subject: [PATCH 3/6] Added examples and stuff.

---
 src/biocutils/__init__.py   |  2 ++
 src/biocutils/duplicated.py | 18 +++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/biocutils/__init__.py b/src/biocutils/__init__.py
index 81ce3ae..51c42a1 100644
--- a/src/biocutils/__init__.py
+++ b/src/biocutils/__init__.py
@@ -63,3 +63,5 @@
 
 from .biocobject import BiocObject
 from .table import table
+
+from .duplicated import duplicated, unique
diff --git a/src/biocutils/duplicated.py b/src/biocutils/duplicated.py
index 6056a16..6b2f6be 100644
--- a/src/biocutils/duplicated.py
+++ b/src/biocutils/duplicated.py
@@ -1,8 +1,10 @@
 from typing import Any, Union, Sequence
+from functools import singledispatch
 
 import numpy
 
 from .Factor import Factor
+from .subset import subset
 
 
 @singledispatch
@@ -27,7 +29,15 @@ def duplicated(x: Any, incomparables: Union[set, Sequence] = set(), from_last: b
         NumPy array of length equal to that of ``x``,
         containing truthy values for only the first occurrence of each value of ``x``.
         If ``from_last = True``, truthy values are only reported for the last occurrence of each value of ``x``.
+
+    Examples:
+        >>> import biocutils
+        >>> biocutils.duplicated([1,2,1,2,3,2])
+        >>> biocutils.duplicated([1,2,1,2,3,2], from_last=True)
+        >>> biocutils.duplicated([1,2,None,None,3,2])
+        >>> biocutils.duplicated([1,2,None,None,3,2], incomparables=set([None]))
     """
+
     available = set()
     output = numpy.ndarray(len(x), dtype=numpy.bool_)
 
@@ -100,5 +110,11 @@ def unique(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool
     Returns:
         An object containing unique values of ``x``.
         This is usually of the same class as ``x``.
+
+    Examples:
+        >>> import biocutils
+        >>> biocutils.unique([1,2,1,2,3,2])
+        >>> biocutils.unique([1,2,None,None,3,2])
+        >>> biocutils.unique([1,2,None,None,3,2], incomparables=set([None]))
     """
-    return subset(x, numpy.where(duplicated(x))[0])
+    return subset(x, numpy.where(numpy.logical_not(duplicated(x, incomparables=incomparables, from_last=from_last)))[0])

From ad0818f6a6e811d667b988f34354ed1f4d60b93d Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 9 Jan 2026 00:56:31 +1100
Subject: [PATCH 4/6] Added tests, fixed the bugs.

---
 src/biocutils/duplicated.py | 18 +++++++++++++-----
 tests/test_duplicated.py    | 22 ++++++++++++++++++++++
 2 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_duplicated.py

diff --git a/src/biocutils/duplicated.py b/src/biocutils/duplicated.py
index 6b2f6be..9d859b1 100644
--- a/src/biocutils/duplicated.py
+++ b/src/biocutils/duplicated.py
@@ -68,23 +68,31 @@ def _duplicated_Factor(x: Factor, incomparables: Union[set, Sequence] = set(), f
             present.append(None)
         else:
             present.append(False)
-    
+
+    # Handling codes of -1, i.e., None.
+    if None in incomparables:
+        present.append(None)
+    else:
+        present.append(False)
+
+    output = numpy.ndarray(len(x), dtype=numpy.bool_)
     def process(i, y):
-        tmp = present[i]
+        tmp = present[y]
         if tmp is None:
             output[i] = False
         elif tmp:
             output[i] = True
         else:
-            present[i] = True
+            present[y] = True
             output[i] = False
 
     if not from_last:
-        for i, y in enumerate(x):
+        for i, y in enumerate(x.get_codes()):
             process(i, y)
     else:
+        codes = x.get_codes()
         for i in range(len(x) - 1, -1, -1):
-            process(i, x[i])
+            process(i, codes[i])
 
     return output
 
diff --git a/tests/test_duplicated.py b/tests/test_duplicated.py
new file mode 100644
index 0000000..b32b9b3
--- /dev/null
+++ b/tests/test_duplicated.py
@@ -0,0 +1,22 @@
+import biocutils
+
+
+def test_duplicated_basic():
+    assert list(biocutils.duplicated([1,2,1,2,3,2])) == [False, False, True, True, False, True] 
+    assert list(biocutils.duplicated([1,2,1,2,3,2], from_last=True)) == [True, True, False, True, False, False] 
+    assert list(biocutils.duplicated([1,2,None,None,3,2,3])) == [False, False, False, True, False, True, True]
+    assert list(biocutils.duplicated([1,2,None,None,3,2,3], incomparables=set([None]))) == [False, False, False, False, False, True, True]
+
+
+def test_duplicated_Factor():
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]))) == [False, False, True, True, False, True] 
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]), from_last=True)) == [True, True, False, True, False, False] 
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]))) == [False, False, False, True, False, True, True]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]), incomparables=set([None]))) == [False, False, False, False, False, True, True]
+
+
+def test_unique():
+    assert biocutils.unique([1,2,1,2,3,2]) == [1,2,3]
+    assert biocutils.unique([1,2,1,2,3,2], from_last=True) == [1,3,2]
+    assert biocutils.unique([1,2,None,None,3,2]) == [1,2,None,3]
+    assert biocutils.unique([1,2,None,None,3,2], incomparables=set([None])) == [1,2,None,None,3]

From a1ceefab0213c5567c722b70b3792119f75438ba Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Jan 2026 13:57:35 +0000
Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/biocutils/duplicated.py | 99 ++++++++++++++++++++++++++++++++-----
 tests/test_duplicated.py    |  8 +--
 2 files changed, 90 insertions(+), 17 deletions(-)

diff --git a/src/biocutils/duplicated.py b/src/biocutils/duplicated.py
index 9d859b1..812e987 100644
--- a/src/biocutils/duplicated.py
+++ b/src/biocutils/duplicated.py
@@ -15,11 +15,11 @@ def duplicated(x: Any, incomparables: Union[set, Sequence] = set(), from_last: b
     Args:
         x:
             Object to be searched for duplicates.
-            This is usually a sequence that can be iterated over. 
+            This is usually a sequence that can be iterated over.
 
         incomparables:
             Values of ``x`` that cannot be compared.
-            Any value of ``x`` in ``incomparables`` will never be a duplicate. 
+            Any value of ``x`` in ``incomparables`` will never be a duplicate.
             Any object that has an ``__in__`` method can be used here.
 
         from_last:
@@ -32,10 +32,50 @@ def duplicated(x: Any, incomparables: Union[set, Sequence] = set(), from_last: b
 
     Examples:
         >>> import biocutils
-        >>> biocutils.duplicated([1,2,1,2,3,2])
-        >>> biocutils.duplicated([1,2,1,2,3,2], from_last=True)
-        >>> biocutils.duplicated([1,2,None,None,3,2])
-        >>> biocutils.duplicated([1,2,None,None,3,2], incomparables=set([None]))
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         1,
+        ...         2,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         1,
+        ...         2,
+        ...         3,
+        ...         2,
+        ...     ],
+        ...     from_last=True,
+        ... )
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.duplicated(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ],
+        ...     incomparables=set(
+        ...         [None]
+        ...     ),
+        ... )
     """
 
     available = set()
@@ -61,7 +101,9 @@ def process(i, y):
 
 
 @duplicated.register
-def _duplicated_Factor(x: Factor, incomparables: Union[set, Sequence] = set(), from_last: bool = False) -> numpy.ndarray:
+def _duplicated_Factor(
+    x: Factor, incomparables: Union[set, Sequence] = set(), from_last: bool = False
+) -> numpy.ndarray:
     present = []
     for lev in x.get_levels():
         if lev in incomparables:
@@ -76,6 +118,7 @@ def _duplicated_Factor(x: Factor, incomparables: Union[set, Sequence] = set(), f
         present.append(False)
 
     output = numpy.ndarray(len(x), dtype=numpy.bool_)
+
     def process(i, y):
         tmp = present[y]
         if tmp is None:
@@ -104,15 +147,15 @@ def unique(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool
     Args:
         x:
             Object in which to find unique entries.
-            This is usually a sequence that can be iterated over. 
+            This is usually a sequence that can be iterated over.
 
         incomparables:
             Values of ``x`` that cannot be compared.
-            Any value of ``x`` in ``incomparables`` will never be a duplicate. 
+            Any value of ``x`` in ``incomparables`` will never be a duplicate.
             Any object that has an ``__in__`` method can be used here.
 
         from_last:
-            Whether to retain the last occurrence of each value in ``x``. 
+            Whether to retain the last occurrence of each value in ``x``.
             By default, the first occurrence is retained.
 
     Returns:
@@ -121,8 +164,38 @@ def unique(x: Any, incomparables: Union[set, Sequence] = set(), from_last: bool
 
     Examples:
         >>> import biocutils
-        >>> biocutils.unique([1,2,1,2,3,2])
-        >>> biocutils.unique([1,2,None,None,3,2])
-        >>> biocutils.unique([1,2,None,None,3,2], incomparables=set([None]))
+        >>> biocutils.unique(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         1,
+        ...         2,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.unique(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ]
+        ... )
+        >>> biocutils.unique(
+        ...     [
+        ...         1,
+        ...         2,
+        ...         None,
+        ...         None,
+        ...         3,
+        ...         2,
+        ...     ],
+        ...     incomparables=set(
+        ...         [None]
+        ...     ),
+        ... )
     """
     return subset(x, numpy.where(numpy.logical_not(duplicated(x, incomparables=incomparables, from_last=from_last)))[0])
diff --git a/tests/test_duplicated.py b/tests/test_duplicated.py
index b32b9b3..3e3a5c3 100644
--- a/tests/test_duplicated.py
+++ b/tests/test_duplicated.py
@@ -2,15 +2,15 @@
 
 
 def test_duplicated_basic():
-    assert list(biocutils.duplicated([1,2,1,2,3,2])) == [False, False, True, True, False, True] 
-    assert list(biocutils.duplicated([1,2,1,2,3,2], from_last=True)) == [True, True, False, True, False, False] 
+    assert list(biocutils.duplicated([1,2,1,2,3,2])) == [False, False, True, True, False, True]
+    assert list(biocutils.duplicated([1,2,1,2,3,2], from_last=True)) == [True, True, False, True, False, False]
     assert list(biocutils.duplicated([1,2,None,None,3,2,3])) == [False, False, False, True, False, True, True]
     assert list(biocutils.duplicated([1,2,None,None,3,2,3], incomparables=set([None]))) == [False, False, False, False, False, True, True]
 
 
 def test_duplicated_Factor():
-    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]))) == [False, False, True, True, False, True] 
-    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]), from_last=True)) == [True, True, False, True, False, False] 
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]))) == [False, False, True, True, False, True]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]), from_last=True)) == [True, True, False, True, False, False]
     assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]))) == [False, False, False, True, False, True, True]
     assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]), incomparables=set([None]))) == [False, False, False, False, False, True, True]
 

From 217e03f1ba7f5204e325243c848a108d8ec3b21f Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Fri, 9 Jan 2026 01:00:32 +1100
Subject: [PATCH 6/6] Add coverage when incomparable Factor level is a string.

---
 tests/test_duplicated.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_duplicated.py b/tests/test_duplicated.py
index 3e3a5c3..3edeb6c 100644
--- a/tests/test_duplicated.py
+++ b/tests/test_duplicated.py
@@ -13,6 +13,7 @@ def test_duplicated_Factor():
     assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,1,2,3,2]), from_last=True)) == [True, True, False, True, False, False]
     assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]))) == [False, False, False, True, False, True, True]
     assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]), incomparables=set([None]))) == [False, False, False, False, False, True, True]
+    assert list(biocutils.duplicated(biocutils.Factor.from_sequence([1,2,None,None,3,2,3]), incomparables=set(["2"]))) == [False, False, False, True, False, False, True]
 
 
 def test_unique():