diff --git a/src/DataAPI.jl b/src/DataAPI.jl index f723983..5abbf6e 100644 --- a/src/DataAPI.jl +++ b/src/DataAPI.jl @@ -105,29 +105,67 @@ definition. """ function describe end +# Sentinel type needed to make `levels` inferrable +struct _Default end + """ - levels(x) + levels(x; skipmissing=true) + +Return a vector of unique values which occur or could occur in collection `x`. +`missing` values are skipped unless `skipmissing=false` is passed. -Return a vector of unique values which occur or could occur in collection `x`, -omitting `missing` even if present. Values are returned in the preferred order -for the collection, with the result of [`sort`](@ref) as a default. +Values are returned in the preferred order for the collection, +with the result of [`sort`](@ref) as a default. If the collection is not sortable then the order of levels is unspecified. Contrary to [`unique`](@ref), this function may return values which do not actually occur in the data, and does not preserve their order of appearance in `x`. """ -function levels(x) - T = Base.nonmissingtype(eltype(x)) - u = unique(x) - # unique returns its input with copying for ranges - # (and possibly for other types guaranteed to hold unique values) - nmu = (u === x || Base.mightalias(u, x)) ? filter(!ismissing, u) : filter!(!ismissing, u) - levs = convert(AbstractArray{T}, nmu) - try - sort!(levs) - catch +@inline levels(x; skipmissing::Union{Bool, _Default}=_Default()) = + skipmissing isa _Default || skipmissing ? + _levels_skipmissing(x) : _levels_missing(x) + +# The `which` check is here for backward compatibility: +# if a type implements a custom `levels` method but does not support +# keyword arguments, `levels(x, skipmissing=true/false)` will dispatch +# to the fallback methods here, and we take care of calling that custom method +function _levels_skipmissing(x) + if which(DataAPI.levels, Tuple{typeof(x)}) === which(DataAPI.levels, Tuple{Any}) + T = Base.nonmissingtype(eltype(x)) + u = unique(x) + # unique returns its input with copying for ranges + # (and possibly for other types guaranteed to hold unique values) + nmu = (u isa AbstractRange || u === x || Base.mightalias(u, x)) ? + filter(!ismissing, u) : filter!(!ismissing, u) + levs = convert(AbstractArray{T}, nmu) + try + sort!(levs) + catch + end + return levs + else + return levels(x) + end +end + +function _levels_missing(x) + if which(DataAPI.levels, Tuple{typeof(x)}) === which(DataAPI.levels, Tuple{Any}) + u = convert(AbstractArray{eltype(x)}, unique(x)) + # unique returns its input with copying for ranges + # (and possibly for other types guaranteed to hold unique values) + levs = (x isa AbstractRange || u === x || Base.mightalias(u, x)) ? + Base.copymutable(u) : u + try + sort!(levs) + catch + end + return levs + # This is a suboptimal fallback since it does a second pass over the data + elseif any(ismissing, x) + return [levels(x); missing] + else + return convert(AbstractArray{eltype(x)}, levels(x)) end - levs end """ diff --git a/test/runtests.jl b/test/runtests.jl index a086310..9782ac2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,15 @@ using Test, DataAPI +const ≅ = isequal + +# For `levels` tests +struct TestArray{T} <: AbstractVector{T} + x::Vector{T} +end +Base.size(x::TestArray) = size(x.x) +Base.getindex(x::TestArray, i) = x.x[i] +DataAPI.levels(x::TestArray) = reverse(DataAPI.levels(x.x)) + @testset "DataAPI" begin @testset "defaultarray" begin @@ -29,15 +39,15 @@ end @testset "levels" begin - @test DataAPI.levels(1:1) == - DataAPI.levels([1]) == - DataAPI.levels([1, missing]) == - DataAPI.levels([missing, 1]) == + @test @inferred(DataAPI.levels(1:1)) == + @inferred(DataAPI.levels([1])) == + @inferred(DataAPI.levels([1, missing])) == + @inferred(DataAPI.levels([missing, 1])) == [1] - @test DataAPI.levels(2:-1:1) == - DataAPI.levels([2, 1]) == - DataAPI.levels(Any[2, 1]) == - DataAPI.levels([2, missing, 1]) == + @test @inferred(DataAPI.levels(2:-1:1)) == + @inferred(DataAPI.levels([2, 1])) == + @inferred(DataAPI.levels(Any[2, 1])) == + @inferred(DataAPI.levels([2, missing, 1])) == [1, 2] @test DataAPI.levels([missing, "a", "c", missing, "b"]) == ["a", "b", "c"] @test DataAPI.levels([Complex(0, 1), Complex(1, 0), missing]) == @@ -55,6 +65,65 @@ end @test isempty(DataAPI.levels([missing])) @test isempty(DataAPI.levels([])) + levels_missing(x) = DataAPI.levels(x, skipmissing=false) + @test @inferred(levels_missing(1:1)) ≅ + @inferred(levels_missing([1])) ≅ + [1] + if VERSION >= v"1.6.0" + @test @inferred(levels_missing([1, missing])) ≅ + @inferred(levels_missing([missing, 1])) ≅ + [1, missing] + else + @test levels_missing([1, missing]) ≅ + levels_missing([missing, 1]) ≅ + [1, missing] + end + @test @inferred(levels_missing(2:-1:1)) ≅ + @inferred(levels_missing([2, 1])) ≅ + [1, 2] + @test @inferred(levels_missing(Any[2, 1])) ≅ + [1, 2] + @test DataAPI.levels([2, missing, 1], skipmissing=false) ≅ + [1, 2, missing] + @test DataAPI.levels([missing, "a", "c", missing, "b"], skipmissing=false) ≅ + ["a", "b", "c", missing] + @test DataAPI.levels([Complex(0, 1), Complex(1, 0), missing], skipmissing=false) ≅ + [Complex(0, 1), Complex(1, 0), missing] + @test typeof(DataAPI.levels([1], skipmissing=false)) === Vector{Int} + @test typeof(DataAPI.levels([1, missing], skipmissing=false)) === + Vector{Union{Int, Missing}} + @test typeof(DataAPI.levels(["a"], skipmissing=false)) === Vector{String} + @test typeof(DataAPI.levels(["a", missing], skipmissing=false)) === + Vector{Union{String, Missing}} + @test typeof(DataAPI.levels(Real[1], skipmissing=false)) === Vector{Real} + @test typeof(DataAPI.levels(Union{Real,Missing}[1, missing], skipmissing=false)) === + Vector{Union{Real, Missing}} + @test typeof(DataAPI.levels(trues(1), skipmissing=false)) === Vector{Bool} + @test DataAPI.levels([missing], skipmissing=false) ≅ [missing] + @test DataAPI.levels([missing], skipmissing=false) isa Vector{Missing} + @test typeof(DataAPI.levels(Union{Int,Missing}[missing], skipmissing=false)) === + Vector{Union{Int,Missing}} + @test isempty(DataAPI.levels([], skipmissing=false)) + @test typeof(DataAPI.levels(Int[], skipmissing=false)) === Vector{Int} + + # Backward compatibility test: + # check that an array type which implements a `levels` method + # which does not accept keyword arguments works thanks to fallbacks + @test DataAPI.levels(TestArray([1, 2])) == + DataAPI.levels(TestArray([1, 2]), skipmissing=true) == + DataAPI.levels(TestArray([1, 2]), skipmissing=false) == [2, 1] + @test DataAPI.levels(TestArray([1, 2])) isa Vector{Int} + @test DataAPI.levels(TestArray([1, 2]), skipmissing=true) isa Vector{Int} + @test DataAPI.levels(TestArray([1, 2]), skipmissing=false) isa Vector{Int} + @test DataAPI.levels(TestArray([missing, 1, 2])) == + DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=true) == [2, 1] + @test DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=false) ≅ + [2, 1, missing] + @test DataAPI.levels(TestArray([missing, 1, 2])) isa Vector{Int} + @test DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=true) isa + Vector{Int} + @test DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=false) isa + Vector{Union{Int, Missing}} end @testset "Between" begin