From ca2783544a728289797278d1ca5339c2ece0162e Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Mon, 27 Oct 2025 10:48:31 -0400
Subject: [PATCH 01/34] bump MatrixAlgebraKit compat to 0.6

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 7d754d817..137aa050c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -35,7 +35,7 @@ Combinatorics = "1"
 FiniteDifferences = "0.12"
 LRUCache = "1.0.2"
 LinearAlgebra = "1"
-MatrixAlgebraKit = "0.5.0"
+MatrixAlgebraKit = "0.6.0"
 OhMyThreads = "0.8.0"
 PackageExtensionCompat = "1"
 Printf = "1"

From a64bb27a91cae15c61deefcfe4ad21bbb1c5ecf2 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Mon, 27 Oct 2025 10:56:54 -0400
Subject: [PATCH 02/34] update `ishermitian` implementations

---
 src/factorizations/factorizations.jl | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 29d2c16a6..1e0a37078 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -61,14 +61,17 @@ LinearAlgebra.svdvals!(t::AbstractTensorMap) = diagview(svd_vals!(t))
 #--------------------------------------------------#
 # Checks for hermiticity and positive definiteness #
 #--------------------------------------------------#
-function LinearAlgebra.ishermitian(t::AbstractTensorMap)
-    domain(t) == codomain(t) || return false
-    InnerProductStyle(t) === EuclideanInnerProduct() || return false # hermiticity only defined for euclidean
-    for (c, b) in blocks(t)
-        ishermitian(b) || return false
-    end
-    return true
+function MAK.ishermitian(t::AbstractTensorMap; kwargs...)
+    return InnerProductStyle(t) === EuclideanInnerProduct() &&
+        domain(t) == codomain(t) &&
+        all(cb -> MatrixAlgebraKit.ishermitian(cb[2]; kwargs...), blocks(t))
+end
+function MAK.isantihermitian(t::AbstractTensorMap; kwargs...)
+    return InnerProductStyle(t) === EuclideanInnerProduct() &&
+        domain(t) == codomain(t) &&
+        all(cb -> MatrixAlgebraKit.isantihermitian(cb[2]; kwargs...), blocks(t))
 end
+LinearAlgebra.ishermitian(t::AbstractTensorMap) = MAK.ishermitian(t)
 
 function LinearAlgebra.isposdef(t::AbstractTensorMap)
     return isposdef!(copy_oftype(t, factorisation_scalartype(isposdef, t)))

From 3f33eb87c8cba198bba97751f1c6d634b327529c Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Mon, 27 Oct 2025 10:57:08 -0400
Subject: [PATCH 03/34] update `isisometric` implementations

---
 src/factorizations/factorizations.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 1e0a37078..42705f432 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -87,14 +87,14 @@ function LinearAlgebra.isposdef!(t::AbstractTensorMap)
 end
 
 # TODO: tolerances are per-block, not global or weighted - does that matter?
-function MatrixAlgebraKit.is_left_isometry(t::AbstractTensorMap; kwargs...)
+function MAK.is_left_isometric(t::AbstractTensorMap; kwargs...)
     domain(t) ≾ codomain(t) || return false
-    f((c, b)) = MatrixAlgebraKit.is_left_isometry(b; kwargs...)
+    f((c, b)) = MAK.is_left_isometric(b; kwargs...)
     return all(f, blocks(t))
 end
-function MatrixAlgebraKit.is_right_isometry(t::AbstractTensorMap; kwargs...)
+function MAK.is_right_isometric(t::AbstractTensorMap; kwargs...)
     domain(t) ≿ codomain(t) || return false
-    f((c, b)) = MatrixAlgebraKit.is_right_isometry(b; kwargs...)
+    f((c, b)) = MAK.is_right_isometric(b; kwargs...)
     return all(f, blocks(t))
 end
 

From fbebae3f1c1e9698e07818b20eb313ceaf4923ee Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Mon, 27 Oct 2025 11:13:26 -0400
Subject: [PATCH 04/34] add projections

---
 src/factorizations/matrixalgebrakit.jl | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index fc5f84502..5a51d3aff 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -5,6 +5,7 @@ for f in
         :svd_compact, :svd_full, :svd_trunc, :svd_vals, :qr_compact, :qr_full, :qr_null,
         :lq_compact, :lq_full, :lq_null, :eig_full, :eig_trunc, :eig_vals, :eigh_full,
         :eigh_trunc, :eigh_vals, :left_polar, :right_polar,
+        :project_hermitian, :project_antihermitian, :project_isometric,
     ]
     f! = Symbol(f, :!)
     @eval function MAK.default_algorithm(::typeof($f!), ::Type{T}; kwargs...) where {T <: AbstractTensorMap}
@@ -655,3 +656,17 @@ for (f!, f_svd!) in zip((:left_null!, :right_null!), (:left_null_svd!, :right_nu
         return $f!(t, N; alg_svd = alg)
     end
 end
+
+# Projections
+# -----------
+function MAK.check_input(::typeof(project_hermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap)
+    domain(tsrc) == codomain(tsrc) || throw(ArgumentError("Hermitian projection requires square input tensor"))
+    tsrc === tdst || @check_space(tdst, space(tsrc))
+    return nothing
+end
+
+MAK.check_input(::typeof(project_antihermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap) =
+    MAK.check_input(project_hermitian!, tsrc, tdst)
+
+MAK.initialize_output(::typeof(project_hermitian!), tsrc::AbstractTensorMap) = tsrc
+MAK.initialize_output(::typeof(project_antihermitian!), tsrc::AbstractTensorMap) = tsrc

From 5cdec10ba5642bead804b51653567275798ff9ef Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Fri, 14 Nov 2025 08:06:28 -0500
Subject: [PATCH 05/34] Incremental fixes

---
 src/TensorKit.jl                       |  2 +-
 src/factorizations/adjoint.jl          |  8 +--
 src/factorizations/factorizations.jl   |  7 +--
 src/factorizations/matrixalgebrakit.jl | 79 +-------------------------
 test/tensors/factorizations.jl         | 66 ++++++++++-----------
 5 files changed, 42 insertions(+), 120 deletions(-)

diff --git a/src/TensorKit.jl b/src/TensorKit.jl
index 74d23c8ce..85bc9a051 100644
--- a/src/TensorKit.jl
+++ b/src/TensorKit.jl
@@ -82,7 +82,7 @@ export left_orth, right_orth, left_null, right_null,
     eigh_full!, eigh_full, eigh_trunc!, eigh_trunc, eig_full!, eig_full, eig_trunc!,
     eig_trunc,
     eigh_vals!, eigh_vals, eig_vals!, eig_vals,
-    isposdef, isposdef!, ishermitian, isisometry, isunitary, sylvester, rank, cond
+    isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond
 
 export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition,
     repartition!
diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index 30a68c138..ef7e5e265 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -29,11 +29,11 @@ function MAK.right_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm)
     return N
 end
 
-function MAK.is_left_isometry(t::AdjointTensorMap; kwargs...)
-    return is_right_isometry(adjoint(t); kwargs...)
+function MAK.is_left_isometric(t::AdjointTensorMap; kwargs...)
+    return is_right_isometric(adjoint(t); kwargs...)
 end
-function MAK.is_right_isometry(t::AdjointTensorMap; kwargs...)
-    return is_left_isometry(adjoint(t); kwargs...)
+function MAK.is_right_isometric(t::AdjointTensorMap; kwargs...)
+    return is_left_isometric(adjoint(t); kwargs...)
 end
 
 # 2-arg functions
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 42705f432..6961d55aa 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -18,8 +18,7 @@ import MatrixAlgebraKit as MAK
 using MatrixAlgebraKit: AbstractAlgorithm, TruncatedAlgorithm, DiagonalAlgorithm
 using MatrixAlgebraKit: TruncationStrategy, NoTruncation, TruncationByValue,
     TruncationByError, TruncationIntersection, TruncationByFilter, TruncationByOrder
-using MatrixAlgebraKit: left_orth_polar!, right_orth_polar!, left_orth_svd!,
-    right_orth_svd!, left_null_svd!, right_null_svd!, diagview
+using MatrixAlgebraKit: diagview, isisometric
 
 include("utility.jl")
 include("matrixalgebrakit.jl")
@@ -30,9 +29,9 @@ include("pullbacks.jl")
 
 TensorKit.one!(A::AbstractMatrix) = MatrixAlgebraKit.one!(A)
 
-function MatrixAlgebraKit.isisometry(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple)
+function MatrixAlgebraKit.isisometric(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple)
     t = permute(t, (p₁, p₂); copy = false)
-    return isisometry(t)
+    return isisometric(t)
 end
 
 #------------------------------#
diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index 5a51d3aff..974b577bd 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -26,8 +26,7 @@ end
 for f! in (
         :qr_compact!, :qr_full!, :lq_compact!, :lq_full!,
         :eig_full!, :eigh_full!, :svd_compact!, :svd_full!,
-        :left_polar!, :left_orth_polar!, :right_polar!, :right_orth_polar!,
-        :left_orth!, :right_orth!,
+        :left_polar!, :right_polar!, :left_orth!, :right_orth!,
     )
     @eval function MAK.$f!(t::AbstractTensorMap, F, alg::AbstractAlgorithm)
         MAK.check_input($f!, t, F, alg)
@@ -417,26 +416,6 @@ function MAK.check_input(::typeof(left_polar!), t::AbstractTensorMap, WP, ::Abst
     return nothing
 end
 
-function MAK.check_input(::typeof(left_orth_polar!), t::AbstractTensorMap, WP, ::AbstractAlgorithm)
-    codomain(t) ≿ domain(t) ||
-        throw(ArgumentError("Polar decomposition requires `codomain(t) ≿ domain(t)`"))
-
-    W, P = WP
-    @assert W isa AbstractTensorMap
-    @assert P isa AbstractTensorMap
-
-    # scalartype checks
-    @check_scalar W t
-    @check_scalar P t
-
-    # space checks
-    VW = fuse(domain(t))
-    @check_space(W, codomain(t) ← VW)
-    @check_space(P, VW ← domain(t))
-
-    return nothing
-end
-
 function MAK.initialize_output(::typeof(left_polar!), t::AbstractTensorMap, ::AbstractAlgorithm)
     W = similar(t, space(t))
     P = similar(t, domain(t) ← domain(t))
@@ -462,26 +441,6 @@ function MAK.check_input(::typeof(right_polar!), t::AbstractTensorMap, PWᴴ, ::
     return nothing
 end
 
-function MAK.check_input(::typeof(right_orth_polar!), t::AbstractTensorMap, PWᴴ, ::AbstractAlgorithm)
-    codomain(t) ≾ domain(t) ||
-        throw(ArgumentError("Polar decomposition requires `domain(t) ≿ codomain(t)`"))
-
-    P, Wᴴ = PWᴴ
-    @assert P isa AbstractTensorMap
-    @assert Wᴴ isa AbstractTensorMap
-
-    # scalartype checks
-    @check_scalar P t
-    @check_scalar Wᴴ t
-
-    # space checks
-    VW = fuse(codomain(t))
-    @check_space(P, codomain(t) ← VW)
-    @check_space(Wᴴ, VW ← domain(t))
-
-    return nothing
-end
-
 function MAK.initialize_output(::typeof(right_polar!), t::AbstractTensorMap, ::AbstractAlgorithm)
     P = similar(t, codomain(t) ← codomain(t))
     Wᴴ = similar(t, space(t))
@@ -582,36 +541,6 @@ function MAK.right_orth!(
     end
 end
 
-function MAK.left_orth_polar!(t::AbstractTensorMap; alg = nothing, kwargs...)
-    alg′ = MAK.select_algorithm(left_polar!, t, alg; kwargs...)
-    VC = MAK.initialize_output(left_orth!, t)
-    return left_orth_polar!(t, VC, alg′)
-end
-function MAK.left_orth_polar!(t::AbstractTensorMap, VC, alg)
-    alg′ = MAK.select_algorithm(left_polar!, t, alg)
-    return left_orth_polar!(t, VC, alg′)
-end
-function MAK.right_orth_polar!(t::AbstractTensorMap; alg = nothing, kwargs...)
-    alg′ = MAK.select_algorithm(right_polar!, t, alg; kwargs...)
-    CVᴴ = MAK.initialize_output(right_orth!, t)
-    return right_orth_polar!(t, CVᴴ, alg′)
-end
-function MAK.right_orth_polar!(t::AbstractTensorMap, CVᴴ, alg)
-    alg′ = MAK.select_algorithm(right_polar!, t, alg)
-    return right_orth_polar!(t, CVᴴ, alg′)
-end
-
-function MAK.left_orth_svd!(t::AbstractTensorMap; trunc = notrunc(), kwargs...)
-    U, S, Vᴴ = trunc == notrunc() ? svd_compact!(t; kwargs...) :
-        svd_trunc!(t; trunc, kwargs...)
-    return U, lmul!(S, Vᴴ)
-end
-function MAK.right_orth_svd!(t::AbstractTensorMap; trunc = notrunc(), kwargs...)
-    U, S, Vᴴ = trunc == notrunc() ? svd_compact!(t; kwargs...) :
-        svd_trunc!(t; trunc, kwargs...)
-    return rmul!(U, S), Vᴴ
-end
-
 # Nullspace
 # ---------
 function MAK.check_input(::typeof(left_null!), t::AbstractTensorMap, N, ::AbstractAlgorithm)
@@ -651,12 +580,6 @@ function MAK.initialize_output(::typeof(right_null!), t::AbstractTensorMap)
     return N
 end
 
-for (f!, f_svd!) in zip((:left_null!, :right_null!), (:left_null_svd!, :right_null_svd!))
-    @eval function MAK.$f_svd!(t::AbstractTensorMap, N, alg, ::Nothing = nothing)
-        return $f!(t, N; alg_svd = alg)
-    end
-end
-
 # Projections
 # -----------
 function MAK.check_input(::typeof(project_hermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap)
diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index c5b4e8748..8c9bde170 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -47,18 +47,18 @@ for V in spacelist
 
                 Q, R = @constinferred qr_compact(t)
                 @test Q * R ≈ t
-                @test isisometry(Q)
+                @test isisometric(Q)
 
                 Q, R = @constinferred left_orth(t; kind = :qr)
                 @test Q * R ≈ t
-                @test isisometry(Q)
+                @test isisometric(Q)
 
                 N = @constinferred qr_null(t)
-                @test isisometry(N)
+                @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
                 N = @constinferred left_null(t; kind = :qr)
-                @test isisometry(N)
+                @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
             end
 
@@ -73,12 +73,12 @@ for V in spacelist
 
                 Q, R = @constinferred qr_compact(t)
                 @test Q * R ≈ t
-                @test isisometry(Q)
+                @test isisometric(Q)
                 @test dim(Q) == dim(R) == dim(t)
 
                 Q, R = @constinferred left_orth(t; kind = :qr)
                 @test Q * R ≈ t
-                @test isisometry(Q)
+                @test isisometric(Q)
                 @test dim(Q) == dim(R) == dim(t)
 
                 N = @constinferred qr_null(t)
@@ -100,14 +100,14 @@ for V in spacelist
 
                 L, Q = @constinferred lq_compact(t)
                 @test L * Q ≈ t
-                @test isisometry(Q; side = :right)
+                @test isisometric(Q; side = :right)
 
                 L, Q = @constinferred right_orth(t; kind = :lq)
                 @test L * Q ≈ t
-                @test isisometry(Q; side = :right)
+                @test isisometric(Q; side = :right)
 
                 Nᴴ = @constinferred lq_null(t)
-                @test isisometry(Nᴴ; side = :right)
+                @test isisometric(Nᴴ; side = :right)
                 @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
             end
 
@@ -122,12 +122,12 @@ for V in spacelist
 
                 L, Q = @constinferred lq_compact(t)
                 @test L * Q ≈ t
-                @test isisometry(Q; side = :right)
+                @test isisometric(Q; side = :right)
                 @test dim(Q) == dim(L) == dim(t)
 
                 L, Q = @constinferred right_orth(t; kind = :lq)
                 @test L * Q ≈ t
-                @test isisometry(Q; side = :right)
+                @test isisometric(Q; side = :right)
                 @test dim(Q) == dim(L) == dim(t)
 
                 Nᴴ = @constinferred lq_null(t)
@@ -146,12 +146,12 @@ for V in spacelist
                 @assert domain(t) ≾ codomain(t)
                 w, p = @constinferred left_polar(t)
                 @test w * p ≈ t
-                @test isisometry(w)
+                @test isisometric(w)
                 @test isposdef(p)
 
                 w, p = @constinferred left_orth(t; kind = :polar)
                 @test w * p ≈ t
-                @test isisometry(w)
+                @test isisometric(w)
             end
 
             for T in eltypes,
@@ -160,12 +160,12 @@ for V in spacelist
                 @assert codomain(t) ≾ domain(t)
                 p, wᴴ = @constinferred right_polar(t)
                 @test p * wᴴ ≈ t
-                @test isisometry(wᴴ; side = :right)
+                @test isisometric(wᴴ; side = :right)
                 @test isposdef(p)
 
                 p, wᴴ = @constinferred right_orth(t; kind = :polar)
                 @test p * wᴴ ≈ t
-                @test isisometry(wᴴ; side = :right)
+                @test isisometric(wᴴ; side = :right)
             end
         end
 
@@ -185,9 +185,9 @@ for V in spacelist
 
                 u, s, vᴴ = @constinferred svd_compact(t)
                 @test u * s * vᴴ ≈ t
-                @test isisometry(u)
+                @test isisometric(u)
                 @test isposdef(s)
-                @test isisometry(vᴴ; side = :right)
+                @test isisometric(vᴴ; side = :right)
 
                 s′ = LinearAlgebra.diag(s)
                 for (c, b) in LinearAlgebra.svdvals(t)
@@ -196,14 +196,14 @@ for V in spacelist
 
                 v, c = @constinferred left_orth(t; kind = :svd)
                 @test v * c ≈ t
-                @test isisometry(v)
+                @test isisometric(v)
 
                 N = @constinferred left_null(t; kind = :svd)
-                @test isisometry(N)
+                @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
                 Nᴴ = @constinferred right_null(t; kind = :svd)
-                @test isisometry(Nᴴ; side = :right)
+                @test isisometric(Nᴴ; side = :right)
                 @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
             end
 
@@ -233,22 +233,22 @@ for V in spacelist
 
                 U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc())
                 @test U * S * Vᴴ ≈ t
-                @test isisometry(U)
-                @test isisometry(Vᴴ; side = :right)
+                @test isisometric(U)
+                @test isisometric(Vᴴ; side = :right)
 
                 trunc = truncrank(dim(domain(S)) ÷ 2)
                 U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ1' ≈ U1 * S1
-                @test isisometry(U1)
-                @test isisometry(Vᴴ1; side = :right)
+                @test isisometric(U1)
+                @test isisometric(Vᴴ1; side = :right)
                 @test dim(domain(S1)) <= trunc.howmany
 
                 λ = minimum(minimum, values(LinearAlgebra.diag(S1)))
                 trunc = trunctol(; atol = λ - 10eps(λ))
                 U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ2' ≈ U2 * S2
-                @test isisometry(U2)
-                @test isisometry(Vᴴ2; side = :right)
+                @test isisometric(U2)
+                @test isisometric(Vᴴ2; side = :right)
                 @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ
                 @test U2 ≈ U1
                 @test S2 ≈ S1
@@ -257,22 +257,22 @@ for V in spacelist
                 trunc = truncspace(space(S2, 1))
                 U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ3' ≈ U3 * S3
-                @test isisometry(U3)
-                @test isisometry(Vᴴ3; side = :right)
+                @test isisometric(U3)
+                @test isisometric(Vᴴ3; side = :right)
                 @test space(S3, 1) ≾ space(S2, 1)
 
                 trunc = truncerror(; atol = 0.5)
                 U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ4' ≈ U4 * S4
-                @test isisometry(U4)
-                @test isisometry(Vᴴ4; side = :right)
+                @test isisometric(U4)
+                @test isisometric(Vᴴ4; side = :right)
                 @test norm(t - U4 * S4 * Vᴴ4) <= 0.5
 
                 trunc = truncrank(dim(domain(S)) ÷ 2) & trunctol(; atol = λ - 10eps(λ))
                 U5, S5, Vᴴ5 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ5' ≈ U5 * S5
-                @test isisometry(U5)
-                @test isisometry(Vᴴ5; side = :right)
+                @test isisometric(U5)
+                @test isisometric(Vᴴ5; side = :right)
                 @test minimum(minimum, values(LinearAlgebra.diag(S5))) >= λ
                 @test dim(domain(S5)) ≤ dim(domain(S)) ÷ 2
             end
@@ -304,7 +304,7 @@ for V in spacelist
 
                 t2 = (t + t')
                 D, V = eigen(t2)
-                @test isisometry(V)
+                @test isisometric(V)
                 D̃, Ṽ = @constinferred eigh_full(t2)
                 @test D ≈ D̃
                 @test V ≈ Ṽ

From 37a7eab2d4218258ecfea5fcabd07923ae0ce39b Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <kslimes@gmail.com>
Date: Fri, 14 Nov 2025 15:30:38 +0100
Subject: [PATCH 06/34] orthnull progress

---
 src/TensorKit.jl                       |  3 +-
 src/factorizations/factorizations.jl   |  1 +
 src/factorizations/matrixalgebrakit.jl | 99 +++++++++++++++++---------
 test/autodiff/ad.jl                    |  6 +-
 test/tensors/factorizations.jl         | 20 +++---
 5 files changed, 84 insertions(+), 45 deletions(-)

diff --git a/src/TensorKit.jl b/src/TensorKit.jl
index 85bc9a051..7ebdb504d 100644
--- a/src/TensorKit.jl
+++ b/src/TensorKit.jl
@@ -82,7 +82,8 @@ export left_orth, right_orth, left_null, right_null,
     eigh_full!, eigh_full, eigh_trunc!, eigh_trunc, eig_full!, eig_full, eig_trunc!,
     eig_trunc,
     eigh_vals!, eigh_vals, eig_vals!, eig_vals,
-    isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond
+    isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond,
+    LeftOrthAlgorithm, RightOrthAlgorithm
 
 export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition,
     repartition!
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 6961d55aa..d618bca09 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -16,6 +16,7 @@ using TensorOperations: Index2Tuple
 using MatrixAlgebraKit
 import MatrixAlgebraKit as MAK
 using MatrixAlgebraKit: AbstractAlgorithm, TruncatedAlgorithm, DiagonalAlgorithm
+using MatrixAlgebraKit: LeftOrthAlgorithm, RightOrthAlgorithm 
 using MatrixAlgebraKit: TruncationStrategy, NoTruncation, TruncationByValue,
     TruncationByError, TruncationIntersection, TruncationByFilter, TruncationByOrder
 using MatrixAlgebraKit: diagview, isisometric
diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index 974b577bd..73fd04a6b 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -26,7 +26,7 @@ end
 for f! in (
         :qr_compact!, :qr_full!, :lq_compact!, :lq_full!,
         :eig_full!, :eigh_full!, :svd_compact!, :svd_full!,
-        :left_polar!, :right_polar!, :left_orth!, :right_orth!,
+        :left_polar!, :right_polar!
     )
     @eval function MAK.$f!(t::AbstractTensorMap, F, alg::AbstractAlgorithm)
         MAK.check_input($f!, t, F, alg)
@@ -45,6 +45,43 @@ for f! in (
     end
 end
 
+for alg in (MAK.LeftOrthAlgorithm{:qr}, MAK.LeftOrthAlgorithm{:svd}, MAK.LeftOrthAlgorithm{:polar})
+    @eval begin
+        function MAK.left_orth!(t::AbstractTensorMap, F, alg::$alg)
+            MAK.check_input(left_orth!, t, F, alg)
+
+            foreachblock(t, F...) do _, bs
+                factors = Base.tail(bs)
+                factors′ = left_orth!(first(bs), factors, alg)
+                # deal with the case where the output is not in-place
+                for (f′, f) in zip(factors′, factors)
+                    f′ === f || copy!(f, f′)
+                end
+                return nothing
+            end
+            return F
+        end
+    end
+end
+
+for alg in (MAK.RightOrthAlgorithm{:lq}, MAK.RightOrthAlgorithm{:svd}, MAK.RightOrthAlgorithm{:polar})
+    @eval function MAK.right_orth!(t::AbstractTensorMap, F, alg::$alg)
+        MAK.check_input(right_orth!, t, F, alg)
+
+        foreachblock(t, F...) do _, bs
+            factors = Base.tail(bs)
+            factors′ = right_orth!(first(bs), factors, alg)
+            # deal with the case where the output is not in-place
+            for (f′, f) in zip(factors′, factors)
+                f′ === f || copy!(f, f′)
+            end
+            return nothing
+        end
+        return F
+    end
+end
+
+
 # Handle these separately because single output instead of tuple
 for f! in (:qr_null!, :lq_null!)
     @eval function MAK.$f!(t::AbstractTensorMap, N, alg::AbstractAlgorithm)
@@ -464,6 +501,18 @@ function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, ::Abstr
     return nothing
 end
 
+function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, alg::MAK.LeftOrthAlgorithm{:qr})
+    return MAK.check_input(qr_compact!, t, VC, alg.alg)
+end
+
+function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, alg::MAK.LeftOrthAlgorithm{:svd})
+    return MAK.check_input(svd_compact!, t, VC, alg.alg)
+end
+
+function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, alg::MAK.LeftOrthAlgorithm{:polar})
+    return MAK.check_input(left_polar!, t, VC, alg.alg)
+end
+
 function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, CVᴴ, ::AbstractAlgorithm)
     C, Vᴴ = CVᴴ
 
@@ -479,6 +528,18 @@ function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, CVᴴ, ::A
     return nothing
 end
 
+function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, VC, alg::MAK.RightOrthAlgorithm{:lq})
+    return MAK.check_input(lq_compact!, t, VC, alg.alg)
+end
+
+function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, VC, alg::MAK.RightOrthAlgorithm{:svd})
+    return MAK.check_input(svd_compact!, t, VC, alg.alg)
+end
+
+function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, VC, alg::MAK.RightOrthAlgorithm{:polar})
+    return MAK.check_input(right_polar!, t, VC, alg.alg)
+end
+
 function MAK.initialize_output(::typeof(left_orth!), t::AbstractTensorMap)
     V_C = infimum(fuse(codomain(t)), fuse(domain(t)))
     V = similar(t, codomain(t) ← V_C)
@@ -501,44 +562,16 @@ end
 function MAK.left_orth!(
         t::AbstractTensorMap;
         trunc::TruncationStrategy = notrunc(),
-        kind = trunc == notrunc() ? :qr : :svd,
-        alg_qr = (; positive = true), alg_polar = (;), alg_svd = (;)
+        alg::AbstractAlgorithm = (trunc == notrunc()) ? MAK.select_algorithm(left_orth!, t, Val(:qr)) : MAK.select_algorithm(left_orth!, t, Val(:svd); trunc)
     )
-    trunc == notrunc() || kind === :svd ||
-        throw(ArgumentError("truncation not supported for left_orth with kind = $kind"))
-
-    return if kind === :qr
-        alg_qr isa NamedTuple ? qr_compact!(t; alg_qr...) : qr_compact!(t; alg = alg_qr)
-    elseif kind === :polar
-        alg_polar isa NamedTuple ? left_orth_polar!(t; alg_polar...) :
-            left_orth_polar!(t; alg = alg_polar)
-    elseif kind === :svd
-        alg_svd isa NamedTuple ? left_orth_svd!(t; trunc, alg_svd...) :
-            left_orth_svd!(t; trunc, alg = alg_svd)
-    else
-        throw(ArgumentError(lazy"`left_orth!` received unknown value `kind = $kind`"))
-    end
+    MAK.left_orth!(t, MAK.initialize_output(left_orth!, t, alg), alg)
 end
 function MAK.right_orth!(
         t::AbstractTensorMap;
         trunc::TruncationStrategy = notrunc(),
-        kind = trunc == notrunc() ? :lq : :svd,
-        alg_lq = (; positive = true), alg_polar = (;), alg_svd = (;)
+        alg::AbstractAlgorithm = (trunc == notrunc()) ? MAK.select_algorithm(right_orth!, t, Val(:lq)) : MAK.select_algorithm(right_orth!, t, Val(:svd); trunc)
     )
-    trunc == notrunc() || kind === :svd ||
-        throw(ArgumentError("truncation not supported for right_orth with kind = $kind"))
-
-    return if kind === :lq
-        alg_lq isa NamedTuple ? lq_compact!(t; alg_lq...) : lq_compact!(t; alg = alg_lq)
-    elseif kind === :polar
-        alg_polar isa NamedTuple ? right_orth_polar!(t; alg_polar...) :
-            right_orth_polar!(t; alg = alg_polar)
-    elseif kind === :svd
-        alg_svd isa NamedTuple ? right_orth_svd!(t; trunc, alg_svd...) :
-            right_orth_svd!(t; trunc, alg = alg_svd)
-    else
-        throw(ArgumentError(lazy"`right_orth!` received unknown value `kind = $kind`"))
-    end
+    MAK.right_orth!(t, MAK.initialize_output(right_orth!, t, alg), alg)
 end
 
 # Nullspace
diff --git a/test/autodiff/ad.jl b/test/autodiff/ad.jl
index 0230936f7..37d4bff46 100644
--- a/test/autodiff/ad.jl
+++ b/test/autodiff/ad.jl
@@ -103,7 +103,7 @@ function remove_lqgauge_dependence!(ΔQ, t, Q)
     return ΔQ
 end
 function remove_eiggauge_dependence!(
-        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_gaugetol(D)
+        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_gauge_atol(D)
     )
     gaugepart = V' * ΔV
     for (c, b) in blocks(gaugepart)
@@ -119,7 +119,7 @@ function remove_eiggauge_dependence!(
     return ΔV
 end
 function remove_eighgauge_dependence!(
-        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_gaugetol(D)
+        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_gauge_atol(D)
     )
     gaugepart = V' * ΔV
     gaugepart = (gaugepart - gaugepart') / 2
@@ -136,7 +136,7 @@ function remove_eighgauge_dependence!(
     return ΔV
 end
 function remove_svdgauge_dependence!(
-        ΔU, ΔVᴴ, U, S, Vᴴ; degeneracy_atol = MatrixAlgebraKit.default_pullback_gaugetol(S)
+        ΔU, ΔVᴴ, U, S, Vᴴ; degeneracy_atol = MatrixAlgebraKit.default_pullback_gauge_atol(S)
     )
     gaugepart = U' * ΔU + Vᴴ * ΔVᴴ'
     gaugepart = (gaugepart - gaugepart') / 2
diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index 8c9bde170..c7950267b 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -49,7 +49,7 @@ for V in spacelist
                 @test Q * R ≈ t
                 @test isisometric(Q)
 
-                Q, R = @constinferred left_orth(t; kind = :qr)
+                Q, R = @constinferred left_orth(t)
                 @test Q * R ≈ t
                 @test isisometric(Q)
 
@@ -57,7 +57,7 @@ for V in spacelist
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
-                N = @constinferred left_null(t; kind = :qr)
+                N = @constinferred left_null(t)
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
             end
@@ -76,7 +76,7 @@ for V in spacelist
                 @test isisometric(Q)
                 @test dim(Q) == dim(R) == dim(t)
 
-                Q, R = @constinferred left_orth(t; kind = :qr)
+                Q, R = @constinferred left_orth(t)
                 @test Q * R ≈ t
                 @test isisometric(Q)
                 @test dim(Q) == dim(R) == dim(t)
@@ -102,7 +102,7 @@ for V in spacelist
                 @test L * Q ≈ t
                 @test isisometric(Q; side = :right)
 
-                L, Q = @constinferred right_orth(t; kind = :lq)
+                L, Q = @constinferred right_orth(t)
                 @test L * Q ≈ t
                 @test isisometric(Q; side = :right)
 
@@ -125,7 +125,7 @@ for V in spacelist
                 @test isisometric(Q; side = :right)
                 @test dim(Q) == dim(L) == dim(t)
 
-                L, Q = @constinferred right_orth(t; kind = :lq)
+                L, Q = @constinferred right_orth(t)
                 @test L * Q ≈ t
                 @test isisometric(Q; side = :right)
                 @test dim(Q) == dim(L) == dim(t)
@@ -149,7 +149,7 @@ for V in spacelist
                 @test isisometric(w)
                 @test isposdef(p)
 
-                w, p = @constinferred left_orth(t; kind = :polar)
+                w, p = @constinferred left_orth(t; alg = TensorKit.LeftOrthAlgorithm{:polar})
                 @test w * p ≈ t
                 @test isisometric(w)
             end
@@ -163,7 +163,7 @@ for V in spacelist
                 @test isisometric(wᴴ; side = :right)
                 @test isposdef(p)
 
-                p, wᴴ = @constinferred right_orth(t; kind = :polar)
+                p, wᴴ = @constinferred right_orth(t; alg = TensorKit.RightOrthAlgorithm{:polar})
                 @test p * wᴴ ≈ t
                 @test isisometric(wᴴ; side = :right)
             end
@@ -194,9 +194,13 @@ for V in spacelist
                     @test b ≈ s′[c]
                 end
 
-                v, c = @constinferred left_orth(t; kind = :svd)
+                v, c = @constinferred left_orth(t; alg = TensorKit.LeftOrthAlgorithm{:svd})
                 @test v * c ≈ t
                 @test isisometric(v)
+                
+                c, vᴴ = @constinferred right_orth(t; alg = TensorKit.RightOrthAlgorithm{:svd})
+                @test c * vᴴ ≈ t
+                @test isisometric(v; side = :right)
 
                 N = @constinferred left_null(t; kind = :svd)
                 @test isisometric(N)

From 47a3ccad7e5473eae726f36318264d67e2503bbb Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <kslimes@gmail.com>
Date: Fri, 14 Nov 2025 16:21:06 +0100
Subject: [PATCH 07/34] Adjoint factorizations (AD not working)

---
 src/factorizations/adjoint.jl | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index ef7e5e265..ac15dadb1 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -69,6 +69,24 @@ for (left_f!, right_f!) in zip(
     end
 end
 
+for (left_f, right_f) in zip(
+        (:qr_full, :qr_compact, :left_polar, :left_orth),
+        (:lq_full, :lq_compact, :right_polar, :right_orth)
+    )
+    @eval function MAK.$left_f(t::AdjointTensorMap; kwargs...)
+        return reverse(adjoint.($right_f(adjoint(t); kwargs...)))
+    end
+    @eval function MAK.$right_f(t::AdjointTensorMap; kwargs...)
+        return reverse(adjoint.($left_f(adjoint(t); kwargs...)))
+    end
+end
+
+function MAK.eig_full(t::AdjointTensorMap; kwargs...)
+    DV = eig_full(adjoint(t); kwargs...)
+    return (DV[1], adjoint(DV[2]))
+end
+
+
 # 3-arg functions
 for f! in (:svd_full!, :svd_compact!, :svd_trunc!)
     @eval function MAK.copy_input(::typeof($f!), t::AdjointTensorMap)

From a1974c0877b0aaa64c6f1e38e9cd552e18f05697 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:10:14 -0500
Subject: [PATCH 08/34] add missing MAK.

---
 src/factorizations/adjoint.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index ac15dadb1..c3e96ec1a 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -30,10 +30,10 @@ function MAK.right_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm)
 end
 
 function MAK.is_left_isometric(t::AdjointTensorMap; kwargs...)
-    return is_right_isometric(adjoint(t); kwargs...)
+    return MAK.is_right_isometric(adjoint(t); kwargs...)
 end
 function MAK.is_right_isometric(t::AdjointTensorMap; kwargs...)
-    return is_left_isometric(adjoint(t); kwargs...)
+    return MAK.is_left_isometric(adjoint(t); kwargs...)
 end
 
 # 2-arg functions

From e26ce89073be6df1dcb5b0f0f90ab0a84e631cb2 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:10:56 -0500
Subject: [PATCH 09/34] remove `eig(::Adjoint)`

This doesn't actually hold for non-hermitian matrices
---
 src/factorizations/adjoint.jl | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index c3e96ec1a..8b209d43d 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -81,12 +81,6 @@ for (left_f, right_f) in zip(
     end
 end
 
-function MAK.eig_full(t::AdjointTensorMap; kwargs...)
-    DV = eig_full(adjoint(t); kwargs...)
-    return (DV[1], adjoint(DV[2]))
-end
-
-
 # 3-arg functions
 for f! in (:svd_full!, :svd_compact!, :svd_trunc!)
     @eval function MAK.copy_input(::typeof($f!), t::AdjointTensorMap)

From 196cee1e6211d539a7451d9ac8641c6e44a9e25f Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:11:21 -0500
Subject: [PATCH 10/34] add truncation error implementation

---
 src/factorizations/truncation.jl | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl
index 19f39759c..478d5ac12 100644
--- a/src/factorizations/truncation.jl
+++ b/src/factorizations/truncation.jl
@@ -242,3 +242,17 @@ function MAK.findtruncated_svd(values::SectorDict, strategy::TruncationIntersect
             ) for c in intersect(map(keys, inds)...)
     )
 end
+
+# Truncation error
+# ----------------
+MAK.truncation_error(values::SectorDict, ind) =
+    MAK.truncation_error!(SectorDict(c => copy(v) for (c, v) in values), ind)
+
+function MAK.truncation_error!(values::SectorDict, ind)
+    for (c, v) in values
+        ind_c = get(ind, c, nothing)
+        isnothing(ind_c) && continue
+        v[ind_c] .= zero(eltype(v))
+    end
+    return TensorKit._norm(values, 2, zero(real(eltype(valtype(values)))))
+end

From 4f2bc306b706a3b3dc421d26a7f293daf0f3d951 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:11:37 -0500
Subject: [PATCH 11/34] add nullspace truncation specializations

---
 src/factorizations/truncation.jl | 38 +++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl
index 478d5ac12..23fc4c985 100644
--- a/src/factorizations/truncation.jl
+++ b/src/factorizations/truncation.jl
@@ -76,9 +76,7 @@ function MAK.truncate(
 end
 
 function MAK.truncate(
-        ::typeof(left_null!),
-        (U, S)::Tuple{AbstractTensorMap, AbstractTensorMap},
-        strategy::MatrixAlgebraKit.TruncationStrategy
+        ::typeof(left_null!), (U, S)::NTuple{2, AbstractTensorMap}, strategy::TruncationStrategy
     )
     extended_S = SectorDict(
         c => vcat(diagview(b), zeros(eltype(b), max(0, size(b, 2) - size(b, 1))))
@@ -90,6 +88,40 @@ function MAK.truncate(
     truncate_domain!(Ũ, U, ind)
     return Ũ, ind
 end
+function MAK.truncate(
+        ::typeof(right_null!), (S, Vᴴ)::NTuple{2, AbstractTensorMap}, strategy::TruncationStrategy
+    )
+    extended_S = SectorDict(
+        c => vcat(diagview(b), zeros(eltype(b), max(0, size(b, 1) - size(b, 2))))
+            for (c, b) in blocks(S)
+    )
+    ind = MAK.findtruncated(extended_S, strategy)
+    V_truncated = truncate_space(space(Vᴴ, 1), ind)
+    Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ))
+    truncate_codomain!(Ṽᴴ, Vᴴ, ind)
+    return Ṽᴴ, ind
+end
+
+# special case `NoTruncation` for null: should keep exact zeros due to rectangularity
+# need to specialize to avoid ambiguity with special case in MatrixAlgebraKit
+function MAK.truncate(
+        ::typeof(left_null!), (U, S)::NTuple{2, AbstractTensorMap}, strategy::NoTruncation
+    )
+    ind = SectorDict(c => (size(b, 2) + 1):size(b, 1) for (c, b) in blocks(S))
+    V_truncated = truncate_space(space(S, 1), ind)
+    Ũ = similar(U, codomain(U) ← V_truncated)
+    truncate_domain!(Ũ, U, ind)
+    return Ũ, ind
+end
+function MAK.truncate(
+        ::typeof(right_null!), (S, Vᴴ)::NTuple{2, AbstractTensorMap}, strategy::NoTruncation
+    )
+    ind = SectorDict(c => (size(b, 1) + 1):size(b, 2) for (c, b) in blocks(S))
+    V_truncated = truncate_space(space(Vᴴ, 1), ind)
+    Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ))
+    truncate_codomain!(Ṽᴴ, Vᴴ, ind)
+    return Ṽᴴ, ind
+end
 
 for f! in (:eig_trunc!, :eigh_trunc!)
     @eval function MAK.truncate(

From dd3b11268fa1cc47fc2ba8f46c064d996807ac50 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:22:45 -0500
Subject: [PATCH 12/34] update orth interface in tests

---
 test/tensors/factorizations.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index c7950267b..af1e9ef6c 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -149,7 +149,7 @@ for V in spacelist
                 @test isisometric(w)
                 @test isposdef(p)
 
-                w, p = @constinferred left_orth(t; alg = TensorKit.LeftOrthAlgorithm{:polar})
+                w, p = @constinferred left_orth(t; alg = :polar)
                 @test w * p ≈ t
                 @test isisometric(w)
             end
@@ -163,7 +163,7 @@ for V in spacelist
                 @test isisometric(wᴴ; side = :right)
                 @test isposdef(p)
 
-                p, wᴴ = @constinferred right_orth(t; alg = TensorKit.RightOrthAlgorithm{:polar})
+                p, wᴴ = @constinferred right_orth(t; alg = :polar)
                 @test p * wᴴ ≈ t
                 @test isisometric(wᴴ; side = :right)
             end
@@ -194,19 +194,19 @@ for V in spacelist
                     @test b ≈ s′[c]
                 end
 
-                v, c = @constinferred left_orth(t; alg = TensorKit.LeftOrthAlgorithm{:svd})
+                v, c = @constinferred left_orth(t; alg = :svd)
                 @test v * c ≈ t
                 @test isisometric(v)
-                
-                c, vᴴ = @constinferred right_orth(t; alg = TensorKit.RightOrthAlgorithm{:svd})
+
+                c, vᴴ = @constinferred right_orth(t; alg = :svd)
                 @test c * vᴴ ≈ t
                 @test isisometric(v; side = :right)
 
-                N = @constinferred left_null(t; kind = :svd)
+                N = @constinferred left_null(t; alg = :svd)
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
-                Nᴴ = @constinferred right_null(t; kind = :svd)
+                Nᴴ = @constinferred right_null(t; alg = :svd)
                 @test isisometric(Nᴴ; side = :right)
                 @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
             end

From 7d20fc31c3985cc1d8c5432bd5870473d7102913 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:22:54 -0500
Subject: [PATCH 13/34] fix wrong variable

---
 test/tensors/factorizations.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index af1e9ef6c..5ca880582 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -200,7 +200,7 @@ for V in spacelist
 
                 c, vᴴ = @constinferred right_orth(t; alg = :svd)
                 @test c * vᴴ ≈ t
-                @test isisometric(v; side = :right)
+                @test isisometric(vᴴ; side = :right)
 
                 N = @constinferred left_null(t; alg = :svd)
                 @test isisometric(N)

From f623bc3d56f7c475c11d387d6a788ffe9e14c45d Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:23:58 -0500
Subject: [PATCH 14/34] improve adjoint support

---
 src/factorizations/adjoint.jl | 23 +++++++++++++++++++----
 src/tensors/adjoint.jl        |  2 ++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index 8b209d43d..406fc8082 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -9,6 +9,21 @@ _adjoint(alg::MAK.LAPACK_HouseholderRQ) = MAK.LAPACK_HouseholderQL(; alg.kwargs.
 _adjoint(alg::MAK.PolarViaSVD) = MAK.PolarViaSVD(_adjoint(alg.svdalg))
 _adjoint(alg::AbstractAlgorithm) = alg
 
+for f in
+    [
+        :svd_compact, :svd_full, :svd_trunc, :svd_vals, :qr_compact, :qr_full, :qr_null,
+        :lq_compact, :lq_full, :lq_null, :eig_full, :eig_trunc, :eig_vals, :eigh_full,
+        :eigh_trunc, :eigh_vals, :left_polar, :right_polar,
+        :project_hermitian, :project_antihermitian, :project_isometric,
+    ]
+    f! = Symbol(f, :!)
+    # just return the algorithm for the parent type since we are mapping this with
+    # `_adjoint` afterwards anyways.
+    # TODO: properly handle these cases
+    @eval MAK.default_algorithm(::typeof($f!), ::Type{T}; kwargs...) where {T <: AdjointTensorMap} =
+        MAK.default_algorithm($f!, TensorKit.parenttype(T); kwargs...)
+end
+
 # 1-arg functions
 function MAK.initialize_output(::typeof(left_null!), t::AdjointTensorMap, alg::AbstractAlgorithm)
     return adjoint(MAK.initialize_output(right_null!, adjoint(t), _adjoint(alg)))
@@ -38,8 +53,8 @@ end
 
 # 2-arg functions
 for (left_f!, right_f!) in zip(
-        (:qr_full!, :qr_compact!, :left_polar!, :left_orth!),
-        (:lq_full!, :lq_compact!, :right_polar!, :right_orth!)
+        (:qr_full!, :qr_compact!, :left_polar!),
+        (:lq_full!, :lq_compact!, :right_polar!)
     )
     @eval function MAK.copy_input(::typeof($left_f!), t::AdjointTensorMap)
         return adjoint(MAK.copy_input($right_f!, adjoint(t)))
@@ -70,8 +85,8 @@ for (left_f!, right_f!) in zip(
 end
 
 for (left_f, right_f) in zip(
-        (:qr_full, :qr_compact, :left_polar, :left_orth),
-        (:lq_full, :lq_compact, :right_polar, :right_orth)
+        (:qr_full, :qr_compact, :left_polar),
+        (:lq_full, :lq_compact, :right_polar)
     )
     @eval function MAK.$left_f(t::AdjointTensorMap; kwargs...)
         return reverse(adjoint.($right_f(adjoint(t); kwargs...)))
diff --git a/src/tensors/adjoint.jl b/src/tensors/adjoint.jl
index 2a229f2c5..ca484e77b 100644
--- a/src/tensors/adjoint.jl
+++ b/src/tensors/adjoint.jl
@@ -11,6 +11,8 @@ struct AdjointTensorMap{T, S, N₁, N₂, TT <: AbstractTensorMap{T, S, N₂, N
     parent::TT
 end
 Base.parent(t::AdjointTensorMap) = t.parent
+parenttype(t::AdjointTensorMap) = parenttype(typeof(t))
+parenttype(::Type{AdjointTensorMap{T, S, N₁, N₂, TT}}) where {T, S, N₁, N₂, TT} = TT
 
 # Constructor: construct from taking adjoint of a tensor
 Base.adjoint(t::AdjointTensorMap) = parent(t)

From 8636d67176a9376970ec18a556163059cd6e63a2 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:26:16 -0500
Subject: [PATCH 15/34] remove unnecessary left/rightorth functions

---
 src/TensorKit.jl                       |   3 +-
 src/factorizations/factorizations.jl   |   1 -
 src/factorizations/matrixalgebrakit.jl | 168 +------------------------
 3 files changed, 2 insertions(+), 170 deletions(-)

diff --git a/src/TensorKit.jl b/src/TensorKit.jl
index 7ebdb504d..85bc9a051 100644
--- a/src/TensorKit.jl
+++ b/src/TensorKit.jl
@@ -82,8 +82,7 @@ export left_orth, right_orth, left_null, right_null,
     eigh_full!, eigh_full, eigh_trunc!, eigh_trunc, eig_full!, eig_full, eig_trunc!,
     eig_trunc,
     eigh_vals!, eigh_vals, eig_vals!, eig_vals,
-    isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond,
-    LeftOrthAlgorithm, RightOrthAlgorithm
+    isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond
 
 export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition,
     repartition!
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index d618bca09..6961d55aa 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -16,7 +16,6 @@ using TensorOperations: Index2Tuple
 using MatrixAlgebraKit
 import MatrixAlgebraKit as MAK
 using MatrixAlgebraKit: AbstractAlgorithm, TruncatedAlgorithm, DiagonalAlgorithm
-using MatrixAlgebraKit: LeftOrthAlgorithm, RightOrthAlgorithm 
 using MatrixAlgebraKit: TruncationStrategy, NoTruncation, TruncationByValue,
     TruncationByError, TruncationIntersection, TruncationByFilter, TruncationByOrder
 using MatrixAlgebraKit: diagview, isisometric
diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index 73fd04a6b..3478a6d27 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -26,7 +26,7 @@ end
 for f! in (
         :qr_compact!, :qr_full!, :lq_compact!, :lq_full!,
         :eig_full!, :eigh_full!, :svd_compact!, :svd_full!,
-        :left_polar!, :right_polar!
+        :left_polar!, :right_polar!,
     )
     @eval function MAK.$f!(t::AbstractTensorMap, F, alg::AbstractAlgorithm)
         MAK.check_input($f!, t, F, alg)
@@ -45,43 +45,6 @@ for f! in (
     end
 end
 
-for alg in (MAK.LeftOrthAlgorithm{:qr}, MAK.LeftOrthAlgorithm{:svd}, MAK.LeftOrthAlgorithm{:polar})
-    @eval begin
-        function MAK.left_orth!(t::AbstractTensorMap, F, alg::$alg)
-            MAK.check_input(left_orth!, t, F, alg)
-
-            foreachblock(t, F...) do _, bs
-                factors = Base.tail(bs)
-                factors′ = left_orth!(first(bs), factors, alg)
-                # deal with the case where the output is not in-place
-                for (f′, f) in zip(factors′, factors)
-                    f′ === f || copy!(f, f′)
-                end
-                return nothing
-            end
-            return F
-        end
-    end
-end
-
-for alg in (MAK.RightOrthAlgorithm{:lq}, MAK.RightOrthAlgorithm{:svd}, MAK.RightOrthAlgorithm{:polar})
-    @eval function MAK.right_orth!(t::AbstractTensorMap, F, alg::$alg)
-        MAK.check_input(right_orth!, t, F, alg)
-
-        foreachblock(t, F...) do _, bs
-            factors = Base.tail(bs)
-            factors′ = right_orth!(first(bs), factors, alg)
-            # deal with the case where the output is not in-place
-            for (f′, f) in zip(factors′, factors)
-                f′ === f || copy!(f, f′)
-            end
-            return nothing
-        end
-        return F
-    end
-end
-
-
 # Handle these separately because single output instead of tuple
 for f! in (:qr_null!, :lq_null!)
     @eval function MAK.$f!(t::AbstractTensorMap, N, alg::AbstractAlgorithm)
@@ -484,135 +447,6 @@ function MAK.initialize_output(::typeof(right_polar!), t::AbstractTensorMap, ::A
     return P, Wᴴ
 end
 
-# Orthogonalization
-# -----------------
-function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, ::AbstractAlgorithm)
-    V, C = VC
-
-    # scalartype checks
-    @check_scalar V t
-    isnothing(C) || @check_scalar C t
-
-    # space checks
-    V_C = infimum(fuse(codomain(t)), fuse(domain(t)))
-    @check_space(V, codomain(t) ← V_C)
-    isnothing(C) || @check_space(C, V_C ← domain(t))
-
-    return nothing
-end
-
-function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, alg::MAK.LeftOrthAlgorithm{:qr})
-    return MAK.check_input(qr_compact!, t, VC, alg.alg)
-end
-
-function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, alg::MAK.LeftOrthAlgorithm{:svd})
-    return MAK.check_input(svd_compact!, t, VC, alg.alg)
-end
-
-function MAK.check_input(::typeof(left_orth!), t::AbstractTensorMap, VC, alg::MAK.LeftOrthAlgorithm{:polar})
-    return MAK.check_input(left_polar!, t, VC, alg.alg)
-end
-
-function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, CVᴴ, ::AbstractAlgorithm)
-    C, Vᴴ = CVᴴ
-
-    # scalartype checks
-    isnothing(C) || @check_scalar C t
-    @check_scalar Vᴴ t
-
-    # space checks
-    V_C = infimum(fuse(codomain(t)), fuse(domain(t)))
-    isnothing(C) || @check_space(C, codomain(t) ← V_C)
-    @check_space(Vᴴ, V_C ← domain(t))
-
-    return nothing
-end
-
-function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, VC, alg::MAK.RightOrthAlgorithm{:lq})
-    return MAK.check_input(lq_compact!, t, VC, alg.alg)
-end
-
-function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, VC, alg::MAK.RightOrthAlgorithm{:svd})
-    return MAK.check_input(svd_compact!, t, VC, alg.alg)
-end
-
-function MAK.check_input(::typeof(right_orth!), t::AbstractTensorMap, VC, alg::MAK.RightOrthAlgorithm{:polar})
-    return MAK.check_input(right_polar!, t, VC, alg.alg)
-end
-
-function MAK.initialize_output(::typeof(left_orth!), t::AbstractTensorMap)
-    V_C = infimum(fuse(codomain(t)), fuse(domain(t)))
-    V = similar(t, codomain(t) ← V_C)
-    C = similar(t, V_C ← domain(t))
-    return V, C
-end
-
-function MAK.initialize_output(::typeof(right_orth!), t::AbstractTensorMap)
-    V_C = infimum(fuse(codomain(t)), fuse(domain(t)))
-    C = similar(t, codomain(t) ← V_C)
-    Vᴴ = similar(t, V_C ← domain(t))
-    return C, Vᴴ
-end
-
-# This is a rework of the dispatch logic in order to avoid having to deal with having to
-# allocate the output before knowing the kind of decomposition. In particular, here I disable
-# providing output arguments for left_ and right_orth.
-# This is mainly because polar decompositions have different shapes, and SVD for Diagonal
-# also does
-function MAK.left_orth!(
-        t::AbstractTensorMap;
-        trunc::TruncationStrategy = notrunc(),
-        alg::AbstractAlgorithm = (trunc == notrunc()) ? MAK.select_algorithm(left_orth!, t, Val(:qr)) : MAK.select_algorithm(left_orth!, t, Val(:svd); trunc)
-    )
-    MAK.left_orth!(t, MAK.initialize_output(left_orth!, t, alg), alg)
-end
-function MAK.right_orth!(
-        t::AbstractTensorMap;
-        trunc::TruncationStrategy = notrunc(),
-        alg::AbstractAlgorithm = (trunc == notrunc()) ? MAK.select_algorithm(right_orth!, t, Val(:lq)) : MAK.select_algorithm(right_orth!, t, Val(:svd); trunc)
-    )
-    MAK.right_orth!(t, MAK.initialize_output(right_orth!, t, alg), alg)
-end
-
-# Nullspace
-# ---------
-function MAK.check_input(::typeof(left_null!), t::AbstractTensorMap, N, ::AbstractAlgorithm)
-    # scalartype checks
-    @check_scalar N t
-
-    # space checks
-    V_Q = infimum(fuse(codomain(t)), fuse(domain(t)))
-    V_N = ⊖(fuse(codomain(t)), V_Q)
-    @check_space(N, codomain(t) ← V_N)
-
-    return nothing
-end
-
-function MAK.check_input(::typeof(right_null!), t::AbstractTensorMap, N, ::AbstractAlgorithm)
-    @check_scalar N t
-
-    # space checks
-    V_Q = infimum(fuse(codomain(t)), fuse(domain(t)))
-    V_N = ⊖(fuse(domain(t)), V_Q)
-    @check_space(N, V_N ← domain(t))
-
-    return nothing
-end
-
-function MAK.initialize_output(::typeof(left_null!), t::AbstractTensorMap)
-    V_Q = infimum(fuse(codomain(t)), fuse(domain(t)))
-    V_N = ⊖(fuse(codomain(t)), V_Q)
-    N = similar(t, codomain(t) ← V_N)
-    return N
-end
-
-function MAK.initialize_output(::typeof(right_null!), t::AbstractTensorMap)
-    V_Q = infimum(fuse(codomain(t)), fuse(domain(t)))
-    V_N = ⊖(fuse(domain(t)), V_Q)
-    N = similar(t, V_N ← domain(t))
-    return N
-end
-
 # Projections
 # -----------
 function MAK.check_input(::typeof(project_hermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap)

From a67099fe57901d825861eaa9dea9d6dd6a94e5b6 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:26:24 -0500
Subject: [PATCH 16/34] pass kwargs in isisometri

---
 src/factorizations/factorizations.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 6961d55aa..f9cef84a1 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -29,9 +29,9 @@ include("pullbacks.jl")
 
 TensorKit.one!(A::AbstractMatrix) = MatrixAlgebraKit.one!(A)
 
-function MatrixAlgebraKit.isisometric(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple)
+function MatrixAlgebraKit.isisometric(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple; kwargs...)
     t = permute(t, (p₁, p₂); copy = false)
-    return isisometric(t)
+    return isisometric(t; kwargs...)
 end
 
 #------------------------------#

From 88ac077d34a1bab29ce674facf8e711f6689e202 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:34:07 -0500
Subject: [PATCH 17/34] add tests for truncation error

---
 test/tensors/factorizations.jl | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index 5ca880582..2207b6bae 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -235,48 +235,55 @@ for V in spacelist
 
                 @constinferred normalize!(t)
 
-                U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc())
+                U, S, Vᴴ, ϵ = @constinferred svd_trunc(t; trunc = notrunc())
                 @test U * S * Vᴴ ≈ t
+                @test ϵ ≈ 0
                 @test isisometric(U)
                 @test isisometric(Vᴴ; side = :right)
 
                 trunc = truncrank(dim(domain(S)) ÷ 2)
-                U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc)
+                U1, S1, Vᴴ1, ϵ1 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ1' ≈ U1 * S1
                 @test isisometric(U1)
                 @test isisometric(Vᴴ1; side = :right)
+                @test norm(t - U1 * S1 * Vᴴ1) ≈ ϵ1 atol = eps(real(T))^(4 / 5)
                 @test dim(domain(S1)) <= trunc.howmany
 
                 λ = minimum(minimum, values(LinearAlgebra.diag(S1)))
                 trunc = trunctol(; atol = λ - 10eps(λ))
-                U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc)
+                U2, S2, Vᴴ2, ϵ2 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ2' ≈ U2 * S2
                 @test isisometric(U2)
                 @test isisometric(Vᴴ2; side = :right)
+                @test norm(t - U2 * S2 * Vᴴ2) ≈ ϵ2 atol = eps(real(T))^(4 / 5)
                 @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ
                 @test U2 ≈ U1
                 @test S2 ≈ S1
                 @test Vᴴ2 ≈ Vᴴ1
+                @test ϵ1 ≈ ϵ2
 
                 trunc = truncspace(space(S2, 1))
-                U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc)
+                U3, S3, Vᴴ3, ϵ3 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ3' ≈ U3 * S3
                 @test isisometric(U3)
                 @test isisometric(Vᴴ3; side = :right)
+                @test norm(t - U3 * S3 * Vᴴ3) ≈ ϵ3 atol = eps(real(T))^(4 / 5)
                 @test space(S3, 1) ≾ space(S2, 1)
 
-                trunc = truncerror(; atol = 0.5)
-                U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc)
+                trunc = truncerror(; atol = ϵ2)
+                U4, S4, Vᴴ4, ϵ4 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ4' ≈ U4 * S4
                 @test isisometric(U4)
                 @test isisometric(Vᴴ4; side = :right)
-                @test norm(t - U4 * S4 * Vᴴ4) <= 0.5
+                @test norm(t - U4 * S4 * Vᴴ4) ≈ ϵ4 atol = eps(real(T))^(4 / 5)
+                @test ϵ4 ≤ ϵ2
 
                 trunc = truncrank(dim(domain(S)) ÷ 2) & trunctol(; atol = λ - 10eps(λ))
-                U5, S5, Vᴴ5 = @constinferred svd_trunc(t; trunc)
+                U5, S5, Vᴴ5, ϵ5 = @constinferred svd_trunc(t; trunc)
                 @test t * Vᴴ5' ≈ U5 * S5
                 @test isisometric(U5)
                 @test isisometric(Vᴴ5; side = :right)
+                @test norm(t - U5 * S5 * Vᴴ5) ≈ ϵ5 atol = eps(real(T))^(4 / 5)
                 @test minimum(minimum, values(LinearAlgebra.diag(S5))) >= λ
                 @test dim(domain(S5)) ≤ dim(domain(S)) ÷ 2
             end

From 046ee09bf96176c572d3af32a47dddbed12b6ec1 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:48:00 -0500
Subject: [PATCH 18/34] more nullspace tests and fixes

---
 src/factorizations/truncation.jl | 8 ++++----
 test/tensors/factorizations.jl   | 8 ++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl
index 23fc4c985..1940ec6de 100644
--- a/src/factorizations/truncation.jl
+++ b/src/factorizations/truncation.jl
@@ -79,7 +79,7 @@ function MAK.truncate(
         ::typeof(left_null!), (U, S)::NTuple{2, AbstractTensorMap}, strategy::TruncationStrategy
     )
     extended_S = SectorDict(
-        c => vcat(diagview(b), zeros(eltype(b), max(0, size(b, 2) - size(b, 1))))
+        c => vcat(diagview(b), zeros(eltype(b), max(0, size(b, 1) - size(b, 2))))
             for (c, b) in blocks(S)
     )
     ind = MAK.findtruncated(extended_S, strategy)
@@ -92,11 +92,11 @@ function MAK.truncate(
         ::typeof(right_null!), (S, Vᴴ)::NTuple{2, AbstractTensorMap}, strategy::TruncationStrategy
     )
     extended_S = SectorDict(
-        c => vcat(diagview(b), zeros(eltype(b), max(0, size(b, 1) - size(b, 2))))
+        c => vcat(diagview(b), zeros(eltype(b), max(0, size(b, 2) - size(b, 1))))
             for (c, b) in blocks(S)
     )
     ind = MAK.findtruncated(extended_S, strategy)
-    V_truncated = truncate_space(space(Vᴴ, 1), ind)
+    V_truncated = truncate_space(dual(space(S, 2)), ind)
     Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ))
     truncate_codomain!(Ṽᴴ, Vᴴ, ind)
     return Ṽᴴ, ind
@@ -117,7 +117,7 @@ function MAK.truncate(
         ::typeof(right_null!), (S, Vᴴ)::NTuple{2, AbstractTensorMap}, strategy::NoTruncation
     )
     ind = SectorDict(c => (size(b, 1) + 1):size(b, 2) for (c, b) in blocks(S))
-    V_truncated = truncate_space(space(Vᴴ, 1), ind)
+    V_truncated = truncate_space(dual(space(S, 2)), ind)
     Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ))
     truncate_codomain!(Ṽᴴ, Vᴴ, ind)
     return Ṽᴴ, ind
diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index 2207b6bae..551c382d1 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -206,9 +206,17 @@ for V in spacelist
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
+                N = @constinferred left_null(t; trunc = (; atol = 100 * eps(norm(t))))
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
                 Nᴴ = @constinferred right_null(t; alg = :svd)
                 @test isisometric(Nᴴ; side = :right)
                 @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred right_null(t; trunc = (; atol = 100 * eps(norm(t))))
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
             end
 
             # empty tensor

From bff9b9d1801c83c67badf9c3ac3fb367b5096a4b Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:56:52 -0500
Subject: [PATCH 19/34] remove unnecessary specialization

---
 src/factorizations/matrixalgebrakit.jl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index 3478a6d27..8bb9fe536 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -149,11 +149,6 @@ function MAK.initialize_output(::typeof(svd_compact!), t::AbstractTensorMap, ::A
     return U, S, Vᴴ
 end
 
-# TODO: remove this once `AbstractMatrix` specialization is removed in MatrixAlgebraKit
-function MAK.initialize_output(::typeof(svd_trunc!), t::AbstractTensorMap, alg::TruncatedAlgorithm)
-    return MAK.initialize_output(svd_compact!, t, alg.alg)
-end
-
 function MAK.initialize_output(::typeof(svd_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm)
     V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
     return DiagonalTensorMap{real(scalartype(t))}(undef, V_cod)

From 7cc93169ec648106fa86f64c4c5a36183bb2b735 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 09:59:44 -0500
Subject: [PATCH 20/34] add missing argument

---
 src/factorizations/truncation.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl
index 1940ec6de..f08a15569 100644
--- a/src/factorizations/truncation.jl
+++ b/src/factorizations/truncation.jl
@@ -145,7 +145,8 @@ end
 # Find truncation
 # ---------------
 # auxiliary functions
-rtol_to_atol(S, p, atol, rtol) = rtol > 0 ? max(atol, TensorKit._norm(S, p) * rtol) : atol
+rtol_to_atol(S, p, atol, rtol) =
+    rtol == 0 ? atol : max(atol, TensorKit._norm(S, p, norm(zero(scalartype(valtype(S))))) * rtol)
 
 function _compute_truncerr(Σdata, truncdim, p = 2)
     I = keytype(Σdata)

From 12a78266a2df0b4977caa34373dc02b9eec6f7cf Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 10:04:49 -0500
Subject: [PATCH 21/34] remove unnecessary specializations

---
 src/factorizations/adjoint.jl | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index 406fc8082..f2fc3548f 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -52,10 +52,12 @@ function MAK.is_right_isometric(t::AdjointTensorMap; kwargs...)
 end
 
 # 2-arg functions
-for (left_f!, right_f!) in zip(
-        (:qr_full!, :qr_compact!, :left_polar!),
-        (:lq_full!, :lq_compact!, :right_polar!)
+for (left_f, right_f) in zip(
+        (:qr_full, :qr_compact, :left_polar),
+        (:lq_full, :lq_compact, :right_polar)
     )
+    left_f! = Symbol(left_f, :!)
+    right_f! = Symbol(right_f, :!)
     @eval function MAK.copy_input(::typeof($left_f!), t::AdjointTensorMap)
         return adjoint(MAK.copy_input($right_f!, adjoint(t)))
     end
@@ -74,26 +76,10 @@ for (left_f!, right_f!) in zip(
         return reverse(adjoint.(MAK.initialize_output($left_f!, adjoint(t), _adjoint(alg))))
     end
 
-    @eval function MAK.$left_f!(t::AdjointTensorMap, F, alg::AbstractAlgorithm)
+    @eval MAK.$left_f!(t::AdjointTensorMap, F, alg::AbstractAlgorithm) =
         $right_f!(adjoint(t), reverse(adjoint.(F)), _adjoint(alg))
-        return F
-    end
-    @eval function MAK.$right_f!(t::AdjointTensorMap, F, alg::AbstractAlgorithm)
+    @eval MAK.$right_f!(t::AdjointTensorMap, F, alg::AbstractAlgorithm) =
         $left_f!(adjoint(t), reverse(adjoint.(F)), _adjoint(alg))
-        return F
-    end
-end
-
-for (left_f, right_f) in zip(
-        (:qr_full, :qr_compact, :left_polar),
-        (:lq_full, :lq_compact, :right_polar)
-    )
-    @eval function MAK.$left_f(t::AdjointTensorMap; kwargs...)
-        return reverse(adjoint.($right_f(adjoint(t); kwargs...)))
-    end
-    @eval function MAK.$right_f(t::AdjointTensorMap; kwargs...)
-        return reverse(adjoint.($left_f(adjoint(t); kwargs...)))
-    end
 end
 
 # 3-arg functions

From e6b3cee079c23032fd17884d50312566e8674f87 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 10:09:11 -0500
Subject: [PATCH 22/34] slight reorganization

---
 src/factorizations/factorizations.jl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index f9cef84a1..33f267e27 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -18,7 +18,7 @@ import MatrixAlgebraKit as MAK
 using MatrixAlgebraKit: AbstractAlgorithm, TruncatedAlgorithm, DiagonalAlgorithm
 using MatrixAlgebraKit: TruncationStrategy, NoTruncation, TruncationByValue,
     TruncationByError, TruncationIntersection, TruncationByFilter, TruncationByOrder
-using MatrixAlgebraKit: diagview, isisometric
+using MatrixAlgebraKit: diagview
 
 include("utility.jl")
 include("matrixalgebrakit.jl")
@@ -29,11 +29,6 @@ include("pullbacks.jl")
 
 TensorKit.one!(A::AbstractMatrix) = MatrixAlgebraKit.one!(A)
 
-function MatrixAlgebraKit.isisometric(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple; kwargs...)
-    t = permute(t, (p₁, p₂); copy = false)
-    return isisometric(t; kwargs...)
-end
-
 #------------------------------#
 # LinearAlgebra overloads
 #------------------------------#
@@ -96,5 +91,9 @@ function MAK.is_right_isometric(t::AbstractTensorMap; kwargs...)
     f((c, b)) = MAK.is_right_isometric(b; kwargs...)
     return all(f, blocks(t))
 end
+function MAK.isisometric(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple; kwargs...)
+    t = permute(t, (p₁, p₂); copy = false)
+    return MAK.isisometric(t; kwargs...)
+end
 
 end

From fa6fa86b54a1626908d2e40f7cc77b43993de5c3 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 10:09:16 -0500
Subject: [PATCH 23/34] export new functionality

---
 src/TensorKit.jl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/TensorKit.jl b/src/TensorKit.jl
index 85bc9a051..ded08e50e 100644
--- a/src/TensorKit.jl
+++ b/src/TensorKit.jl
@@ -79,10 +79,12 @@ export left_orth, right_orth, left_null, right_null,
     qr_full!, qr_compact!, qr_null!, lq_full!, lq_compact!, lq_null!,
     svd_compact!, svd_full!, svd_trunc!, svd_compact, svd_full, svd_trunc,
     exp, exp!,
-    eigh_full!, eigh_full, eigh_trunc!, eigh_trunc, eig_full!, eig_full, eig_trunc!,
-    eig_trunc,
-    eigh_vals!, eigh_vals, eig_vals!, eig_vals,
-    isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond
+    eigh_full!, eigh_full, eigh_trunc!, eigh_trunc, eigh_vals!, eigh_vals,
+    eig_full!, eig_full, eig_trunc!, eig_trunc, eig_vals!, eig_vals,
+    ishermitian, project_hermitian, project_hermitian!,
+    isantihermitian, project_antihermitian, project_antihermitian!,
+    isisometric, isunitary, project_isometric, project_isometric!,
+    isposdef, isposdef!, sylvester, rank, cond
 
 export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition,
     repartition!

From 86c31e542201840a5b7c99372be8577d94e5bce8 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 20:25:04 -0500
Subject: [PATCH 24/34] simplify truncationerror implementation

---
 src/factorizations/truncation.jl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl
index f08a15569..0e9e2dda8 100644
--- a/src/factorizations/truncation.jl
+++ b/src/factorizations/truncation.jl
@@ -282,9 +282,8 @@ MAK.truncation_error(values::SectorDict, ind) =
     MAK.truncation_error!(SectorDict(c => copy(v) for (c, v) in values), ind)
 
 function MAK.truncation_error!(values::SectorDict, ind)
-    for (c, v) in values
-        ind_c = get(ind, c, nothing)
-        isnothing(ind_c) && continue
+    for (c, ind_c) in ind
+        v = values[c]
         v[ind_c] .= zero(eltype(v))
     end
     return TensorKit._norm(values, 2, zero(real(eltype(valtype(values)))))

From 244e6dc1c5dbd298b4efad5277c0652c455ee6fb Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 20:25:11 -0500
Subject: [PATCH 25/34] fix testcase

---
 test/tensors/tensors.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/tensors/tensors.jl b/test/tensors/tensors.jl
index 51c326cf9..34934eb34 100644
--- a/test/tensors/tensors.jl
+++ b/test/tensors/tensors.jl
@@ -364,7 +364,7 @@ for V in spacelist
             for T in (Float64, ComplexF64)
                 t1 = randisometry(T, W1, W2)
                 t2 = randisometry(T, W2 ← W2)
-                @test isisometry(t1)
+                @test isisometric(t1)
                 @test isunitary(t2)
                 P = t1 * t1'
                 @test P * P ≈ P

From 0cf6cbd1914e4ef5186a3a39ebc9775feb331782 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 20:32:23 -0500
Subject: [PATCH 26/34] refrain from defining left_null functions to avoid
 ambiguities

---
 src/factorizations/adjoint.jl | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl
index f2fc3548f..41a0b9513 100644
--- a/src/factorizations/adjoint.jl
+++ b/src/factorizations/adjoint.jl
@@ -25,31 +25,20 @@ for f in
 end
 
 # 1-arg functions
-function MAK.initialize_output(::typeof(left_null!), t::AdjointTensorMap, alg::AbstractAlgorithm)
-    return adjoint(MAK.initialize_output(right_null!, adjoint(t), _adjoint(alg)))
-end
-function MAK.initialize_output(
-        ::typeof(right_null!), t::AdjointTensorMap,
-        alg::AbstractAlgorithm
-    )
-    return adjoint(MAK.initialize_output(left_null!, adjoint(t), _adjoint(alg)))
-end
+MAK.initialize_output(::typeof(qr_null!), t::AdjointTensorMap, alg::AbstractAlgorithm) =
+    adjoint(MAK.initialize_output(lq_null!, adjoint(t), _adjoint(alg)))
+MAK.initialize_output(::typeof(lq_null!), t::AdjointTensorMap, alg::AbstractAlgorithm) =
+    adjoint(MAK.initialize_output(qr_null!, adjoint(t), _adjoint(alg)))
 
-function MAK.left_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm)
-    right_null!(adjoint(t), adjoint(N), _adjoint(alg))
-    return N
-end
-function MAK.right_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm)
-    left_null!(adjoint(t), adjoint(N), _adjoint(alg))
-    return N
-end
+MAK.qr_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm) =
+    lq_null!(adjoint(t), adjoint(N), _adjoint(alg))
+MAK.lq_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm) =
+    qr_null!(adjoint(t), adjoint(N), _adjoint(alg))
 
-function MAK.is_left_isometric(t::AdjointTensorMap; kwargs...)
-    return MAK.is_right_isometric(adjoint(t); kwargs...)
-end
-function MAK.is_right_isometric(t::AdjointTensorMap; kwargs...)
-    return MAK.is_left_isometric(adjoint(t); kwargs...)
-end
+MAK.is_left_isometric(t::AdjointTensorMap; kwargs...) =
+    MAK.is_right_isometric(adjoint(t); kwargs...)
+MAK.is_right_isometric(t::AdjointTensorMap; kwargs...) =
+    MAK.is_left_isometric(adjoint(t); kwargs...)
 
 # 2-arg functions
 for (left_f, right_f) in zip(

From 110d0fbf46ff75054af95fc85513abe21a9e9f96 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 20:48:07 -0500
Subject: [PATCH 27/34] fix ad tests

---
 test/autodiff/ad.jl | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/test/autodiff/ad.jl b/test/autodiff/ad.jl
index 37d4bff46..6426e8d6b 100644
--- a/test/autodiff/ad.jl
+++ b/test/autodiff/ad.jl
@@ -90,7 +90,6 @@ function remove_qrgauge_dependence!(ΔQ, t, Q)
     end
     return ΔQ
 end
-
 function remove_lqgauge_dependence!(ΔQ, t, Q)
     for (c, b) in blocks(ΔQ)
         m, n = size(block(t, c))
@@ -103,7 +102,7 @@ function remove_lqgauge_dependence!(ΔQ, t, Q)
     return ΔQ
 end
 function remove_eiggauge_dependence!(
-        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_gauge_atol(D)
+        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_degeneracy_atol(D)
     )
     gaugepart = V' * ΔV
     for (c, b) in blocks(gaugepart)
@@ -119,9 +118,9 @@ function remove_eiggauge_dependence!(
     return ΔV
 end
 function remove_eighgauge_dependence!(
-        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_gauge_atol(D)
+        ΔV, D, V; degeneracy_atol = MatrixAlgebraKit.default_pullback_degeneracy_atol(D)
     )
-    gaugepart = V' * ΔV
+    gaugepart = project_antihermitian!(V' * ΔV)
     gaugepart = (gaugepart - gaugepart') / 2
     for (c, b) in blocks(gaugepart)
         Dc = diagview(block(D, c))
@@ -136,10 +135,9 @@ function remove_eighgauge_dependence!(
     return ΔV
 end
 function remove_svdgauge_dependence!(
-        ΔU, ΔVᴴ, U, S, Vᴴ; degeneracy_atol = MatrixAlgebraKit.default_pullback_gauge_atol(S)
+        ΔU, ΔVᴴ, U, S, Vᴴ; degeneracy_atol = MatrixAlgebraKit.default_pullback_degeneracy_atol(S)
     )
-    gaugepart = U' * ΔU + Vᴴ * ΔVᴴ'
-    gaugepart = (gaugepart - gaugepart') / 2
+    gaugepart = project_antihermitian!(U' * ΔU + Vᴴ * ΔVᴴ')
     for (c, b) in blocks(gaugepart)
         Sd = diagview(block(S, c))
         # for some reason this fails only on tests, and I cannot reproduce it in an
@@ -153,8 +151,6 @@ function remove_svdgauge_dependence!(
     return ΔU, ΔVᴴ
 end
 
-project_hermitian(A) = (A + A') / 2
-
 # Tests
 # -----
 
@@ -601,7 +597,7 @@ for V in spacelist
                     USVᴴ_trunc = svd_trunc(t; trunc)
                     ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
                     remove_svdgauge_dependence!(
-                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], USVᴴ_trunc...; degeneracy_atol
+                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
                     )
                     # test_ad_rrule(svd_trunc, t;
                     #               fkwargs=(; trunc), output_tangent=ΔUSVᴴ_trunc, atol, rtol)
@@ -610,7 +606,7 @@ for V in spacelist
                     USVᴴ_trunc = svd_trunc(t; trunc)
                     ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
                     remove_svdgauge_dependence!(
-                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], USVᴴ_trunc...; degeneracy_atol
+                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
                     )
                     test_ad_rrule(
                         svd_trunc, t;
@@ -631,7 +627,7 @@ for V in spacelist
                     USVᴴ_trunc = svd_trunc(t; trunc)
                     ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
                     remove_svdgauge_dependence!(
-                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], USVᴴ_trunc...; degeneracy_atol
+                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
                     )
                     test_ad_rrule(
                         svd_trunc, t;

From 35e79aee9f904a1ae4addeb032b90d949f758889 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 21:49:59 -0500
Subject: [PATCH 28/34] add projection tests

---
 src/TensorKit.jl                       |  2 +-
 src/factorizations/matrixalgebrakit.jl | 22 +++++++---
 test/tensors/factorizations.jl         | 57 ++++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/TensorKit.jl b/src/TensorKit.jl
index ded08e50e..c6e003450 100644
--- a/src/TensorKit.jl
+++ b/src/TensorKit.jl
@@ -137,7 +137,7 @@ using LinearAlgebra: norm, dot, normalize, normalize!, tr,
     adjoint, adjoint!, transpose, transpose!,
     lu, pinv, sylvester,
     eigen, eigen!, svd, svd!,
-    isposdef, isposdef!, ishermitian, rank, cond,
+    isposdef, isposdef!, rank, cond,
     Diagonal, Hermitian
 using MatrixAlgebraKit
 
diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index 8bb9fe536..72d7015c0 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -46,7 +46,7 @@ for f! in (
 end
 
 # Handle these separately because single output instead of tuple
-for f! in (:qr_null!, :lq_null!)
+for f! in (:qr_null!, :lq_null!, :project_hermitian!, :project_antihermitian!, :project_isometric!)
     @eval function MAK.$f!(t::AbstractTensorMap, N, alg::AbstractAlgorithm)
         MAK.check_input($f!, t, N, alg)
 
@@ -444,14 +444,24 @@ end
 
 # Projections
 # -----------
-function MAK.check_input(::typeof(project_hermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap)
+function MAK.check_input(::typeof(project_hermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap, ::AbstractAlgorithm)
     domain(tsrc) == codomain(tsrc) || throw(ArgumentError("Hermitian projection requires square input tensor"))
     tsrc === tdst || @check_space(tdst, space(tsrc))
     return nothing
 end
 
-MAK.check_input(::typeof(project_antihermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap) =
-    MAK.check_input(project_hermitian!, tsrc, tdst)
+MAK.check_input(::typeof(project_antihermitian!), tsrc::AbstractTensorMap, tdst::AbstractTensorMap, alg::AbstractAlgorithm) =
+    MAK.check_input(project_hermitian!, tsrc, tdst, alg)
 
-MAK.initialize_output(::typeof(project_hermitian!), tsrc::AbstractTensorMap) = tsrc
-MAK.initialize_output(::typeof(project_antihermitian!), tsrc::AbstractTensorMap) = tsrc
+function MAK.check_input(::typeof(project_isometric!), t::AbstractTensorMap, W::AbstractTensorMap, alg::AbstractAlgorithm)
+    codomain(t) ≿ domain(t) || throw(ArgumentError("Isometric projection requires `codomain(t) ≿ domain(t)`"))
+    @check_space W space(t)
+    @check_scalar(W, t)
+
+    return nothing
+end
+
+
+MAK.initialize_output(::typeof(project_hermitian!), tsrc::AbstractTensorMap, ::AbstractAlgorithm) = tsrc
+MAK.initialize_output(::typeof(project_antihermitian!), tsrc::AbstractTensorMap, ::AbstractAlgorithm) = tsrc
+MAK.initialize_output(::typeof(project_isometric!), tsrc::AbstractTensorMap, ::AbstractAlgorithm) = similar(tsrc)
diff --git a/test/tensors/factorizations.jl b/test/tensors/factorizations.jl
index 551c382d1..2c1638a30 100644
--- a/test/tensors/factorizations.jl
+++ b/test/tensors/factorizations.jl
@@ -389,5 +389,62 @@ for V in spacelist
                 @test cond(t) ≈ λmax / λmin
             end
         end
+
+        @testset "Hermitian projections" begin
+            for T in eltypes,
+                    t in (
+                        rand(T, V1, V1), rand(T, W, W), rand(T, W, W)',
+                        DiagonalTensorMap(rand(T, reduceddim(V1)), V1),
+                    )
+                normalize!(t)
+                noisefactor = eps(real(T))^(3 / 4)
+
+                th = (t + t') / 2
+                ta = (t - t') / 2
+                tc = copy(t)
+
+                th′ = @constinferred project_hermitian(t)
+                @test ishermitian(th′)
+                @test th′ ≈ th
+                @test t == tc
+                th_approx = th + noisefactor * ta
+                @test !ishermitian(th_approx) || (T <: Real && t isa DiagonalTensorMap)
+                @test ishermitian(th_approx; atol = 10 * noisefactor)
+
+                ta′ = project_antihermitian(t)
+                @test isantihermitian(ta′)
+                @test ta′ ≈ ta
+                @test t == tc
+                ta_approx = ta + noisefactor * th
+                @test !isantihermitian(ta_approx)
+                @test isantihermitian(ta_approx; atol = 10 * noisefactor) || (T <: Real && t isa DiagonalTensorMap)
+            end
+        end
+
+        @testset "Isometric projections" begin
+            for T in eltypes,
+                    t in (
+                        randn(T, W, W), randn(T, W, W)',
+                        randn(T, W, V1), randn(T, V1, W)',
+                    )
+                t2 = project_isometric(t)
+                @test isisometric(t2)
+                t3 = project_isometric(t2)
+                @test t3 ≈ t2 # stability of the projection
+                @test t2 * (t2' * t) ≈ t
+
+                tc = similar(t)
+                t3 = @constinferred project_isometric!(copy!(tc, t), t2)
+                @test t3 === t2
+                @test isisometric(t2)
+
+                # test that t2 is closer to A then any other isometry
+                for k in 1:10
+                    δt = randn!(similar(t))
+                    t3 = project_isometric(t + δt / 100)
+                    @test norm(t - t3) > norm(t - t2)
+                end
+            end
+        end
     end
 end

From 104fde46d43f9d8e7d12e74d70f293292558565e Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sat, 15 Nov 2025 22:12:16 -0500
Subject: [PATCH 29/34] temporary AD fixes

---
 test/autodiff/ad.jl | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/test/autodiff/ad.jl b/test/autodiff/ad.jl
index 6426e8d6b..bd1ac4d14 100644
--- a/test/autodiff/ad.jl
+++ b/test/autodiff/ad.jl
@@ -77,6 +77,9 @@ function test_ad_rrule(f, args...; check_inferred = false, kwargs...)
     return nothing
 end
 
+# project_hermitian is non-differentiable for now
+_project_hermitian(x) = (x + x') / 2
+
 # Gauge fixing tangents
 # ---------------------
 function remove_qrgauge_dependence!(ΔQ, t, Q)
@@ -564,7 +567,7 @@ for V in spacelist
                     remove_eighgauge_dependence!(Δv, d, v)
 
                     # necessary for FiniteDifferences to not complain
-                    eigh_full′ = eigh_full ∘ project_hermitian
+                    eigh_full′ = eigh_full ∘ _project_hermitian
 
                     test_ad_rrule(eigh_full′, t; output_tangent = (Δd, Δv), atol, rtol)
                     test_ad_rrule(first ∘ eigh_full′, t; output_tangent = Δd, atol, rtol)
@@ -595,7 +598,7 @@ for V in spacelist
 
                     trunc = truncrank(max(2, round(Int, min(dim(domain(t)), dim(codomain(t))) * (3 / 4))))
                     USVᴴ_trunc = svd_trunc(t; trunc)
-                    ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
+                    ΔUSVᴴ_trunc = (rand_tangent.(Base.front(USVᴴ_trunc))..., zero(last(USVᴴ_trunc)))
                     remove_svdgauge_dependence!(
                         ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
                     )
@@ -604,7 +607,7 @@ for V in spacelist
 
                     trunc = truncspace(space(USVᴴ_trunc[2], 1))
                     USVᴴ_trunc = svd_trunc(t; trunc)
-                    ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
+                    ΔUSVᴴ_trunc = (rand_tangent.(Base.front(USVᴴ_trunc))..., zero(last(USVᴴ_trunc)))
                     remove_svdgauge_dependence!(
                         ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
                     )
@@ -625,14 +628,14 @@ for V in spacelist
                     tol = minimum(((c, b),) -> minimum(diagview(b)), blocks(USVᴴ_trunc[2]))
                     trunc = trunctol(; atol = 10 * tol)
                     USVᴴ_trunc = svd_trunc(t; trunc)
-                    ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
+                    ΔUSVᴴ_trunc = (rand_tangent.(Base.front(USVᴴ_trunc))..., zero(last(USVᴴ_trunc)))
                     remove_svdgauge_dependence!(
                         ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
                     )
-                    test_ad_rrule(
-                        svd_trunc, t;
-                        fkwargs = (; trunc), output_tangent = ΔUSVᴴ_trunc, atol, rtol
-                    )
+                    # test_ad_rrule(
+                    #     svd_trunc, t;
+                    #     fkwargs = (; trunc), output_tangent = ΔUSVᴴ_trunc, atol, rtol
+                    # )
                 end
             end
 

From 75f8d17b58eeacae5e5f0ad8de550e978755c010 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Sun, 16 Nov 2025 08:55:29 -0500
Subject: [PATCH 30/34] try and add back some AD tests

---
 test/autodiff/ad.jl | 55 +++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 29 deletions(-)

diff --git a/test/autodiff/ad.jl b/test/autodiff/ad.jl
index bd1ac4d14..9a2def71d 100644
--- a/test/autodiff/ad.jl
+++ b/test/autodiff/ad.jl
@@ -593,19 +593,12 @@ for V in spacelist
                     test_ad_rrule(svd_compact, t; output_tangent = (ΔU, ΔS, ΔVᴴ), atol, rtol)
                     test_ad_rrule(svd_compact, t; output_tangent = (ΔU, ΔS2, ΔVᴴ), atol, rtol)
 
-                    # TODO: I'm not sure how to properly test with spaces that might change
-                    # with the finite-difference methods, as then the jacobian is ill-defined.
-
-                    trunc = truncrank(max(2, round(Int, min(dim(domain(t)), dim(codomain(t))) * (3 / 4))))
-                    USVᴴ_trunc = svd_trunc(t; trunc)
-                    ΔUSVᴴ_trunc = (rand_tangent.(Base.front(USVᴴ_trunc))..., zero(last(USVᴴ_trunc)))
-                    remove_svdgauge_dependence!(
-                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
-                    )
-                    # test_ad_rrule(svd_trunc, t;
-                    #               fkwargs=(; trunc), output_tangent=ΔUSVᴴ_trunc, atol, rtol)
-
-                    trunc = truncspace(space(USVᴴ_trunc[2], 1))
+                    # Testing truncation with finitedifferences is RNG-prone since the
+                    # Jacobian changes size if the truncation space changes, causing errors.
+                    # So, first test the fixed space case, then do more limited testing on
+                    # some gradients and compare to the fixed space case
+                    V_trunc = spacetype(t)(c => ceil(Int, min(size(b)...) / 2) for (c, b) in blocks(t))
+                    trunc = truncspace(V_trunc)
                     USVᴴ_trunc = svd_trunc(t; trunc)
                     ΔUSVᴴ_trunc = (rand_tangent.(Base.front(USVᴴ_trunc))..., zero(last(USVᴴ_trunc)))
                     remove_svdgauge_dependence!(
@@ -616,26 +609,30 @@ for V in spacelist
                         fkwargs = (; trunc), output_tangent = ΔUSVᴴ_trunc, atol, rtol
                     )
 
-                    # ϵ = norm(*(USVᴴ_trunc...) - t)
-                    # trunc = truncerror(; atol=ϵ)
-                    # USVᴴ_trunc = svd_trunc(t; trunc)
-                    # ΔUSVᴴ_trunc = rand_tangent.(USVᴴ_trunc)
-                    # remove_svdgauge_dependence!(ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], USVᴴ_trunc...;
-                    #                            degeneracy_atol)
-                    # test_ad_rrule(svd_trunc, t;
-                    #               fkwargs=(; trunc), output_tangent=ΔUSVᴴ_trunc, atol, rtol)
+                    # attempt to construct a loss function that doesn't depend on the gauges
+                    function f(t; trunc)
+                        Utr, Str, Vᴴtr, ϵ = svd_trunc(t; trunc)
+                        return LinearAlgebra.tr(Str) + LinearAlgebra.norm(Utr * Vᴴtr)
+                    end
+
+                    trunc = truncrank(round(Int, dim(V_trunc)))
+                    USVᴴ_trunc = svd_trunc(t; trunc)
+                    g1, = Zygote.gradient(x -> f(x; trunc), t)
+                    g2, = Zygote.gradient(x -> f(x; trunc = truncspace(space(USVᴴ_trunc[2], 1))), t)
+                    @test g1 ≈ g2
+
+                    trunc = truncerror(; atol = last(USVᴴ_trunc))
+                    USVᴴ_trunc = svd_trunc(t; trunc)
+                    g1, = Zygote.gradient(x -> f(x; trunc), t)
+                    g2, = Zygote.gradient(x -> f(x; trunc = truncspace(space(USVᴴ_trunc[2], 1))), t)
+                    @test g1 ≈ g2
 
                     tol = minimum(((c, b),) -> minimum(diagview(b)), blocks(USVᴴ_trunc[2]))
                     trunc = trunctol(; atol = 10 * tol)
                     USVᴴ_trunc = svd_trunc(t; trunc)
-                    ΔUSVᴴ_trunc = (rand_tangent.(Base.front(USVᴴ_trunc))..., zero(last(USVᴴ_trunc)))
-                    remove_svdgauge_dependence!(
-                        ΔUSVᴴ_trunc[1], ΔUSVᴴ_trunc[3], Base.front(USVᴴ_trunc)...; degeneracy_atol
-                    )
-                    # test_ad_rrule(
-                    #     svd_trunc, t;
-                    #     fkwargs = (; trunc), output_tangent = ΔUSVᴴ_trunc, atol, rtol
-                    # )
+                    g1, = Zygote.gradient(x -> f(x; trunc), t)
+                    g2, = Zygote.gradient(x -> f(x; trunc = truncspace(space(USVᴴ_trunc[2], 1))), t)
+                    @test g1 ≈ g2
                 end
             end
 

From 6d3a22d25c7afe1374d3228e144a2db92edc0ffa Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Tue, 7 Oct 2025 10:45:42 -0400
Subject: [PATCH 31/34] Start on GPU extensions

---
 Project.toml                                 |  25 +-
 ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl | 109 ++++
 ext/TensorKitAMDGPUExt/roctensormap.jl       | 278 ++++++++
 ext/TensorKitCUDAExt/TensorKitCUDAExt.jl     | 107 ++++
 ext/TensorKitCUDAExt/cutensormap.jl          | 340 ++++++++++
 src/auxiliary/random.jl                      |  13 +-
 src/factorizations/factorizations.jl         |   3 +-
 src/factorizations/matrixalgebrakit.jl       |  14 +-
 src/factorizations/truncation.jl             |  18 +-
 src/tensors/diagonal.jl                      |   2 +-
 src/tensors/linalg.jl                        |  12 +-
 src/tensors/tensor.jl                        |   4 +-
 test/amd/factorizations.jl                   | 406 ++++++++++++
 test/amd/tensors.jl                          | 609 ++++++++++++++++++
 test/cuda/factorizations.jl                  | 390 ++++++++++++
 test/cuda/tensors.jl                         | 629 +++++++++++++++++++
 test/runtests.jl                             |  12 +-
 17 files changed, 2933 insertions(+), 38 deletions(-)
 create mode 100644 ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
 create mode 100644 ext/TensorKitAMDGPUExt/roctensormap.jl
 create mode 100644 ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
 create mode 100644 ext/TensorKitCUDAExt/cutensormap.jl
 create mode 100644 test/amd/factorizations.jl
 create mode 100644 test/amd/tensors.jl
 create mode 100644 test/cuda/factorizations.jl
 create mode 100644 test/cuda/tensors.jl

diff --git a/Project.toml b/Project.toml
index 137aa050c..c968768e5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,20 +19,35 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
 VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
 [weakdeps]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
+
+[sources]
+GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
+MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
+AMDGPU = {rev = "master", url = "https://github.com/JuliaGPU/AMDGPU.jl"}
+cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl", rev="master"}
 
 [extensions]
+TensorKitAMDGPUExt = "AMDGPU"
+TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
 TensorKitChainRulesCoreExt = "ChainRulesCore"
 TensorKitFiniteDifferencesExt = "FiniteDifferences"
 
 [compat]
+AMDGPU = "2"
+Adapt = "4"
 Aqua = "0.6, 0.7, 0.8"
 ArgParse = "1.2.0"
+CUDA = "5.9"
 ChainRulesCore = "1"
 ChainRulesTestUtils = "1"
 Combinatorics = "1"
 FiniteDifferences = "0.12"
+GPUArrays = "11.2.6"
 LRUCache = "1.0.2"
 LinearAlgebra = "1"
 MatrixAlgebraKit = "0.6.0"
@@ -50,21 +65,27 @@ TestExtras = "0.2,0.3"
 TupleTools = "1.1"
 VectorInterface = "0.4.8, 0.5"
 Zygote = "0.7"
+cuTENSOR = "2"
 julia = "1.10"
 
 [extras]
-ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [targets]
-test = ["ArgParse", "Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "SafeTestsets", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
+test = ["ArgParse", "Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
diff --git a/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
new file mode 100644
index 000000000..f3a67ca43
--- /dev/null
+++ b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
@@ -0,0 +1,109 @@
+module TensorKitAMDGPUExt
+
+using AMDGPU, AMDGPU.rocBLAS, LinearAlgebra
+using AMDGPU: @allowscalar
+import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!
+
+using TensorKit
+import TensorKit.VectorInterface: scalartype as vi_scalartype
+using TensorKit.Factorizations
+using TensorKit.Strided
+using TensorKit.Factorizations: AbstractAlgorithm
+using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
+
+using TensorKit.MatrixAlgebraKit
+
+using Random
+
+include("roctensormap.jl")
+
+const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
+
+"""
+    ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
+    # expert mode: select storage type `A`
+    DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
+
+Construct a `DiagonalTensorMap` with uninitialized data.
+"""
+function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
+    (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
+        throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
+    return ROCDiagonalTensorMap{T}(undef, domain(V))
+end
+function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
+    length(V) == 1 ||
+        throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
+    return ROCDiagonalTensorMap{T}(undef, only(V))
+end
+function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace}
+    return ROCDiagonalTensorMap{T, S}(undef, V)
+end
+ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V)
+
+function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S}
+    return ROCDiagonalTensorMap{T, S}(data, V)
+end
+
+function ROCDiagonalTensorMap(data::Vector{T}, V::S) where {T, S}
+    return ROCDiagonalTensorMap{T, S}(ROCVector{T}(data), V)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm)
+    V_cod = fuse(codomain(t))
+    V_dom = fuse(domain(t))
+    U = similar(t, codomain(t) ← V_cod)
+    S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom)
+    Vᴴ = similar(t, V_dom ← domain(t))
+    return U, S, Vᴴ
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
+    V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
+    return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm)
+    V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
+    U = similar(t, codomain(t) ← V_cod)
+    S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+    Vᴴ = similar(t, V_dom ← domain(t))
+    return U, S, Vᴴ
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    T = real(scalartype(t))
+    D = ROCDiagonalTensorMap{T}(undef, V_D)
+    V = similar(t, codomain(t) ← V_D)
+    return D, V
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    Tc = complex(scalartype(t))
+    D = ROCDiagonalTensorMap{Tc}(undef, V_D)
+    V = similar(t, Tc, codomain(t) ← V_D)
+    return D, V
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    T = real(scalartype(t))
+    return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    Tc = complex(scalartype(t))
+    return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
+end
+
+
+# TODO
+# add VectorInterface extensions for proper AMDGPU promotion
+function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
+    return Base.promote_op(add, Tx, Ty, Tα, Tβ)
+end
+
+end
diff --git a/ext/TensorKitAMDGPUExt/roctensormap.jl b/ext/TensorKitAMDGPUExt/roctensormap.jl
new file mode 100644
index 000000000..88b69ea0d
--- /dev/null
+++ b/ext/TensorKitAMDGPUExt/roctensormap.jl
@@ -0,0 +1,278 @@
+const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
+const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0}
+
+const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}}
+
+function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedROCArray})
+    if TorA <: ROCArray
+        return TensorMap{eltype(TorA), S, N₁, N₂, ROCVector{eltype(TorA), AMDGPU.Mem.HIPBuffer}}
+    else
+        throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:ROCVector{<:Number}`"))
+    end
+end
+
+function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
+    return ROCTensorMap{T, S, N₁, N₂}(undef, V)
+end
+
+function ROCTensorMap{T}(
+        ::UndefInitializer, codomain::TensorSpace{S},
+        domain::TensorSpace{S}
+    ) where {T, S}
+    return ROCTensorMap{T}(undef, codomain ← domain)
+end
+function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T, S}
+    return ROCTensorMap{T}(undef, V ← one(V))
+end
+# constructor starting from block data
+"""
+    ROCTensorMap(data::AbstractDict{<:Sector,<:ROCMatrix}, codomain::ProductSpace{S,N₁},
+                domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂}
+    ROCTensorMap(data, codomain ← domain)
+    ROCTensorMap(data, domain → codomain)
+
+Construct a `ROCTensorMap` by explicitly specifying its block data.
+
+## Arguments
+- `data::AbstractDict{<:Sector,<:ROCMatrix}`: dictionary containing the block data for
+  each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`.
+- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type
+  `S<:ElementarySpace`.
+- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type
+  `S<:ElementarySpace`.
+
+Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref)
+using the syntax `codomain ← domain` or `domain → codomain`.
+"""
+function ROCTensorMap(
+        data::AbstractDict{<:Sector, <:ROCArray},
+        V::TensorMapSpace{S, N₁, N₂}
+    ) where {S, N₁, N₂}
+    T = eltype(valtype(data))
+    t = ROCTensorMap{T}(undef, V)
+    for (c, b) in blocks(t)
+        haskey(data, c) || throw(SectorMismatch("no data for block sector $c"))
+        datac = data[c]
+        size(datac) == size(b) ||
+            throw(DimensionMismatch("wrong size of block for sector $c"))
+        copy!(b, datac)
+    end
+    for (c, b) in data
+        c ∈ blocksectors(t) || isempty(b) ||
+            throw(SectorMismatch("data for block sector $c not expected"))
+    end
+    return t
+end
+function ROCTensorMap{T}(
+        data::DenseVector{T}, codomain::TensorSpace{S},
+        domain::TensorSpace{S}
+    ) where {T, S}
+    return ROCTensorMap(data, codomain ← domain)
+end
+function ROCTensorMap(
+        data::AbstractDict{<:Sector, <:ROCMatrix}, codom::TensorSpace{S},
+        dom::TensorSpace{S}
+    ) where {S}
+    return ROCTensorMap(data, codom ← dom)
+end
+
+function ROCTensorMap(ts::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
+    return ROCTensorMap{T, S, N₁, N₂}(ROCArray(ts.data), ts.space)
+end
+
+for (fname, felt) in ((:zeros, :zero), (:ones, :one))
+    @eval begin
+        function AMDGPU.$fname(
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain)
+            ) where {S <: IndexSpace}
+            return AMDGPU.$fname(codomain ← domain)
+        end
+        function AMDGPU.$fname(
+                ::Type{T}, codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain)
+            ) where {T, S <: IndexSpace}
+            return AMDGPU.$fname(T, codomain ← domain)
+        end
+        AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
+        function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
+            t = ROCTensorMap{T}(undef, V)
+            fill!(t, $felt(T))
+            return t
+        end
+    end
+end
+
+for randfun in (:rocrand, :rocrandn)
+    randfun! = Symbol(randfun, :!)
+    @eval begin
+        # converting `codomain` and `domain` into `HomSpace`
+        function $randfun(
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {S <: IndexSpace}
+            return $randfun(codomain ← domain)
+        end
+        function $randfun(
+                ::Type{T}, codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {T, S <: IndexSpace}
+            return $randfun(T, codomain ← domain)
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {T, S <: IndexSpace}
+            return $randfun(rng, T, codomain ← domain)
+        end
+
+        # accepting single `TensorSpace`
+        $randfun(codomain::TensorSpace) = $randfun(codomain ← one(codomain))
+        function $randfun(::Type{T}, codomain::TensorSpace) where {T}
+            return $randfun(T, codomain ← one(codomain))
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                codomain::TensorSpace
+            ) where {T}
+            return $randfun(rng, T, codomain ← one(domain))
+        end
+
+        # filling in default eltype
+        $randfun(V::TensorMapSpace) = $randfun(Float64, V)
+        function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
+            return $randfun(rng, Float64, V)
+        end
+
+        # filling in default rng
+        function $randfun(::Type{T}, V::TensorMapSpace) where {T}
+            return $randfun(Random.default_rng(), T, V)
+        end
+
+        # implementation
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                V::TensorMapSpace
+            ) where {T}
+            t = ROCTensorMap{T}(undef, V)
+            $randfun!(rng, t)
+            return t
+        end
+    end
+end
+
+# converters
+# ----------
+function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol, Any})
+    try
+        codomain = eval(Meta.parse(d[:codomain]))
+        domain = eval(Meta.parse(d[:domain]))
+        data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data])
+        return ROCTensorMap(data, codomain, domain)
+    catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
+        codomain = Base.eval(Main, Meta.parse(d[:codomain]))
+        domain = Base.eval(Main, Meta.parse(d[:domain]))
+        data = SectorDict(
+            Base.eval(Main, Meta.parse(c)) => ROCArray(b)
+                for (c, b) in d[:data]
+        )
+        return ROCTensorMap(data, codomain, domain)
+    end
+end
+
+function Base.convert(::Type{ROCTensorMap}, t::AbstractTensorMap)
+    return copy!(ROCTensorMap{scalartype(t)}(undef, space(t)), t)
+end
+
+# Scalar implementation
+#-----------------------
+function TensorKit.scalar(t::ROCTensorMap)
+
+    # TODO: should scalar only work if N₁ == N₂ == 0?
+    return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ?
+        first(blocks(t))[2][1, 1] : throw(DimensionMismatch())
+end
+
+TensorKit.scalartype(A::StridedROCArray{T}) where {T} = T
+vi_scalartype(::Type{<:ROCTensorMap{T}}) where {T} = T
+vi_scalartype(::Type{<:ROCArray{T}}) where {T} = T
+
+function TensorKit.similarstoragetype(TT::Type{<:ROCTensorMap{TTT, S, N₁, N₂}}, ::Type{T}) where {TTT, T, S, N₁, N₂}
+    return ROCVector{T, AMDGPU.Mem.HIPBuffer}
+end
+
+function Base.convert(
+        TT::Type{ROCTensorMap{T, S, N₁, N₂}},
+        t::AbstractTensorMap{<:Any, S, N₁, N₂}
+    ) where {T, S, N₁, N₂}
+    if typeof(t) === TT
+        return t
+    else
+        tnew = TT(undef, space(t))
+        return copy!(tnew, t)
+    end
+end
+
+function Base.copy!(tdst::ROCTensorMap{T, S, N₁, N₂}, tsrc::ROCTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂}
+    space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))"))
+    for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc))
+        copy!(bdst, bsrc)
+    end
+    return tdst
+end
+
+function Base.copy!(tdst::ROCTensorMap, tsrc::TensorKit.AdjointTensorMap)
+    space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))"))
+    for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc))
+        copy!(bdst, bsrc)
+    end
+    return tdst
+end
+
+function Base.promote_rule(
+        ::Type{<:TT₁},
+        ::Type{<:TT₂}
+    ) where {
+        S, N₁, N₂, TTT₁, TTT₂,
+        TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂},
+        TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂},
+    }
+    T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
+    return ROCTensorMap{T, S, N₁, N₂}
+end
+
+function LinearAlgebra.isposdef(t::ROCTensorMap)
+    domain(t) == codomain(t) ||
+        throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
+    InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
+    for (c, b) in blocks(t)
+        # do our own hermitian check
+        isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b))))
+        isherm || return false
+        isposdef(Hermitian(b)) || return false
+    end
+    return true
+end
+
+# Conversion to ROCArray:
+#----------------------
+# probably not optimized for speed, only for checking purposes
+function Base.convert(::Type{ROCArray}, t::AbstractTensorMap)
+    I = sectortype(t)
+    if I === Trivial
+        convert(ROCArray, t[])
+    else
+        cod = codomain(t)
+        dom = domain(t)
+        T = sectorscalartype(I) <: Complex ? complex(scalartype(t)) :
+            sectorscalartype(I) <: Integer ? scalartype(t) : float(scalartype(t))
+        A = AMDGPU.zeros(T, dims(cod)..., dims(dom)...)
+        for (f₁, f₂) in fusiontrees(t)
+            F = convert(ROCArray, (f₁, f₂))
+            Aslice = StridedView(A)[axes(cod, f₁.uncoupled)..., axes(dom, f₂.uncoupled)...]
+            add!(Aslice, StridedView(TensorKit._kron(convert(ROCArray, t[f₁, f₂]), F)))
+        end
+        return A
+    end
+end
diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
new file mode 100644
index 000000000..dae307d36
--- /dev/null
+++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
@@ -0,0 +1,107 @@
+module TensorKitCUDAExt
+
+using CUDA, CUDA.CUBLAS, LinearAlgebra
+using CUDA: @allowscalar
+using cuTENSOR: cuTENSOR
+import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn!
+
+using TensorKit
+import TensorKit.VectorInterface: scalartype as vi_scalartype
+using TensorKit.Factorizations
+using TensorKit.Strided
+using TensorKit.Factorizations: AbstractAlgorithm
+using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
+import TensorKit: randisometry
+
+using TensorKit.MatrixAlgebraKit
+
+using Random
+
+include("cutensormap.jl")
+
+const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}}
+
+"""
+    CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
+    # expert mode: select storage type `A`
+    DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
+
+Construct a `DiagonalTensorMap` with uninitialized data.
+"""
+function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
+    (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
+        throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
+    return CuDiagonalTensorMap{T}(undef, domain(V))
+end
+function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
+    length(V) == 1 ||
+        throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
+    return CuDiagonalTensorMap{T}(undef, only(V))
+end
+function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace}
+    return CuDiagonalTensorMap{T, S}(undef, V)
+end
+CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V)
+
+function CuDiagonalTensorMap(data::CuVector{T}, V::S) where {T, S}
+    return CuDiagonalTensorMap{T, S}(data, V)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::CuDiagonalTensorMap, alg::DiagonalAlgorithm)
+    V_cod = fuse(codomain(t))
+    V_dom = fuse(domain(t))
+    U = similar(t, codomain(t) ← V_cod)
+    S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom)
+    Vᴴ = similar(t, V_dom ← domain(t))
+    return U, S, Vᴴ
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
+    V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
+    return CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm)
+    V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
+    U = similar(t, codomain(t) ← V_cod)
+    S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+    Vᴴ = similar(t, V_dom ← domain(t))
+    return U, S, Vᴴ
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    T = real(scalartype(t))
+    D = CuDiagonalTensorMap{T}(undef, V_D)
+    V = similar(t, codomain(t) ← V_D)
+    return D, V
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    Tc = complex(scalartype(t))
+    D = CuDiagonalTensorMap{Tc}(undef, V_D)
+    V = similar(t, Tc, codomain(t) ← V_D)
+    return D, V
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    T = real(scalartype(t))
+    return D = CuDiagonalTensorMap{Tc}(undef, V_D)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    Tc = complex(scalartype(t))
+    return D = CuDiagonalTensorMap{Tc}(undef, V_D)
+end
+
+
+# TODO
+# add VectorInterface extensions for proper CUDA promotion
+function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
+    return Base.promote_op(add, Tx, Ty, Tα, Tβ)
+end
+
+end
diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl
new file mode 100644
index 000000000..1f751c561
--- /dev/null
+++ b/ext/TensorKitCUDAExt/cutensormap.jl
@@ -0,0 +1,340 @@
+const CuTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, CuVector{T, CUDA.DeviceMemory}}
+const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0}
+
+const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, CuTensorMap{T, S, N₁, N₂}}
+
+function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedCuArray})
+    if TorA <: CuArray
+        return TensorMap{eltype(TorA), S, N₁, N₂, CuVector{eltype(TorA), CUDA.DeviceMemory}}
+    else
+        throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:CuVector{<:Number}`"))
+    end
+end
+
+function CuTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
+    return CuTensorMap{T, S, N₁, N₂}(undef, V)
+end
+
+function CuTensorMap{T}(
+        ::UndefInitializer, codomain::TensorSpace{S},
+        domain::TensorSpace{S}
+    ) where {T, S}
+    return CuTensorMap{T}(undef, codomain ← domain)
+end
+function CuTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T, S}
+    return CuTensorMap{T}(undef, V ← one(V))
+end
+# constructor starting from block data
+"""
+    CuTensorMap(data::AbstractDict{<:Sector,<:CuMatrix}, codomain::ProductSpace{S,N₁},
+                domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂}
+    CuTensorMap(data, codomain ← domain)
+    CuTensorMap(data, domain → codomain)
+
+Construct a `CuTensorMap` by explicitly specifying its block data.
+
+## Arguments
+- `data::AbstractDict{<:Sector,<:CuMatrix}`: dictionary containing the block data for
+  each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`.
+- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type
+  `S<:ElementarySpace`.
+- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type
+  `S<:ElementarySpace`.
+
+Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref)
+using the syntax `codomain ← domain` or `domain → codomain`.
+"""
+function CuTensorMap(
+        data::AbstractDict{<:Sector, <:CuArray},
+        V::TensorMapSpace{S, N₁, N₂}
+    ) where {S, N₁, N₂}
+    T = eltype(valtype(data))
+    t = CuTensorMap{T}(undef, V)
+    for (c, b) in blocks(t)
+        haskey(data, c) || throw(SectorMismatch("no data for block sector $c"))
+        datac = data[c]
+        size(datac) == size(b) ||
+            throw(DimensionMismatch("wrong size of block for sector $c"))
+        copy!(b, datac)
+    end
+    for (c, b) in data
+        c ∈ blocksectors(t) || isempty(b) ||
+            throw(SectorMismatch("data for block sector $c not expected"))
+    end
+    return t
+end
+function CuTensorMap{T}(
+        data::DenseVector{T}, codomain::TensorSpace{S},
+        domain::TensorSpace{S}
+    ) where {T, S}
+    return CuTensorMap(data, codomain ← domain)
+end
+function CuTensorMap(
+        data::AbstractDict{<:Sector, <:CuMatrix}, codom::TensorSpace{S},
+        dom::TensorSpace{S}
+    ) where {S}
+    return CuTensorMap(data, codom ← dom)
+end
+function CuTensorMap(data::DenseVector{T}, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
+    return CuTensorMap{T, S, N₁, N₂}(data, V)
+end
+function CuTensorMap(data::CuArray{T}, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
+    return CuTensorMap{T, S, N₁, N₂}(vec(data), V)
+end
+
+for (fname, felt) in ((:zeros, :zero), (:ones, :one))
+    @eval begin
+        function CUDA.$fname(
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain)
+            ) where {S <: IndexSpace}
+            return CUDA.$fname(codomain ← domain)
+        end
+        function CUDA.$fname(
+                ::Type{T}, codomain::TensorSpace{S},
+                domain::TensorSpace{S} = one(codomain)
+            ) where {T, S <: IndexSpace}
+            return CUDA.$fname(T, codomain ← domain)
+        end
+        CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V)
+        function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T}
+            t = CuTensorMap{T}(undef, V)
+            fill!(t, $felt(T))
+            return t
+        end
+    end
+end
+
+for randfun in (:curand, :curandn)
+    randfun! = Symbol(randfun, :!)
+    @eval begin
+        # converting `codomain` and `domain` into `HomSpace`
+        function $randfun(
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {S <: IndexSpace}
+            return $randfun(codomain ← domain)
+        end
+        function $randfun(
+                ::Type{T}, codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {T, S <: IndexSpace}
+            return $randfun(T, codomain ← domain)
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {T, S <: IndexSpace}
+            return $randfun(rng, T, codomain ← domain)
+        end
+
+        # accepting single `TensorSpace`
+        $randfun(codomain::TensorSpace) = $randfun(codomain ← one(codomain))
+        function $randfun(::Type{T}, codomain::TensorSpace) where {T}
+            return $randfun(T, codomain ← one(codomain))
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                codomain::TensorSpace
+            ) where {T}
+            return $randfun(rng, T, codomain ← one(domain))
+        end
+
+        # filling in default eltype
+        $randfun(V::TensorMapSpace) = $randfun(Float64, V)
+        function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
+            return $randfun(rng, Float64, V)
+        end
+
+        # filling in default rng
+        function $randfun(::Type{T}, V::TensorMapSpace) where {T}
+            return $randfun(Random.default_rng(), T, V)
+        end
+
+        # implementation
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                V::TensorMapSpace
+            ) where {T}
+            t = CuTensorMap{T}(undef, V)
+            $randfun!(rng, t)
+            return t
+        end
+    end
+end
+
+for randfun in (:rand, :randn, :randisometry)
+    randfun! = Symbol(randfun, :!)
+    @eval begin
+        # converting `codomain` and `domain` into `HomSpace`
+        function $randfun(
+                ::Type{A}, codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {A <: CuArray, S <: IndexSpace}
+            return $randfun(A, codomain ← domain)
+        end
+        function $randfun(
+                ::Type{T}, ::Type{A}, codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {T, S <: IndexSpace, A <: CuArray{T}}
+            return $randfun(T, A, codomain ← domain)
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T}, ::Type{A},
+                codomain::TensorSpace{S},
+                domain::TensorSpace{S}
+            ) where {T, S <: IndexSpace, A <: CuArray{T}}
+            return $randfun(rng, T, A, codomain ← domain)
+        end
+
+        # accepting single `TensorSpace`
+        $randfun(::Type{A}, codomain::TensorSpace) where {A <: CuArray} = $randfun(A, codomain ← one(codomain))
+        function $randfun(::Type{T}, ::Type{A}, codomain::TensorSpace) where {T, A <: CuArray{T}}
+            return $randfun(T, A, codomain ← one(codomain))
+        end
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                ::Type{A}, codomain::TensorSpace
+            ) where {T, A <: CuArray{T}}
+            return $randfun(rng, T, A, codomain ← one(domain))
+        end
+
+        # filling in default eltype
+        $randfun(::Type{A}, V::TensorMapSpace) where {A <: CuArray} = $randfun(eltype(A), A, V)
+        function $randfun(rng::Random.AbstractRNG, ::Type{A}, V::TensorMapSpace) where {A <: CuArray}
+            return $randfun(rng, eltype(A), A, V)
+        end
+
+        # filling in default rng
+        function $randfun(::Type{T}, ::Type{A}, V::TensorMapSpace) where {T, A <: CuArray{T}}
+            return $randfun(Random.default_rng(), T, A, V)
+        end
+
+        # implementation
+        function $randfun(
+                rng::Random.AbstractRNG, ::Type{T},
+                ::Type{A}, V::TensorMapSpace
+            ) where {T, A <: CuArray{T}}
+            t = CuTensorMap{T}(undef, V)
+            $randfun!(rng, t)
+            return t
+        end
+    end
+end
+
+# converters
+# ----------
+function Base.convert(::Type{CuTensorMap}, d::Dict{Symbol, Any})
+    try
+        codomain = eval(Meta.parse(d[:codomain]))
+        domain = eval(Meta.parse(d[:domain]))
+        data = SectorDict(eval(Meta.parse(c)) => CuArray(b) for (c, b) in d[:data])
+        return CuTensorMap(data, codomain, domain)
+    catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
+        codomain = Base.eval(Main, Meta.parse(d[:codomain]))
+        domain = Base.eval(Main, Meta.parse(d[:domain]))
+        data = SectorDict(
+            Base.eval(Main, Meta.parse(c)) => CuArray(b)
+                for (c, b) in d[:data]
+        )
+        return CuTensorMap(data, codomain, domain)
+    end
+end
+
+function Base.convert(::Type{CuTensorMap}, t::AbstractTensorMap)
+    return copy!(CuTensorMap{scalartype(t)}(undef, space(t)), t)
+end
+
+# Scalar implementation
+#-----------------------
+function TensorKit.scalar(t::CuTensorMap)
+
+    # TODO: should scalar only work if N₁ == N₂ == 0?
+    return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ?
+        first(blocks(t))[2][1, 1] : throw(DimensionMismatch())
+end
+
+TensorKit.scalartype(A::StridedCuArray{T}) where {T} = T
+vi_scalartype(::Type{<:CuTensorMap{T}}) where {T} = T
+vi_scalartype(::Type{<:CuArray{T}}) where {T} = T
+
+function TensorKit.similarstoragetype(TT::Type{<:CuTensorMap{TTT, S, N₁, N₂}}, ::Type{T}) where {TTT, T, S, N₁, N₂}
+    return CuVector{T, CUDA.DeviceMemory}
+end
+
+function Base.convert(
+        TT::Type{CuTensorMap{T, S, N₁, N₂}},
+        t::AbstractTensorMap{<:Any, S, N₁, N₂}
+    ) where {T, S, N₁, N₂}
+    if typeof(t) === TT
+        return t
+    else
+        tnew = TT(undef, space(t))
+        return copy!(tnew, t)
+    end
+end
+
+function Base.copy!(tdst::CuTensorMap{T, S, N₁, N₂}, tsrc::CuTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂}
+    space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))"))
+    for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc))
+        copy!(bdst, bsrc)
+    end
+    return tdst
+end
+
+function Base.copy!(tdst::CuTensorMap, tsrc::TensorKit.AdjointTensorMap)
+    space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))"))
+    for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc))
+        copy!(bdst, bsrc)
+    end
+    return tdst
+end
+
+function Base.promote_rule(
+        ::Type{<:TT₁},
+        ::Type{<:TT₂}
+    ) where {
+        S, N₁, N₂, TTT₁, TTT₂,
+        TT₁ <: CuTensorMap{TTT₁, S, N₁, N₂},
+        TT₂ <: CuTensorMap{TTT₂, S, N₁, N₂},
+    }
+    T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
+    return CuTensorMap{T, S, N₁, N₂}
+end
+
+function LinearAlgebra.isposdef(t::CuTensorMap)
+    domain(t) == codomain(t) ||
+        throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
+    InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
+    for (c, b) in blocks(t)
+        # do our own hermitian check
+        isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b))))
+        isherm || return false
+        isposdef(Hermitian(b)) || return false
+    end
+    return true
+end
+
+
+# Conversion to CuArray:
+#----------------------
+# probably not optimized for speed, only for checking purposes
+function Base.convert(::Type{CuArray}, t::AbstractTensorMap)
+    I = sectortype(t)
+    if I === Trivial
+        convert(CuArray, t[])
+    else
+        cod = codomain(t)
+        dom = domain(t)
+        T = sectorscalartype(I) <: Complex ? complex(scalartype(t)) :
+            sectorscalartype(I) <: Integer ? scalartype(t) : float(scalartype(t))
+        A = CUDA.zeros(T, dims(cod)..., dims(dom)...)
+        for (f₁, f₂) in fusiontrees(t)
+            F = convert(CuArray, (f₁, f₂))
+            Aslice = StridedView(A)[axes(cod, f₁.uncoupled)..., axes(dom, f₂.uncoupled)...]
+            add!(Aslice, StridedView(TensorKit._kron(convert(CuArray, t[f₁, f₂]), F)))
+        end
+        return A
+    end
+end
diff --git a/src/auxiliary/random.jl b/src/auxiliary/random.jl
index 022809518..9aed16509 100644
--- a/src/auxiliary/random.jl
+++ b/src/auxiliary/random.jl
@@ -1,17 +1,20 @@
 """
-    randisometry([::Type{T}=Float64], dims::Dims{2}) -> Array{T,2}
-    randhaar([::Type{T}=Float64], dims::Dims{2}) -> Array{T,2}
+    randisometry([::Type{T}=Float64], [::Type{A}=Matrix{T}], dims::Dims{2}) -> A
+    randhaar([::Type{T}=Float64], [::Type{A}=Matrix{T}], dims::Dims{2}) -> A
 
 Create a random isometry of size `dims`, uniformly distributed according to the Haar measure.
 
 See also [`randuniform`](@ref) and [`randnormal`](@ref).
 """
-randisometry(dims::Base.Dims{2}) = randisometry(Float64, dims)
+randisometry(dims::Base.Dims{2}) = randisometry(Float64, Matrix{Float64}, dims)
 function randisometry(::Type{T}, dims::Base.Dims{2}) where {T <: Number}
     return randisometry(Random.default_rng(), T, dims)
 end
-function randisometry(rng::Random.AbstractRNG, ::Type{T}, dims::Base.Dims{2}) where {T <: Number}
-    return randisometry!(rng, Matrix{T}(undef, dims))
+function randisometry(::Type{T}, ::Type{A}, dims::Base.Dims{2}) where {T <: Number, A <: AbstractArray{T}}
+    return randisometry(Random.default_rng(), T, A, dims)
+end
+function randisometry(rng::Random.AbstractRNG, ::Type{T}, ::Type{A}, dims::Base.Dims{2}) where {T <: Number, A <: AbstractArray{T}}
+    return randisometry!(rng, A(undef, dims))
 end
 
 randisometry!(A::AbstractMatrix) = randisometry!(Random.default_rng(), A)
diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl
index 33f267e27..a6dbb4e9a 100644
--- a/src/factorizations/factorizations.jl
+++ b/src/factorizations/factorizations.jl
@@ -6,7 +6,7 @@ module Factorizations
 export copy_oftype, factorisation_scalartype, one!, truncspace
 
 using ..TensorKit
-using ..TensorKit: AdjointTensorMap, SectorDict, blocktype, foreachblock, one!
+using ..TensorKit: AdjointTensorMap, SectorDict, blocktype, foreachblock, one!, similarstoragetype
 
 using LinearAlgebra: LinearAlgebra, BlasFloat, Diagonal, svdvals, svdvals!, eigen, eigen!,
     isposdef, isposdef!, ishermitian
@@ -28,7 +28,6 @@ include("diagonal.jl")
 include("pullbacks.jl")
 
 TensorKit.one!(A::AbstractMatrix) = MatrixAlgebraKit.one!(A)
-
 #------------------------------#
 # LinearAlgebra overloads
 #------------------------------#
diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl
index 72d7015c0..2dadd701b 100644
--- a/src/factorizations/matrixalgebrakit.jl
+++ b/src/factorizations/matrixalgebrakit.jl
@@ -144,14 +144,16 @@ end
 function MAK.initialize_output(::typeof(svd_compact!), t::AbstractTensorMap, ::AbstractAlgorithm)
     V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
     U = similar(t, codomain(t) ← V_cod)
-    S = DiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+    output_type = similarstoragetype(t, real(scalartype(t)))
+    S = DiagonalTensorMap{real(scalartype(t)), typeof(V_cod), output_type}(undef, V_cod)
     Vᴴ = similar(t, V_dom ← domain(t))
     return U, S, Vᴴ
 end
 
 function MAK.initialize_output(::typeof(svd_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm)
     V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
-    return DiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+    output_storage_type = similarstoragetype(t, real(scalartype(t)))
+    return DiagonalTensorMap{real(scalartype(t)), typeof(V_cod), output_storage_type}(undef, V_cod)
 end
 
 # Eigenvalue decomposition
@@ -219,7 +221,7 @@ end
 function MAK.initialize_output(::typeof(eigh_full!), t::AbstractTensorMap, ::AbstractAlgorithm)
     V_D = fuse(domain(t))
     T = real(scalartype(t))
-    D = DiagonalTensorMap{T}(undef, V_D)
+    D = DiagonalTensorMap{T, typeof(V_D), similarstoragetype(t, T)}(undef, V_D)
     V = similar(t, codomain(t) ← V_D)
     return D, V
 end
@@ -227,7 +229,7 @@ end
 function MAK.initialize_output(::typeof(eig_full!), t::AbstractTensorMap, ::AbstractAlgorithm)
     V_D = fuse(domain(t))
     Tc = complex(scalartype(t))
-    D = DiagonalTensorMap{Tc}(undef, V_D)
+    D = DiagonalTensorMap{Tc, typeof(V_D), similarstoragetype(t, Tc)}(undef, V_D)
     V = similar(t, Tc, codomain(t) ← V_D)
     return D, V
 end
@@ -235,13 +237,13 @@ end
 function MAK.initialize_output(::typeof(eigh_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm)
     V_D = fuse(domain(t))
     T = real(scalartype(t))
-    return D = DiagonalTensorMap{Tc}(undef, V_D)
+    return D = DiagonalTensorMap{Tc, typeof(V_D), storagetype(t)}(undef, V_D)
 end
 
 function MAK.initialize_output(::typeof(eig_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm)
     V_D = fuse(domain(t))
     Tc = complex(scalartype(t))
-    return D = DiagonalTensorMap{Tc}(undef, V_D)
+    return D = DiagonalTensorMap{Tc, typeof(V_D), similarstoragetype(t, Tc)}(undef, V_D)
 end
 
 # QR decomposition
diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl
index 0e9e2dda8..206175220 100644
--- a/src/factorizations/truncation.jl
+++ b/src/factorizations/truncation.jl
@@ -67,7 +67,7 @@ function MAK.truncate(
 
     Ũ = similar(U, codomain(U) ← V_truncated)
     truncate_domain!(Ũ, U, ind)
-    S̃ = DiagonalTensorMap{scalartype(S)}(undef, V_truncated)
+    S̃ = DiagonalTensorMap{scalartype(S), typeof(V_truncated), storagetype(S)}(undef, V_truncated)
     truncate_diagonal!(S̃, S, ind)
     Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ))
     truncate_codomain!(Ṽᴴ, Vᴴ, ind)
@@ -132,7 +132,7 @@ for f! in (:eig_trunc!, :eigh_trunc!)
         ind = MAK.findtruncated(diagview(D), strategy)
         V_truncated = truncate_space(space(D, 1), ind)
 
-        D̃ = DiagonalTensorMap{scalartype(D)}(undef, V_truncated)
+        D̃ = DiagonalTensorMap{scalartype(D), typeof(V_truncated), storagetype(D)}(undef, V_truncated)
         truncate_diagonal!(D̃, D, ind)
 
         Ṽ = similar(V, codomain(V) ← V_truncated)
@@ -162,17 +162,15 @@ function _findnexttruncvalue(
     ) where {I <: Sector}
     # early return
     (isempty(S) || all(iszero, values(truncdim))) && return nothing
+    mapped_keys = map(keys(truncdim)) do c
+        d = truncdim[c]
+        return by(S[c][d])
+    end
     if rev
-        σmin, imin = findmin(keys(truncdim)) do c
-            d = truncdim[c]
-            return by(S[c][d])
-        end
+        σmin, imin = findmin(mapped_keys)
         return σmin, keys(truncdim)[imin]
     else
-        σmax, imax = findmax(keys(truncdim)) do c
-            d = truncdim[c]
-            return by(S[c][d])
-        end
+        σmax, imax = findmax(mapped_keys)
         return σmax, keys(truncdim)[imax]
     end
 end
diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl
index 904e40cba..66ab9c377 100644
--- a/src/tensors/diagonal.jl
+++ b/src/tensors/diagonal.jl
@@ -271,7 +271,7 @@ function LinearAlgebra.mul!(
         dC::DiagonalTensorMap, dA::DiagonalTensorMap, dB::DiagonalTensorMap, α::Number, β::Number
     )
     dC.domain == dA.domain == dB.domain || throw(SpaceMismatch())
-    mul!(Diagonal(dC.data), Diagonal(dA.data), Diagonal(dB.data), α, β)
+    @. dC.data = (α * dA.data * dB.data) + β * dC.data
     return dC
 end
 
diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl
index d09609edc..34fd6ec18 100644
--- a/src/tensors/linalg.jl
+++ b/src/tensors/linalg.jl
@@ -272,7 +272,7 @@ function _norm(blockiter, p::Real, init::Real)
         end
     elseif p == 2
         n² = mapreduce(+, blockiter; init = init) do (c, b)
-            return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2)
+            return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm(b, 2)^2)
         end
         return sqrt(n²)
     elseif p == 1
@@ -281,7 +281,7 @@ function _norm(blockiter, p::Real, init::Real)
         end
     elseif p > 0
         nᵖ = mapreduce(+, blockiter; init = init) do (c, b)
-            return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p)
+            return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm(b, p)^p)
         end
         return (nᵖ)^inv(oftype(nᵖ, p))
     else
@@ -299,7 +299,7 @@ function LinearAlgebra.rank(
     r = dim(one(sectortype(t))) * 0
     dim(t) == 0 && return r
     S = LinearAlgebra.svdvals(t)
-    tol = max(atol, rtol * maximum(first, values(S)))
+    tol = max(atol, rtol * mapreduce(maximum, max, values(S)))
     for (c, b) in S
         if !isempty(b)
             r += dim(c) * count(>(tol), b)
@@ -317,8 +317,8 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2)
             return zero(real(float(scalartype(t))))
         end
         S = LinearAlgebra.svdvals(t)
-        maxS = maximum(first, values(S))
-        minS = minimum(last, values(S))
+        maxS = mapreduce(maximum, max, values(S))
+        minS = mapreduce(minimum, min, values(S))
         return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS)
     else
         throw(ArgumentError("cond currently only defined for p=2"))
@@ -431,7 +431,7 @@ function exp!(t::TensorMap)
     domain(t) == codomain(t) ||
         error("Exponential of a tensor only exist when domain == codomain.")
     for (c, b) in blocks(t)
-        copy!(b, LinearAlgebra.exp!(b))
+        copy!(b, exp!(b))
     end
     return t
 end
diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl
index fe15d819d..d0a3f380b 100644
--- a/src/tensors/tensor.jl
+++ b/src/tensors/tensor.jl
@@ -139,7 +139,7 @@ function TensorMap(
         datac = data[c]
         size(datac) == size(b) ||
             throw(DimensionMismatch("wrong size of block for sector $c"))
-        copy!(b, datac)
+        copyto!(b, datac)
     end
     for (c, b) in data
         c ∈ blocksectors(t) || isempty(b) ||
@@ -357,7 +357,7 @@ function TensorMap(
     ) where {S}
     return TensorMap(data, codom ← dom; kwargs...)
 end
-function Tensor(data::AbstractArray, codom::TensorSpace, ; kwargs...)
+function Tensor(data::AbstractArray, codom::TensorSpace; kwargs...)
     return TensorMap(data, codom ← one(codom); kwargs...)
 end
 
diff --git a/test/amd/factorizations.jl b/test/amd/factorizations.jl
new file mode 100644
index 000000000..cbdfd9e27
--- /dev/null
+++ b/test/amd/factorizations.jl
@@ -0,0 +1,406 @@
+const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt)
+@assert !isnothing(AMDGPUExt)
+const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap)
+const ROCDiagonalTensorMap = getglobal(AMDGPUExt, :ROCDiagonalTensorMap)
+
+spacelist = try
+    if ENV["CI"] == "true"
+        println("Detected running on CI")
+        if Sys.iswindows()
+            (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂)
+        elseif Sys.isapple()
+            (Vtr, Vℤ₃, VfU₁, VfSU₂)
+        else
+            (Vtr, VU₁, VCU₁, VSU₂, VfSU₂)
+        end
+    else
+        (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)
+    end
+catch
+    (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)
+end
+
+import AMDGPU: rand as rocrand, randn as rocrandn
+
+
+eltypes = (Float32, ComplexF64)
+
+for V in spacelist
+    I = sectortype(first(V))
+    Istr = TensorKit.type_repr(I)
+    println("---------------------------------------")
+    println("Factorizations with symmetry: $Istr")
+    println("---------------------------------------")
+    @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin
+        V1, V2, V3, V4, V5 = V
+        W = V1 ⊗ V2
+        @testset "QR decomposition" begin
+            @testset "$(typeof(t))($T)" for T in eltypes,
+                    t in (
+                        rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, W, V1), rocrand(T, V1, W)',
+                        ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1),
+                    )
+
+                Q, R = @constinferred qr_full(t)
+                AMDGPU.@allowscalar begin
+                    @test Q * R ≈ t
+                end
+                @test isunitary(Q)
+
+                Q, R = @constinferred qr_compact(t)
+                AMDGPU.@allowscalar begin
+                    @test Q * R ≈ t
+                end
+                @test isisometric(Q)
+
+                Q, R = @constinferred left_orth(t; kind = :qr)
+                AMDGPU.@allowscalar begin
+                    @test Q * R ≈ t
+                end
+                @test isisometric(Q)
+
+                N = @constinferred qr_null(t)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred left_null(t; kind = :qr)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            # empty tensor
+            for T in eltypes
+                t = rocrand(T, V1 ⊗ V2, zero(V1))
+
+                Q, R = @constinferred qr_full(t)
+                AMDGPU.@allowscalar begin
+                    @test Q * R ≈ t
+                end
+                @test isunitary(Q)
+                @test dim(R) == dim(t) == 0
+
+                Q, R = @constinferred qr_compact(t)
+                AMDGPU.@allowscalar begin
+                    @test Q * R ≈ t
+                end
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                Q, R = @constinferred left_orth(t; kind = :qr)
+                AMDGPU.@allowscalar begin
+                    @test Q * R ≈ t
+                end
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                N = @constinferred qr_null(t)
+                @test isunitary(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+            end
+        end
+        @testset "LQ decomposition" begin
+            for T in eltypes,
+                    t in (
+                        rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, W, V1), rocrand(T, V1, W)',
+                        ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1),
+                    )
+
+                L, Q = @constinferred lq_full(t)
+                AMDGPU.@allowscalar begin
+                    @test L * Q ≈ t
+                end
+                @test isunitary(Q)
+
+                L, Q = @constinferred lq_compact(t)
+                AMDGPU.@allowscalar begin
+                    @test L * Q ≈ t
+                end
+                @test isisometric(Q; side = :right)
+
+                L, Q = @constinferred right_orth(t; kind = :lq)
+                AMDGPU.@allowscalar begin
+                    @test L * Q ≈ t
+                end
+                @test isisometric(Q; side = :right)
+
+                Nᴴ = @constinferred lq_null(t)
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            for T in eltypes
+                # empty tensor
+                t = rocrand(T, zero(V1), V1 ⊗ V2)
+
+                L, Q = @constinferred lq_full(t)
+                AMDGPU.@allowscalar begin
+                    @test L * Q ≈ t
+                end
+                @test isunitary(Q)
+                @test dim(L) == dim(t) == 0
+
+                L, Q = @constinferred lq_compact(t)
+                AMDGPU.@allowscalar begin
+                    @test L * Q ≈ t
+                end
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                L, Q = @constinferred right_orth(t; kind = :lq)
+                AMDGPU.@allowscalar begin
+                    @test L * Q ≈ t
+                end
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                Nᴴ = @constinferred lq_null(t)
+                @test isunitary(Nᴴ)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+        end
+        @testset "Polar decomposition" begin
+            @testset "$(typeof(t))($T)" for T in eltypes,
+                    t in (
+                        rocrand(T, W, W),
+                        rocrand(T, W, W)',
+                        rocrand(T, W, V1),
+                        rocrand(T, V1, W)',
+                        ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1),
+                    )
+
+                @assert domain(t) ≾ codomain(t)
+                w, p = @constinferred left_polar(t)
+                @test w * p ≈ t
+                @test isisometric(w)
+                @test isposdef(p)
+
+                w, p = @constinferred left_orth(t; kind = :polar)
+                @test w * p ≈ t
+                @test isisometric(w)
+            end
+
+            @testset "$(typeof(t))($T)" for T in eltypes,
+                    t in (
+                        rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, V1, W), rocrand(T, W, V1)',
+                        ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1),
+                    )
+
+                @assert codomain(t) ≾ domain(t)
+                p, wᴴ = @constinferred right_polar(t)
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+                @test isposdef(p)
+
+                p, wᴴ = @constinferred right_orth(t; kind = :polar)
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+            end
+        end
+        @testset "SVD" begin
+            for T in eltypes,
+                    t in (
+                        rocrand(T, W, W), rocrand(T, W, W)',
+                        rocrand(T, W, V1), rocrand(T, V1, W),
+                        rocrand(T, W, V1)', rocrand(T, V1, W)',
+                        ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1),
+                    )
+
+                u, s, vᴴ = @constinferred svd_full(t)
+                @test u * s * vᴴ ≈ t
+                @test isunitary(u)
+                @test isunitary(vᴴ)
+
+                u, s, vᴴ = @constinferred svd_compact(t)
+                @test u * s * vᴴ ≈ t
+                @test isisometric(u)
+                @test isposdef(s)
+                @test isisometric(vᴴ; side = :right)
+
+                s′ = LinearAlgebra.diag(s)
+                for (c, b) in LinearAlgebra.svdvals(t)
+                    @test b ≈ s′[c]
+                end
+
+                v, c = @constinferred left_orth(t; kind = :svd)
+                @test v * c ≈ t
+                @test isisometric(v)
+
+                N = @constinferred left_null(t; kind = :svd)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred right_null(t; kind = :svd)
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            # empty tensor
+            for T in eltypes, t in (rocrand(T, W, zero(V1)), rocrand(T, zero(V1), W))
+                U, S, Vᴴ = @constinferred svd_full(t)
+                @test U * S * Vᴴ ≈ t
+                @test isunitary(U)
+                @test isunitary(Vᴴ)
+
+                U, S, Vᴴ = @constinferred svd_compact(t)
+                @test U * S * Vᴴ ≈ t
+                @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0
+            end
+        end
+
+        @testset "truncated SVD" begin
+            for T in eltypes,
+                    # AMDGPU doesn't support randn(ComplexF64)
+                    t in (
+                        ROCTensorMap(randn(T, W, W)),
+                        ROCTensorMap(randn(T, W, W))',
+                        ROCTensorMap(randn(T, W, V1)),
+                        ROCTensorMap(randn(T, V1, W)),
+                        ROCTensorMap(randn(T, W, V1))',
+                        ROCTensorMap(randn(T, V1, W))',
+                        ROCDiagonalTensorMap(randn(T, reduceddim(V1)), V1),
+                    )
+
+                @constinferred normalize!(t)
+
+                U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc())
+                @test U * S * Vᴴ ≈ t
+                @test isisometric(U)
+                @test isisometric(Vᴴ; side = :right)
+
+                trunc = truncrank(dim(domain(S)) ÷ 2)
+                AMDGPU.@allowscalar begin
+                    U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ1' ≈ U1 * S1
+                @test isisometric(U1)
+                @test isisometric(Vᴴ1; side = :right)
+                @test dim(domain(S1)) <= trunc.howmany
+
+                λ = minimum(minimum, values(LinearAlgebra.diag(S1)))
+                trunc = trunctol(; atol = λ - 10eps(λ))
+                AMDGPU.@allowscalar begin
+                    U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ2' ≈ U2 * S2
+                @test isisometric(U2)
+                @test isisometric(Vᴴ2; side = :right)
+                @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ
+                @test U2 ≈ U1
+                @test S2 ≈ S1
+                @test Vᴴ2 ≈ Vᴴ1
+
+                trunc = truncspace(space(S2, 1))
+                AMDGPU.@allowscalar begin
+                    U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ3' ≈ U3 * S3
+                @test isisometric(U3)
+                @test isisometric(Vᴴ3; side = :right)
+                @test space(S3, 1) ≾ space(S2, 1)
+
+                trunc = truncerror(; atol = 0.5)
+                AMDGPU.@allowscalar begin
+                    U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ4' ≈ U4 * S4
+                @test isisometric(U4)
+                @test isisometric(Vᴴ4; side = :right)
+                @test norm(t - U4 * S4 * Vᴴ4) <= 0.5
+            end
+        end
+
+        #=@testset "Eigenvalue decomposition" begin
+            for T in eltypes,
+                t in (rocrand(T, V1, V1),
+                      rocrand(T, W, W),
+                      rocrand(T, W, W)',
+                      ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1)
+                     )
+
+                d, v = @constinferred eig_full(t)
+                @test t * v ≈ v * d
+
+                d′ = LinearAlgebra.diag(d)
+                for (c, b) in LinearAlgebra.eigvals(t)
+                    @test sort(b; by=abs) ≈ sort(d′[c]; by=abs)
+                end
+
+                vdv = v' * v
+                vdv = (vdv + vdv') / 2
+                @test @constinferred isposdef(vdv)
+                t isa ROCDiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map
+
+                AMDGPU.@allowscalar begin # TODO
+                    d, v = @constinferred eig_trunc(t; trunc=truncrank(dim(domain(t)) ÷ 2))
+                end
+                @test t * v ≈ v * d
+                @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2
+
+                t2 = (t + t')
+                D, V = eigen(t2)
+                @test isisometric(V)
+                D̃, Ṽ = @constinferred eigh_full(t2)
+                @test D ≈ D̃
+                @test V ≈ Ṽ
+                λ = minimum(minimum(real(LinearAlgebra.diag(b)))
+                            for (c, b) in blocks(D))
+                @test cond(Ṽ) ≈ one(real(T))
+                @test isposdef(t2) == isposdef(λ)
+                @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2))
+                @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2))
+
+                add!(t, t')
+
+                d, v = @constinferred eigh_full(t)
+                @test t * v ≈ v * d
+                @test isunitary(v)
+
+                λ = minimum(minimum(real(LinearAlgebra.diag(b))) for (c, b) in blocks(d))
+                @test cond(v) ≈ one(real(T))
+                @test isposdef(t) == isposdef(λ)
+                @test isposdef(t - λ * one(t) + 0.1 * one(t))
+                @test !isposdef(t - λ * one(t) - 0.1 * one(t))
+
+                AMDGPU.@allowscalar begin
+                    d, v = @constinferred eigh_trunc(t; trunc=truncrank(dim(domain(t)) ÷ 2))
+                end
+                @test t * v ≈ v * d
+                @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2
+            end
+        end=#
+
+        #=@testset "Condition number and rank" begin
+            for T in eltypes,
+                t in (rocrand(T, W, W), rocrand(T, W, W)',
+                      rocrand(T, W, V1), rocrand(T, V1, W),
+                      rocrand(T, W, V1)', rocrand(T, V1, W)',
+                      ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1))
+
+                d1, d2 = dim(codomain(t)), dim(domain(t))
+                @test rank(t) == min(d1, d2)
+                M = left_null(t)
+                @test @constinferred(rank(M)) + rank(t) == d1
+                Mᴴ = right_null(t)
+                @test rank(Mᴴ) + rank(t) == d2
+            end
+            for T in eltypes
+                u = unitary(T, V1 ⊗ V2, V1 ⊗ V2)
+                @test @constinferred(cond(u)) ≈ one(real(T))
+                @test @constinferred(rank(u)) == dim(V1 ⊗ V2)
+
+                t = rocrand(T, zero(V1), W)
+                @test rank(t) == 0
+                t2 = rocrand(T, zero(V1) * zero(V2), zero(V1) * zero(V2))
+                @test rank(t2) == 0
+                @test cond(t2) == 0.0
+            end
+            for T in eltypes, t in (rocrand(T, W, W), rocrand(T, W, W)')
+                add!(t, t')
+                vals = @constinferred LinearAlgebra.eigvals(t)
+                λmax = maximum(s -> maximum(abs, s), values(vals))
+                λmin = minimum(s -> minimum(abs, s), values(vals))
+                @test cond(t) ≈ λmax / λmin
+            end
+        end=#
+    end
+end
diff --git a/test/amd/tensors.jl b/test/amd/tensors.jl
new file mode 100644
index 000000000..0df035f35
--- /dev/null
+++ b/test/amd/tensors.jl
@@ -0,0 +1,609 @@
+using Adapt, AMDGPU
+ad = adapt(Array)
+
+const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt)
+@assert !isnothing(AMDGPUExt)
+const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap)
+const ROCDiagonalTensorMap = getglobal(AMDGPUExt, :ROCDiagonalTensorMap)
+
+for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃)
+    V1, V2, V3, V4, V5 = V
+    @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests
+    @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests
+end
+
+spacelist = try
+    if ENV["CI"] == "true"
+        println("Detected running on CI")
+        if Sys.iswindows()
+            (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂)
+        elseif Sys.isapple()
+            (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃)
+        else
+            (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃)
+        end
+    else
+        (Vtr, VU₁, VSU₂, Vfℤ₂)
+    end
+catch
+    (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃)
+end
+
+for V in spacelist
+    I = sectortype(first(V))
+    Istr = TensorKit.type_repr(I)
+    println("---------------------------------------")
+    println("AMDGPU Tensors with symmetry: $Istr")
+    println("---------------------------------------")
+    @timedtestset "Tensors with symmetry: $Istr" verbose = true begin
+        V1, V2, V3, V4, V5 = V
+        @timedtestset "Basic tensor properties" begin
+            W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5
+            for T in (Int, Float32, Float64, ComplexF32, ComplexF64)
+                t = @constinferred AMDGPU.zeros(T, W)
+                AMDGPU.@allowscalar begin
+                    @test @constinferred(hash(t)) == hash(deepcopy(t))
+                end
+                @test scalartype(t) == T
+                @test norm(t) == 0
+                @test codomain(t) == W
+                @test space(t) == (W ← one(W))
+                @test domain(t) == one(W)
+                @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
+                # blocks
+                bs = @constinferred blocks(t)
+                (c, b1), state = @constinferred Nothing iterate(bs)
+                @test c == first(blocksectors(W))
+                next = @constinferred Nothing iterate(bs, state)
+                b2 = @constinferred block(t, first(blocksectors(t)))
+                @test b1 == b2
+                @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)}
+                @test_broken typeof(b1) === TensorKit.blocktype(t)
+                @test typeof(c) === sectortype(t)
+            end
+        end
+        @timedtestset "Tensor Dict conversion" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            for T in (Int, Float32, ComplexF64)
+                t = @constinferred AMDGPU.rand(T, W)
+                d = convert(Dict, t)
+                @test t == convert(ROCTensorMap, d)
+            end
+        end
+        if hasfusiontensor(I)
+            @timedtestset "Tensor Array conversion" begin
+                W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+                W1 = V1 ← one(V1)
+                W2 = one(V2) ← V2
+                W3 = V1 ⊗ V2 ← one(V1)
+                W4 = V1 ← V2
+                W5 = one(V1) ← V1 ⊗ V2
+                W6 = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+                for W in (W1, W2, W3, W4, W5, W6)
+                    for T in (Int, Float32, ComplexF64)
+                        if T == Int
+                            t = ROCTensorMap{T}(undef, W)
+                            for (_, b) in blocks(t)
+                                AMDGPU.@allowscalar AMDGPU.rand!(b, -20:20)
+                            end
+                        else
+                            t = @constinferred AMDGPU.randn(T, W)
+                        end
+                        AMDGPU.@allowscalar begin
+                            a = @constinferred convert(ROCArray, t)
+                        end
+                        b = reshape(a, dim(codomain(W)), dim(domain(W)))
+                        @test t ≈ @constinferred TensorMap(a, W)
+                        @test t ≈ @constinferred TensorMap(b, W)
+                        @test t === @constinferred TensorMap(t.data, W)
+                    end
+                end
+                for T in (Int, Float32, ComplexF64)
+                    t = AMDGPU.randn(T, V1 ⊗ V2 ← zero(V1))
+                    AMDGPU.@allowscalar begin
+                        a = convert(ROCArray, t)
+                    end
+                    @test norm(a) == 0
+                end
+            end
+        end
+        @timedtestset "Basic linear algebra" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            for T in (Float32, ComplexF64)
+                t = @constinferred AMDGPU.rand(T, W)
+                @test scalartype(t) == T
+                @test space(t) == W
+                @test space(t') == W'
+                @test dim(t) == dim(space(t))
+                @test codomain(t) == codomain(W)
+                @test domain(t) == domain(W)
+                # blocks for adjoint
+                bs = @constinferred blocks(t')
+                (c, b1), state = @constinferred Nothing iterate(bs)
+                @test c == first(blocksectors(W'))
+                next = @constinferred Nothing iterate(bs, state)
+                b2 = @constinferred block(t', first(blocksectors(t')))
+                @test b1 == b2
+                @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)}
+                @test_broken typeof(b1) === TensorKit.blocktype(t')
+                @test typeof(c) === sectortype(t)
+                # linear algebra
+                @test isa(@constinferred(norm(t)), real(T))
+                @test norm(t)^2 ≈ dot(t, t)
+                α = rand(T)
+                @test norm(α * t) ≈ abs(α) * norm(t)
+                @test norm(t + t, 2) ≈ 2 * norm(t, 2)
+                @test norm(t + t, 1) ≈ 2 * norm(t, 1)
+                @test norm(t + t, Inf) ≈ 2 * norm(t, Inf)
+                p = 3 * rand(Float64)
+                AMDGPU.@allowscalar begin
+                    @test norm(t + t, p) ≈ 2 * norm(t, p)
+                end
+                @test norm(t) ≈ norm(t')
+
+                t2 = @constinferred AMDGPU.rand!(similar(t))
+                β = rand(T)
+                @test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2))
+                @test dot(t2, t) ≈ conj(dot(t, t2))
+                @test dot(t2, t) ≈ conj(dot(t2', t'))
+                @test dot(t2, t) ≈ dot(t', t2')
+
+                i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1))
+                i2 = @constinferred(isomorphism(Vector{T}, V2 ⊗ V1, V1 ⊗ V2))
+                @test i1 * i2 == @constinferred(id(T, V1 ⊗ V2))
+                @test i2 * i1 == @constinferred(id(Vector{T}, V2 ⊗ V1))
+
+                w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1))
+                @test dim(w) == 2 * dim(V1 ← V1)
+                @test w' * w == id(Vector{T}, V1)
+                @test w * w' == (w * w')^2
+            end
+        end
+        @timedtestset "Trivial space insertion and removal" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            for T in (Float32, ComplexF64)
+                t = @constinferred AMDGPU.rand(T, W)
+                t2 = @constinferred insertleftunit(t)
+                @test t2 == @constinferred insertrightunit(t)
+                @test numind(t2) == numind(t) + 1
+                @test space(t2) == insertleftunit(space(t))
+                @test scalartype(t2) === T
+                @test t.data === t2.data
+                @test @constinferred(removeunit(t2, $(numind(t2)))) == t
+                t3 = @constinferred insertleftunit(t; copy = true)
+                @test t3 == @constinferred insertrightunit(t; copy = true)
+                @test t.data !== t3.data
+                for (c, b) in blocks(t)
+                    @test b == block(t3, c)
+                end
+                @test @constinferred(removeunit(t3, $(numind(t3)))) == t
+                t4 = @constinferred insertrightunit(t, 3; dual = true)
+                @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1
+                for (c, b) in blocks(t)
+                    @test b == block(t4, c)
+                end
+                @test @constinferred(removeunit(t4, 4)) == t
+                t5 = @constinferred insertleftunit(t, 4; dual = true)
+                @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t)
+                for (c, b) in blocks(t)
+                    @test b == block(t5, c)
+                end
+                @test @constinferred(removeunit(t5, 4)) == t
+            end
+        end
+        if hasfusiontensor(I)
+            @timedtestset "Basic linear algebra: test via conversion" begin
+                W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+                for T in (Float32, ComplexF64)
+                    t = AMDGPU.rand(T, W)
+                    t2 = @constinferred AMDGPU.rand!(similar(t))
+                    @test norm(t, 2) ≈ norm(ad(t), 2)
+                    @test dot(t2, t) ≈ dot(ad(t2), ad(t))
+                    α = rand(T)
+                    @test ad(α * t) ≈ α * ad(t)
+                    @test ad(t + t) ≈ 2 * ad(t)
+                end
+            end
+            @timedtestset "Real and imaginary parts" begin
+                W = V1 ⊗ V2
+                for T in (Float64, ComplexF64, ComplexF32)
+                    t = @constinferred AMDGPU.randn(T, W, W)
+                    @test real(ad(t)) == ad(@constinferred real(t))
+                    @test imag(ad(t)) == ad(@constinferred imag(t))
+                end
+            end
+        end
+        @timedtestset "Tensor conversion" begin
+            W = V1 ⊗ V2
+            t = @constinferred AMDGPU.randn(W ← W)
+            @test typeof(convert(ROCTensorMap, t')) == typeof(t)
+            tc = complex(t)
+            @test convert(typeof(tc), t) == tc
+            @test typeof(convert(typeof(tc), t)) == typeof(tc)
+            @test typeof(convert(typeof(tc), t')) == typeof(tc)
+            @test Base.promote_typeof(t, tc) == typeof(tc)
+            @test Base.promote_typeof(tc, t) == typeof(tc + t)
+        end
+        @timedtestset "Permutations: test via inner product invariance" begin
+            W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5
+            t = AMDGPU.rand(ComplexF64, W)
+            t′ = AMDGPU.randn!(similar(t))
+            AMDGPU.@allowscalar begin
+                for k in 0:5
+                    for p in permutations(1:5)
+                        p1 = ntuple(n -> p[n], k)
+                        p2 = ntuple(n -> p[k + n], 5 - k)
+                        t2 = @constinferred permute(t, (p1, p2))
+                        t2 = permute(t, (p1, p2))
+                        @test norm(t2) ≈ norm(t)
+                        t2′ = permute(t′, (p1, p2))
+                        @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2))
+                    end
+                    t3 = @constinferred repartition(t, $k)
+                    t3 = repartition(t, k)
+                    @test norm(t3) ≈ norm(t)
+                    t3′ = @constinferred repartition!(similar(t3), t′)
+                    @test norm(t3′) ≈ norm(t′)
+                    @test dot(t′, t) ≈ dot(t3′, t3)
+                end
+            end
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Permutations: test via conversion" begin
+                W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5
+                t = AMDGPU.rand(ComplexF64, W)
+                a = ad(t)
+                AMDGPU.@allowscalar begin
+                    for k in 0:5
+                        for p in permutations(1:5)
+                            p1 = ntuple(n -> p[n], k)
+                            p2 = ntuple(n -> p[k + n], 5 - k)
+                            t2 = permute(t, (p1, p2))
+                            a2 = ad(t2)
+                            @test a2 ≈ permute(a, (p1, p2))
+                            @test ad(transpose(t2)) ≈ transpose(a2)
+                        end
+
+                        t3 = repartition(t, k)
+                        a3 = ad(t3)
+                        @test a3 ≈ repartition(a, k)
+                    end
+                end
+            end
+        end
+        @timedtestset "Full trace: test self-consistency" begin
+            t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1')
+            AMDGPU.@allowscalar begin
+                t2 = permute(t, ((1, 2), (4, 3)))
+                s = @constinferred tr(t2)
+            end
+            @test conj(s) ≈ tr(t2')
+            if !isdual(V1)
+                t2 = twist!(t2, 1)
+            end
+            if isdual(V2)
+                t2 = twist!(t2, 2)
+            end
+            AMDGPU.@allowscalar begin
+                ss = tr(t2)
+                @tensor s2 = t[a, b, b, a]
+                @tensor t3[a, b] := t[a, c, c, b]
+                @tensor s3 = t3[a, a]
+            end
+            @test ss ≈ s2
+            @test ss ≈ s3
+        end
+        @timedtestset "Partial trace: test self-consistency" begin
+            t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3')
+            AMDGPU.@allowscalar begin
+                @tensor t2[a, b] := t[c, d, b, d, c, a]
+                @tensor t4[a, b, c, d] := t[d, e, b, e, c, a]
+                @tensor t5[a, b] := t4[a, b, c, c]
+            end
+            @test t2 ≈ t5
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Trace: test via conversion" begin
+                t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3')
+                AMDGPU.@allowscalar begin
+                    @tensor t2[a, b] := t[c, d, b, d, c, a]
+                    @tensor t3[a, b] := ad(t)[c, d, b, d, c, a]
+                end
+                @test t3 ≈ ad(t2)
+            end
+        end
+        @timedtestset "Trace and contraction" begin
+            t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3)
+            t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V4 ⊗ V1')
+            AMDGPU.@allowscalar begin
+                t3 = t1 ⊗ t2
+                @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x]
+                @tensor tb[a, b] := t3[x, y, a, y, b, x]
+                @test ta ≈ tb
+            end
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Tensor contraction: test via conversion" begin
+                A1 = AMDGPU.randn(ComplexF64, V1' * V2', V3')
+                A2 = AMDGPU.randn(ComplexF64, V3 * V4, V5)
+                rhoL = AMDGPU.randn(ComplexF64, V1, V1)
+                rhoR = AMDGPU.randn(ComplexF64, V5, V5)' # test adjoint tensor
+                H = AMDGPU.randn(ComplexF64, V2 * V4, V2 * V4)
+                AMDGPU.@allowscalar begin
+                    @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) *
+                        A2[b, t2, c'] * rhoR[c', c] *
+                        H[s1, s2, t1, t2]
+
+                    @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] *
+                        conj(ad(A1)[a', t1, b]) *
+                        ad(A2)[b, t2, c'] *
+                        ad(rhoR)[c', c] *
+                        ad(H)[s1, s2, t1, t2]
+                end
+                @test HrA12array ≈ ad(HrA12)
+            end
+        end
+        @timedtestset "Index flipping: test flipping inverse" begin
+            t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1)
+            AMDGPU.@allowscalar begin
+                for i in 1:4
+                    @test t ≈ flip(flip(t, i), i; inv = true)
+                    @test t ≈ flip(flip(t, i; inv = true), i)
+                end
+            end
+        end
+        #=
+        @timedtestset "Index flipping: test via explicit flip" begin
+            t  = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1)
+            F1 = unitary(flip(V1), V1) # TODO wrong storage type
+            AMDGPU.@allowscalar begin
+                @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d]
+                @test flip(t, 1) ≈ tf
+                @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d]
+                @test twist!(flip(t, 2), 2) ≈ tf
+                @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d]
+                @test flip(t, 3) ≈ tf
+                @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d']
+                @test twist!(flip(t, 4), 4) ≈ tf
+            end
+        end =#
+        @timedtestset "Index flipping: test via contraction" begin
+            t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4)
+            t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1)
+            AMDGPU.@allowscalar begin
+                @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x]
+                @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x]
+                @test ta ≈ tb
+                @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x]
+                @test ta ≈ tb
+                @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x]
+                @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x]
+                @test flip(ta, (1, 2)) ≈ tb
+            end
+        end
+        @timedtestset "Multiplication and inverse: test compatibility" begin
+            W1 = V1 ⊗ V2 ⊗ V3
+            W2 = V4 ⊗ V5
+            for T in (Float64, ComplexF64)
+                t1 = AMDGPU.rand(T, W1, W1)
+                t2 = AMDGPU.rand(T, W2, W2)
+                t = AMDGPU.rand(T, W1, W2)
+                @test t1 * (t1 \ t) ≈ t
+                @test (t / t2) * t2 ≈ t
+                AMDGPU.@allowscalar begin
+                    @test t1 \ one(t1) ≈ inv(t1)
+                end
+                # @test one(t1) / t1 ≈ pinv(t1) # pinv not available in AMDGPU
+                @test_throws SpaceMismatch inv(t)
+                @test_throws SpaceMismatch t2 \ t
+                @test_throws SpaceMismatch t / t1
+                # pinv not available in AMDGPU
+                # tp = pinv(t) * t
+                # @test tp ≈ tp * tp
+            end
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Multiplication and inverse: test via conversion" begin
+                W1 = V1 ⊗ V2 ⊗ V3
+                W2 = V4 ⊗ V5
+                for T in (Float32, Float64, ComplexF32, ComplexF64)
+                    t1 = AMDGPU.rand(T, W1, W1)
+                    t2 = AMDGPU.rand(T, W2, W2)
+                    t = AMDGPU.rand(T, W1, W2)
+                    d1 = dim(W1)
+                    d2 = dim(W2)
+                    AMDGPU.@allowscalar begin
+                        At1 = reshape(convert(ROCArray, t1), d1, d1)
+                        At2 = reshape(convert(ROCArray, t2), d2, d2)
+                        At = reshape(convert(ROCArray, t), d1, d2)
+                    end
+                    @test ad(t1 * t) ≈ ad(t1) * ad(t)
+                    @test ad(t1' * t) ≈ ad(t1)' * ad(t)
+                    @test ad(t2 * t') ≈ ad(t2) * ad(t)'
+                    @test ad(t2' * t') ≈ ad(t2)' * ad(t)'
+                    @test ad(inv(t1)) ≈ inv(ad(t1))
+                    # pinv not available in AMDGPU
+                    #@test ad(pinv(t)) ≈ pinv(ad(t))
+
+                    if T == Float32 || T == ComplexF32
+                        continue
+                    end
+
+                    @test ad(t1 \ t) ≈ ad(t1) \ ad(t)
+                    @test ad(t1' \ t) ≈ ad(t1)' \ ad(t)
+                    @test ad(t2 \ t') ≈ ad(t2) \ ad(t)'
+                    @test ad(t2' \ t') ≈ ad(t2)' \ ad(t)'
+
+                    @test ad(t2 / t) ≈ ad(t2) / ad(t)
+                    @test ad(t2' / t) ≈ ad(t2)' / ad(t)
+                    @test ad(t1 / t') ≈ ad(t1) / ad(t)'
+                    @test ad(t1' / t') ≈ ad(t1)' / ad(t)'
+                end
+            end
+        end
+        #=
+        @timedtestset "diag/diagm" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            t = AMDGPU.randn(ComplexF64, W)
+            d = LinearAlgebra.diag(t)
+            D = LinearAlgebra.diagm(codomain(t), domain(t), d)
+            @test LinearAlgebra.isdiag(D)
+            @test LinearAlgebra.diag(D) == d
+        end=# # TODO
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Tensor functions" begin
+                W = V1 ⊗ V2
+                for T in (Float64, ComplexF64)
+                    t = AMDGPU.randn(T, W, W)
+                    s = dim(W)
+                    AMDGPU.@allowscalar begin
+                        expt = @constinferred exp(t)
+                        @test ad(expt) ≈ exp(ad(t))
+
+                        @test (@constinferred sqrt(t))^2 ≈ t
+                        @test ad(sqrt(t)) ≈ sqrt(ad(t))
+
+                        @test exp(@constinferred log(expt)) ≈ expt
+                        @test ad(log(expt)) ≈ log(ad(expt))
+
+                        @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈
+                            id(storagetype(t), W)
+                        @test (@constinferred tan(t)) ≈ sin(t) / cos(t)
+                        @test (@constinferred cot(t)) ≈ cos(t) / sin(t)
+                        @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈
+                            id(storagetype(t), W)
+                        @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t)
+                        @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t)
+
+                        t1 = sin(t)
+                        @test sin(@constinferred asin(t1)) ≈ t1
+                        t2 = cos(t)
+                        @test cos(@constinferred acos(t2)) ≈ t2
+                        t3 = sinh(t)
+                        @test sinh(@constinferred asinh(t3)) ≈ t3
+                        t4 = cosh(t)
+                        @test cosh(@constinferred acosh(t4)) ≈ t4
+                        t5 = tan(t)
+                        @test tan(@constinferred atan(t5)) ≈ t5
+                        t6 = cot(t)
+                        @test cot(@constinferred acot(t6)) ≈ t6
+                        t7 = tanh(t)
+                        @test tanh(@constinferred atanh(t7)) ≈ t7
+                        t8 = coth(t)
+                        @test coth(@constinferred acoth(t8)) ≈ t8
+                    end
+                end
+            end
+        end
+        # Sylvester not defined for AMDGPU
+        # @timedtestset "Sylvester equation" begin
+        #     for T in (Float32, ComplexF64)
+        #         tA = AMDGPU.rand(T, V1 ⊗ V3, V1 ⊗ V3)
+        #         tB = AMDGPU.rand(T, V2 ⊗ V4, V2 ⊗ V4)
+        #         tA = 3 // 2 * leftorth(tA; alg=Polar())[1]
+        #         tB = 1 // 5 * leftorth(tB; alg=Polar())[1]
+        #         tC = AMDGPU.rand(T, V1 ⊗ V3, V2 ⊗ V4)
+        #         t = @constinferred sylvester(tA, tB, tC)
+        #         @test codomain(t) == V1 ⊗ V3
+        #         @test domain(t) == V2 ⊗ V4
+        #         @test norm(tA * t + t * tB + tC) <
+        #               (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3)
+        #         if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+        #             matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x)))
+        #             @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC))
+        #         end
+        #     end
+        # end
+        #
+        # TODO
+        @timedtestset "Tensor product: test via norm preservation" begin
+            for T in (Float32, ComplexF64)
+                t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2)
+                t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1)
+                AMDGPU.@allowscalar begin
+                    t = @constinferred (t1 ⊗ t2)
+                end
+                @test norm(t) ≈ norm(t1) * norm(t2)
+            end
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Tensor product: test via conversion" begin
+                for T in (Float32, ComplexF64)
+                    t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1)
+                    t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V2)
+                    AMDGPU.@allowscalar begin
+                        t = @constinferred (t1 ⊗ t2)
+                    end
+                    d1 = dim(codomain(t1))
+                    d2 = dim(codomain(t2))
+                    d3 = dim(domain(t1))
+                    d4 = dim(domain(t2))
+                    At = ad(t)
+                    AMDGPU.@allowscalar begin
+                        @test ad(t) ≈ ad(t1) ⊗ ad(t2)
+                    end
+                end
+            end
+        end
+        @timedtestset "Tensor product: test via tensor contraction" begin
+            for T in (Float32, ComplexF64)
+                t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1)
+                t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3)
+                AMDGPU.@allowscalar begin
+                    t = @constinferred (t1 ⊗ t2)
+                    @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6]
+                    @test t ≈ t′
+                end
+            end
+        end
+        @timedtestset "Tensor absorpsion" begin
+            # absorbing small into large
+            t1 = AMDGPU.zeros(V1 ⊕ V1, V2 ⊗ V3)
+            t2 = AMDGPU.rand(V1, V2 ⊗ V3)
+            AMDGPU.@allowscalar begin
+                t3 = @constinferred absorb(t1, t2)
+            end
+            @test norm(t3) ≈ norm(t2)
+            @test norm(t1) == 0
+            AMDGPU.@allowscalar begin
+                t4 = @constinferred absorb!(t1, t2)
+            end
+            @test t1 === t4
+            @test t3 ≈ t4
+
+            # absorbing large into small
+            t1 = AMDGPU.rand(V1 ⊕ V1, V2 ⊗ V3)
+            t2 = AMDGPU.zeros(V1, V2 ⊗ V3)
+            AMDGPU.@allowscalar begin
+                t3 = @constinferred absorb(t2, t1)
+            end
+            @test norm(t3) < norm(t1)
+            @test norm(t2) == 0
+            AMDGPU.@allowscalar begin
+                t4 = @constinferred absorb!(t2, t1)
+            end
+            @test t2 === t4
+            @test t3 ≈ t4
+        end
+    end
+    TensorKit.empty_globalcaches!()
+end
+
+@timedtestset "Deligne tensor product: test via conversion" begin
+    Vlists1 = (Vtr,) # VSU₂)
+    Vlists2 = (Vtr,) # Vℤ₂)
+    @testset for Vlist1 in Vlists1, Vlist2 in Vlists2
+        V1, V2, V3, V4, V5 = Vlist1
+        W1, W2, W3, W4, W5 = Vlist2
+        for T in (Float32, ComplexF64)
+            t1 = AMDGPU.rand(T, V1 ⊗ V2, V3' ⊗ V4)
+            t2 = AMDGPU.rand(T, W2, W1 ⊗ W1')
+            t = @constinferred (t1 ⊠ t2)
+            d1 = dim(codomain(t1))
+            d2 = dim(codomain(t2))
+            d3 = dim(domain(t1))
+            d4 = dim(domain(t2))
+            @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2)
+        end
+    end
+end
diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl
new file mode 100644
index 000000000..b198aa1a4
--- /dev/null
+++ b/test/cuda/factorizations.jl
@@ -0,0 +1,390 @@
+using LinearAlgebra, CUDA, Test, TestExtras, TensorKit, cuTENSOR
+
+const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt)
+@assert !isnothing(CUDAExt)
+# const CuTensorMap{T,S,N1,N2,I,A} = CUDAExt.CuTensorMap{T,S,N1,N2,I,A}
+const CuTensorMap = getglobal(CUDAExt, :CuTensorMap)
+const CuDiagonalTensorMap = getglobal(CUDAExt, :CuDiagonalTensorMap)
+
+@isdefined(TestSetup) || include("../setup.jl")
+using .TestSetup
+
+spacelist = try
+    if ENV["CI"] == "true"
+        println("Detected running on CI")
+        if Sys.iswindows()
+            (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂)
+        elseif Sys.isapple()
+            (Vtr, Vℤ₃, VfU₁, VfSU₂)
+        else
+            (Vtr, VU₁, VCU₁, VSU₂, VfSU₂)
+        end
+    else
+        (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)
+    end
+catch
+    (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)
+end
+
+import CUDA: rand as curand, randn as curandn
+
+
+eltypes = (Float32, ComplexF64)
+
+for V in spacelist
+    I = sectortype(first(V))
+    Istr = TensorKit.type_repr(I)
+    println("---------------------------------------")
+    println("Factorizations with symmetry: $Istr")
+    println("---------------------------------------")
+    @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin
+        V1, V2, V3, V4, V5 = V
+        W = V1 ⊗ V2
+        @testset "QR decomposition" begin
+            for T in eltypes,
+                    t in (
+                        curand(T, W, W), curand(T, W, W)', curand(T, W, V1), curand(T, V1, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                Q, R = @constinferred qr_full(t)
+                @test Q * R ≈ t
+                @test isunitary(Q)
+
+                Q, R = @constinferred qr_compact(t)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+
+                Q, R = @constinferred left_orth(t; kind = :qr)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+
+                N = @constinferred qr_null(t)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                N = @constinferred left_null(t; kind = :qr)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            # empty tensor
+            for T in eltypes
+                t = curand(T, V1 ⊗ V2, zero(V1))
+
+                Q, R = @constinferred qr_full(t)
+                @test Q * R ≈ t
+                @test isunitary(Q)
+                @test dim(R) == dim(t) == 0
+
+                Q, R = @constinferred qr_compact(t)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                Q, R = @constinferred left_orth(t; kind = :qr)
+                @test Q * R ≈ t
+                @test isisometric(Q)
+                @test dim(Q) == dim(R) == dim(t)
+
+                N = @constinferred qr_null(t)
+                @test isunitary(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+            end
+        end
+        @testset "LQ decomposition" begin
+            for T in eltypes,
+                    t in (
+                        curand(T, W, W), curand(T, W, W)', curand(T, W, V1), curand(T, V1, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                L, Q = @constinferred lq_full(t)
+                @test L * Q ≈ t
+                @test isunitary(Q)
+
+                L, Q = @constinferred lq_compact(t)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+
+                L, Q = @constinferred right_orth(t; kind = :lq)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+
+                Nᴴ = @constinferred lq_null(t)
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            for T in eltypes
+                # empty tensor
+                t = curand(T, zero(V1), V1 ⊗ V2)
+
+                L, Q = @constinferred lq_full(t)
+                @test L * Q ≈ t
+                @test isunitary(Q)
+                @test dim(L) == dim(t) == 0
+
+                L, Q = @constinferred lq_compact(t)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                L, Q = @constinferred right_orth(t; kind = :lq)
+                @test L * Q ≈ t
+                @test isisometric(Q; side = :right)
+                @test dim(Q) == dim(L) == dim(t)
+
+                Nᴴ = @constinferred lq_null(t)
+                @test isunitary(Nᴴ)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+        end
+        @testset "Polar decomposition" begin
+            @testset for T in eltypes,
+                    t in (
+                        curand(T, W, W),
+                        curand(T, W, W)',
+                        curand(T, W, V1),
+                        curand(T, V1, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                @assert domain(t) ≾ codomain(t)
+                w, p = @constinferred left_polar(t)
+                @test w * p ≈ t
+                @test isisometric(w)
+                # broken for T <: Complex
+                @test isposdef(p)
+
+                w, p = @constinferred left_orth(t; kind = :polar)
+                @test w * p ≈ t
+                @test isisometric(w)
+            end
+
+            @testset for T in eltypes,
+                    t in (
+                        curand(T, W, W), curand(T, W, W)', curand(T, V1, W), curand(T, W, V1)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                @assert codomain(t) ≾ domain(t)
+                p, wᴴ = @constinferred right_polar(t)
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+                @test isposdef(p)
+
+                p, wᴴ = @constinferred right_orth(t; kind = :polar)
+                @test p * wᴴ ≈ t
+                @test isisometric(wᴴ; side = :right)
+            end
+        end
+        @testset "SVD" begin
+            for T in eltypes,
+                    t in (
+                        curand(T, W, W), curand(T, W, W)',
+                        curand(T, W, V1), curand(T, V1, W),
+                        curand(T, W, V1)', curand(T, V1, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                u, s, vᴴ = @constinferred svd_full(t)
+                @test u * s * vᴴ ≈ t
+                @test isunitary(u)
+                @test isunitary(vᴴ)
+
+                u, s, vᴴ = @constinferred svd_compact(t)
+                @test u * s * vᴴ ≈ t
+                @test isisometric(u)
+                @test isposdef(s)
+                @test isisometric(vᴴ; side = :right)
+
+                s′ = LinearAlgebra.diag(s)
+                for (c, b) in LinearAlgebra.svdvals(t)
+                    @test b ≈ s′[c]
+                end
+
+                v, c = @constinferred left_orth(t; kind = :svd)
+                @test v * c ≈ t
+                @test isisometric(v)
+
+                N = @constinferred left_null(t; kind = :svd)
+                @test isisometric(N)
+                @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
+
+                Nᴴ = @constinferred right_null(t; kind = :svd)
+                @test isisometric(Nᴴ; side = :right)
+                @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
+            end
+
+            # empty tensor
+            for T in eltypes, t in (curand(T, W, zero(V1)), curand(T, zero(V1), W))
+                U, S, Vᴴ = @constinferred svd_full(t)
+                @test U * S * Vᴴ ≈ t
+                @test isunitary(U)
+                @test isunitary(Vᴴ)
+
+                U, S, Vᴴ = @constinferred svd_compact(t)
+                @test U * S * Vᴴ ≈ t
+                @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0
+            end
+        end
+
+        @testset "truncated SVD" begin
+            for T in eltypes,
+                    t in (
+                        curandn(T, W, W), curandn(T, W, W)',
+                        curandn(T, W, V1), curandn(T, V1, W),
+                        curandn(T, W, V1)', curandn(T, V1, W)',
+                        CuDiagonalTensorMap(curandn(T, reduceddim(V1)), V1),
+                    )
+
+                @constinferred normalize!(t)
+
+                U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc())
+                @test U * S * Vᴴ ≈ t
+                @test isisometric(U)
+                @test isisometric(Vᴴ; side = :right)
+
+                trunc = truncrank(dim(domain(S)) ÷ 2)
+                CUDA.@allowscalar begin
+                    U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ1' ≈ U1 * S1
+                @test isisometric(U1)
+                @test isisometric(Vᴴ1; side = :right)
+                @test dim(domain(S1)) <= trunc.howmany
+
+                λ = minimum(minimum, values(LinearAlgebra.diag(S1)))
+                trunc = trunctol(; atol = λ - 10eps(λ))
+                CUDA.@allowscalar begin
+                    U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ2' ≈ U2 * S2
+                @test isisometric(U2)
+                @test isisometric(Vᴴ2; side = :right)
+                @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ
+                @test U2 ≈ U1
+                @test S2 ≈ S1
+                @test Vᴴ2 ≈ Vᴴ1
+
+                trunc = truncspace(space(S2, 1))
+                CUDA.@allowscalar begin
+                    U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ3' ≈ U3 * S3
+                @test isisometric(U3)
+                @test isisometric(Vᴴ3; side = :right)
+                @test space(S3, 1) ≾ space(S2, 1)
+
+                trunc = truncerror(; atol = 0.5)
+                CUDA.@allowscalar begin
+                    U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc)
+                end
+                @test t * Vᴴ4' ≈ U4 * S4
+                @test isisometric(U4)
+                @test isisometric(Vᴴ4; side = :right)
+                @test norm(t - U4 * S4 * Vᴴ4) <= 0.5
+            end
+        end
+
+        @testset "Eigenvalue decomposition" begin
+            for T in eltypes,
+                    t in (
+                        curand(T, V1, V1),
+                        curand(T, W, W),
+                        curand(T, W, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                d, v = @constinferred eig_full(t)
+                @test t * v ≈ v * d
+
+                d′ = LinearAlgebra.diag(d)
+                for (c, b) in LinearAlgebra.eigvals(t)
+                    @test sort(b; by = abs) ≈ sort(d′[c]; by = abs)
+                end
+
+                vdv = v' * v
+                vdv = (vdv + vdv') / 2
+                @test @constinferred isposdef(vdv)
+                t isa CuDiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map
+
+                CUDA.@allowscalar begin # TODO
+                    d, v = @constinferred eig_trunc(t; trunc = truncrank(dim(domain(t)) ÷ 2))
+                end
+                @test t * v ≈ v * d
+                @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2
+
+                t2 = (t + t')
+                D, V = eigen(t2)
+                @test isisometric(V)
+                D̃, Ṽ = @constinferred eigh_full(t2)
+                @test D ≈ D̃
+                @test V ≈ Ṽ
+                λ = minimum(
+                    minimum(real(LinearAlgebra.diag(b)))
+                        for (c, b) in blocks(D)
+                )
+                @test cond(Ṽ) ≈ one(real(T))
+                @test isposdef(t2) == isposdef(λ)
+                @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2))
+                @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2))
+
+                add!(t, t')
+
+                d, v = @constinferred eigh_full(t)
+                @test t * v ≈ v * d
+                @test isunitary(v)
+
+                λ = minimum(minimum(real(LinearAlgebra.diag(b))) for (c, b) in blocks(d))
+                @test cond(v) ≈ one(real(T))
+                @test isposdef(t) == isposdef(λ)
+                @test isposdef(t - λ * one(t) + 0.1 * one(t))
+                @test !isposdef(t - λ * one(t) - 0.1 * one(t))
+
+                CUDA.@allowscalar begin
+                    d, v = @constinferred eigh_trunc(t; trunc = truncrank(dim(domain(t)) ÷ 2))
+                end
+                @test t * v ≈ v * d
+                @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2
+            end
+        end
+
+        @testset "Condition number and rank" begin
+            for T in eltypes,
+                    t in (
+                        curand(T, W, W), curand(T, W, W)',
+                        curand(T, W, V1), curand(T, V1, W),
+                        curand(T, W, V1)', curand(T, V1, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+
+                d1, d2 = dim(codomain(t)), dim(domain(t))
+                @test rank(t) == min(d1, d2)
+                M = left_null(t)
+                @test @constinferred(rank(M)) + rank(t) == d1
+                Mᴴ = right_null(t)
+                @test rank(Mᴴ) + rank(t) == d2
+            end
+            for T in eltypes
+                u = unitary(T, V1 ⊗ V2, V1 ⊗ V2)
+                @test @constinferred(cond(u)) ≈ one(real(T))
+                @test @constinferred(rank(u)) == dim(V1 ⊗ V2)
+
+                t = curand(T, zero(V1), W)
+                @test rank(t) == 0
+                t2 = curand(T, zero(V1) * zero(V2), zero(V1) * zero(V2))
+                @test rank(t2) == 0
+                @test cond(t2) == 0.0
+            end
+            for T in eltypes, t in (curand(T, W, W), curand(T, W, W)')
+                add!(t, t')
+                vals = @constinferred LinearAlgebra.eigvals(t)
+                λmax = maximum(s -> maximum(abs, s), values(vals))
+                λmin = minimum(s -> minimum(abs, s), values(vals))
+                @test cond(t) ≈ λmax / λmin
+            end
+        end
+    end
+end
diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl
new file mode 100644
index 000000000..a22e53db9
--- /dev/null
+++ b/test/cuda/tensors.jl
@@ -0,0 +1,629 @@
+using Adapt, CUDA, cuTENSOR
+using Test, TestExtras
+using TensorKit, Combinatorics
+ad = adapt(Array)
+const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt)
+@assert !isnothing(CUDAExt)
+const CuTensorMap = getglobal(CUDAExt, :CuTensorMap)
+const CuDiagonalTensorMap = getglobal(CUDAExt, :CuDiagonalTensorMap)
+
+@isdefined(TestSetup) || include("../setup.jl")
+using .TestSetup
+
+for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃)
+    V1, V2, V3, V4, V5 = V
+    @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests
+    @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests
+end
+
+spacelist = try
+    if ENV["CI"] == "true"
+        println("Detected running on CI")
+        if Sys.iswindows()
+            (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂)
+        elseif Sys.isapple()
+            (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃)
+        else
+            (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃)
+        end
+    else
+        (Vtr, VU₁, VSU₂, Vfℤ₂)
+    end
+catch
+    (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃)
+end
+
+for V in spacelist
+    I = sectortype(first(V))
+    Istr = TensorKit.type_repr(I)
+    println("---------------------------------------")
+    println("CUDA Tensors with symmetry: $Istr")
+    println("---------------------------------------")
+    @timedtestset "Tensors with symmetry: $Istr" verbose = true begin
+        V1, V2, V3, V4, V5 = V
+        @timedtestset "Basic tensor properties" begin
+            W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5
+            for T in (Int, Float32, Float64, ComplexF32, ComplexF64)
+                t = @constinferred CUDA.zeros(T, W)
+                CUDA.@allowscalar begin
+                    @test @constinferred(hash(t)) == hash(deepcopy(t))
+                end
+                @test scalartype(t) == T
+                @test norm(t) == 0
+                @test codomain(t) == W
+                @test space(t) == (W ← one(W))
+                @test domain(t) == one(W)
+                @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, CuVector{T, CUDA.DeviceMemory}}
+                # blocks
+                bs = @constinferred blocks(t)
+                (c, b1), state = @constinferred Nothing iterate(bs)
+                @test c == first(blocksectors(W))
+                next = @constinferred Nothing iterate(bs, state)
+                b2 = @constinferred block(t, first(blocksectors(t)))
+                @test b1 == b2
+                @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)}
+                @test_broken typeof(b1) === TensorKit.blocktype(t)
+                @test typeof(c) === sectortype(t)
+            end
+        end
+        @timedtestset "Tensor Dict conversion" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            for T in (Int, Float32, ComplexF64)
+                t = @constinferred CUDA.rand(T, W)
+                d = convert(Dict, t)
+                @test t == convert(CuTensorMap, d)
+            end
+        end
+        if hasfusiontensor(I) || I == Trivial
+            @timedtestset "Tensor Array conversion" begin
+                W1 = V1 ← one(V1)
+                W2 = one(V2) ← V2
+                W3 = V1 ⊗ V2 ← one(V1)
+                W4 = V1 ← V2
+                W5 = one(V1) ← V1 ⊗ V2
+                W6 = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+                for W in (W1, W2, W3, W4, W5, W6)
+                    for T in (Int, Float32, ComplexF64)
+                        if T == Int
+                            t = CuTensorMap{T}(undef, W)
+                            for (_, b) in blocks(t)
+                                CUDA.@allowscalar CUDA.rand!(b, -20:20)
+                            end
+                        else
+                            t = @constinferred CUDA.randn(T, W)
+                        end
+                        #=CUDA.@allowscalar begin
+                            a = @constinferred convert(CuArray, t)
+                        end
+                        b = reshape(a, dim(codomain(W)), dim(domain(W)))
+                        @test t ≈ @constinferred CuTensorMap(a, W)
+                        @test t ≈ @constinferred CuTensorMap(b, W)
+                        @test t === @constinferred CuTensorMap(t.data, W)=# # TODO convert(CuArray broken
+                    end
+                end
+                for T in (Int, Float32, ComplexF64)
+                    t = CUDA.randn(T, V1 ⊗ V2 ← zero(V1))
+                    CUDA.@allowscalar begin
+                        a = convert(CuArray, t)
+                    end
+                    @test norm(a) == 0
+                end
+            end
+        end
+        @timedtestset "Basic linear algebra" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            for T in (Float32, ComplexF64)
+                t = @constinferred CUDA.rand(T, W)
+                @test scalartype(t) == T
+                @test space(t) == W
+                @test space(t') == W'
+                @test dim(t) == dim(space(t))
+                @test codomain(t) == codomain(W)
+                @test domain(t) == domain(W)
+                # blocks for adjoint
+                bs = @constinferred blocks(t')
+                (c, b1), state = @constinferred Nothing iterate(bs)
+                @test c == first(blocksectors(W'))
+                next = @constinferred Nothing iterate(bs, state)
+                b2 = @constinferred block(t', first(blocksectors(t')))
+                @test b1 == b2
+                @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)}
+                @test_broken typeof(b1) === TensorKit.blocktype(t')
+                @test typeof(c) === sectortype(t)
+                # linear algebra
+                @test isa(@constinferred(norm(t)), real(T))
+                @test norm(t)^2 ≈ dot(t, t)
+                α = rand(T)
+                @test norm(α * t) ≈ abs(α) * norm(t)
+                @test norm(t + t, 2) ≈ 2 * norm(t, 2)
+                @test norm(t + t, 1) ≈ 2 * norm(t, 1)
+                @test norm(t + t, Inf) ≈ 2 * norm(t, Inf)
+                p = 3 * rand(Float64)
+                CUDA.@allowscalar begin
+                    @test norm(t + t, p) ≈ 2 * norm(t, p)
+                end
+                @test norm(t) ≈ norm(t')
+
+                t2 = @constinferred rand!(similar(t))
+                β = rand(T)
+                #@test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) # broken for Irrep[CU₁]
+                @test dot(β * t2, α * t) ≈ conj(β) * α * conj(dot(t, t2))
+                @test dot(t2, t) ≈ conj(dot(t, t2))
+                @test dot(t2, t) ≈ conj(dot(t2', t'))
+                @test dot(t2, t) ≈ dot(t', t2')
+
+                i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1))
+                i2 = @constinferred(isomorphism(CuVector{T}, V2 ⊗ V1, V1 ⊗ V2))
+                CUDA.@allowscalar begin
+                    #@test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) # TODO
+                    #@test i2 * i1 == @constinferred(id(CuVector{T}, V2 ⊗ V1)) # TODO
+                    w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1))
+                    @test dim(w) == 2 * dim(V1 ← V1)
+                    @test w' * w == id(CuVector{T}, V1)
+                    @test w * w' == (w * w')^2
+                end
+            end
+        end
+        @timedtestset "Trivial space insertion and removal" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            for T in (Float32, ComplexF64)
+                t = @constinferred CUDA.rand(T, W)
+                t2 = @constinferred insertleftunit(t)
+                @test t2 == @constinferred insertrightunit(t)
+                @test numind(t2) == numind(t) + 1
+                @test space(t2) == insertleftunit(space(t))
+                @test scalartype(t2) === T
+                @test t.data === t2.data
+                @test @constinferred(removeunit(t2, $(numind(t2)))) == t
+                t3 = @constinferred insertleftunit(t; copy = true)
+                @test t3 == @constinferred insertrightunit(t; copy = true)
+                @test t.data !== t3.data
+                for (c, b) in blocks(t)
+                    @test b == block(t3, c)
+                end
+                @test @constinferred(removeunit(t3, $(numind(t3)))) == t
+                t4 = @constinferred insertrightunit(t, 3; dual = true)
+                @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1
+                for (c, b) in blocks(t)
+                    @test b == block(t4, c)
+                end
+                @test @constinferred(removeunit(t4, 4)) == t
+                t5 = @constinferred insertleftunit(t, 4; dual = true)
+                @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t)
+                for (c, b) in blocks(t)
+                    @test b == block(t5, c)
+                end
+                @test @constinferred(removeunit(t5, 4)) == t
+            end
+        end
+        if hasfusiontensor(I)
+            #=@timedtestset "Basic linear algebra: test via conversion" begin
+                W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+                for T in (Float32, ComplexF64)
+                    t = CUDA.rand(T, W)
+                    t2 = @constinferred CUDA.rand!(similar(t))
+                    @test norm(t, 2) ≈ norm(convert(CuArray, t), 2)
+                    @test dot(t2, t) ≈ dot(convert(CuArray, t2), convert(CuArray, t))
+                    α = rand(T)
+                    @test convert(CuArray, α * t) ≈ α * convert(CuArray, t)
+                    @test convert(CuArray, t + t) ≈ 2 * convert(CuArray, t)
+                end
+            end
+            @timedtestset "Real and imaginary parts" begin
+                W = V1 ⊗ V2
+                for T in (Float64, ComplexF64, ComplexF32)
+                    t = @constinferred CUDA.randn(T, W, W)
+
+                    tr = @constinferred real(t)
+                    @test scalartype(tr) <: Real
+                    @test real(convert(CuArray, t)) == convert(CuArray, tr)
+
+                    ti = @constinferred imag(t)
+                    @test scalartype(ti) <: Real
+                    @test imag(convert(CuArray, t)) == convert(CuArray, ti)
+
+                    tc = @inferred complex(t)
+                    @test scalartype(tc) <: Complex
+                    @test complex(convert(CuArray, t)) == convert(CuArray, tc)
+
+                    tc2 = @inferred complex(tr, ti)
+                    @test tc2 ≈ tc
+                end
+            end=# # TODO convert(CuArray is broken
+        end
+        @timedtestset "Tensor conversion" begin
+            W = V1 ⊗ V2
+            t = @constinferred CUDA.randn(W ← W)
+            @test typeof(convert(CuTensorMap, t')) == typeof(t)
+            tc = complex(t)
+            @test convert(typeof(tc), t) == tc
+            @test typeof(convert(typeof(tc), t)) == typeof(tc)
+            @test typeof(convert(typeof(tc), t')) == typeof(tc)
+            @test Base.promote_typeof(t, tc) == typeof(tc)
+            @test Base.promote_typeof(tc, t) == typeof(tc + t)
+        end
+        #=@timedtestset "diag/diagm" begin
+            W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5
+            t = CUDA.randn(ComplexF64, W)
+            d = LinearAlgebra.diag(t)
+            # TODO find a way to use CUDA here
+            D = LinearAlgebra.diagm(codomain(t), domain(t), d)
+            @test LinearAlgebra.isdiag(D)
+            @test LinearAlgebra.diag(D) == d
+        end=#
+        @timedtestset "Permutations: test via inner product invariance" begin
+            W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5
+            t = CUDA.rand(ComplexF64, W)
+            t′ = CUDA.randn!(similar(t))
+            for k in 0:5
+                for p in permutations(1:5)
+                    p1 = ntuple(n -> p[n], k)
+                    p2 = ntuple(n -> p[k + n], 5 - k)
+                    CUDA.@allowscalar begin
+                        t2 = @constinferred permute(t, (p1, p2))
+                        t2 = permute(t, (p1, p2))
+                        @test norm(t2) ≈ norm(t)
+                        t2′ = permute(t′, (p1, p2))
+                        @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2))
+                    end
+                end
+
+                CUDA.@allowscalar begin
+                    t3 = @constinferred repartition(t, $k)
+                    t3 = repartition(t, k)
+                    @test norm(t3) ≈ norm(t)
+                    t3′ = @constinferred repartition!(similar(t3), t′)
+                    @test norm(t3′) ≈ norm(t′)
+                    @test dot(t′, t) ≈ dot(t3′, t3)
+                end
+            end
+        end
+        #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Permutations: test via conversion" begin
+                W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5
+                t = CUDA.rand(ComplexF64, W)
+                a = convert(CuArray, t)
+                for k in 0:5
+                    for p in permutations(1:5)
+                        p1 = ntuple(n -> p[n], k)
+                        p2 = ntuple(n -> p[k + n], 5 - k)
+                        t2 = permute(t, (p1, p2))
+                        a2 = convert(CuArray, t2)
+                        @test a2 ≈ permutedims(a, (p1..., p2...))
+                        @test convert(CuArray, transpose(t2)) ≈
+                            permutedims(a2, (5, 4, 3, 2, 1))
+                    end
+
+                    t3 = repartition(t, k)
+                    a3 = convert(CuArray, t3)
+                    @test a3 ≈ permutedims(
+                        a, (ntuple(identity, k)..., reverse(ntuple(i -> i + k, 5 - k))...)
+                    )
+                end
+            end
+        end=# # convert(CuArray broken
+        @timedtestset "Full trace: test self-consistency" begin
+            t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1')
+            CUDA.@allowscalar begin
+                t2 = permute(t, ((1, 2), (4, 3)))
+                s = @constinferred tr(t2)
+                @test conj(s) ≈ tr(t2')
+                if !isdual(V1)
+                    t2 = twist!(t2, 1)
+                end
+                if isdual(V2)
+                    t2 = twist!(t2, 2)
+                end
+                ss = tr(t2)
+                @tensor s2 = t[a, b, b, a]
+                @tensor t3[a, b] := t[a, c, c, b]
+                @tensor s3 = t3[a, a]
+            end
+            @test ss ≈ s2
+            @test ss ≈ s3
+        end
+        @timedtestset "Partial trace: test self-consistency" begin
+            t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3')
+            @tensor t2[a, b] := t[c, d, b, d, c, a]
+            @tensor t4[a, b, c, d] := t[d, e, b, e, c, a]
+            @tensor t5[a, b] := t4[a, b, c, c]
+            @test t2 ≈ t5
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Trace: test via conversion" begin
+                t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3')
+                CUDA.@allowscalar begin
+                    @tensor t2[a, b] := t[c, d, b, d, c, a]
+                    @tensor t3[a, b] := ad(t)[c, d, b, d, c, a]
+                end
+                @test t3 ≈ ad(t2)
+            end
+        end
+        @timedtestset "Trace and contraction" begin
+            t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3)
+            t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1')
+            CUDA.@allowscalar begin
+                t3 = t1 ⊗ t2
+                @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x]
+                @tensor tb[a, b] := t3[x, y, a, y, b, x]
+            end
+            @test ta ≈ tb
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Tensor contraction: test via conversion" begin
+                A1 = CUDA.randn(ComplexF64, V1' * V2', V3')
+                A2 = CUDA.randn(ComplexF64, V3 * V4, V5)
+                rhoL = CUDA.randn(ComplexF64, V1, V1)
+                rhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor
+                H = CUDA.randn(ComplexF64, V2 * V4, V2 * V4)
+                CUDA.@allowscalar begin
+                    @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) *
+                        A2[b, t2, c'] * rhoR[c', c] *
+                        H[s1, s2, t1, t2]
+
+                    @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] *
+                        conj(ad(A1)[a', t1, b]) *
+                        ad(A2)[b, t2, c'] *
+                        ad(rhoR)[c', c] *
+                        ad(H)[s1, s2, t1, t2]
+                end
+                @test HrA12array ≈ ad(HrA12)
+            end
+        end
+        @timedtestset "Index flipping: test flipping inverse" begin
+            t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1)
+            for i in 1:4
+                CUDA.@allowscalar begin
+                    @test t ≈ flip(flip(t, i), i; inv = true)
+                    @test t ≈ flip(flip(t, i; inv = true), i)
+                end
+            end
+        end
+        #=@timedtestset "Index flipping: test via explicit flip" begin
+            t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1)
+            F1 = unitary(flip(V1), V1)
+
+            CUDA.@allowscalar begin
+                @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d]
+                @test flip(t, 1) ≈ tf
+                @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d]
+                @test twist!(flip(t, 2), 2) ≈ tf
+                @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d]
+                @test flip(t, 3) ≈ tf
+                @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d']
+                @test twist!(flip(t, 4), 4) ≈ tf
+            end
+        end=# # TODO
+        @timedtestset "Index flipping: test via contraction" begin
+            t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4)
+            t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1)
+            CUDA.@allowscalar begin
+                @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x]
+                @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x]
+                @test ta ≈ tb
+                @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x]
+                @test ta ≈ tb
+                @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x]
+                @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x]
+                @test flip(ta, (1, 2)) ≈ tb
+            end
+        end
+        @timedtestset "Multiplication of isometries: test properties" begin
+            W2 = V4 ⊗ V5
+            W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1))
+            for T in (Float64, ComplexF64)
+                t1 = randisometry(T, CuMatrix{T}, W1, W2)
+                t2 = randisometry(T, CuMatrix{T}, W2 ← W2)
+                @test isisometric(t1)
+                @test isunitary(t2)
+                P = t1 * t1'
+                @test P * P ≈ P
+            end
+        end
+        @timedtestset "Multiplication and inverse: test compatibility" begin
+            W1 = V1 ⊗ V2 ⊗ V3
+            W2 = V4 ⊗ V5
+            for T in (Float64, ComplexF64)
+                t1 = CUDA.rand(T, W1, W1)
+                t2 = CUDA.rand(T, W2, W2)
+                t = CUDA.rand(T, W1, W2)
+                @test t1 * (t1 \ t) ≈ t
+                @test (t / t2) * t2 ≈ t
+                @test t1 \ one(t1) ≈ inv(t1)
+                @test one(t1) / t1 ≈ pinv(t1)
+                @test_throws SpaceMismatch inv(t)
+                @test_throws SpaceMismatch t2 \ t
+                @test_throws SpaceMismatch t / t1
+                tp = pinv(t) * t
+                @test tp ≈ tp * tp
+            end
+        end
+        #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Multiplication and inverse: test via conversion" begin
+                W1 = V1 ⊗ V2 ⊗ V3
+                W2 = V4 ⊗ V5
+                for T in (Float32, Float64, ComplexF32, ComplexF64)
+                    t1 = CUDA.rand(T, W1, W1)
+                    t2 = CUDA.rand(T, W2, W2)
+                    t = CUDA.rand(T, W1, W2)
+                    d1 = dim(W1)
+                    d2 = dim(W2)
+                    At1 = reshape(convert(CuArray, t1), d1, d1)
+                    At2 = reshape(convert(CuArray, t2), d2, d2)
+                    At = reshape(convert(CuArray, t), d1, d2)
+                    @test reshape(convert(CuArray, t1 * t), d1, d2) ≈ At1 * At
+                    @test reshape(convert(CuArray, t1' * t), d1, d2) ≈ At1' * At
+                    @test reshape(convert(CuArray, t2 * t'), d2, d1) ≈ At2 * At'
+                    @test reshape(convert(CuArray, t2' * t'), d2, d1) ≈ At2' * At'
+
+                    @test reshape(convert(CuArray, inv(t1)), d1, d1) ≈ inv(At1)
+                    @test reshape(convert(CuArray, pinv(t)), d2, d1) ≈ pinv(At)
+
+                    if T == Float32 || T == ComplexF32
+                        continue
+                    end
+
+                    @test reshape(convert(CuArray, t1 \ t), d1, d2) ≈ At1 \ At
+                    @test reshape(convert(CuArray, t1' \ t), d1, d2) ≈ At1' \ At
+                    @test reshape(convert(CuArray, t2 \ t'), d2, d1) ≈ At2 \ At'
+                    @test reshape(convert(CuArray, t2' \ t'), d2, d1) ≈ At2' \ At'
+
+                    @test reshape(convert(CuArray, t2 / t), d2, d1) ≈ At2 / At
+                    @test reshape(convert(CuArray, t2' / t), d2, d1) ≈ At2' / At
+                    @test reshape(convert(CuArray, t1 / t'), d1, d2) ≈ At1 / At'
+                    @test reshape(convert(CuArray, t1' / t'), d1, d2) ≈ At1' / At'
+                end
+            end
+        end=# # convert(CuArray broken
+        @timedtestset "Tensor truncation" begin
+            for T in (Float32, ComplexF64)
+                for p in (1, 2, 3, Inf)
+                    # Test both a normal tensor and an adjoint one.
+                    ts = (
+                        CUDA.randn(T, V1 ⊗ V2 ⊗ V3, V4 ⊗ V5),
+                        CUDA.randn(T, V4 ⊗ V5, V1 ⊗ V2 ⊗ V3)',
+                    )
+                    for t in ts
+                        U₀, S₀, V₀, = tsvd(t)
+                        t = rmul!(t, 1 / norm(S₀, p))
+                        # Probably shouldn't allow truncerr and truncdim, as these require scalar indexing?
+                        CUDA.@allowscalar begin
+                            U, S, V = tsvd(t; trunc = truncbelow(1 / dim(domain(S₀))), p = p)
+                            U′, S′, V′ = tsvd(t; trunc = truncspace(space(S, 1)), p = p)
+                        end
+                        @test (U, S, V) == (U′, S′, V′)
+                    end
+                end
+            end
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Tensor functions" begin
+                W = V1 ⊗ V2
+                for T in (Float64, ComplexF64)
+                    t = CUDA.randn(T, W, W)
+                    s = dim(W)
+                    CUDA.@allowscalar begin
+                        #=@test (@constinferred sqrt(t))^2 ≈ t
+                        @test ad(sqrt(t)) ≈ sqrt(ad(t))
+                        
+                        expt = @constinferred exp(t)
+                        @test ad(expt) ≈ exp(ad(t))
+
+                        @test exp(@constinferred log(expt)) ≈ expt
+                        @test ad(log(expt)) ≈ log(ad(expt))
+
+                        @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈
+                              id(storagetype(t), W)
+                        @test (@constinferred tan(t)) ≈ sin(t) / cos(t)
+                        @test (@constinferred cot(t)) ≈ cos(t) / sin(t)
+                        @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈
+                              id(storagetype(t), W)
+                        @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t)
+                        @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t)
+
+                        t1 = sin(t)
+                        @test sin(@constinferred asin(t1)) ≈ t1
+                        t2 = cos(t)
+                        @test cos(@constinferred acos(t2)) ≈ t2
+                        t3 = sinh(t)
+                        @test sinh(@constinferred asinh(t3)) ≈ t3
+                        t4 = cosh(t)
+                        @test cosh(@constinferred acosh(t4)) ≈ t4
+                        t5 = tan(t)
+                        @test tan(@constinferred atan(t5)) ≈ t5
+                        t6 = cot(t)
+                        @test cot(@constinferred acot(t6)) ≈ t6
+                        t7 = tanh(t)
+                        @test tanh(@constinferred atanh(t7)) ≈ t7
+                        t8 = coth(t)
+                        @test coth(@constinferred acoth(t8)) ≈ t8
+                        =# #exp not supported for CuArray
+                    end
+                end
+            end
+        end
+        # Sylvester not defined for CUDA
+        # @timedtestset "Sylvester equation" begin
+        #     for T in (Float32, ComplexF64)
+        #         tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3)
+        #         tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4)
+        #         tA = 3 // 2 * leftorth(tA; alg=Polar())[1]
+        #         tB = 1 // 5 * leftorth(tB; alg=Polar())[1]
+        #         tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4)
+        #         t = @constinferred sylvester(tA, tB, tC)
+        #         @test codomain(t) == V1 ⊗ V3
+        #         @test domain(t) == V2 ⊗ V4
+        #         @test norm(tA * t + t * tB + tC) <
+        #               (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3)
+        #         if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+        #             matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x)))
+        #             @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC))
+        #         end
+        #     end
+        # end
+        #
+        # TODO
+        @timedtestset "Tensor product: test via norm preservation" begin
+            for T in (Float32, ComplexF64)
+                t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2)
+                t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1)
+                CUDA.@allowscalar begin
+                    t = @constinferred (t1 ⊗ t2)
+                end
+                @test norm(t) ≈ norm(t1) * norm(t2)
+            end
+        end
+        if BraidingStyle(I) isa Bosonic && hasfusiontensor(I)
+            @timedtestset "Tensor product: test via conversion" begin
+                for T in (Float32, ComplexF64)
+                    t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1)
+                    t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V2)
+                    d1 = dim(codomain(t1))
+                    d2 = dim(codomain(t2))
+                    d3 = dim(domain(t1))
+                    d4 = dim(domain(t2))
+                    CUDA.@allowscalar begin
+                        t = @constinferred (t1 ⊗ t2)
+                        At = ad(t)
+                        @test ad(t) ≈ ad(t1) ⊗ ad(t2)
+                    end
+                end
+            end
+        end
+        @timedtestset "Tensor product: test via tensor contraction" begin
+            for T in (Float32, ComplexF64)
+                t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1)
+                t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3)
+                CUDA.@allowscalar begin
+                    t = @constinferred (t1 ⊗ t2)
+                    @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6]
+                    # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃]
+                end
+            end
+        end
+    end
+    TensorKit.empty_globalcaches!()
+end
+
+@timedtestset "Deligne tensor product: test via conversion" begin
+    Vlists1 = (Vtr,) # VSU₂)
+    Vlists2 = (Vtr,) # Vℤ₂)
+    @testset for Vlist1 in Vlists1, Vlist2 in Vlists2
+        V1, V2, V3, V4, V5 = Vlist1
+        W1, W2, W3, W4, W5 = Vlist2
+        for T in (Float32, ComplexF64)
+            t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4)
+            t2 = CUDA.rand(T, W2, W1 ⊗ W1')
+            CUDA.@allowscalar begin
+                t = @constinferred (t1 ⊠ t2)
+            end
+            d1 = dim(codomain(t1))
+            d2 = dim(codomain(t2))
+            d3 = dim(domain(t1))
+            d4 = dim(domain(t2))
+            CUDA.@allowscalar begin
+                @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2)
+            end
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 854991a7f..7a588bf4c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,8 +2,6 @@
 # ------------
 using ArgParse: ArgParse
 using SafeTestsets: @safetestset
-# using CUDA: CUDA
-# using AMDGPU: AMDGPU
 
 function parse_commandline(args = ARGS)
     s = ArgParse.ArgParseSettings()
@@ -49,9 +47,15 @@ istestfile(fn) = endswith(fn, ".jl") && !contains(fn, "setup")
 
     # handle GPU cases separately
     if group == "cuda"
-        # CUDA.functional() || continue
+        using CUDA
+        CUDA.functional() || continue
+        @time include("cuda/tensors.jl")
+        @time include("cuda/factorizations.jl")
     elseif group == "amd"
-        # AMDGPU.functional() || continue
+        using AMDGPU
+        AMDGPU.functional() || continue
+        @time include("amd/tensors.jl")
+        @time include("amd/factorizations.jl")
     elseif is_buildkite
         continue
     end

From 6184ca5b46f3f3dd51ac052993c1702b8d76be1b Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Mon, 10 Nov 2025 05:15:48 -0500
Subject: [PATCH 32/34] Small updates

---
 src/tensors/linalg.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl
index 34fd6ec18..d9e14d0e7 100644
--- a/src/tensors/linalg.jl
+++ b/src/tensors/linalg.jl
@@ -431,7 +431,7 @@ function exp!(t::TensorMap)
     domain(t) == codomain(t) ||
         error("Exponential of a tensor only exist when domain == codomain.")
     for (c, b) in blocks(t)
-        copy!(b, exp!(b))
+        copy!(b, LinearAlgebra.exp!(b))
     end
     return t
 end

From ae77d203b567789ec29a6d38eb15752b32bc5f43 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 12 Nov 2025 07:39:51 -0500
Subject: [PATCH 33/34] Add CUDA to factorization testset titles

---
 test/cuda/factorizations.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl
index b198aa1a4..0db782a92 100644
--- a/test/cuda/factorizations.jl
+++ b/test/cuda/factorizations.jl
@@ -35,9 +35,9 @@ for V in spacelist
     I = sectortype(first(V))
     Istr = TensorKit.type_repr(I)
     println("---------------------------------------")
-    println("Factorizations with symmetry: $Istr")
+    println("CUDA Factorizations with symmetry: $Istr")
     println("---------------------------------------")
-    @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin
+    @timedtestset "CUDA Factorizations with symmetry: $Istr" verbose = true begin
         V1, V2, V3, V4, V5 = V
         W = V1 ⊗ V2
         @testset "QR decomposition" begin

From 131fd0486c3e358976b52b73dd474d69dd3e89ee Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Mon, 17 Nov 2025 13:32:41 -0500
Subject: [PATCH 34/34] More tests for CUDA

---
 Project.toml                |  3 +-
 test/cuda/factorizations.jl | 82 ++++++++++++++++++++++++++++++++-----
 2 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/Project.toml b/Project.toml
index c968768e5..0193fb9bd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -27,9 +27,8 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [sources]
 GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
-MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
 AMDGPU = {rev = "master", url = "https://github.com/JuliaGPU/AMDGPU.jl"}
-cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl", rev="master"}
+MatrixAlgebraKit = {rev = "ksh/tk2", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
 
 [extensions]
 TensorKitAMDGPUExt = "AMDGPU"
diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl
index 0db782a92..3751d58ee 100644
--- a/test/cuda/factorizations.jl
+++ b/test/cuda/factorizations.jl
@@ -1,4 +1,7 @@
-using LinearAlgebra, CUDA, Test, TestExtras, TensorKit, cuTENSOR
+using Test, TestExtras
+using TensorKit
+using LinearAlgebra: LinearAlgebra
+using CUDA, cuTENSOR
 
 const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt)
 @assert !isnothing(CUDAExt)
@@ -55,7 +58,7 @@ for V in spacelist
                 @test Q * R ≈ t
                 @test isisometric(Q)
 
-                Q, R = @constinferred left_orth(t; kind = :qr)
+                Q, R = @constinferred left_orth(t)
                 @test Q * R ≈ t
                 @test isisometric(Q)
 
@@ -63,7 +66,7 @@ for V in spacelist
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
-                N = @constinferred left_null(t; kind = :qr)
+                N = @constinferred left_null(t)
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
             end
@@ -82,7 +85,7 @@ for V in spacelist
                 @test isisometric(Q)
                 @test dim(Q) == dim(R) == dim(t)
 
-                Q, R = @constinferred left_orth(t; kind = :qr)
+                Q, R = @constinferred left_orth(t)
                 @test Q * R ≈ t
                 @test isisometric(Q)
                 @test dim(Q) == dim(R) == dim(t)
@@ -107,7 +110,7 @@ for V in spacelist
                 @test L * Q ≈ t
                 @test isisometric(Q; side = :right)
 
-                L, Q = @constinferred right_orth(t; kind = :lq)
+                L, Q = @constinferred right_orth(t)
                 @test L * Q ≈ t
                 @test isisometric(Q; side = :right)
 
@@ -130,7 +133,7 @@ for V in spacelist
                 @test isisometric(Q; side = :right)
                 @test dim(Q) == dim(L) == dim(t)
 
-                L, Q = @constinferred right_orth(t; kind = :lq)
+                L, Q = @constinferred right_orth(t)
                 @test L * Q ≈ t
                 @test isisometric(Q; side = :right)
                 @test dim(Q) == dim(L) == dim(t)
@@ -157,7 +160,7 @@ for V in spacelist
                 # broken for T <: Complex
                 @test isposdef(p)
 
-                w, p = @constinferred left_orth(t; kind = :polar)
+                w, p = @constinferred left_orth(t; alg = :polar)
                 @test w * p ≈ t
                 @test isisometric(w)
             end
@@ -174,7 +177,7 @@ for V in spacelist
                 @test isisometric(wᴴ; side = :right)
                 @test isposdef(p)
 
-                p, wᴴ = @constinferred right_orth(t; kind = :polar)
+                p, wᴴ = @constinferred right_orth(t; alg = :polar)
                 @test p * wᴴ ≈ t
                 @test isisometric(wᴴ; side = :right)
             end
@@ -204,15 +207,15 @@ for V in spacelist
                     @test b ≈ s′[c]
                 end
 
-                v, c = @constinferred left_orth(t; kind = :svd)
+                v, c = @constinferred left_orth(t; alg = :svd)
                 @test v * c ≈ t
                 @test isisometric(v)
 
-                N = @constinferred left_null(t; kind = :svd)
+                N = @constinferred left_null(t; alg = :svd)
                 @test isisometric(N)
                 @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))
 
-                Nᴴ = @constinferred right_null(t; kind = :svd)
+                Nᴴ = @constinferred right_null(t; alg = :svd)
                 @test isisometric(Nᴴ; side = :right)
                 @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))
             end
@@ -386,5 +389,62 @@ for V in spacelist
                 @test cond(t) ≈ λmax / λmin
             end
         end
+
+        @testset "Hermitian projections" begin
+            for T in eltypes,
+                    t in (
+                        curand(T, V1, V1), curand(T, W, W), curand(T, W, W)',
+                        CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1),
+                    )
+                normalize!(t)
+                noisefactor = eps(real(T))^(3 / 4)
+
+                th = (t + t') / 2
+                ta = (t - t') / 2
+                tc = copy(t)
+
+                th′ = @constinferred project_hermitian(t)
+                @test ishermitian(th′)
+                @test th′ ≈ th
+                @test t == tc
+                th_approx = th + noisefactor * ta
+                @test !ishermitian(th_approx) || (T <: Real && t isa CuDiagonalTensorMap)
+                @test ishermitian(th_approx; atol = 10 * noisefactor)
+
+                ta′ = project_antihermitian(t)
+                @test isantihermitian(ta′)
+                @test ta′ ≈ ta
+                @test t == tc
+                ta_approx = ta + noisefactor * th
+                @test !isantihermitian(ta_approx)
+                @test isantihermitian(ta_approx; atol = 10 * noisefactor) || (T <: Real && t isa CuDiagonalTensorMap)
+            end
+        end
+
+        @testset "Isometric projections" begin
+            for T in eltypes,
+                    t in (
+                        curandn(T, W, W), curandn(T, W, W)',
+                        curandn(T, W, V1), curandn(T, V1, W)',
+                    )
+                t2 = project_isometric(t)
+                @test isisometric(t2)
+                t3 = project_isometric(t2)
+                @test t3 ≈ t2 # stability of the projection
+                @test t2 * (t2' * t) ≈ t
+
+                tc = similar(t)
+                t3 = @constinferred project_isometric!(copy!(tc, t), t2)
+                @test t3 === t2
+                @test isisometric(t2)
+
+                # test that t2 is closer to A then any other isometry
+                for k in 1:10
+                    δt = randn!(similar(t))
+                    t3 = project_isometric(t + δt / 100)
+                    @test norm(t - t3) > norm(t - t2)
+                end
+            end
+        end
     end
 end