From dd742de1bfe21bf5f2ee979713dc9b69ac28963e Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Sat, 22 Feb 2025 20:44:18 -0300
Subject: [PATCH 01/22] prepare more tests

---
 test/Project.toml    | 1 +
 test/jump_wrapper.jl | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index e7c5a5b8d..a9cc61b04 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -10,6 +10,7 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
+ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13"
diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl
index dae907c3d..2a2c63110 100644
--- a/test/jump_wrapper.jl
+++ b/test/jump_wrapper.jl
@@ -11,6 +11,7 @@ import DiffOpt
 import HiGHS
 import Ipopt
 import SCS
+import ParametricOptInterface as POI
 import MathOptInterface as MOI
 
 const ATOL = 1e-3
@@ -36,8 +37,8 @@ function test_jump_api()
             # (DiffOpt.conic_diff_model, HiGHS.Optimizer),
             # (DiffOpt.conic_diff_model, SCS.Optimizer), # conicmodel has a issue with sign
             # (DiffOpt.conic_diff_model, Ipopt.Optimizer),
-            # (DiffOpt.nonlinear_diff_model, HiGHS.Optimizer), #  SQF ctr not supported?
-            # (DiffOpt.nonlinear_diff_model, SCS.Optimizer), # returns zero for sensitivity
+            # (DiffOpt.nonlinear_diff_model, ()->POI.Optimizer(HiGHS.Optimizer())), # wrong results
+            # (DiffOpt.nonlinear_diff_model, ()->POI.Optimizer(SCS.Optimizer())), # needs cache
             (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
         ],
         ineq in [true, false],

From 60d3d8ed58434d96c2b81184ba62c53863af8dc9 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 23 Jun 2025 08:01:56 -0300
Subject: [PATCH 02/22] sketch

---
 src/NonLinearProgram/NonLinearProgram.jl |  4 +-
 src/QuadraticProgram/QuadraticProgram.jl |  3 ++
 src/copy_dual.jl                         | 10 +++-
 src/diff_opt.jl                          | 50 +++++++++++++++++---
 src/moi_wrapper.jl                       | 60 ++++++++++++++++++------
 src/parameters.jl                        | 10 ----
 test/jump_wrapper.jl                     | 20 ++++----
 7 files changed, 114 insertions(+), 43 deletions(-)

diff --git a/src/NonLinearProgram/NonLinearProgram.jl b/src/NonLinearProgram/NonLinearProgram.jl
index e09d4cf40..dbbefc3d1 100644
--- a/src/NonLinearProgram/NonLinearProgram.jl
+++ b/src/NonLinearProgram/NonLinearProgram.jl
@@ -513,8 +513,8 @@ function DiffOpt.forward_differentiate!(model::Model; tol = 1e-6)
         Δp = zeros(length(cache.params))
         for (i, var_idx) in enumerate(cache.params)
             ky = form.var2ci[var_idx]
-            if haskey(model.input_cache.dp, ky) # only for set sensitivities
-                Δp[i] = model.input_cache.dp[ky]
+            if haskey(model.input_cache.parameter_constraints, ky) # only for set sensitivities
+                Δp[i] = model.input_cache.parameter_constraints[ky]
             end
         end
 
diff --git a/src/QuadraticProgram/QuadraticProgram.jl b/src/QuadraticProgram/QuadraticProgram.jl
index b3c06536c..568e31be6 100644
--- a/src/QuadraticProgram/QuadraticProgram.jl
+++ b/src/QuadraticProgram/QuadraticProgram.jl
@@ -172,6 +172,9 @@ end
 
 function MOI.set(model::Model, ::MOI.ConstraintDualStart, ci::LE, value)
     MOI.throw_if_not_valid(model, ci)
+    @show ci
+    @show value
+    @show model
     return DiffOpt._enlarge_set(
         model.λ,
         MOI.Utilities.rows(_inequalities(model), ci),
diff --git a/src/copy_dual.jl b/src/copy_dual.jl
index a7c24d23b..b28b11c36 100644
--- a/src/copy_dual.jl
+++ b/src/copy_dual.jl
@@ -80,6 +80,7 @@ end
 
 function _copy_dual(dest::MOI.ModelLike, src::MOI.ModelLike, index_map)
     vis_src = MOI.get(src, MOI.ListOfVariableIndices())
+    # TODO: in POI varible primal must be the same as variable value
     MOI.set(
         dest,
         MOI.VariablePrimalStart(),
@@ -87,6 +88,13 @@ function _copy_dual(dest::MOI.ModelLike, src::MOI.ModelLike, index_map)
         MOI.get(src, MOI.VariablePrimal(), vis_src),
     )
     for (F, S) in MOI.get(dest, MOI.ListOfConstraintTypesPresent())
+        @show F, S
+        @show dest
+        @show dest.optimizer
+        if F<:MOI.VariableIndex && S<:MOI.Parameter
+            @show "Skipping constraint type $F $S"
+            continue
+        end
         _copy_constraint_start(
             dest,
             src,
@@ -116,7 +124,7 @@ function _copy_constraint_start(
     src_attr,
 ) where {F,S}
     for ci in MOI.get(src, MOI.ListOfConstraintIndices{F,S}())
-        value = MOI.get(src, src_attr, ci)
+        @show value = MOI.get(src, src_attr, ci)
         MOI.set(dest, dest_attr, index_map[ci], value)
     end
 end
diff --git a/src/diff_opt.jl b/src/diff_opt.jl
index b850e72aa..0cd0dff9a 100644
--- a/src/diff_opt.jl
+++ b/src/diff_opt.jl
@@ -13,7 +13,6 @@ const MOIDD = MOI.Utilities.DoubleDicts
 
 Base.@kwdef mutable struct InputCache
     dx::Dict{MOI.VariableIndex,Float64} = Dict{MOI.VariableIndex,Float64}()# dz for QP
-    dp::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram
     dy::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}()
     # Dual sensitivity currently only works for NonLinearProgram
     # ds
@@ -23,6 +22,7 @@ Base.@kwdef mutable struct InputCache
     #       concrete value types.
     # `scalar_constraints` and `vector_constraints` includes `A` and `b` for CPs
     # or `G` and `h` for QPs
+    parameter_constraints::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram
     scalar_constraints::MOIDD.DoubleDict{MOI.ScalarAffineFunction{Float64}} =
         MOIDD.DoubleDict{MOI.ScalarAffineFunction{Float64}}() # also includes G for QPs
     vector_constraints::MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}} =
@@ -33,8 +33,8 @@ end
 
 function Base.empty!(cache::InputCache)
     empty!(cache.dx)
-    empty!(cache.dp)
     empty!(cache.dy)
+    empty!(cache.parameter_constraints)
     empty!(cache.scalar_constraints)
     empty!(cache.vector_constraints)
     cache.objective = nothing
@@ -136,6 +136,17 @@ ConstraintFunction so we consider the expression `θ * (x + 2y - 5)`.
 """
 struct ForwardConstraintFunction <: MOI.AbstractConstraintAttribute end
 
+
+"""
+    ForwardConstraintSet <: MOI.AbstractConstraintAttribute
+
+A `MOI.AbstractConstraintAttribute` to set input data to forward differentiation, that
+is, problem input data.
+
+Currently, this only works for the set `MOI.Parameter`.
+"""
+struct ForwardConstraintSet <: MOI.AbstractConstraintAttribute end
+
 """
     ForwardVariablePrimal <: MOI.AbstractVariableAttribute
 
@@ -167,10 +178,6 @@ MOI.set(model, DiffOpt.ReverseVariablePrimal(), x)
 """
 struct ReverseVariablePrimal <: MOI.AbstractVariableAttribute end
 
-struct ForwardConstraintSet <: MOI.AbstractConstraintAttribute end
-
-struct ReverseConstraintSet <: MOI.AbstractConstraintAttribute end
-
 """
     ReverseConstraintDual <: MOI.AbstractConstraintAttribute
 
@@ -253,6 +260,18 @@ struct ReverseConstraintFunction <: MOI.AbstractConstraintAttribute end
 
 MOI.is_set_by_optimize(::ReverseConstraintFunction) = true
 
+"""
+    ReverseConstraintSet
+
+An `MOI.AbstractConstraintAttribute` to get output data to reverse differentiation, that
+is, problem input data.
+
+Currently, this only works for the set `MOI.Parameter`.
+"""
+struct ReverseConstraintSet <: MOI.AbstractConstraintAttribute end
+
+MOI.is_set_by_optimize(::ReverseConstraintSet) = true
+
 """
     DifferentiateTimeSec()
 
@@ -409,6 +428,9 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.ScalarAffineFunction{T},S},
     func::MOI.ScalarAffineFunction{T},
 ) where {T,S}
+    if MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
+        error("The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.")
+    end
     model.input_cache.scalar_constraints[ci] = func
     return
 end
@@ -419,10 +441,26 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.VectorAffineFunction{T},S},
     func::MOI.VectorAffineFunction{T},
 ) where {T,S}
+    if MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
+        error("The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.")
+    end
     model.input_cache.vector_constraints[ci] = func
     return
 end
 
+function MOI.set(
+    model::AbstractModel,
+    ::ForwardConstraintSet,
+    ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
+    set::MOI.Parameter{T},
+) where {T}
+    if !MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
+        error("The model with type $(typeof(model)) does not support Parameters")
+    end
+    model.input_cache.parameter_constraints[ci] = set.value
+    return
+end
+
 function lazy_combination(op::F, α, a, β, b) where {F<:Function}
     return LazyArrays.ApplyArray(
         op,
diff --git a/src/moi_wrapper.jl b/src/moi_wrapper.jl
index 35fe763b8..b70663328 100644
--- a/src/moi_wrapper.jl
+++ b/src/moi_wrapper.jl
@@ -39,16 +39,12 @@ function diff_optimizer(
             MOI.Utilities.UniversalFallback(
                 MOI.Utilities.Model{with_bridge_type}(),
             ),
-            optimizer,
+            with_parametric_opt_interface ? POI.Optimizer(optimizer) : optimizer,
         )
     else
-        optimizer
-    end
-    if with_parametric_opt_interface
-        return POI.Optimizer(Optimizer(caching_opt))
-    else
-        return Optimizer(caching_opt)
+        with_parametric_opt_interface ? POI.Optimizer(optimizer) : optimizer
     end
+    return Optimizer(caching_opt)
 end
 
 mutable struct Optimizer{OT<:MOI.ModelLike} <: MOI.AbstractOptimizer
@@ -560,6 +556,20 @@ function forward_differentiate!(model::Optimizer)
         NonLinearKKTJacobianFactorization(),
         model.input_cache.factorization,
     )
+    T = Float64
+    if MOI.supports_constraint(model, MOI.VariableIndex, MOI.Parameter{T})
+        @show "param mode"
+        for (vi, value) in model.input_cache.parameter_constraints
+            MOI.set(
+                diff,
+                ForwardConstraintSet(),
+                model.index_map[vi],
+                MOI.Parameter(value),
+            )
+        end
+        return forward_differentiate!(diff)
+    end
+    @show "func mode"
     if model.input_cache.objective !== nothing
         MOI.set(
             diff,
@@ -586,11 +596,6 @@ function forward_differentiate!(model::Optimizer)
             model.input_cache.vector_constraints[F, S],
         )
     end
-    if model.input_cache.dp !== nothing
-        for (vi, value) in model.input_cache.dp
-            diff.model.input_cache.dp[model.index_map[vi]] = value
-        end
-    end
     return forward_differentiate!(diff)
 end
 
@@ -623,7 +628,7 @@ function _diff(model::Optimizer)
         if isnothing(model_constructor)
             model.diff = nothing
             for constructor in model.model_constructors
-                model.diff = _instantiate_with_bridges(constructor)
+                model.diff = POI.Optimizer(_instantiate_with_bridges(constructor))
                 try
                     model.index_map = MOI.copy_to(model.diff, model.optimizer)
                 catch err
@@ -648,7 +653,7 @@ function _diff(model::Optimizer)
                 )
             end
         else
-            model.diff = _instantiate_with_bridges(model_constructor)
+            model.diff = POI.Optimizer(_instantiate_with_bridges(model_constructor))
             model.index_map = MOI.copy_to(model.diff, model.optimizer)
         end
         _copy_dual(model.diff, model.optimizer, model.index_map)
@@ -858,6 +863,33 @@ function MOI.get(
     end
 end
 
+function MOI.supports(
+    ::Optimizer,
+    ::ForwardConstraintSet,
+    ::Type{MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}},
+) where {T}
+    return true
+end
+
+function MOI.get(
+    model::Optimizer,
+    ::ForwardConstraintSet,
+    ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
+) where {T}
+    set = get(model.input_cache.parameter_constraints, ci, zero(T))
+    return MOI.Parameter{T}(set)
+end
+
+function MOI.set(
+    model::Optimizer,
+    ::ForwardConstraintSet,
+    ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
+    set::MOI.Parameter,
+) where {T}
+    model.input_cache.parameter_constraints[ci] = set.value
+    return
+end
+
 function MOI.get(model::Optimizer, attr::DifferentiateTimeSec)
     return MOI.get(model.diff, attr)
 end
diff --git a/src/parameters.jl b/src/parameters.jl
index 68ff54a49..f9529816b 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -315,16 +315,6 @@ function MOI.set(
     return
 end
 
-function MOI.set(
-    model::Optimizer,
-    ::ForwardConstraintSet,
-    ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
-    set::MOI.Parameter,
-) where {T}
-    model.input_cache.dp[ci] = set.value
-    return
-end
-
 function MOI.get(
     model::POI.Optimizer,
     attr::ForwardVariablePrimal,
diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl
index 1720246e4..dbbd2cf70 100644
--- a/test/jump_wrapper.jl
+++ b/test/jump_wrapper.jl
@@ -29,20 +29,20 @@ end
 
 function test_jump_api()
     for (MODEL, SOLVER) in [
-            (DiffOpt.diff_model, Ipopt.Optimizer),
+            # (DiffOpt.diff_model, Ipopt.Optimizer),
             (DiffOpt.quadratic_diff_model, HiGHS.Optimizer),
-            (DiffOpt.quadratic_diff_model, SCS.Optimizer),
-            (DiffOpt.quadratic_diff_model, Ipopt.Optimizer),
-            (DiffOpt.conic_diff_model, HiGHS.Optimizer),
-            (DiffOpt.conic_diff_model, SCS.Optimizer),
-            (DiffOpt.conic_diff_model, Ipopt.Optimizer),
+            # (DiffOpt.quadratic_diff_model, SCS.Optimizer),
+            # (DiffOpt.quadratic_diff_model, Ipopt.Optimizer),
+            # (DiffOpt.conic_diff_model, HiGHS.Optimizer),
+            # (DiffOpt.conic_diff_model, SCS.Optimizer),
+            # (DiffOpt.conic_diff_model, Ipopt.Optimizer),
             # (DiffOpt.nonlinear_diff_model, HiGHS.Optimizer), #  SQF ctr not supported?
             # (DiffOpt.nonlinear_diff_model, SCS.Optimizer), # returns zero for sensitivity
-            (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
+            # (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
         ],
-        ineq in [true, false],
-        min in [true, false],
-        flip in [true, false]
+        ineq in [true],#, false],
+        min in [true],#, false],
+        flip in [true]#, false]
 
         @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
             model = MODEL(SOLVER)

From eb76a98efee35d0543023814e2434e393865b1fd Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 23 Jun 2025 09:54:12 -0300
Subject: [PATCH 03/22] temp examples

---
 ...Thermal_Generation_Dispatch_Example_new.jl | 191 +++++++++++++++
 docs/src/examples/autotuning-ridge_new.jl     | 222 ++++++++++++++++++
 docs/src/examples/custom-relu_new.jl          | 122 ++++++++++
 .../examples/matrix-inversion-manual_new.jl   | 150 ++++++++++++
 docs/src/examples/nearest_correlation_new.jl  |  66 ++++++
 docs/src/examples/polyhedral_project_new.jl   | 169 +++++++++++++
 .../sensitivity-analysis-ridge_new.jl         | 170 ++++++++++++++
 .../examples/sensitivity-analysis-svm_new.jl  | 144 ++++++++++++
 src/jump_wrapper.jl                           |   7 +-
 src/moi_wrapper.jl                            |   8 +-
 test/jump_wrapper.jl                          |  46 +++-
 11 files changed, 1277 insertions(+), 18 deletions(-)
 create mode 100644 docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl
 create mode 100644 docs/src/examples/autotuning-ridge_new.jl
 create mode 100644 docs/src/examples/custom-relu_new.jl
 create mode 100644 docs/src/examples/matrix-inversion-manual_new.jl
 create mode 100644 docs/src/examples/nearest_correlation_new.jl
 create mode 100644 docs/src/examples/polyhedral_project_new.jl
 create mode 100644 docs/src/examples/sensitivity-analysis-ridge_new.jl
 create mode 100644 docs/src/examples/sensitivity-analysis-svm_new.jl

diff --git a/docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl b/docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl
new file mode 100644
index 000000000..de8134a28
--- /dev/null
+++ b/docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl
@@ -0,0 +1,191 @@
+# # Thermal Generation Dispatch Example
+
+#md # [![](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](@__REPO_ROOT_URL__/examples/Thermal_Generation_Dispatch_Example.jl)
+
+# This example illustrates the sensitivity analysis of thermal generation dispatch problem.
+
+# This problem can be described as the choice of thermal generation `g` given a demand `d`, a price for thermal generation `c` and a penalty price `c_{ϕ}` for any demand not attended ϕ.
+
+# ```math
+# \begin{split}
+# \begin{array} {ll}
+# \mbox{minimize} & \sum_{i=1}^{N} c_{i} g_{i} + c_{\phi} \phi \\
+# \mbox{s.t.} & g_{i} \ge 0 \quad i=1..N  \\
+#             & g_{i} \le G_{i} \quad i=1..N  \\
+#             & \sum_{i=1}^{N} g_{i} + \phi = d\\
+# \end{array}
+# \end{split}
+# ```
+# where
+# - `G_{i}` is the maximum possible generation for a thermal generator `i`
+
+# ## Define and solve the Thermal Dispatch Problem
+
+# First, import the libraries.
+
+using Test
+using JuMP
+import DiffOpt
+import LinearAlgebra: dot
+import HiGHS
+import MathOptInterface as MOI
+import Plots
+
+# Define the model that will be construct given a set of parameters.
+
+function generate_model(
+    d_data::Float64;
+    g_sup::Vector{Float64},
+    c_g::Vector{Float64},
+    c_ϕ::Float64,
+)
+    ## Creation of the Model and Parameters
+    model = DiffOpt.quadratic_diff_model(HiGHS.Optimizer)
+    set_silent(model)
+    I = length(g_sup)
+
+    ## Variables
+    @variable(model, g[i in 1:I] >= 0.0)
+    @variable(model, ϕ >= 0.0)
+
+    ## Parameters
+    @variable(model, d in Parameter(d_data))
+
+    ## Constraints
+    @constraint(model, limit_constraints_sup[i in 1:I], g[i] <= g_sup[i])
+    @constraint(model, demand_constraint, sum(g) + ϕ == d)
+
+    ## Objectives
+    @objective(model, Min, dot(c_g, g) + c_ϕ * ϕ)
+
+    ## Solve the model
+    optimize!(model)
+
+    ## Return the solved model
+    return model
+end
+
+# Define the functions that will get the primal values `g` and `\phi` and sensitivity analysis of the demand `dg/dd` and `d\phi/dd` from a optimized model.
+
+function diff_forward(model::Model, ϵ::Float64 = 1.0)
+    ## Initialization of parameters and references to simplify the notation
+    vect_ref = [model[:g]; model[:ϕ]]
+
+    ## Get the primal solution of the model
+    vect = value.(vect_ref)
+
+    ## Reset the sensitivities of the model
+    DiffOpt.empty_input_sensitivities(model)
+
+    ## Pass the perturbation to the DiffOpt Framework
+    DiffOpt.set_forward_parameter(model, model[:d], ϵ)
+
+    ## Compute the derivatives with the Forward Mode
+    DiffOpt.forward_differentiate!(model)
+
+    ## Get the derivative of the model
+    dvect = DiffOpt.get_forward_variable.(model, vect_ref)
+
+    ## Return the values as a vector
+    return [vect; dvect]
+end
+
+function diff_reverse(model::Model, ϵ::Float64 = 1.0)
+    ## Initialization of parameters and references to simplify the notation
+    vect_ref = [model[:g]; model[:ϕ]]
+
+    ## Get the primal solution of the model
+    vect = value.(vect_ref)
+
+    ## Set variables needed for the DiffOpt Backward Framework
+    dvect = Array{Float64,1}(undef, length(vect_ref))
+
+    ## Loop for each primal variable
+    for i in 1:I+1
+        ## Reset the sensitivities of the model
+        DiffOpt.empty_input_sensitivities(model)
+
+        ## Pass the perturbation to the DiffOpt Framework
+        DiffOpt.set_reverse_variable.(model, vect_ref[i], ϵ)
+
+        ## Compute the derivatives with the Forward Mode
+        DiffOpt.reverse_differentiate!(model)
+
+        ## Get the derivative of the model
+        dvect[i] = DiffOpt.get_reverse_parameter(model, model[:d])
+    end
+
+    ## Return the values as a vector
+    return [vect; dvect]
+end
+
+# Initialize of Parameters
+
+g_sup = [10.0, 20.0, 30.0]
+I = length(g_sup)
+d = 0.0:0.1:80
+d_size = length(d)
+c_g = [1.0, 3.0, 5.0]
+c_ϕ = 10.0;
+
+# Generate models for each demand `d`
+models = generate_model.(d; g_sup = g_sup, c_g = c_g, c_ϕ = c_ϕ);
+
+# Get the results of models with the DiffOpt Forward and Backward context
+
+result_forward = diff_forward.(models)
+optimize!.(models)
+result_reverse = diff_reverse.(models);
+
+# Organization of results to plot
+# Initialize data_results that will contain every result
+data_results = Array{Float64,3}(undef, 2, d_size, 2 * (I + 1));
+
+# Populate the data_results array
+for k in 1:d_size
+    data_results[1, k, :] = result_forward[k]
+    data_results[2, k, :] = result_reverse[k]
+end
+
+# ## Results with Plot graphs
+# ### Results for the forward context
+# Result Primal Values:
+Plots.plot(
+    d,
+    data_results[1, :, 1:I+1];
+    title = "Generation by Demand",
+    label = ["Thermal Generation 1" "Thermal Generation 2" "Thermal Generation 3" "Generation Deficit"],
+    xlabel = "Demand [unit]",
+    ylabel = "Generation [unit]",
+)
+
+# Result Sensitivity Analysis:
+Plots.plot(
+    d,
+    data_results[1, :, I+2:2*(I+1)];
+    title = "Sensitivity of Generation by Demand",
+    label = ["T. Gen. 1 Sensitivity" "T. Gen. 2 Sensitivity" "T. Gen. 3 Sensitivity" "Gen. Deficit Sensitivity"],
+    xlabel = "Demand [unit]",
+    ylabel = "Sensitivity [-]",
+)
+
+# ### Results for the reverse context
+# Result Primal Values:
+Plots.plot(
+    d,
+    data_results[2, :, 1:I+1];
+    title = "Generation by Demand",
+    label = ["Thermal Generation 1" "Thermal Generation 2" "Thermal Generation 3" "Generation Deficit"],
+    xlabel = "Demand [unit]",
+    ylabel = "Generation [unit]",
+)
+
+# Result Sensitivity Analysis:
+Plots.plot(
+    d,
+    data_results[2, :, I+2:2*(I+1)];
+    title = "Sensitivity of Generation by Demand",
+    label = ["T. Gen. 1 Sensitivity" "T. Gen. 2 Sensitivity" "T. Gen. 3 Sensitivity" "Gen. Deficit Sensitivity"],
+    xlabel = "Demand [unit]",
+    ylabel = "Sensitivity [-]",
+)
diff --git a/docs/src/examples/autotuning-ridge_new.jl b/docs/src/examples/autotuning-ridge_new.jl
new file mode 100644
index 000000000..1bf24ef3c
--- /dev/null
+++ b/docs/src/examples/autotuning-ridge_new.jl
@@ -0,0 +1,222 @@
+# # Auto-tuning Hyperparameters (JuMP API)
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/autotuning-ridge.jl)
+
+# This example shows how to learn a hyperparameter in Ridge Regression using a gradient descent routine.
+# Let the regularized regression problem be formulated as:
+
+# ```math
+# \begin{equation}
+# \min_{w} \quad \frac{1}{2nd} \sum_{i=1}^{n} (w^T x_{i} - y_i)^2 + \frac{\alpha}{2d} \| w \|_2^2
+# \end{equation}
+# ```
+
+# where
+# - `x`, `y` are the data points
+# - `w` are the learned weights
+# - `α` is the hyperparameter acting on regularization.
+
+# The main optimization model will be formulated with JuMP.
+# Using the gradient of the optimal weights with respect to the regularization parameters
+# computed with DiffOpt, we can perform a gradient descent on top of the inner model
+# to minimize the test loss.
+
+# This tutorial uses the following packages
+
+using JuMP     # The mathematical programming modelling language
+import DiffOpt # JuMP extension for differentiable optimization
+import Ipopt   # Optimization solver that handles quadratic programs
+import LinearAlgebra
+import Plots
+import Random
+
+# ## Generating a noisy regression dataset
+
+Random.seed!(42)
+
+N = 100
+D = 20
+noise = 5
+
+w_real = 10 * randn(D)
+X = 10 * randn(N, D)
+y = X * w_real + noise * randn(N)
+l = N ÷ 2  # test train split
+
+X_train = X[1:l, :]
+X_test = X[l+1:N, :]
+y_train = y[1:l]
+y_test = y[l+1:N];
+
+# ## Defining the regression problem
+
+# We implement the regularized regression problem as a function taking the problem data,
+# building a JuMP model and solving it.
+
+function build_fit_ridge(X, y, α_val = 1.0)
+    model = DiffOpt.nonlinear_diff_optimizer(Ipopt.Optimizer)
+    set_silent(model)
+    N, D = size(X)
+    @variable(model, w[1:D])
+    @variable(model, α in Parameter(α_val))
+    @expression(model, err_term, X * w - y)
+    @objective(
+        model,
+        Min,
+        LinearAlgebra.dot(err_term, err_term) / (2 * N * D) +
+        α * LinearAlgebra.dot(w, w) / (2 * D),
+    )
+    return model
+end
+
+function optimize_fit_ridge!(model, α_val)
+    set_parameter_value(model[:α], α_val)
+    optimize!(model)
+    return value.(model[:w])
+end
+
+# We can solve the problem for several values of α
+# to visualize the effect of regularization on the testing and training loss.
+
+αs = 0.00:0.01:0.50
+mse_test = Float64[]
+mse_train = Float64[]
+model = build_fit_ridge(X, y)
+(Ntest, D) = size(X_test)
+(Ntrain, D) = size(X_train)
+for α in αs
+    ŵ = optimize_fit_ridge!(model, α)
+    ŷ_test = X_test * ŵ
+    ŷ_train = X_train * ŵ
+    push!(mse_test, LinearAlgebra.norm(ŷ_test - y_test)^2 / (2 * Ntest * D))
+    push!(
+        mse_train,
+        LinearAlgebra.norm(ŷ_train - y_train)^2 / (2 * Ntrain * D),
+    )
+end
+
+# Visualize the Mean Score Error metric
+
+Plots.plot(
+    αs,
+    mse_test ./ sum(mse_test);
+    label = "MSE test",
+    xaxis = "α",
+    yaxis = "MSE",
+    legend = (0.8, 0.2),
+    width = 3,
+)
+Plots.plot!(
+    αs,
+    mse_train ./ sum(mse_train);
+    label = "MSE train",
+    linestyle = :dash,
+    width = 3,
+)
+Plots.title!("Normalized MSE on training and testing sets")
+
+# ## Leveraging differentiable optimization: computing the derivative of the solution
+
+# Using DiffOpt, we can compute `∂w_i/∂α`, the derivative of the learned solution `̂w`
+# w.r.t. the regularization parameter.
+
+function compute_dw_dα(model, w)
+    D = length(w)
+    dw_dα = zeros(D)
+    DiffOpt.set_forward_parameter(model, model[:α], 1.0)
+    DiffOpt.forward_differentiate!(model)
+    for i in 1:D
+        dw_dα[i] = DiffOpt.get_forward_variable(model, w[i])
+    end
+    return dw_dα
+end
+
+# Using `∂w_i/∂α` computed with `compute_dw_dα`,
+# we can compute the derivative of the test loss w.r.t. the parameter α
+# by composing derivatives.
+
+function d_testloss_dα(model, X_test, y_test, ŵ)
+    N, D = size(X_test)
+    dw_dα = compute_dw_dα(model, model[:w])
+    err_term = X_test * ŵ - y_test
+    return sum(eachindex(err_term)) do i
+        return LinearAlgebra.dot(X_test[i, :], dw_dα) * err_term[i]
+    end / (N * D)
+end
+
+# We can define a meta-optimizer function performing gradient descent
+# on the test loss w.r.t. the regularization parameter.
+
+function descent(α0, max_iters = 100; fixed_step = 0.01, grad_tol = 1e-3)
+    α_s = Float64[]
+    ∂α_s = Float64[]
+    test_loss = Float64[]
+    α = α0
+    N, D = size(X_test)
+    model = build_fit_ridge(X_train, y_train)
+    for iter in 1:max_iters
+        ŵ = optimize_fit_ridge!(model, α)
+        err_term = X_test * ŵ - y_test
+        ∂α = d_testloss_dα(model, X_test, y_test, ŵ)
+        push!(α_s, α)
+        push!(∂α_s, ∂α)
+        push!(test_loss, LinearAlgebra.norm(err_term)^2 / (2 * N * D))
+        α -= fixed_step * ∂α
+        if abs(∂α) ≤ grad_tol
+            break
+        end
+    end
+    return α_s, ∂α_s, test_loss
+end
+
+ᾱ, ∂ᾱ, msē = descent(0.10, 500)
+iters = 1:length(ᾱ);
+
+# Visualize gradient descent and convergence
+
+Plots.plot(
+    αs,
+    mse_test;
+    label = "MSE test",
+    xaxis = ("α"),
+    legend = :topleft,
+    width = 2,
+)
+Plots.plot!(ᾱ, msē; label = "learned α", width = 5, style = :dot)
+Plots.title!("Regularizer learning")
+
+# Visualize the convergence of α to its optimal value
+
+Plots.plot(
+    iters,
+    ᾱ;
+    label = nothing,
+    color = :blue,
+    xaxis = ("Iterations"),
+    legend = :bottom,
+    title = "Convergence of α",
+)
+
+# Visualize the convergence of the objective function
+
+Plots.plot(
+    iters,
+    msē;
+    label = nothing,
+    color = :red,
+    xaxis = ("Iterations"),
+    legend = :bottom,
+    title = "Convergence of MSE",
+)
+
+# Visualize the convergence of the derivative to zero
+
+Plots.plot(
+    iters,
+    ∂ᾱ;
+    label = nothing,
+    color = :green,
+    xaxis = ("Iterations"),
+    legend = :bottom,
+    title = "Convergence of ∂α",
+)
diff --git a/docs/src/examples/custom-relu_new.jl b/docs/src/examples/custom-relu_new.jl
new file mode 100644
index 000000000..fef842286
--- /dev/null
+++ b/docs/src/examples/custom-relu_new.jl
@@ -0,0 +1,122 @@
+# # Custom ReLU layer
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/custom-relu.jl)
+
+# We demonstrate how DiffOpt can be used to generate a simple neural network
+# unit - the ReLU layer. A neural network is created using Flux.jl and
+# trained on the MNIST dataset.
+
+# This tutorial uses the following packages
+
+using JuMP
+import DiffOpt
+import Ipopt
+import ChainRulesCore
+import Flux
+import MLDatasets
+import Statistics
+import Base.Iterators: repeated
+using LinearAlgebra
+
+# ## The ReLU and its derivative
+
+# Define a relu through an optimization problem solved by a quadratic solver.
+# Return the solution of the problem.
+function matrix_relu(
+    y_data::Matrix;
+    model = DiffOpt.nonlinear_diff_optimizer(Ipopt.Optimizer),
+)
+    layer_size, batch_size = size(y)
+    empty!(model)
+    set_silent(model)
+    @variable(model, x[1:layer_size, 1:batch_size] >= 0)
+    @variable(model, y[1:layer_size, 1:batch_size] in Parameter.(y_data))
+    @objective(model, Min, x[:]'x[:] - 2y[:]'x[:])
+    optimize!(model)
+    return Float32.(value.(x))
+end
+
+# Define the reverse differentiation rule, for the function we defined above.
+function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where {T}
+    model = DiffOpt.nonlinear_diff_optimizer(Ipopt.Optimizer)
+    pv = matrix_relu(y; model = model)
+    function pullback_matrix_relu(dl_dx)
+        ## some value from the backpropagation (e.g., loss) is denoted by `l`
+        ## so `dl_dy` is the derivative of `l` wrt `y`
+        x = model[:x]::Matrix{JuMP.VariableRef} # load decision variable `x` into scope
+        y = model[:y]::Matrix{JuMP.VariableRef} # load parameter variable `y` into scope
+        ## set sensitivities (dl/dx)
+        DiffOpt.set_reverse_variable.(model, x[:], dl_dx[:])
+        ## compute grad (dx/dy)
+        DiffOpt.reverse_differentiate!(model)
+        ## return gradient (dl/dy = dl/dx * dx/dy)
+        dl_dy = DiffOpt.get_reverse_parameter.(model, y[:])
+        return (ChainRulesCore.NoTangent(), dl_dy)
+    end
+    return pv, pullback_matrix_relu
+end
+
+# For more details about backpropagation, visit [Introduction, ChainRulesCore.jl](https://juliadiff.org/ChainRulesCore.jl/dev/).
+
+# ## Define the network
+
+layer_size = 10
+m = Flux.Chain(
+    Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)
+    matrix_relu,
+    Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)
+    Flux.softmax,
+)
+
+# ## Prepare data
+
+N = 1000 # batch size
+## Preprocessing train data
+imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:N]
+labels = MLDatasets.MNIST(; split = :train).targets[1:N]
+train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images
+train_Y = Flux.onehotbatch(labels, 0:9);
+## Preprocessing test data
+test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:N]
+test_labels = MLDatasets.MNIST(; split = :test).targets[1:N];
+test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))
+test_Y = Flux.onehotbatch(test_labels, 0:9);
+
+# Define input data
+# The original data is repeated `epochs` times because `Flux.train!` only
+# loops through the data set once
+
+epochs = 50 # ~1 minute (i7 8th gen with 16gb RAM)
+## epochs = 100 # leads to 77.8% in about 2 minutes
+dataset = repeated((train_X, train_Y), epochs);
+
+# ## Network training
+
+# training loss function, Flux optimizer
+custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
+opt = Flux.setup(Flux.Adam(), m)
+
+# Train to optimize network parameters
+
+@time Flux.train!(custom_loss, m, dataset, opt);
+
+# Although our custom implementation takes time, it is able to reach similar
+# accuracy as the usual ReLU function implementation.
+
+# ## Accuracy results
+
+# Average of correct guesses
+
+accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));
+
+# Training accuracy
+
+accuracy(train_X, train_Y)
+
+# Test accuracy
+
+accuracy(test_X, test_Y)
+
+# Note that the accuracy is low due to simplified training.
+# It is possible to increase the number of samples `N`,
+# the number of epochs `epoch` and the connectivity `inner`.
diff --git a/docs/src/examples/matrix-inversion-manual_new.jl b/docs/src/examples/matrix-inversion-manual_new.jl
new file mode 100644
index 000000000..ab332544b
--- /dev/null
+++ b/docs/src/examples/matrix-inversion-manual_new.jl
@@ -0,0 +1,150 @@
+# # Differentiating a QP wrt a single variable
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/matrix-inversion-manual.jl)
+
+# Consider the quadratic program
+
+# ```math
+# \begin{split}
+# \begin{array} {ll}
+# \mbox{minimize} & \frac{1}{2} x^T Q x + q^T x \\
+# \mbox{subject to} & G x \leq h, x \in \mathcal{R}^2 \\
+# \end{array}
+# \end{split}
+# ```
+
+# where `Q`, `q`, `G` are fixed and `h` is the single parameter.
+
+# In this example, we'll try to differentiate the QP wrt `h`, by finding its
+# jacobian by hand (using Eqn (6) of [QPTH article](https://arxiv.org/pdf/1703.00443.pdf))
+# and compare the results:
+# - Manual compuation
+# - Using JuMP and DiffOpt
+
+# Assuming 
+# ```
+# Q = [[4, 1], [1, 2]]
+# q = [1, 1]
+# G = [1, 1]
+# ```
+# and begining with a starting value of `h=-1`
+
+# few values just for reference
+
+# | variable | optimal value | note |
+# |----|------|-----|
+# | x* | [-0.25; -0.75] | Primal optimal | 
+# | 𝜆∗ | -0.75 | Dual optimal | 
+
+# ## Finding Jacobian using matrix inversion
+# Lets formulate Eqn (6) of [QPTH article](https://arxiv.org/pdf/1703.00443.pdf) for our QP. If we assume `h` as the only parameter and `Q`,`q`,`G` as fixed problem data - also note that our QP doesn't involves `Ax=b` constraint - then Eqn (6) reduces to 
+# ```math
+# \begin{gather}
+#  \begin{bmatrix} 
+#      Q & G^T \\
+#      \lambda^* G & G x^* - h
+#  \end{bmatrix}
+#  \begin{bmatrix} 
+#      dx \\
+#      d \lambda
+#  \end{bmatrix}
+#  =
+#   \begin{bmatrix}
+#    0 \\
+#    \lambda^* dh
+#    \end{bmatrix}
+# \end{gather}
+# ```
+
+# Now to find the jacobians $$ \frac{\partial x}{\partial h}, \frac{\partial \lambda}{\partial h}$$
+# we substitute `dh = I = [1]` and plug in values of `Q`,`q`,`G` to get
+# ```math
+# \begin{gather}
+#  \begin{bmatrix} 
+#      4 & 1 & 1 \\
+#      1 & 2 & 1 \\
+#      -0.75 & -0.75 & 0
+#  \end{bmatrix}
+#  \begin{bmatrix} 
+#      \frac{\partial x_1}{\partial h} \\
+#      \frac{\partial x_2}{\partial h} \\
+#      \frac{\partial \lambda}{\partial h}
+#  \end{bmatrix}
+#  =
+#   \begin{bmatrix}
+#    0 \\
+#    0 \\
+#    -0.75
+#    \end{bmatrix}
+# \end{gather}
+# ```
+
+# Upon solving using matrix inversion, the jacobian is
+# ```math
+# \frac{\partial x_1}{\partial h} = 0.25, \frac{\partial x_2}{\partial h} = 0.75, \frac{\partial \lambda}{\partial h} = -1.75 
+# ```
+
+# ## Finding Jacobian using JuMP and DiffOpt
+
+using JuMP
+import DiffOpt
+import Ipopt
+
+n = 2 # variable dimension
+m = 1; # no of inequality constraints
+
+Q = [4.0 1.0; 1.0 2.0]
+q = [1.0; 1.0]
+G = [1.0 1.0;]
+h = [-1.0;]   # initial values set
+
+# Initialize empty model
+
+model = DiffOpt.quadratic_diff_optimizer(Ipopt.Optimizer)
+set_silent(model)
+
+# Add the variables
+
+@variable(model, x[1:2])
+
+# Add the parameters
+
+@variable(model, y[1:length(h)] in Parameter.(h))
+
+# Add the constraints.
+
+@constraint(model, cons[j in 1:1], sum(G[j, i] * x[i] for i in 1:2) <= y[j]);
+
+@objective(
+    model,
+    Min,
+    1 / 2 * sum(Q[j, i] * x[i] * x[j] for i in 1:2, j in 1:2) +
+    sum(q[i] * x[i] for i in 1:2)
+)
+
+# Solve problem
+
+optimize!(model)
+
+# primal solution
+
+value.(x)
+
+# dual solution
+
+dual.(cons)
+
+# set sensitivitity
+
+DiffOpt.set_forward_parameter(model, y, 1.0)
+
+# Compute derivatives
+
+DiffOpt.forward_differentiate!(model)
+
+# Query derivative
+
+dx = DiffOpt.get_forward_variable(model, x)
+
+using Test                                  #src
+@test dx ≈ [0.25, 0.75] atol = 1e-4 rtol = 1e-4 #src
diff --git a/docs/src/examples/nearest_correlation_new.jl b/docs/src/examples/nearest_correlation_new.jl
new file mode 100644
index 000000000..d22bdd0e9
--- /dev/null
+++ b/docs/src/examples/nearest_correlation_new.jl
@@ -0,0 +1,66 @@
+# # Nearest correlation
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/nearest_correlation.jl)
+
+# This example illustrates the sensitivity analysis of the nearest correlation problem studied in [H02].
+#
+# Higham, Nicholas J.
+# *Computing the nearest correlation matrix—a problem from finance.*
+# IMA journal of Numerical Analysis 22.3 (2002): 329-343.
+
+using DiffOpt, JuMP, SCS, LinearAlgebra
+solver = SCS.Optimizer
+
+function proj(A, dH = Diagonal(ones(size(A, 1))), H_data = ones(size(A)))
+    n = LinearAlgebra.checksquare(A)
+    model = Model(() -> DiffOpt.diff_optimizer(solver))
+    @variable(model, X[1:n, 1:n] in PSDCone())
+    @variable(model, H[1:n, 1:n] in Parameter.(H_data))
+    @variable(model, E[1:n, 1:n])
+    @constraint(model, [i in 1:n], X[i, i] == 1)
+    @constraint(model, E .== (H .* (X .- A)))
+    @objective(model, Min, sum(E .^ 2))
+    for i in 1:n
+        DiffOpt.set_forward_parameter(model, H[i, i], dH[i, i])
+    end
+    optimize!(model)
+    DiffOpt.forward_differentiate!(model)
+    dX = DiffOpt.get_forward_variable.(model, X)
+    return value.(X), dX
+end
+
+# Example from [H02, p. 334-335]:
+
+A = LinearAlgebra.Tridiagonal(ones(2), ones(3), ones(2))
+
+# The projection is computed as follows:
+
+X, dX = proj(A)
+nothing # hide
+
+# The projection of `A` is:
+
+X
+
+# The derivative of the projection with respect to a uniform increase of the weights
+# of the diagonal entries is:
+
+dX
+
+# Example from [H02, Section 4, p. 340]:
+
+A = LinearAlgebra.Tridiagonal(-ones(3), 2ones(4), -ones(3))
+
+# The projection is computed as follows:
+
+X, dX = proj(A)
+nothing # hide
+
+# The projection of `A` is:
+
+X
+
+# The derivative of the projection with respect to a uniform increase of the weights
+# of the diagonal entries is:
+
+dX
diff --git a/docs/src/examples/polyhedral_project_new.jl b/docs/src/examples/polyhedral_project_new.jl
new file mode 100644
index 000000000..9117157a2
--- /dev/null
+++ b/docs/src/examples/polyhedral_project_new.jl
@@ -0,0 +1,169 @@
+# # Polyhedral QP layer
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/polyhedral_project.jl)
+
+# We use DiffOpt to define a custom network layer which, given an input matrix `y`,
+# computes its projection onto a polytope defined by a fixed number of inequalities:
+# `a_i^T x ≥ b_i`.
+# A neural network is created using Flux.jl and trained on the MNIST dataset,
+# integrating this quadratic optimization layer.
+#
+# The QP is solved in the forward pass, and its DiffOpt derivative is used in the backward pass expressed with `ChainRulesCore.rrule`.
+
+# This example is similar to the custom ReLU layer, except that the layer is parameterized
+# by the hyperplanes `(w,b)` and not a simple stateless function.
+# This also means that `ChainRulesCore.rrule` must return the derivatives of the output with respect to the
+# layer parameters to allow for backpropagation.
+
+using JuMP
+import DiffOpt
+import Ipopt
+import ChainRulesCore
+import Flux
+import MLDatasets
+import Statistics
+using Base.Iterators: repeated
+using LinearAlgebra
+using Random
+
+Random.seed!(42)
+
+# ## The Polytope representation and its derivative
+
+struct Polytope{N}
+    w::NTuple{N,Vector{Float64}}
+    b::Vector{Float64}
+end
+
+Polytope(w::NTuple{N}) where {N} = Polytope{N}(w, randn(N))
+
+# We define a "call" operation on the polytope, making it a so-called functor.
+# Calling the polytope with a matrix `y` operates an Euclidean projection of this matrix onto the polytope.
+function (polytope::Polytope{N})(
+    y_data::AbstractMatrix;
+    model = DiffOpt.nonlinear_diff_optimizer(Ipopt.Optimizer),
+) where {N}
+    layer_size, batch_size = size(y)
+    empty!(model)
+    set_silent(model)
+    @variable(model, x[1:layer_size, 1:batch_size])
+    @variable(model, y[1:layer_size, 1:batch_size] in Parameter.(y_data))
+    @variable(model, b[1:N] in Parameter.(polytope.b[idx]))
+    @variable(
+        model,
+        w[idx = 1:N, i = 1:layer_size] in Parameter(polytope.w[idx][i])
+    )
+    @constraint(
+        model,
+        greater_than_cons[idx in 1:N, sample in 1:batch_size],
+        dot(polytope.w[idx], x[:, sample]) ≥ b[idx]
+    )
+    @objective(model, Min, dot(x - y, x - y))
+    optimize!(model)
+    return Float32.(JuMP.value.(x))
+end
+
+# The `@functor` macro from Flux implements auxiliary functions for collecting the parameters of
+# our custom layer and operating backpropagation.
+Flux.@functor Polytope
+
+# Define the reverse differentiation rule, for the function we defined above.
+# Flux uses ChainRules primitives to implement reverse-mode differentiation of the whole network.
+# To learn the current layer (the polytope the layer contains),
+# the gradient is computed with respect to the `Polytope` fields in a ChainRulesCore.Tangent type
+# which is used to represent derivatives with respect to structs.
+# For more details about backpropagation, visit [Introduction, ChainRulesCore.jl](https://juliadiff.org/ChainRulesCore.jl/dev/).
+
+function ChainRulesCore.rrule(
+    polytope::Polytope{N},
+    y_data::AbstractMatrix,
+) where {N}
+    model = DiffOpt.nonlinear_diff_optimizer(Ipopt.Optimizer)
+    xv = polytope(y_data; model = model)
+    function pullback_matrix_projection(dl_dx)
+        dl_dx = ChainRulesCore.unthunk(dl_dx)
+        ##  `dl_dy` is the derivative of `l` wrt `y`
+        x = model[:x]::Matrix{JuMP.VariableRef}
+        y = model[:y]::Matrix{JuMP.VariableRef}
+        w = model[:w]::Matrix{JuMP.VariableRef}
+        b = model[:b]::Vector{JuMP.VariableRef}
+        layer_size, batch_size = size(x)
+        ## set sensitivities
+        DiffOpt.set_reverse_variable.(model, x, dl_dx)
+        ## compute grad
+        DiffOpt.reverse_differentiate!(model)
+        ## compute gradient wrt objective function parameter y
+        dl_dy = DiffOpt.get_reverse_parameter.(model, y)
+        ## compute gradient wrt objective function parameter w and b
+        _dl_dw = DiffOpt.get_reverse_parameter.(model, w)
+        dl_dw = zero.(polytope.w)
+        for idx in 1:N
+            dl_dw[idx] = _dl_dw[:, idx]
+        end
+        dl_db = DiffOpt.get_reverse_parameter.(model, b)
+        dself = ChainRulesCore.Tangent{Polytope{N}}(; w = dl_dw, b = dl_db)
+        return (dself, dl_dy)
+    end
+    return xv, pullback_matrix_projection
+end
+
+# ## Define the Network
+
+layer_size = 20
+m = Flux.Chain(
+    Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)
+    Polytope((randn(layer_size), randn(layer_size), randn(layer_size))),
+    Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)
+    Flux.softmax,
+)
+
+# ## Prepare data
+
+M = 500 # batch size
+## Preprocessing train data
+imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:M]
+labels = MLDatasets.MNIST(; split = :train).targets[1:M]
+train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images
+train_Y = Flux.onehotbatch(labels, 0:9);
+## Preprocessing test data
+test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:M]
+test_labels = MLDatasets.MNIST(; split = :test).targets[1:M]
+test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))
+test_Y = Flux.onehotbatch(test_labels, 0:9);
+
+# Define input data
+# The original data is repeated `epochs` times because `Flux.train!` only
+# loops through the data set once
+
+epochs = 50
+dataset = repeated((train_X, train_Y), epochs);
+
+# ## Network training
+
+# training loss function, Flux optimizer
+custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
+opt = Flux.setup(Flux.Adam(), m)
+
+# Train to optimize network parameters
+
+@time Flux.train!(custom_loss, m, dataset, opt);
+
+# Although our custom implementation takes time, it is able to reach similar
+# accuracy as the usual ReLU function implementation.
+
+# ## Accuracy results
+
+# Average of correct guesses
+accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));
+
+# Training accuracy
+
+accuracy(train_X, train_Y)
+
+# Test accuracy
+
+accuracy(test_X, test_Y)
+
+# Note that the accuracy is low due to simplified training.
+# It is possible to increase the number of samples `N`,
+# the number of epochs `epoch` and the connectivity `inner`.
diff --git a/docs/src/examples/sensitivity-analysis-ridge_new.jl b/docs/src/examples/sensitivity-analysis-ridge_new.jl
new file mode 100644
index 000000000..7ec3d5394
--- /dev/null
+++ b/docs/src/examples/sensitivity-analysis-ridge_new.jl
@@ -0,0 +1,170 @@
+# # Sensitivity Analysis of Ridge Regression
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/sensitivity-analysis-ridge.jl)
+
+# This example illustrates the sensitivity analysis of data points in a
+# [Ridge Regression](https://en.wikipedia.org/wiki/Ridge_regression) problem.
+# The general form of the problem is given below:
+
+# ```math
+# \begin{split}
+# \begin{array} {ll}
+# \mbox{minimize} & \sum_{i=1}^{N} (y_{i} - w x_{i} - b)^2 + \alpha (w^2 + b^2) \\
+# \end{array}
+# \end{split}
+# ```
+# where
+# - `w`, `b` are slope and intercept of the regressing line
+# - `x`, `y` are the N data points
+# - `α` is the regularization constant
+#
+# which is equivalent to:
+# ```math
+# \begin{split}
+# \begin{array} {ll}
+# \mbox{minimize} & e^{\top}e + \alpha (w^2) \\
+# \mbox{s.t.} & e_{i} = y_{i} - w x_{i} - b \quad \quad i=1..N  \\
+# \end{array}
+# \end{split}
+# ```
+
+# This tutorial uses the following packages
+
+using JuMP
+import DiffOpt
+import Random
+import Ipopt
+import Plots
+using LinearAlgebra: dot
+
+# ## Define and solve the problem
+
+# Construct a set of noisy (guassian) data points around a line.
+
+Random.seed!(42)
+
+N = 150
+
+w = 2 * abs(randn())
+b = rand()
+X = randn(N)
+Y = w * X .+ b + 0.8 * randn(N);
+
+# The helper method `fit_ridge` defines and solves the corresponding model.
+# The ridge regression is modeled with quadratic programming
+# (quadratic objective and linear constraints) and solved in generic methods
+# of Ipopt. This is not the standard way of solving the ridge regression problem
+# this is done here for didactic purposes.
+
+function build_fit_ridge(X_data, Y_data, alpha = 0.1)
+    N = length(Y)
+    ## Initialize a JuMP Model with Ipopt solver
+    model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    set_silent(model)
+    @variable(model, w) # angular coefficient
+    @variable(model, b) # linear coefficient
+    @variable(model, α in Parameter(alpha)) # regularization parameter
+    @variable(model, X[1:N] in Parameter.(X_data))
+    @variable(model, Y[1:N] in Parameter.(Y_data))
+    ## expression defining approximation error
+    @expression(model, e[i = 1:N], Y[i] - w * X[i] - b)
+    ## objective minimizing squared error and ridge penalty
+    @objective(model, Min, 1 / N * dot(e, e) + α * (w^2))
+    optimize!(model)
+    return model
+end
+
+function optimize_fit_ridge!(model, alpha)
+    set_parameter_value(model[:α], alpha)
+    optimize!(model)
+    return value(model[:w]), value(model[:b])
+end
+
+# Plot the data points and the fitted line for different alpha values
+
+p = Plots.scatter(X, Y; label = nothing, legend = :topleft)
+mi, ma = minimum(X), maximum(X)
+Plots.title!("Fitted lines and points")
+
+model = build_fit_ridge(X, Y)
+for alpha in 0.5:0.5:1.5
+    local w, b = optimize_fit_ridge!(model, alpha)
+    ŵ = value(w)
+    b̂ = value(b)
+    Plots.plot!(
+        p,
+        [mi, ma],
+        [mi * ŵ + b̂, ma * ŵ + b̂];
+        label = "alpha=$alpha",
+        width = 2,
+    )
+end
+p
+
+# ## Differentiate
+
+# Now that we've solved the problem, we can compute the sensitivity of optimal
+# values of the slope `w` with
+# respect to perturbations in the data points (`x`,`y`).
+
+alpha = 0.4
+w, b = optimize_fit_ridge!(model, alpha)
+ŵ = value(w)
+b̂ = value(b)
+
+# Sensitivity with respect to x and y
+
+∇y = zero(X)
+∇x = zero(X)
+for i in 1:N
+    # X[i] sensitivity
+    DiffOpt.empty_input_sensitivities!(model)
+    DiffOpt.set_forward_parameter(model, model[:X][i], 1.0)
+    DiffOpt.forward_differentiate!(model)
+    ∇x[i] = DiffOpt.get_forward_variable(model, w)
+    # Y[i] sensitivity
+    DiffOpt.empty_input_sensitivities!(model)
+    DiffOpt.set_forward_parameter(model, model[:Y][i], 1.0)
+    DiffOpt.forward_differentiate!(model)
+    ∇y[i] = DiffOpt.get_forward_variable(model, w)
+end
+
+# Visualize point sensitivities with respect to regression points.
+
+p = Plots.scatter(
+    X,
+    Y;
+    color = [dw < 0 ? :blue : :red for dw in ∇x],
+    markersize = [5 * abs(dw) + 1.2 for dw in ∇x],
+    label = "",
+)
+mi, ma = minimum(X), maximum(X)
+Plots.plot!(
+    p,
+    [mi, ma],
+    [mi * ŵ + b̂, ma * ŵ + b̂];
+    color = :blue,
+    label = "",
+)
+Plots.title!("Regression slope sensitivity with respect to x")
+
+#
+
+p = Plots.scatter(
+    X,
+    Y;
+    color = [dw < 0 ? :blue : :red for dw in ∇y],
+    markersize = [5 * abs(dw) + 1.2 for dw in ∇y],
+    label = "",
+)
+mi, ma = minimum(X), maximum(X)
+Plots.plot!(
+    p,
+    [mi, ma],
+    [mi * ŵ + b̂, ma * ŵ + b̂];
+    color = :blue,
+    label = "",
+)
+Plots.title!("Regression slope sensitivity with respect to y")
+
+# Note the points with less central `x` values induce a greater y sensitivity of the slope.
diff --git a/docs/src/examples/sensitivity-analysis-svm_new.jl b/docs/src/examples/sensitivity-analysis-svm_new.jl
new file mode 100644
index 000000000..8e1420c61
--- /dev/null
+++ b/docs/src/examples/sensitivity-analysis-svm_new.jl
@@ -0,0 +1,144 @@
+# # Sensitivity Analysis of SVM
+
+#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/sensitivity-analysis-svm.jl)
+
+# This notebook illustrates sensitivity analysis of data points in a [Support Vector Machine](https://en.wikipedia.org/wiki/Support-vector_machine) (inspired from [@matbesancon](http://github.com/matbesancon)'s [SimpleSVMs](http://github.com/matbesancon/SimpleSVMs.jl).)
+
+# For reference, Section 10.1 of https://online.stat.psu.edu/stat508/book/export/html/792 gives an intuitive explanation of what it means to have a sensitive hyperplane or data point. The general form of the SVM training problem is given below (with $\ell_2$ regularization):
+
+# ```math
+# \begin{split}
+# \begin{array} {ll}
+# \mbox{minimize} & \lambda||w||^2 + \sum_{i=1}^{N} \xi_{i} \\
+# \mbox{s.t.} & \xi_{i} \ge 0 \quad \quad i=1..N  \\
+#             & y_{i} (w^T X_{i} + b) \ge 1 - \xi_{i} \quad i=1..N  \\
+# \end{array}
+# \end{split}
+# ```
+# where
+# - `X`, `y` are the `N` data points
+# - `w` is the support vector
+# - `b` determines the offset `b/||w||` of the hyperplane with normal `w`
+# - `ξ` is the soft-margin loss
+# - `λ` is the $\ell_2$ regularization.
+#
+# This tutorial uses the following packages
+
+using JuMP     # The mathematical programming modelling language
+import DiffOpt # JuMP extension for differentiable optimization
+import Ipopt   # Optimization solver that handles quadratic programs
+import LinearAlgebra
+import Plots
+import Random
+
+# ## Define and solve the SVM
+
+# Construct two clusters of data points.
+
+N = 100
+D = 2
+
+Random.seed!(62)
+X_data = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [2.0, 2.0]')
+y = append!(ones(N ÷ 2), -ones(N ÷ 2))
+λ = 0.05;
+
+# Let's initialize a special model that can understand sensitivities
+
+model = DiffOpt.quadratic_diff_model(Ipopt.Optimizer)
+set_silent(model)
+
+# Add the variables
+
+@variable(model, ξ[1:N] >= 0)
+@variable(model, w[1:D])
+@variable(model, b);
+
+# Add the parameters to be differentiated
+
+@variable(model, X[1:N, 1:D] in Parameter.(X_data))
+
+# Add the constraints
+
+@constraint(
+    model,
+    con[i in 1:N],
+    y[i] * (LinearAlgebra.dot(X[i, :], w) + b) >= 1 - ξ[i]
+);
+
+# Define the objective and solve
+
+@objective(model, Min, λ * LinearAlgebra.dot(w, w) + sum(ξ))
+
+optimize!(model)
+
+# We can visualize the separating hyperplane.
+
+loss = objective_value(model)
+
+wv = value.(w)
+
+bv = value(b)
+
+svm_x = [-2.0, 4.0] # arbitrary points
+svm_y = (-bv .- wv[1] * svm_x) / wv[2]
+
+p = Plots.scatter(
+    X_data[:, 1],
+    X_data[:, 2];
+    color = [yi > 0 ? :red : :blue for yi in y],
+    label = "",
+)
+Plots.plot!(
+    p,
+    svm_x,
+    svm_y;
+    label = "loss = $(round(loss, digits=2))",
+    width = 3,
+)
+
+# ## Gradient of hyperplane wrt the data point coordinates
+
+# Now that we've solved the SVM, we can compute the sensitivity of optimal
+# values -- the separating hyperplane in our case -- with respect to
+# perturbations of the problem data -- the data points -- using DiffOpt.
+
+# How does a change in coordinates of the data points, `X`,
+# affects the position of the hyperplane?
+# This is achieved by finding gradients of `w` and `b` with respect to `X[i]`.
+
+# Begin differentiating the model.
+# analogous to varying θ in the expression:
+# ```math
+# y_{i} (w^T (X_{i} + \theta) + b) \ge 1 - \xi_{i}
+# ```
+∇ = zeros(N)
+for i in 1:N
+    for j in 1:N
+        if i == j
+            ## we consider identical perturbations on all x_i coordinates
+            DiffOpt.set_forward_parameter.(model, X[i, :], 1.0)
+        else
+            DiffOpt.set_forward_parameter.(model, X[i, :], 0.0)
+        end
+    end
+    DiffOpt.forward_differentiate!(model)
+    dw = DiffOpt.get_forward_variable(model, w)
+    db = DiffOpt.get_forward_variable(model, b)
+    ∇[i] = LinearAlgebra.norm(dw) + LinearAlgebra.norm(db)
+end
+
+# We can visualize the separating hyperplane sensitivity with respect to the data points.
+# Note that all the small numbers were converted into 1/10 of the
+# largest value to show all the points of the set.
+
+p3 = Plots.scatter(
+    X[:, 1],
+    X[:, 2];
+    color = [yi > 0 ? :red : :blue for yi in y],
+    label = "",
+    markersize = 2 * (max.(1.8∇, 0.2 * maximum(∇))),
+)
+Plots.yaxis!(p3, (-2, 4.5))
+Plots.plot!(p3, svm_x, svm_y; label = "", width = 3)
+Plots.title!("Sensitivity of the separator to data point variations")
diff --git a/src/jump_wrapper.jl b/src/jump_wrapper.jl
index 24c4c55cf..7060f074d 100644
--- a/src/jump_wrapper.jl
+++ b/src/jump_wrapper.jl
@@ -32,14 +32,15 @@ See also: [`conic_diff_model`](@ref), [`quadratic_diff_model`](@ref), [`diff_mod
 """
 function nonlinear_diff_model(
     optimizer_constructor;
+    with_parametric_opt_interface = false,
     with_bridge_type = Float64,
     with_cache::Bool = true,
 )
     inner = diff_optimizer(
         optimizer_constructor;
-        with_parametric_opt_interface = false,
-        with_bridge_type = with_bridge_type,
-        with_cache = with_cache,
+        with_parametric_opt_interface,
+        with_bridge_type,
+        with_cache,
     )
     MOI.set(inner, ModelConstructor(), NonLinearProgram.Model)
     return JuMP.direct_model(inner)
diff --git a/src/moi_wrapper.jl b/src/moi_wrapper.jl
index 35fe763b8..364285628 100644
--- a/src/moi_wrapper.jl
+++ b/src/moi_wrapper.jl
@@ -570,7 +570,9 @@ function forward_differentiate!(model::Optimizer)
             ),
         )
     end
-    for (F, S) in keys(model.input_cache.scalar_constraints.dict)
+    for (F, S) in MOI.Utilities.DoubleDicts.nonempty_outer_keys(
+        model.input_cache.scalar_constraints,
+    )
         _copy_forward_in_constraint(
             diff,
             model.index_map,
@@ -578,7 +580,9 @@ function forward_differentiate!(model::Optimizer)
             model.input_cache.scalar_constraints[F, S],
         )
     end
-    for (F, S) in keys(model.input_cache.vector_constraints.dict)
+    for (F, S) in MOI.Utilities.DoubleDicts.nonempty_outer_keys(
+        model.input_cache.vector_constraints,
+    )
         _copy_forward_in_constraint(
             diff,
             model.index_map,
diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl
index 2a2c63110..fe9895898 100644
--- a/test/jump_wrapper.jl
+++ b/test/jump_wrapper.jl
@@ -29,24 +29,44 @@ function runtests()
 end
 
 function test_jump_api()
-    for (MODEL, SOLVER) in [
-            (DiffOpt.diff_model, Ipopt.Optimizer),
-            (DiffOpt.quadratic_diff_model, HiGHS.Optimizer),
-            (DiffOpt.quadratic_diff_model, SCS.Optimizer),
-            (DiffOpt.quadratic_diff_model, Ipopt.Optimizer),
-            # (DiffOpt.conic_diff_model, HiGHS.Optimizer),
-            # (DiffOpt.conic_diff_model, SCS.Optimizer), # conicmodel has a issue with sign
-            # (DiffOpt.conic_diff_model, Ipopt.Optimizer),
-            # (DiffOpt.nonlinear_diff_model, ()->POI.Optimizer(HiGHS.Optimizer())), # wrong results
-            # (DiffOpt.nonlinear_diff_model, ()->POI.Optimizer(SCS.Optimizer())), # needs cache
-            (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
+    # for (MODEL, SOLVER) in [
+    #         # (DiffOpt.diff_model, Ipopt.Optimizer),
+    #         # (DiffOpt.quadratic_diff_model, HiGHS.Optimizer),
+    #         # (DiffOpt.quadratic_diff_model, SCS.Optimizer),
+    #         # (DiffOpt.quadratic_diff_model, Ipopt.Optimizer),
+    #         # # (DiffOpt.conic_diff_model, HiGHS.Optimizer),
+    #         # # (DiffOpt.conic_diff_model, SCS.Optimizer), # conicmodel has a issue with sign
+    #         # # (DiffOpt.conic_diff_model, Ipopt.Optimizer),
+    #         # # (DiffOpt.nonlinear_diff_model, ()->POI.Optimizer(HiGHS.Optimizer())), # wrong results
+    #         # # (DiffOpt.nonlinear_diff_model, ()->POI.Optimizer(SCS.Optimizer())), # needs cache
+    #         # (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
+    #     ],
+    for (MODEL) in [
+            DiffOpt.diff_model(Ipopt.Optimizer),
+            DiffOpt.quadratic_diff_model(HiGHS.Optimizer),
+            DiffOpt.quadratic_diff_model(SCS.Optimizer),
+            DiffOpt.quadratic_diff_model(Ipopt.Optimizer),
+            # (DiffOpt.conic_diff_model(HiGHS.Optimizer),
+            # (DiffOpt.conic_diff_model(SCS.Optimizer), # conicmodel has a issue with sign
+            # (DiffOpt.conic_diff_model(Ipopt.Optimizer),
+            DiffOpt.nonlinear_diff_model(
+                HiGHS.Optimizer;
+                with_parametric_opt_interface = true,
+            ), # wrong results
+            DiffOpt.nonlinear_diff_model(
+                SCS.Optimizer;
+                with_parametric_opt_interface = true,
+            ), # wrong results
+            DiffOpt.nonlinear_diff_model(Ipopt.Optimizer),
         ],
         ineq in [true, false],
         min in [true, false],
         flip in [true, false]
 
-        @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
-            model = MODEL(SOLVER)
+        # @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
+        # model = MODEL(SOLVER)
+        @testset "$(MODEL), $(ineq ? "ineqs" : "eqs"), $(min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
+            model = MODEL
             set_silent(model)
 
             p_val = 4.0

From 22d0567bf6b3f19c722f045b3238b0aa22c54d14 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Fri, 4 Jul 2025 11:04:49 -0300
Subject: [PATCH 04/22] remove printing

---
 src/QuadraticProgram/QuadraticProgram.jl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/QuadraticProgram/QuadraticProgram.jl b/src/QuadraticProgram/QuadraticProgram.jl
index 568e31be6..b3c06536c 100644
--- a/src/QuadraticProgram/QuadraticProgram.jl
+++ b/src/QuadraticProgram/QuadraticProgram.jl
@@ -172,9 +172,6 @@ end
 
 function MOI.set(model::Model, ::MOI.ConstraintDualStart, ci::LE, value)
     MOI.throw_if_not_valid(model, ci)
-    @show ci
-    @show value
-    @show model
     return DiffOpt._enlarge_set(
         model.λ,
         MOI.Utilities.rows(_inequalities(model), ci),

From fe5dcc6d7ec0a480da2fc73c47272cae226380df Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Fri, 4 Jul 2025 11:08:38 -0300
Subject: [PATCH 05/22] remove printing

---
 src/copy_dual.jl | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/copy_dual.jl b/src/copy_dual.jl
index b28b11c36..4b712aa4b 100644
--- a/src/copy_dual.jl
+++ b/src/copy_dual.jl
@@ -80,7 +80,6 @@ end
 
 function _copy_dual(dest::MOI.ModelLike, src::MOI.ModelLike, index_map)
     vis_src = MOI.get(src, MOI.ListOfVariableIndices())
-    # TODO: in POI varible primal must be the same as variable value
     MOI.set(
         dest,
         MOI.VariablePrimalStart(),
@@ -88,11 +87,7 @@ function _copy_dual(dest::MOI.ModelLike, src::MOI.ModelLike, index_map)
         MOI.get(src, MOI.VariablePrimal(), vis_src),
     )
     for (F, S) in MOI.get(dest, MOI.ListOfConstraintTypesPresent())
-        @show F, S
-        @show dest
-        @show dest.optimizer
         if F<:MOI.VariableIndex && S<:MOI.Parameter
-            @show "Skipping constraint type $F $S"
             continue
         end
         _copy_constraint_start(
@@ -124,7 +119,7 @@ function _copy_constraint_start(
     src_attr,
 ) where {F,S}
     for ci in MOI.get(src, MOI.ListOfConstraintIndices{F,S}())
-        @show value = MOI.get(src, src_attr, ci)
+        value = MOI.get(src, src_attr, ci)
         MOI.set(dest, dest_attr, index_map[ci], value)
     end
 end

From b4c492fdceb969e62ad0c14704f2a43fe76bb61e Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Fri, 4 Jul 2025 11:12:26 -0300
Subject: [PATCH 06/22] remove duplicatie def

---
 src/parameters.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/parameters.jl b/src/parameters.jl
index f9529816b..0c4ce11a3 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -557,8 +557,6 @@ function MOI.set(
     return
 end
 
-MOI.is_set_by_optimize(::ReverseConstraintSet) = true
-
 function MOI.get(
     model::POI.Optimizer,
     ::ReverseConstraintSet,

From a0142bc0c3749eabebeb40bb574f4b9fed7f3e55 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Fri, 4 Jul 2025 11:14:08 -0300
Subject: [PATCH 07/22] move definition

---
 src/diff_opt.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/diff_opt.jl b/src/diff_opt.jl
index 0cd0dff9a..9f934b73b 100644
--- a/src/diff_opt.jl
+++ b/src/diff_opt.jl
@@ -292,6 +292,11 @@ Model supporting [`forward_differentiate!`](@ref) and
 """
 abstract type AbstractModel <: MOI.ModelLike end
 
+function empty_input_sensitivities!(model::AbstractModel)
+    empty!(model.input_cache)
+    return
+end
+
 MOI.supports_incremental_interface(::AbstractModel) = true
 
 function MOI.is_valid(model::AbstractModel, idx::MOI.Index)

From ad61e35353a958253e06a6fbd784a208737e853c Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:33:32 -0300
Subject: [PATCH 08/22] format

---
 src/copy_dual.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/copy_dual.jl b/src/copy_dual.jl
index 4b712aa4b..dbeca2a19 100644
--- a/src/copy_dual.jl
+++ b/src/copy_dual.jl
@@ -87,7 +87,7 @@ function _copy_dual(dest::MOI.ModelLike, src::MOI.ModelLike, index_map)
         MOI.get(src, MOI.VariablePrimal(), vis_src),
     )
     for (F, S) in MOI.get(dest, MOI.ListOfConstraintTypesPresent())
-        if F<:MOI.VariableIndex && S<:MOI.Parameter
+        if F <: MOI.VariableIndex && S <: MOI.Parameter
             continue
         end
         _copy_constraint_start(

From 3606ab581637a66911dce46a9260fdaddc8d0d08 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:34:55 -0300
Subject: [PATCH 09/22] format

---
 src/diff_opt.jl | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/diff_opt.jl b/src/diff_opt.jl
index 9f934b73b..086104738 100644
--- a/src/diff_opt.jl
+++ b/src/diff_opt.jl
@@ -22,7 +22,8 @@ Base.@kwdef mutable struct InputCache
     #       concrete value types.
     # `scalar_constraints` and `vector_constraints` includes `A` and `b` for CPs
     # or `G` and `h` for QPs
-    parameter_constraints::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram
+    parameter_constraints::Dict{MOI.ConstraintIndex,Float64} =
+        Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram
     scalar_constraints::MOIDD.DoubleDict{MOI.ScalarAffineFunction{Float64}} =
         MOIDD.DoubleDict{MOI.ScalarAffineFunction{Float64}}() # also includes G for QPs
     vector_constraints::MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}} =
@@ -136,7 +137,6 @@ ConstraintFunction so we consider the expression `θ * (x + 2y - 5)`.
 """
 struct ForwardConstraintFunction <: MOI.AbstractConstraintAttribute end
 
-
 """
     ForwardConstraintSet <: MOI.AbstractConstraintAttribute
 
@@ -434,7 +434,9 @@ function MOI.set(
     func::MOI.ScalarAffineFunction{T},
 ) where {T,S}
     if MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
-        error("The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.")
+        error(
+            "The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.",
+        )
     end
     model.input_cache.scalar_constraints[ci] = func
     return
@@ -447,7 +449,9 @@ function MOI.set(
     func::MOI.VectorAffineFunction{T},
 ) where {T,S}
     if MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
-        error("The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.")
+        error(
+            "The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.",
+        )
     end
     model.input_cache.vector_constraints[ci] = func
     return
@@ -459,8 +463,14 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
     set::MOI.Parameter{T},
 ) where {T}
-    if !MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
-        error("The model with type $(typeof(model)) does not support Parameters")
+    if !MOI.supports_constraint(
+        model.model,
+        MOI.VariableIndex,
+        MOI.Parameter{T},
+    )
+        error(
+            "The model with type $(typeof(model)) does not support Parameters",
+        )
     end
     model.input_cache.parameter_constraints[ci] = set.value
     return

From 1920357dc9ae129f83f1b623965623955203dbd9 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:43:09 -0300
Subject: [PATCH 10/22] simplify conic model

---
 src/ConicProgram/ConicProgram.jl | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/ConicProgram/ConicProgram.jl b/src/ConicProgram/ConicProgram.jl
index f94444047..c9f9c2d19 100644
--- a/src/ConicProgram/ConicProgram.jl
+++ b/src/ConicProgram/ConicProgram.jl
@@ -121,7 +121,7 @@ function MOI.empty!(model::Model)
     model.back_grad_cache = nothing
     empty!(model.input_cache)
     empty!(model.x)
-    empty!(model.s)
+    empty!(model.s) # TODO: stop using this
     empty!(model.y)
     model.diff_time = NaN
     return
@@ -189,6 +189,7 @@ function _gradient_cache(model::Model)
         )
     end
 
+    # TODO: remove this
     if any(isnan, model.s) || length(model.s) < length(b)
         error(
             "Some constraints are missing a value for the `ConstraintPrimalStart` attribute.",
@@ -216,10 +217,12 @@ function _gradient_cache(model::Model)
     m = A.m
     n = A.n
     N = m + n + 1
+
+    slack = b - A * model.x
     # NOTE: w = 1.0 systematically since we asserted the primal-dual pair is optimal
     # `inv(M)((x, y, 1), (0, s, 0)) = (x, y, 1) - (0, s, 0)`,
     # see Minty parametrization in https://stanford.edu/~boyd/papers/pdf/cone_prog_refine.pdf
-    (u, v, w) = (model.x, model.y - model.s, 1.0)
+    (u, v, w) = (model.x, model.y - slack, 1.0)
 
     # find gradient of projections on dual of the cones
     Dπv = DiffOpt.Dπ(v, model.model, model.model.constraints.sets)
@@ -260,12 +263,13 @@ function DiffOpt.forward_differentiate!(model::Model)
         M = gradient_cache.M
         vp = gradient_cache.vp
         Dπv = gradient_cache.Dπv
-        x = model.x
-        y = model.y
-        s = model.s
         A = gradient_cache.A
         b = gradient_cache.b
         c = gradient_cache.c
+        x = model.x
+        y = model.y
+        # s = model.s
+        slack = b - A * x
 
         objective_function = DiffOpt._convert(
             MOI.ScalarAffineFunction{Float64},
@@ -309,7 +313,7 @@ function DiffOpt.forward_differentiate!(model::Model)
         n = size(A, 2)
         N = m + n + 1
         # NOTE: w = 1 systematically since we asserted the primal-dual pair is optimal
-        (u, v, w) = (x, y - s, 1.0)
+        (u, v, w) = (x, y - slack, 1.0)
 
         # g = dQ * Π(z/|w|) = dQ * [u, vp, 1.0]
         RHS = [
@@ -340,12 +344,13 @@ function DiffOpt.reverse_differentiate!(model::Model)
         M = gradient_cache.M
         vp = gradient_cache.vp
         Dπv = gradient_cache.Dπv
-        x = model.x
-        y = model.y
-        s = model.s
         A = gradient_cache.A
         b = gradient_cache.b
         c = gradient_cache.c
+        x = model.x
+        y = model.y
+        # s = model.s
+        slack = b - A * x
 
         dx = zeros(length(c))
         for (vi, value) in model.input_cache.dx
@@ -358,13 +363,13 @@ function DiffOpt.reverse_differentiate!(model::Model)
         n = size(A, 2)
         N = m + n + 1
         # NOTE: w = 1 systematically since we asserted the primal-dual pair is optimal
-        (u, v, w) = (x, y - s, 1.0)
+        (u, v, w) = (x, y - slack, 1.0)
 
         # dz = D \phi (z)^T (dx,dy,dz)
         dz = [
             dx
             Dπv' * (dy + ds) - ds
-            -x' * dx - y' * dy - s' * ds
+            -x' * dx - y' * dy - slack' * ds
         ]
 
         g = if LinearAlgebra.norm(dz) <= 1e-4 # TODO: parametrize or remove

From ebbea33c310f267c3f3ee5008ec0758ae63ee34d Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:45:44 -0300
Subject: [PATCH 11/22] force nlp on nlp tests

---
 test/data/nlp_problems.jl | 15 +++++++++++++++
 test/nlp_program.jl       |  7 +++++++
 2 files changed, 22 insertions(+)

diff --git a/test/data/nlp_problems.jl b/test/data/nlp_problems.jl
index 923b48c4c..2769ae82c 100644
--- a/test/data/nlp_problems.jl
+++ b/test/data/nlp_problems.jl
@@ -9,6 +9,7 @@ https://github.com/jump-dev/JuMP.jl/blob/301d46e81cb66c74c6e22cd89fb89ced740f157
 ################################################
 function create_nonlinear_jump_model(; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
     @variable(model, p ∈ MOI.Parameter(1.0))
     @variable(model, p2 ∈ MOI.Parameter(2.0))
@@ -33,6 +34,7 @@ From sIpopt paper: https://optimization-online.org/2011/04/3008/
 
 function create_nonlinear_jump_model_sipopt(; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
     @variable(model, p1 ∈ MOI.Parameter(4.5))
     @variable(model, p2 ∈ MOI.Parameter(1.0))
@@ -55,6 +57,7 @@ Simple Problems
 
 function create_jump_model_1(p_val = [1.5])
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -73,6 +76,7 @@ end
 
 function create_jump_model_2(p_val = [1.5])
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -90,6 +94,7 @@ end
 
 function create_jump_model_3(p_val = [-1.5])
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -108,6 +113,7 @@ end
 
 function create_jump_model_4(p_val = [1.5])
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -126,6 +132,7 @@ end
 
 function create_jump_model_5(p_val = [1.5])
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -147,6 +154,7 @@ h(y) = -sum(y .* log.(y))
 softmax(x) = exp.(x) / sum(exp.(x))
 function create_jump_model_6(p_a = collect(1.0:0.1:2.0))
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -167,6 +175,7 @@ end
 
 function create_jump_model_7(p_val = [1.5], g = sin)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -190,6 +199,7 @@ Non Linear Problems
 
 function create_nonlinear_jump_model_1(p_val = [1.0; 2.0; 100]; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -214,6 +224,7 @@ end
 
 function create_nonlinear_jump_model_2(p_val = [3.0; 2.0; 10]; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -238,6 +249,7 @@ end
 
 function create_nonlinear_jump_model_3(p_val = [3.0; 2.0; 10]; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -261,6 +273,7 @@ end
 
 function create_nonlinear_jump_model_4(p_val = [1.0; 2.0; 100]; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -286,6 +299,7 @@ end
 
 function create_nonlinear_jump_model_5(p_val = [1.0; 2.0; 100]; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
@@ -312,6 +326,7 @@ end
 
 function create_nonlinear_jump_model_6(p_val = [100.0; 200.0]; ismin = true)
     model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     set_silent(model)
 
     # Parameters
diff --git a/test/nlp_program.jl b/test/nlp_program.jl
index 862050035..3ed2ff033 100644
--- a/test/nlp_program.jl
+++ b/test/nlp_program.jl
@@ -126,6 +126,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
                 with_parametric_opt_interface = false,
             ),
         )
+        MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, 0 ≤ x[1:P] ≤ 1)
         @variable(m, p[1:P] ∈ Parameter.(0.5))
@@ -200,6 +201,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
                 with_parametric_opt_interface = false,
             ),
         )
+        MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, x[1:P])
         @constraint(m, x .≥ 0)
@@ -244,6 +246,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
                 with_parametric_opt_interface = false,
             ),
         )
+        MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, x[1:P])
         @constraint(m, 0 .≤ x)
@@ -288,6 +291,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
                 with_parametric_opt_interface = false,
             ),
         )
+        MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, x[1:P])
         @constraint(m, 0 .≤ x)
@@ -664,6 +668,7 @@ function test_differentiating_non_trivial_convex_qp_jump()
     h = vec(h)
     b = vec(b)
     model = JuMP.Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
+    MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
     MOI.set(model, MOI.Silent(), true)
     @variable(model, x[1:nz])
     @variable(model, p_le[1:nineq_le] ∈ MOI.Parameter.(0.0))
@@ -715,6 +720,7 @@ function test_ReverseConstraintDual()
             with_parametric_opt_interface = false,
         ),
     )
+    MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
     @variable(m, x[1:2])
     @variable(m, p[1:2] ∈ Parameter.(0.5))
@@ -802,6 +808,7 @@ function test_changing_factorization()
             with_parametric_opt_interface = false,
         ),
     )
+    MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
     @variable(m, x[1:P])
     @constraint(m, x .≥ 0)

From 0e48201d1c76c98530177774b4001093f004a07e Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:46:00 -0300
Subject: [PATCH 12/22] fix param tests

---
 test/parameters.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/parameters.jl b/test/parameters.jl
index b38f84c07..82c3b252b 100644
--- a/test/parameters.jl
+++ b/test/parameters.jl
@@ -610,8 +610,8 @@ function test_quadratic_objective_qp()
 end
 
 function test_diff_errors_POI()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
+    model = direct_model(
+        DiffOpt.diff_optimizer(
             HiGHS.Optimizer;
             with_parametric_opt_interface = true,
         ),
@@ -747,10 +747,10 @@ function test_empty_cache()
 
     function get_sensitivity(m, xᵢ, pᵢ)
         DiffOpt.empty_input_sensitivities!(m)
-        @test is_empty(unsafe_backend(m).optimizer.input_cache)
-        if !isnothing(unsafe_backend(m).optimizer.diff) &&
-           !isnothing(unsafe_backend(m).optimizer.diff.model.input_cache)
-            @test is_empty(unsafe_backend(m).optimizer.diff.model.input_cache)
+        @test is_empty(unsafe_backend(m).input_cache)
+        if !isnothing(unsafe_backend(m).diff) &&
+           !isnothing(unsafe_backend(m).diff.optimizer.model.input_cache)
+            @test is_empty(unsafe_backend(m).diff.optimizer.model.input_cache)
         end
         MOI.set(
             m,

From 68c5dd72a8a8eef6a92d08104612d0ba4f2a53e9 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:50:35 -0300
Subject: [PATCH 13/22] cleanup parameter usage

---
 src/moi_wrapper.jl   | 103 ++++++++++++++++++++++++++++++++++++++-----
 test/jump_wrapper.jl |  59 +++++++++++++++++++------
 2 files changed, 137 insertions(+), 25 deletions(-)

diff --git a/src/moi_wrapper.jl b/src/moi_wrapper.jl
index b70663328..8c7260942 100644
--- a/src/moi_wrapper.jl
+++ b/src/moi_wrapper.jl
@@ -39,7 +39,8 @@ function diff_optimizer(
             MOI.Utilities.UniversalFallback(
                 MOI.Utilities.Model{with_bridge_type}(),
             ),
-            with_parametric_opt_interface ? POI.Optimizer(optimizer) : optimizer,
+            with_parametric_opt_interface ? POI.Optimizer(optimizer) :
+            optimizer,
         )
     else
         with_parametric_opt_interface ? POI.Optimizer(optimizer) : optimizer
@@ -48,16 +49,25 @@ function diff_optimizer(
 end
 
 mutable struct Optimizer{OT<:MOI.ModelLike} <: MOI.AbstractOptimizer
+    # main optimizer responsible for caching the optimization problem data
+    # and for solving the optimization problem
     optimizer::OT
 
+    # list of differentiation backends for automatic differentiation
+    # without mode selection
     model_constructors::Vector{Any}
+    # used to select a single differentiation backend
+    # if not `nothing`, it is used to select the model constructor
+    # and the above is ignored
     model_constructor::Any
 
+    # instantiated differentiation backend from the options above
     diff::Any
 
+    # mapping between the `optimizer` and the `diff` models
     index_map::Union{Nothing,MOI.Utilities.IndexMap}
 
-    # sensitivity input cache using MOI like sparse format
+    # sensitivity input cache using MOI-like sparse format
     input_cache::InputCache
 
     function Optimizer(optimizer::OT) where {OT<:MOI.ModelLike}
@@ -557,8 +567,13 @@ function forward_differentiate!(model::Optimizer)
         model.input_cache.factorization,
     )
     T = Float64
-    if MOI.supports_constraint(model, MOI.VariableIndex, MOI.Parameter{T})
-        @show "param mode"
+    list = MOI.get(
+        model,
+        MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(),
+    )
+    parametric_diff = !isempty(list)
+    if parametric_diff # MOI.supports_constraint(model, MOI.VariableIndex, MOI.Parameter{T})
+        # @show "param mode"
         for (vi, value) in model.input_cache.parameter_constraints
             MOI.set(
                 diff,
@@ -569,7 +584,7 @@ function forward_differentiate!(model::Optimizer)
         end
         return forward_differentiate!(diff)
     end
-    @show "func mode"
+    # @show "func mode"
     if model.input_cache.objective !== nothing
         MOI.set(
             diff,
@@ -602,13 +617,13 @@ end
 function empty_input_sensitivities!(model::Optimizer)
     empty!(model.input_cache)
     if model.diff !== nothing
-        empty!(model.diff.model.input_cache)
+        empty_input_sensitivities!(model.diff)
     end
     return
 end
 
-function _instantiate_with_bridges(model_constructor)
-    model = MOI.Bridges.LazyBridgeOptimizer(MOI.instantiate(model_constructor))
+function _add_bridges(instantiated_model)
+    model = MOI.Bridges.LazyBridgeOptimizer(instantiated_model)
     # We don't add any variable bridge here because:
     # 1) If `ZerosBridge` is used, `MOI.Bridges.unbridged_function` does not work.
     #    This is in fact expected: since `ZerosBridge` drops the variable, we dont
@@ -621,14 +636,38 @@ function _instantiate_with_bridges(model_constructor)
     return model
 end
 
+function _instantiate_diff(model::Optimizer, constructor)
+    # parametric_diff = MOI.supports_constraint(
+    #     model,
+    #     MOI.VariableIndex,
+    #     MOI.Parameter{Float64},
+    # )
+    list = MOI.get(
+        model,
+        MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{Float64}}(),
+    )
+    parametric_diff = !isempty(list)
+    model_instance = MOI.instantiate(constructor)
+    needs_poi =
+        !MOI.supports_constraint(
+            model_instance,
+            MOI.VariableIndex,
+            MOI.Parameter{Float64},
+        )
+    model_bridged = _add_bridges(model_instance)
+    if needs_poi && parametric_diff
+        return POI.Optimizer(model_bridged)
+    end
+    return model_bridged
+end
+
 function _diff(model::Optimizer)
     if model.diff === nothing
         _check_termination_status(model)
         model_constructor = MOI.get(model, ModelConstructor())
         if isnothing(model_constructor)
-            model.diff = nothing
             for constructor in model.model_constructors
-                model.diff = POI.Optimizer(_instantiate_with_bridges(constructor))
+                model.diff = _instantiate_diff(model, constructor)
                 try
                     model.index_map = MOI.copy_to(model.diff, model.optimizer)
                 catch err
@@ -653,7 +692,7 @@ function _diff(model::Optimizer)
                 )
             end
         else
-            model.diff = POI.Optimizer(_instantiate_with_bridges(model_constructor))
+            model.diff = _instantiate_diff(model, model_constructor)
             model.index_map = MOI.copy_to(model.diff, model.optimizer)
         end
         _copy_dual(model.diff, model.optimizer, model.index_map)
@@ -693,6 +732,18 @@ end
 MOI.supports(::Optimizer, ::ForwardObjectiveFunction) = true
 
 function MOI.set(model::Optimizer, ::ForwardObjectiveFunction, objective)
+    T = Float64
+    list = MOI.get(
+        model,
+        MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(),
+    )
+    parametric_diff = !isempty(list)
+    if parametric_diff
+        error(
+            "Cannot set forward objective function for a model with parameters. " *
+            "Use `MOI.set(model, ForwardConstraintSet(), ParameterRef(vi), Parameter(val))` instead.",
+        )
+    end
     model.input_cache.objective = objective
     return
 end
@@ -751,6 +802,14 @@ function MOI.set(
     vi::MOI.VariableIndex,
     val,
 )
+    T = Float64
+    is_param = MOI.is_valid(
+        model,
+        MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}(vi.value),
+    )
+    if is_param
+        error("Trying to set a backward variable sensitivity for a parameter")
+    end
     model.input_cache.dx[vi] = val
     return
 end
@@ -819,6 +878,17 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.ScalarAffineFunction{T},S},
     func::MOI.ScalarAffineFunction{T},
 ) where {T,S}
+    list = MOI.get(
+        model,
+        MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(),
+    )
+    parametric_diff = !isempty(list)
+    if parametric_diff
+        error(
+            "Cannot set forward constraint function for a model with parameters. " *
+            "Use `MOI.set(model, ForwardConstraintSet(), ParameterRef(vi), Parameter(val))` instead.",
+        )
+    end
     model.input_cache.scalar_constraints[ci] = func
     return
 end
@@ -829,6 +899,17 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.VectorAffineFunction{T},S},
     func::MOI.VectorAffineFunction{T},
 ) where {T,S}
+    list = MOI.get(
+        model,
+        MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(),
+    )
+    parametric_diff = !isempty(list)
+    if parametric_diff
+        error(
+            "Cannot set forward constraint function for a model with parameters. " *
+            "Use `MOI.set(model, ForwardConstraintSet(), ParameterRef(vi), Parameter(val))` instead.",
+        )
+    end
     model.input_cache.vector_constraints[ci] = func
     return
 end
diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl
index dbbd2cf70..4e11b2df4 100644
--- a/test/jump_wrapper.jl
+++ b/test/jump_wrapper.jl
@@ -12,6 +12,7 @@ import HiGHS
 import Ipopt
 import SCS
 import MathOptInterface as MOI
+import ParametricOptInterface as POI
 
 const ATOL = 1e-3
 const RTOL = 1e-3
@@ -31,20 +32,48 @@ function test_jump_api()
     for (MODEL, SOLVER) in [
             # (DiffOpt.diff_model, Ipopt.Optimizer),
             (DiffOpt.quadratic_diff_model, HiGHS.Optimizer),
-            # (DiffOpt.quadratic_diff_model, SCS.Optimizer),
-            # (DiffOpt.quadratic_diff_model, Ipopt.Optimizer),
-            # (DiffOpt.conic_diff_model, HiGHS.Optimizer),
-            # (DiffOpt.conic_diff_model, SCS.Optimizer),
-            # (DiffOpt.conic_diff_model, Ipopt.Optimizer),
-            # (DiffOpt.nonlinear_diff_model, HiGHS.Optimizer), #  SQF ctr not supported?
-            # (DiffOpt.nonlinear_diff_model, SCS.Optimizer), # returns zero for sensitivity
-            # (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
+            (DiffOpt.quadratic_diff_model, SCS.Optimizer),
+            (
+                DiffOpt.quadratic_diff_model,
+                () ->
+                    MOI.instantiate(Ipopt.Optimizer; with_cache_type = Float64),
+            ),
+            (DiffOpt.conic_diff_model, HiGHS.Optimizer),
+            (DiffOpt.conic_diff_model, SCS.Optimizer),
+            (
+                DiffOpt.conic_diff_model,
+                () ->
+                    MOI.instantiate(Ipopt.Optimizer; with_cache_type = Float64),
+            ),
+            (
+                DiffOpt.nonlinear_diff_model,
+                () -> POI.Optimizer(HiGHS.Optimizer()),
+            ), #  SQF ctr not supported?
+            (
+                DiffOpt.nonlinear_diff_model,
+                () -> POI.Optimizer(
+                    MOI.instantiate(
+                        SCS.Optimizer;
+                        with_cache_type = Float64,
+                        with_bridge_type = Float64,
+                    ),
+                ),
+            ), # returns zero for sensitivity
+            (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
         ],
-        ineq in [true],#, false],
-        min in [true],#, false],
-        flip in [true]#, false]
+        ineq in [true, false],
+        _min in [true, false],
+        flip in [true, false]
 
-        @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
+        # # MODEL = DiffOpt.nonlinear_diff_model
+        # MODEL = DiffOpt.conic_diff_model
+        # SOLVER = ()->POI.Optimizer(HiGHS.Optimizer())
+        # # SOLVER = Ipopt.Optimizer
+        # ineq = true
+        # _min = true
+        # flip = true
+
+        @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(_min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
             model = MODEL(SOLVER)
             set_silent(model)
 
@@ -60,10 +89,12 @@ function test_jump_api()
                     cons = @constraint(model, pc * x <= 3 * p)
                 end
             else
-                @constraint(model, cons, pc * x == 3 * p)
+                # names are failing!!!!!
+                # @constraint(model, cons, pc * x == 3 * p)
+                cons = @constraint(model, pc * x == 3 * p)
             end
             sign = flip ? -1 : 1
-            if min
+            if _min
                 @objective(model, Min, 2x * sign)
             else
                 @objective(model, Max, -2x * sign)

From a012debdb3df2dd29ab5376809b48a9264df5589 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 02:50:52 -0300
Subject: [PATCH 14/22] remove code

---
 test/jump_wrapper.jl | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl
index 4e11b2df4..29b47dbc4 100644
--- a/test/jump_wrapper.jl
+++ b/test/jump_wrapper.jl
@@ -65,14 +65,6 @@ function test_jump_api()
         _min in [true, false],
         flip in [true, false]
 
-        # # MODEL = DiffOpt.nonlinear_diff_model
-        # MODEL = DiffOpt.conic_diff_model
-        # SOLVER = ()->POI.Optimizer(HiGHS.Optimizer())
-        # # SOLVER = Ipopt.Optimizer
-        # ineq = true
-        # _min = true
-        # flip = true
-
         @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(_min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin
             model = MODEL(SOLVER)
             set_silent(model)

From 2207ec426c348bd8d0bd04cf80e951c178fb58c3 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 04:07:57 -0300
Subject: [PATCH 15/22] cleanup usage of parameters

---
 src/NonLinearProgram/NonLinearProgram.jl | 15 +++--
 src/diff_opt.jl                          |  9 +--
 src/jump_wrapper.jl                      | 45 +++++++------
 src/moi_wrapper.jl                       | 27 +++++---
 test/jump_wrapper.jl                     | 32 ++-------
 test/moi_wrapper.jl                      |  8 +--
 test/nlp_program.jl                      | 42 ++----------
 test/parameters.jl                       | 84 ++++--------------------
 8 files changed, 87 insertions(+), 175 deletions(-)

diff --git a/src/NonLinearProgram/NonLinearProgram.jl b/src/NonLinearProgram/NonLinearProgram.jl
index dbbefc3d1..973f0786e 100644
--- a/src/NonLinearProgram/NonLinearProgram.jl
+++ b/src/NonLinearProgram/NonLinearProgram.jl
@@ -119,11 +119,11 @@ function MOI.supports_constraint(
     return true
 end
 
-function MOI.supports_constraint(
+
+function MOI.supports_add_constrained_variable(
     ::Form,
-    ::Type{MOI.VariableIndex},
-    ::Type{MOI.Parameter{Float64}},
-)
+    ::Type{MOI.Parameter{T}},
+) where {T}
     return true
 end
 
@@ -300,6 +300,13 @@ function Model()
     )
 end
 
+function MOI.supports_add_constrained_variable(
+    ::Model,
+    ::Type{MOI.Parameter{T}},
+) where {T}
+    return true
+end
+
 _objective_sense(form::Form) = form.sense
 _objective_sense(model::Model) = _objective_sense(model.model)
 
diff --git a/src/diff_opt.jl b/src/diff_opt.jl
index 086104738..7a469c053 100644
--- a/src/diff_opt.jl
+++ b/src/diff_opt.jl
@@ -311,6 +311,8 @@ function MOI.add_variables(model::AbstractModel, n)
     return MOI.add_variables(model.model, n)
 end
 
+# TODO: add support for add_constrained_variable(s) and supports_
+
 function MOI.Utilities.pass_nonvariable_constraints(
     dest::AbstractModel,
     src::MOI.ModelLike,
@@ -433,7 +435,7 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.ScalarAffineFunction{T},S},
     func::MOI.ScalarAffineFunction{T},
 ) where {T,S}
-    if MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
+    if MOI.supports_add_constrained_variable(model.model, MOI.Parameter{T})
         error(
             "The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.",
         )
@@ -448,7 +450,7 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.VectorAffineFunction{T},S},
     func::MOI.VectorAffineFunction{T},
 ) where {T,S}
-    if MOI.supports_constraint(model.model, MOI.VariableIndex, MOI.Parameter{T})
+    if MOI.supports_add_constrained_variable(model.model, MOI.Parameter{T})
         error(
             "The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.",
         )
@@ -463,9 +465,8 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
     set::MOI.Parameter{T},
 ) where {T}
-    if !MOI.supports_constraint(
+    if !MOI.supports_add_constrained_variable(
         model.model,
-        MOI.VariableIndex,
         MOI.Parameter{T},
     )
         error(
diff --git a/src/jump_wrapper.jl b/src/jump_wrapper.jl
index 24c4c55cf..8b2cb16ce 100644
--- a/src/jump_wrapper.jl
+++ b/src/jump_wrapper.jl
@@ -1,5 +1,5 @@
 """
-    diff_model(optimizer_constructor; with_parametric_opt_interface::Bool = true, with_bridge_type = Float64, with_cache::Bool = true)
+    diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true)
 
 Create a JuMP model with a differentiable optimizer. The optimizer is created
 using `optimizer_constructor`. This model will try to select the proper
@@ -9,21 +9,21 @@ See also: [`nonlinear_diff_model`](@ref), [`conic_diff_model`](@ref), [`quadrati
 """
 function diff_model(
     optimizer_constructor;
-    with_parametric_opt_interface::Bool = true,
     with_bridge_type = Float64,
-    with_cache::Bool = true,
+    with_cache_type = Float64,
+    with_outer_cache = true,
 )
     inner = diff_optimizer(
         optimizer_constructor;
-        with_parametric_opt_interface = with_parametric_opt_interface,
-        with_bridge_type = with_bridge_type,
-        with_cache = with_cache,
+        with_bridge_type,
+        with_cache_type,
+        with_outer_cache,
     )
     return JuMP.direct_model(inner)
 end
 
 """
-    nonlinear_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache::Bool = true)
+    nonlinear_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true)
 
 Create a JuMP model with a differentiable optimizer for nonlinear programs.
 The optimizer is created using `optimizer_constructor`.
@@ -33,20 +33,21 @@ See also: [`conic_diff_model`](@ref), [`quadratic_diff_model`](@ref), [`diff_mod
 function nonlinear_diff_model(
     optimizer_constructor;
     with_bridge_type = Float64,
-    with_cache::Bool = true,
+    with_cache_type = Float64,
+    with_outer_cache = true,
 )
     inner = diff_optimizer(
         optimizer_constructor;
-        with_parametric_opt_interface = false,
-        with_bridge_type = with_bridge_type,
-        with_cache = with_cache,
+        with_bridge_type,
+        with_cache_type,
+        with_outer_cache,
     )
     MOI.set(inner, ModelConstructor(), NonLinearProgram.Model)
     return JuMP.direct_model(inner)
 end
 
 """
-    conic_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache::Bool = true)
+    conic_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true)
 
 Create a JuMP model with a differentiable optimizer for conic programs.
 The optimizer is created using `optimizer_constructor`.
@@ -56,20 +57,21 @@ See also: [`nonlinear_diff_model`](@ref), [`quadratic_diff_model`](@ref), [`diff
 function conic_diff_model(
     optimizer_constructor;
     with_bridge_type = Float64,
-    with_cache::Bool = true,
+    with_cache_type = Float64,
+    with_outer_cache = true,
 )
     inner = diff_optimizer(
         optimizer_constructor;
-        with_parametric_opt_interface = true,
-        with_bridge_type = with_bridge_type,
-        with_cache = with_cache,
+        with_bridge_type,
+        with_cache_type,
+        with_outer_cache,
     )
     MOI.set(inner, ModelConstructor(), ConicProgram.Model)
     return JuMP.direct_model(inner)
 end
 
 """
-    quadratic_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache::Bool = true)
+    quadratic_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true)
 
 Create a JuMP model with a differentiable optimizer for quadratic programs.
 The optimizer is created using `optimizer_constructor`.
@@ -79,13 +81,14 @@ See also: [`nonlinear_diff_model`](@ref), [`conic_diff_model`](@ref), [`diff_mod
 function quadratic_diff_model(
     optimizer_constructor;
     with_bridge_type = Float64,
-    with_cache::Bool = true,
+    with_cache_type = Float64,
+    with_outer_cache = true,
 )
     inner = diff_optimizer(
         optimizer_constructor;
-        with_parametric_opt_interface = true,
-        with_bridge_type = with_bridge_type,
-        with_cache = with_cache,
+        with_bridge_type,
+        with_cache_type,
+        with_outer_cache,
     )
     MOI.set(inner, ModelConstructor(), QuadraticProgram.Model)
     return JuMP.direct_model(inner)
diff --git a/src/moi_wrapper.jl b/src/moi_wrapper.jl
index 8c7260942..d9fb25fe3 100644
--- a/src/moi_wrapper.jl
+++ b/src/moi_wrapper.jl
@@ -24,26 +24,36 @@ julia> model.add_constraint(model, ...)
 """
 function diff_optimizer(
     optimizer_constructor;
-    with_parametric_opt_interface::Bool = false,
     with_bridge_type = Float64,
-    with_cache::Bool = true,
+    with_cache_type = Float64,
+    with_outer_cache = true,
+    allow_parametric_opt_interface = true,
 )
-    optimizer = MOI.instantiate(optimizer_constructor; with_bridge_type)
+    optimizer = MOI.instantiate(
+        optimizer_constructor;
+        with_bridge_type,
+        with_cache_type,
+    )
+    add_poi =
+        allow_parametric_opt_interface &&
+        !MOI.supports_add_constrained_variable(
+            optimizer.model,
+            MOI.Parameter{Float64},
+        )
     # When we do `MOI.copy_to(diff, optimizer)` we need to efficiently `MOI.get`
     # the model information from `optimizer`. However, 1) `optimizer` may not
     # implement some getters or it may be inefficient and 2) the getters may be
     # unimplemented or inefficient through some bridges.
     # For this reason we add a cache layer, the same cache JuMP adds.
-    caching_opt = if with_cache
+    caching_opt = if with_outer_cache
         MOI.Utilities.CachingOptimizer(
             MOI.Utilities.UniversalFallback(
                 MOI.Utilities.Model{with_bridge_type}(),
             ),
-            with_parametric_opt_interface ? POI.Optimizer(optimizer) :
-            optimizer,
+            add_poi ? POI.Optimizer(optimizer) : optimizer,
         )
     else
-        with_parametric_opt_interface ? POI.Optimizer(optimizer) : optimizer
+        add_poi ? POI.Optimizer(optimizer) : optimizer
     end
     return Optimizer(caching_opt)
 end
@@ -649,9 +659,8 @@ function _instantiate_diff(model::Optimizer, constructor)
     parametric_diff = !isempty(list)
     model_instance = MOI.instantiate(constructor)
     needs_poi =
-        !MOI.supports_constraint(
+        !MOI.supports_add_constrained_variable(
             model_instance,
-            MOI.VariableIndex,
             MOI.Parameter{Float64},
         )
     model_bridged = _add_bridges(model_instance)
diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl
index 29b47dbc4..04bae779d 100644
--- a/test/jump_wrapper.jl
+++ b/test/jump_wrapper.jl
@@ -30,35 +30,17 @@ end
 
 function test_jump_api()
     for (MODEL, SOLVER) in [
-            # (DiffOpt.diff_model, Ipopt.Optimizer),
+            (DiffOpt.diff_model, HiGHS.Optimizer),
+            (DiffOpt.diff_model, SCS.Optimizer),
+            (DiffOpt.diff_model, Ipopt.Optimizer),
             (DiffOpt.quadratic_diff_model, HiGHS.Optimizer),
             (DiffOpt.quadratic_diff_model, SCS.Optimizer),
-            (
-                DiffOpt.quadratic_diff_model,
-                () ->
-                    MOI.instantiate(Ipopt.Optimizer; with_cache_type = Float64),
-            ),
+            (DiffOpt.quadratic_diff_model, Ipopt.Optimizer),
             (DiffOpt.conic_diff_model, HiGHS.Optimizer),
             (DiffOpt.conic_diff_model, SCS.Optimizer),
-            (
-                DiffOpt.conic_diff_model,
-                () ->
-                    MOI.instantiate(Ipopt.Optimizer; with_cache_type = Float64),
-            ),
-            (
-                DiffOpt.nonlinear_diff_model,
-                () -> POI.Optimizer(HiGHS.Optimizer()),
-            ), #  SQF ctr not supported?
-            (
-                DiffOpt.nonlinear_diff_model,
-                () -> POI.Optimizer(
-                    MOI.instantiate(
-                        SCS.Optimizer;
-                        with_cache_type = Float64,
-                        with_bridge_type = Float64,
-                    ),
-                ),
-            ), # returns zero for sensitivity
+            (DiffOpt.conic_diff_model, Ipopt.Optimizer),
+            (DiffOpt.nonlinear_diff_model, HiGHS.Optimizer),
+            (DiffOpt.nonlinear_diff_model, SCS.Optimizer),
             (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer),
         ],
         ineq in [true, false],
diff --git a/test/moi_wrapper.jl b/test/moi_wrapper.jl
index e2d547c46..b72ee0399 100644
--- a/test/moi_wrapper.jl
+++ b/test/moi_wrapper.jl
@@ -28,10 +28,10 @@ end
 function test_moi_test_runtests()
     model = DiffOpt.diff_optimizer(HiGHS.Optimizer)
     # `Variable.ZerosBridge` makes dual needed by some tests fail.
-    MOI.Bridges.remove_bridge(
-        model.optimizer.optimizer,
-        MOI.Bridges.Variable.ZerosBridge{Float64},
-    )
+    # MOI.Bridges.remove_bridge(
+    #     model.optimizer.optimizer.optimizer,
+    #     MOI.Bridges.Variable.ZerosBridge{Float64},
+    # )
     MOI.set(model, MOI.Silent(), true)
     config = MOI.Test.Config(; atol = 1e-7)
     MOI.Test.runtests(model, config)
diff --git a/test/nlp_program.jl b/test/nlp_program.jl
index 3ed2ff033..bc54fd163 100644
--- a/test/nlp_program.jl
+++ b/test/nlp_program.jl
@@ -120,12 +120,7 @@ end
 
 function test_analytical_simple(; P = 2) # Number of parameters
     @testset "Bounds Bounds" begin
-        m = Model(
-            () -> DiffOpt.diff_optimizer(
-                Ipopt.Optimizer;
-                with_parametric_opt_interface = false,
-            ),
-        )
+        m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
         MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, 0 ≤ x[1:P] ≤ 1)
@@ -195,12 +190,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
         )
     end
     @testset "Bounds as RHS constraints" begin
-        m = Model(
-            () -> DiffOpt.diff_optimizer(
-                Ipopt.Optimizer;
-                with_parametric_opt_interface = false,
-            ),
-        )
+        m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
         MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, x[1:P])
@@ -240,12 +230,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
         )
     end
     @testset "Bounds as Mixed constraints" begin
-        m = Model(
-            () -> DiffOpt.diff_optimizer(
-                Ipopt.Optimizer;
-                with_parametric_opt_interface = false,
-            ),
-        )
+        m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
         MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, x[1:P])
@@ -285,12 +270,7 @@ function test_analytical_simple(; P = 2) # Number of parameters
         )
     end
     @testset "Bounds as LHS constraints" begin
-        m = Model(
-            () -> DiffOpt.diff_optimizer(
-                Ipopt.Optimizer;
-                with_parametric_opt_interface = false,
-            ),
-        )
+        m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
         MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
         @variable(m, x[1:P])
@@ -714,12 +694,7 @@ function test_differentiating_non_trivial_convex_qp_jump()
 end
 
 function test_ReverseConstraintDual()
-    m = Model(
-        () -> DiffOpt.diff_optimizer(
-            Ipopt.Optimizer;
-            with_parametric_opt_interface = false,
-        ),
-    )
+    m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
     MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
     @variable(m, x[1:2])
@@ -802,12 +777,7 @@ end
 
 function test_changing_factorization()
     P = 2
-    m = Model(
-        () -> DiffOpt.diff_optimizer(
-            Ipopt.Optimizer;
-            with_parametric_opt_interface = false,
-        ),
-    )
+    m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))
     MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model)
 
     @variable(m, x[1:P])
diff --git a/test/parameters.jl b/test/parameters.jl
index 82c3b252b..9b4f1968d 100644
--- a/test/parameters.jl
+++ b/test/parameters.jl
@@ -30,12 +30,7 @@ function runtests()
 end
 
 function test_diff_rhs()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     @variable(model, x)
     @variable(model, p in Parameter(3.0))
@@ -101,12 +96,7 @@ function test_diff_rhs()
 end
 
 function test_diff_vector_rhs()
-    model = direct_model(
-        DiffOpt.diff_optimizer(
-            SCS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = direct_model(DiffOpt.diff_optimizer(SCS.Optimizer))
     set_silent(model)
     @variable(model, x)
     @variable(model, p in Parameter(3.0))
@@ -152,12 +142,7 @@ function test_diff_vector_rhs()
 end
 
 function test_affine_changes()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 3.0
     pc_val = 1.0
@@ -257,12 +242,7 @@ function test_affine_changes()
 end
 
 function test_affine_changes_compact()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 3.0
     pc_val = 1.0
@@ -315,12 +295,7 @@ function test_affine_changes_compact()
 end
 
 function test_quadratic_rhs_changes()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 2.0
     q_val = 2.0
@@ -444,12 +419,7 @@ function test_quadratic_rhs_changes()
 end
 
 function test_affine_changes_compact_max()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 3.0
     pc_val = 1.0
@@ -488,12 +458,7 @@ function test_affine_changes_compact_max()
 end
 
 function test_diff_affine_objective()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 3.0
     @variable(model, x)
@@ -528,12 +493,7 @@ function test_diff_affine_objective()
 end
 
 function test_diff_quadratic_objective()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 3.0
     @variable(model, x)
@@ -568,12 +528,7 @@ function test_diff_quadratic_objective()
 end
 
 function test_quadratic_objective_qp()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     p_val = 3.0
     @variable(model, x)
@@ -610,12 +565,7 @@ function test_quadratic_objective_qp()
 end
 
 function test_diff_errors_POI()
-    model = direct_model(
-        DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    model = direct_model(DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     @variable(model, x)
     @variable(model, p in Parameter(3.0))
@@ -672,12 +622,7 @@ function test_diff_errors_POI()
 end
 
 function test_diff_errors()
-    model = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = false,
-        ),
-    )
+    model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     set_silent(model)
     @variable(model, x)
     @variable(model, p in Parameter(3.0))
@@ -730,12 +675,7 @@ end
 
 # Credit to @klamike
 function test_empty_cache()
-    m = Model(
-        () -> DiffOpt.diff_optimizer(
-            HiGHS.Optimizer;
-            with_parametric_opt_interface = true,
-        ),
-    )
+    m = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))
     @variable(m, x)
     @variable(m, p ∈ Parameter(1.0))
     @variable(m, q ∈ Parameter(2.0))

From 241ffe2fd38a6adcaabef758b1456c55c7787a46 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 04:08:25 -0300
Subject: [PATCH 16/22] format

---
 src/NonLinearProgram/NonLinearProgram.jl | 1 -
 src/diff_opt.jl                          | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/NonLinearProgram/NonLinearProgram.jl b/src/NonLinearProgram/NonLinearProgram.jl
index 973f0786e..111794574 100644
--- a/src/NonLinearProgram/NonLinearProgram.jl
+++ b/src/NonLinearProgram/NonLinearProgram.jl
@@ -119,7 +119,6 @@ function MOI.supports_constraint(
     return true
 end
 
-
 function MOI.supports_add_constrained_variable(
     ::Form,
     ::Type{MOI.Parameter{T}},
diff --git a/src/diff_opt.jl b/src/diff_opt.jl
index 7a469c053..70aa3ebb6 100644
--- a/src/diff_opt.jl
+++ b/src/diff_opt.jl
@@ -465,10 +465,7 @@ function MOI.set(
     ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}},
     set::MOI.Parameter{T},
 ) where {T}
-    if !MOI.supports_add_constrained_variable(
-        model.model,
-        MOI.Parameter{T},
-    )
+    if !MOI.supports_add_constrained_variable(model.model, MOI.Parameter{T})
         error(
             "The model with type $(typeof(model)) does not support Parameters",
         )

From c15c94829208f530804a5e9d7a9ae1362e7a2b58 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 04:25:06 -0300
Subject: [PATCH 17/22] add temp dep

---
 test/runtests.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 70844576e..0517967e1 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,6 +3,9 @@
 # Use of this source code is governed by an MIT-style license that can be found
 # in the LICENSE.md file or at https://opensource.org/licenses/MIT.
 
+import Pkg
+Pkg.add(url="https://github.com/jump-dev/ParametricOptInterface.jl", rev="jg/newdo") 
+
 using Test
 
 @testset "$file" for file in readdir(@__DIR__)

From 94272b4a76bd5d316f8080450433b3a6780b0543 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 04:26:26 -0300
Subject: [PATCH 18/22] format

---
 test/runtests.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 0517967e1..d3c23fab8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,7 +4,10 @@
 # in the LICENSE.md file or at https://opensource.org/licenses/MIT.
 
 import Pkg
-Pkg.add(url="https://github.com/jump-dev/ParametricOptInterface.jl", rev="jg/newdo") 
+Pkg.add(;
+    url = "https://github.com/jump-dev/ParametricOptInterface.jl",
+    rev = "jg/newdo",
+)
 
 using Test
 

From 1a1f4e114fcec740bcc0cd19bd5d75433a6ee02c Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 04:30:25 -0300
Subject: [PATCH 19/22] add temp dep

---
 test/Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/Project.toml b/test/Project.toml
index e7c5a5b8d..1b1198b89 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -10,6 +10,7 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13"

From 84f9ce9b667104717d7f4bdfd8deec48facb6e86 Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 11:03:47 -0300
Subject: [PATCH 20/22] fix PSDSquare

---
 src/ConicProgram/ConicProgram.jl | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/ConicProgram/ConicProgram.jl b/src/ConicProgram/ConicProgram.jl
index c9f9c2d19..23ec9c49f 100644
--- a/src/ConicProgram/ConicProgram.jl
+++ b/src/ConicProgram/ConicProgram.jl
@@ -141,6 +141,14 @@ function MOI.supports_constraint(
     return MOI.supports_constraint(model.model, F, S)
 end
 
+function MOI.supports_constraint(
+    ::Model,
+    ::Type{MOI.VectorAffineFunction{T}},
+    ::Type{MOI.PositiveSemidefiniteConeSquare},
+) where {T}
+    return false
+end
+
 function MOI.set(
     model::Model,
     ::MOI.ConstraintPrimalStart,

From 49aa65858319c8dbb58bb04d20f738f13473568c Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 7 Jul 2025 11:07:39 -0300
Subject: [PATCH 21/22] temp fix for tests

---
 docs/Project.toml | 1 +
 docs/make.jl      | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/docs/Project.toml b/docs/Project.toml
index c4112c75d..57e7b0c68 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -9,6 +9,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/docs/make.jl b/docs/make.jl
index a31f7e283..74cfc9089 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -3,6 +3,12 @@
 # Use of this source code is governed by an MIT-style license that can be found
 # in the LICENSE.md file or at https://opensource.org/licenses/MIT.
 
+import Pkg
+Pkg.add(;
+    url = "https://github.com/jump-dev/ParametricOptInterface.jl",
+    rev = "jg/newdo",
+)
+
 using Documenter
 using DiffOpt
 using Literate

From c25669e29e4697ad47bb76f628e4205bd63252ab Mon Sep 17 00:00:00 2001
From: joaquimg <joaquimdgarcia@gmail.com>
Date: Mon, 21 Jul 2025 11:57:09 -0300
Subject: [PATCH 22/22] format

---
 docs/src/examples/chainrules_unit_new.jl | 30 +++++-------------------
 docs/src/examples/custom-relu_new.jl     |  5 +++-
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/docs/src/examples/chainrules_unit_new.jl b/docs/src/examples/chainrules_unit_new.jl
index e06eab77e..5b56d0338 100644
--- a/docs/src/examples/chainrules_unit_new.jl
+++ b/docs/src/examples/chainrules_unit_new.jl
@@ -185,26 +185,10 @@ function ChainRulesCore.frule(
         model = model,
     )
     ## Setting perturbations in the parameters
-    DiffOpt.set_forward_parameter.(
-        model,
-        model[:load1_demand],
-        Δload1_demand,
-    )
-    DiffOpt.set_forward_parameter.(
-        model,
-        model[:load2_demand],
-        Δload2_demand,
-    )
-    DiffOpt.set_forward_parameter.(
-        model,
-        model[:Cp],
-        Δgen_costs,
-    )
-    DiffOpt.set_forward_parameter.(
-        model,
-        model[:Cnl],
-        Δnoload_costs,
-    )
+    DiffOpt.set_forward_parameter.(model, model[:load1_demand], Δload1_demand)
+    DiffOpt.set_forward_parameter.(model, model[:load2_demand], Δload2_demand)
+    DiffOpt.set_forward_parameter.(model, model[:Cp], Δgen_costs)
+    DiffOpt.set_forward_parameter.(model, model[:Cnl], Δnoload_costs)
     ## computing the forward differentiation
     DiffOpt.forward_differentiate!(model)
     ## querying the corresponding perturbation of the decision
@@ -276,10 +260,8 @@ function ChainRulesCore.rrule(
             DiffOpt.get_reverse_parameter.(model, model[:load1_demand])
         dload2_demand =
             DiffOpt.get_reverse_parameter.(model, model[:load2_demand])
-        dgen_costs =
-            DiffOpt.get_reverse_parameter.(model, model[:Cp])
-        dnoload_costs =
-            DiffOpt.get_reverse_parameter.(model, model[:Cnl])
+        dgen_costs = DiffOpt.get_reverse_parameter.(model, model[:Cp])
+        dnoload_costs = DiffOpt.get_reverse_parameter.(model, model[:Cnl])
         return (dload1_demand, dload2_demand, dgen_costs, dnoload_costs)
     end
     return (pv, pullback_unit_commitment)
diff --git a/docs/src/examples/custom-relu_new.jl b/docs/src/examples/custom-relu_new.jl
index 2f733c5c8..160bde4f5 100644
--- a/docs/src/examples/custom-relu_new.jl
+++ b/docs/src/examples/custom-relu_new.jl
@@ -38,7 +38,10 @@ function matrix_relu(
 end
 
 # Define the reverse differentiation rule, for the function we defined above.
-function ChainRulesCore.rrule(::typeof(matrix_relu), y_data::Matrix{T}) where {T}
+function ChainRulesCore.rrule(
+    ::typeof(matrix_relu),
+    y_data::Matrix{T},
+) where {T}
     model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer)
     pv = matrix_relu(y_data; model = model)
     function pullback_matrix_relu(dl_dx)