diff --git a/Project.toml b/Project.toml index a3689608f..42b083c15 100644 --- a/Project.toml +++ b/Project.toml @@ -23,5 +23,5 @@ JuMP = "1" LazyArrays = "0.21, 0.22, 1" MathOptInterface = "1.18" MathOptSetDistances = "0.2.9" -ParametricOptInterface = "0.9.0" +ParametricOptInterface = "0.12.1" julia = "1.6" diff --git a/README.md b/README.md index 039894448..5913411db 100644 --- a/README.md +++ b/README.md @@ -33,15 +33,12 @@ examples, tutorials, and an API reference. ### DiffOpt-JuMP API with `Parameters` +Here is an example with a Parametric **Linear Program**: + ```julia using JuMP, DiffOpt, HiGHS -model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), -) +model = DiffOpt.quadratic_diff_model(HiGHS.Optimizer) set_silent(model) p_val = 4.0 @@ -64,9 +61,9 @@ optimize!(model) # differentiate w.r.t. p direction_p = 3.0 -MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(p), Parameter(direction_p)) +DiffOpt.set_forward_parameter(model, p, direction_p) DiffOpt.forward_differentiate!(model) -@show MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) == direction_p * 3 / pc_val +@show DiffOpt.get_forward_variable(model, x) == direction_p * 3 / pc_val # update p and pc p_val = 2.0 @@ -82,45 +79,30 @@ optimize!(model) DiffOpt.empty_input_sensitivities!(model) # differentiate w.r.t. pc direction_pc = 10.0 -MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(pc), Parameter(direction_pc)) +DiffOpt.set_forward_parameter(model, pc, direction_pc) DiffOpt.forward_differentiate!(model) -@show abs(MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) - +@show abs(DiffOpt.get_forward_variable(model, x) - -direction_pc * 3 * p_val / pc_val^2) < 1e-5 # always a good practice to clear previously set sensitivities DiffOpt.empty_input_sensitivities!(model) # Now, reverse model AD direction_x = 10.0 -MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, direction_x) +DiffOpt.set_reverse_variable(model, x, direction_x) DiffOpt.reverse_differentiate!(model) -@show MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(p)) == MOI.Parameter(direction_x * 3 / pc_val) -@show abs(MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(pc)).value - - -direction_x * 3 * p_val / pc_val^2) < 1e-5 +@show DiffOpt.get_reverse_parameter(model, p) == direction_x * 3 / pc_val +@show DiffOpt.get_reverse_parameter(model, pc) == -direction_x * 3 * p_val / pc_val^2 ``` -### Low level DiffOpt-JuMP API: - -A brief example: +Available models: +* `DiffOpt.quadratic_diff_model`: Quadratic Programs (QP) and Linear Programs +(LP) +* `DiffOpt.conic_diff_model`: Conic Programs (CP) and Linear Programs (LP) +* `DiffOpt.nonlinear_diff_model`: Nonlinear Programs (NLP), Quadratic Program +(QP) and Linear Programs (LP) +* `DiffOpt.diff_model`: Nonlinear Programs (NLP), Conic Programs (CP), +Quadratic Programs (QP) and Linear Programs (LP) -```julia -using JuMP, DiffOpt, HiGHS -# Create a model using the wrapper -model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) -# Define your model and solve it -@variable(model, x) -@constraint(model, cons, x >= 3) -@objective(model, Min, 2x) -optimize!(model) -# Choose the problem parameters to differentiate with respect to, and set their -# perturbations. -MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, 1.0) -# Differentiate the model -DiffOpt.reverse_differentiate!(model) -# fetch the gradients -grad_exp = MOI.get(model, DiffOpt.ReverseConstraintFunction(), cons) # -3 x - 1 -constant(grad_exp) # -1 -coefficient(grad_exp, x) # -3 -``` ## Citing DiffOpt.jl diff --git a/docs/Project.toml b/docs/Project.toml index c4112c75d..fa1740b93 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -9,6 +9,8 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl b/docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl new file mode 100644 index 000000000..9e83504ed --- /dev/null +++ b/docs/src/examples/Thermal_Generation_Dispatch_Example_new.jl @@ -0,0 +1,191 @@ +# # Thermal Generation Dispatch Example + +#md # [![](https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white)](@__REPO_ROOT_URL__/examples/Thermal_Generation_Dispatch_Example.jl) + +# This example illustrates the sensitivity analysis of thermal generation dispatch problem. + +# This problem can be described as the choice of thermal generation `g` given a demand `d`, a price for thermal generation `c` and a penalty price `c_{ϕ}` for any demand not attended ϕ. + +# ```math +# \begin{split} +# \begin{array} {ll} +# \mbox{minimize} & \sum_{i=1}^{N} c_{i} g_{i} + c_{\phi} \phi \\ +# \mbox{s.t.} & g_{i} \ge 0 \quad i=1..N \\ +# & g_{i} \le G_{i} \quad i=1..N \\ +# & \sum_{i=1}^{N} g_{i} + \phi = d\\ +# \end{array} +# \end{split} +# ``` +# where +# - `G_{i}` is the maximum possible generation for a thermal generator `i` + +# ## Define and solve the Thermal Dispatch Problem + +# First, import the libraries. + +using Test +using JuMP +import DiffOpt +import LinearAlgebra: dot +import HiGHS +import MathOptInterface as MOI +import Plots + +# Define the model that will be construct given a set of parameters. + +function generate_model( + d_data::Float64; + g_sup::Vector{Float64}, + c_g::Vector{Float64}, + c_ϕ::Float64, +) + ## Creation of the Model and Parameters + model = DiffOpt.quadratic_diff_model(HiGHS.Optimizer) + set_silent(model) + I = length(g_sup) + + ## Variables + @variable(model, g[i in 1:I] >= 0.0) + @variable(model, ϕ >= 0.0) + + ## Parameters + @variable(model, d in Parameter(d_data)) + + ## Constraints + @constraint(model, limit_constraints_sup[i in 1:I], g[i] <= g_sup[i]) + @constraint(model, demand_constraint, sum(g) + ϕ == d) + + ## Objectives + @objective(model, Min, dot(c_g, g) + c_ϕ * ϕ) + + ## Solve the model + optimize!(model) + + ## Return the solved model + return model +end + +# Define the functions that will get the primal values `g` and `\phi` and sensitivity analysis of the demand `dg/dd` and `d\phi/dd` from a optimized model. + +function diff_forward(model::Model, ϵ::Float64 = 1.0) + ## Initialization of parameters and references to simplify the notation + vect_ref = [model[:g]; model[:ϕ]] + + ## Get the primal solution of the model + vect = value.(vect_ref) + + ## Reset the sensitivities of the model + DiffOpt.empty_input_sensitivities!(model) + + ## Pass the perturbation to the DiffOpt Framework + DiffOpt.set_forward_parameter(model, model[:d], ϵ) + + ## Compute the derivatives with the Forward Mode + DiffOpt.forward_differentiate!(model) + + ## Get the derivative of the model + dvect = DiffOpt.get_forward_variable.(model, vect_ref) + + ## Return the values as a vector + return [vect; dvect] +end + +function diff_reverse(model::Model, ϵ::Float64 = 1.0) + ## Initialization of parameters and references to simplify the notation + vect_ref = [model[:g]; model[:ϕ]] + + ## Get the primal solution of the model + vect = value.(vect_ref) + + ## Set variables needed for the DiffOpt Backward Framework + dvect = Array{Float64,1}(undef, length(vect_ref)) + + ## Loop for each primal variable + for i in 1:I+1 + ## Reset the sensitivities of the model + DiffOpt.empty_input_sensitivities!(model) + + ## Pass the perturbation to the DiffOpt Framework + DiffOpt.set_reverse_variable.(model, vect_ref[i], ϵ) + + ## Compute the derivatives with the Forward Mode + DiffOpt.reverse_differentiate!(model) + + ## Get the derivative of the model + dvect[i] = DiffOpt.get_reverse_parameter(model, model[:d]) + end + + ## Return the values as a vector + return [vect; dvect] +end + +# Initialize of Parameters + +g_sup = [10.0, 20.0, 30.0] +I = length(g_sup) +d = 0.0:0.1:80 +d_size = length(d) +c_g = [1.0, 3.0, 5.0] +c_ϕ = 10.0; + +# Generate models for each demand `d` +models = generate_model.(d; g_sup = g_sup, c_g = c_g, c_ϕ = c_ϕ); + +# Get the results of models with the DiffOpt Forward and Backward context + +result_forward = diff_forward.(models) +optimize!.(models) +result_reverse = diff_reverse.(models); + +# Organization of results to plot +# Initialize data_results that will contain every result +data_results = Array{Float64,3}(undef, 2, d_size, 2 * (I + 1)); + +# Populate the data_results array +for k in 1:d_size + data_results[1, k, :] = result_forward[k] + data_results[2, k, :] = result_reverse[k] +end + +# ## Results with Plot graphs +# ### Results for the forward context +# Result Primal Values: +Plots.plot( + d, + data_results[1, :, 1:I+1]; + title = "Generation by Demand", + label = ["Thermal Generation 1" "Thermal Generation 2" "Thermal Generation 3" "Generation Deficit"], + xlabel = "Demand [unit]", + ylabel = "Generation [unit]", +) + +# Result Sensitivity Analysis: +Plots.plot( + d, + data_results[1, :, I+2:2*(I+1)]; + title = "Sensitivity of Generation by Demand", + label = ["T. Gen. 1 Sensitivity" "T. Gen. 2 Sensitivity" "T. Gen. 3 Sensitivity" "Gen. Deficit Sensitivity"], + xlabel = "Demand [unit]", + ylabel = "Sensitivity [-]", +) + +# ### Results for the reverse context +# Result Primal Values: +Plots.plot( + d, + data_results[2, :, 1:I+1]; + title = "Generation by Demand", + label = ["Thermal Generation 1" "Thermal Generation 2" "Thermal Generation 3" "Generation Deficit"], + xlabel = "Demand [unit]", + ylabel = "Generation [unit]", +) + +# Result Sensitivity Analysis: +Plots.plot( + d, + data_results[2, :, I+2:2*(I+1)]; + title = "Sensitivity of Generation by Demand", + label = ["T. Gen. 1 Sensitivity" "T. Gen. 2 Sensitivity" "T. Gen. 3 Sensitivity" "Gen. Deficit Sensitivity"], + xlabel = "Demand [unit]", + ylabel = "Sensitivity [-]", +) diff --git a/docs/src/examples/autotuning-ridge_new.jl b/docs/src/examples/autotuning-ridge_new.jl new file mode 100644 index 000000000..b76206fb9 --- /dev/null +++ b/docs/src/examples/autotuning-ridge_new.jl @@ -0,0 +1,226 @@ +# # Auto-tuning Hyperparameters (JuMP API) + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/autotuning-ridge.jl) + +# This example shows how to learn a hyperparameter in Ridge Regression using a gradient descent routine. +# Let the regularized regression problem be formulated as: + +# ```math +# \begin{equation} +# \min_{w} \quad \frac{1}{2nd} \sum_{i=1}^{n} (w^T x_{i} - y_i)^2 + \frac{\alpha}{2d} \| w \|_2^2 +# \end{equation} +# ``` + +# where +# - `x`, `y` are the data points +# - `w` are the learned weights +# - `α` is the hyperparameter acting on regularization. + +# The main optimization model will be formulated with JuMP. +# Using the gradient of the optimal weights with respect to the regularization parameters +# computed with DiffOpt, we can perform a gradient descent on top of the inner model +# to minimize the test loss. + +# This tutorial uses the following packages + +using JuMP # The mathematical programming modelling language +import DiffOpt # JuMP extension for differentiable optimization +import Ipopt # Optimization solver that handles quadratic programs +import LinearAlgebra +import Plots +import Random + +# ## Generating a noisy regression dataset + +Random.seed!(42) + +N = 100 +D = 20 +noise = 5 + +w_real = 10 * randn(D) +X = 10 * randn(N, D) +y = X * w_real + noise * randn(N) +l = N ÷ 2 # test train split + +X_train = X[1:l, :] +X_test = X[l+1:N, :] +y_train = y[1:l] +y_test = y[l+1:N]; + +# ## Defining the regression problem + +# We implement the regularized regression problem as a function taking the problem data, +# building a JuMP model and solving it. +# Note the cubic term in the objective function (`α * dot(w, w)`), +# currently this is not handled by ParametricOptInterface smoothly, +# so we use Ipopt as the solver that support parameters as part of +# nonlinear (here only cubic) objective functions. + +function build_fit_ridge(X, y, α_val = 1.0) + model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer) + set_silent(model) + N, D = size(X) + @variable(model, w[1:D]) + @variable(model, α in Parameter(α_val)) + @expression(model, err_term, X * w - y) + @objective( + model, + Min, + LinearAlgebra.dot(err_term, err_term) / (2 * N * D) + + α * LinearAlgebra.dot(w, w) / (2 * D), + ) + return model +end + +function optimize_fit_ridge!(model, α_val) + set_parameter_value(model[:α], α_val) + optimize!(model) + return value.(model[:w]) +end + +# We can solve the problem for several values of α +# to visualize the effect of regularization on the testing and training loss. + +αs = 0.00:0.01:0.50 +mse_test = Float64[] +mse_train = Float64[] +model = build_fit_ridge(X, y) +(Ntest, D) = size(X_test) +(Ntrain, D) = size(X_train) +for α in αs + ŵ = optimize_fit_ridge!(model, α) + ŷ_test = X_test * ŵ + ŷ_train = X_train * ŵ + push!(mse_test, LinearAlgebra.norm(ŷ_test - y_test)^2 / (2 * Ntest * D)) + push!( + mse_train, + LinearAlgebra.norm(ŷ_train - y_train)^2 / (2 * Ntrain * D), + ) +end + +# Visualize the Mean Score Error metric + +Plots.plot( + αs, + mse_test ./ sum(mse_test); + label = "MSE test", + xaxis = "α", + yaxis = "MSE", + legend = (0.8, 0.2), + width = 3, +) +Plots.plot!( + αs, + mse_train ./ sum(mse_train); + label = "MSE train", + linestyle = :dash, + width = 3, +) +Plots.title!("Normalized MSE on training and testing sets") + +# ## Leveraging differentiable optimization: computing the derivative of the solution + +# Using DiffOpt, we can compute `∂w_i/∂α`, the derivative of the learned solution `̂w` +# w.r.t. the regularization parameter. + +function compute_dw_dα(model, w) + D = length(w) + dw_dα = zeros(D) + DiffOpt.set_forward_parameter(model, model[:α], 1.0) + DiffOpt.forward_differentiate!(model) + for i in 1:D + dw_dα[i] = DiffOpt.get_forward_variable(model, w[i]) + end + return dw_dα +end + +# Using `∂w_i/∂α` computed with `compute_dw_dα`, +# we can compute the derivative of the test loss w.r.t. the parameter α +# by composing derivatives. + +function d_testloss_dα(model, X_test, y_test, ŵ) + N, D = size(X_test) + dw_dα = compute_dw_dα(model, model[:w]) + err_term = X_test * ŵ - y_test + return sum(eachindex(err_term)) do i + return LinearAlgebra.dot(X_test[i, :], dw_dα) * err_term[i] + end / (N * D) +end + +# We can define a meta-optimizer function performing gradient descent +# on the test loss w.r.t. the regularization parameter. + +function descent(α0, max_iters = 100; fixed_step = 0.01, grad_tol = 1e-3) + α_s = Float64[] + ∂α_s = Float64[] + test_loss = Float64[] + α = α0 + N, D = size(X_test) + model = build_fit_ridge(X_train, y_train) + for iter in 1:max_iters + ŵ = optimize_fit_ridge!(model, α) + err_term = X_test * ŵ - y_test + ∂α = d_testloss_dα(model, X_test, y_test, ŵ) + push!(α_s, α) + push!(∂α_s, ∂α) + push!(test_loss, LinearAlgebra.norm(err_term)^2 / (2 * N * D)) + α -= fixed_step * ∂α + if abs(∂α) ≤ grad_tol + break + end + end + return α_s, ∂α_s, test_loss +end + +ᾱ, ∂ᾱ, msē = descent(0.10, 500) +iters = 1:length(ᾱ); + +# Visualize gradient descent and convergence + +Plots.plot( + αs, + mse_test; + label = "MSE test", + xaxis = ("α"), + legend = :topleft, + width = 2, +) +Plots.plot!(ᾱ, msē; label = "learned α", width = 5, style = :dot) +Plots.title!("Regularizer learning") + +# Visualize the convergence of α to its optimal value + +Plots.plot( + iters, + ᾱ; + label = nothing, + color = :blue, + xaxis = ("Iterations"), + legend = :bottom, + title = "Convergence of α", +) + +# Visualize the convergence of the objective function + +Plots.plot( + iters, + msē; + label = nothing, + color = :red, + xaxis = ("Iterations"), + legend = :bottom, + title = "Convergence of MSE", +) + +# Visualize the convergence of the derivative to zero + +Plots.plot( + iters, + ∂ᾱ; + label = nothing, + color = :green, + xaxis = ("Iterations"), + legend = :bottom, + title = "Convergence of ∂α", +) diff --git a/docs/src/examples/chainrules_unit_new.jl b/docs/src/examples/chainrules_unit_new.jl new file mode 100644 index 000000000..5b56d0338 --- /dev/null +++ b/docs/src/examples/chainrules_unit_new.jl @@ -0,0 +1,297 @@ +# # ChainRules integration demo: Relaxed Unit Commitment + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/chainrules_unit.jl) + +# In this example, we will demonstrate the integration of DiffOpt with +# [ChainRulesCore.jl](https://juliadiff.org/ChainRulesCore.jl/stable/), +# the library allowing the definition of derivatives for functions +# that can then be used by automatic differentiation systems. + +using JuMP +import DiffOpt +import Plots +import LinearAlgebra: ⋅ +import HiGHS +import ChainRulesCore + +# ## Unit commitment problem + +# We will consider a unit commitment problem, finding the cost-minimizing activation +# of generation units in a power network over multiple time periods. +# The considered constraints include: +# - Demand satisfaction of several loads +# - Ramping constraints +# - Generation limits. + +# The decisions are: +# - ``u_{it} \in \{0,1\}``: activation of the ``i``-th unit at time ``t`` +# - ``p_{it}``: power output of the ``i``-th unit at time ``t``. + +# DiffOpt handles convex optimization problems only, we therefore +# relax the domain of the ``u_{it}`` variables to ``\left[0,1\right]``. + +# ## Primal UC problem + +# ChainRules defines the differentiation of functions. +# The actual function that is differentiated in the context of DiffOpt is the +# solution map taking in input the problem parameters and returning the solution. + +function unit_commitment( + _load1_demand, + _load2_demand, + gen_costs, + noload_costs; + model = Model(HiGHS.Optimizer), + silent = false, +) + MOI.set(model, MOI.Silent(), silent) + + ## Problem data + units = [1, 2] # Generator identifiers + load_names = ["Load1", "Load2"] # Load identifiers + n_periods = 4 # Number of time periods + Pmin = Dict(1 => fill(0.5, n_periods), 2 => fill(0.5, n_periods)) # Minimum power output (pu) + Pmax = Dict(1 => fill(3.0, n_periods), 2 => fill(3.0, n_periods)) # Maximum power output (pu) + RR = Dict(1 => 0.25, 2 => 0.25) # Ramp rates (pu/min) + P0 = Dict(1 => 0.0, 2 => 0.0) # Initial power output (pu) + + ## Parameters + @variable(model, load1_demand[1:n_periods] in Parameter.(_load1_demand)) # Load 1 demand (pu) + @variable(model, load2_demand[1:n_periods] in Parameter.(_load2_demand)) # Load 2 demand (pu) + D = Dict("Load1" => load1_demand, "Load2" => load2_demand) + @variable(model, Cp[1:2] in Parameter.(gen_costs)) # Generation costs ($/pu) + @variable(model, Cnl[1:2] in Parameter.(noload_costs)) # No-load costs ($) + + ## Variables + ## Note: u represents the activation of generation units. + ## Would be binary in the typical UC problem, relaxed here to u ∈ [0,1] + ## for a linear relaxation. + @variable(model, 0 <= u[g in units, t in 1:n_periods] <= 1) # Commitment + @variable(model, p[g in units, t in 1:n_periods] >= 0) # Power output (pu) + + ## Constraints + + ## Energy balance + @constraint( + model, + energy_balance_cons[t in 1:n_periods], + sum(p[g, t] for g in units) == sum(D[l][t] for l in load_names), + ) + + ## Generation limits + @constraint( + model, + [g in units, t in 1:n_periods], + Pmin[g][t] * u[g, t] <= p[g, t] + ) + @constraint( + model, + [g in units, t in 1:n_periods], + p[g, t] <= Pmax[g][t] * u[g, t] + ) + + ## Ramp rates + @constraint( + model, + [g in units, t in 2:n_periods], + p[g, t] - p[g, t-1] <= 60 * RR[g] + ) + @constraint(model, [g in units], p[g, 1] - P0[g] <= 60 * RR[g]) + @constraint( + model, + [g in units, t in 2:n_periods], + p[g, t-1] - p[g, t] <= 60 * RR[g] + ) + @constraint(model, [g in units], P0[g] - p[g, 1] <= 60 * RR[g]) + + ## Objective + @objective( + model, + Min, + sum( + (Cp[g] * p[g, t]) + (Cnl[g] * u[g, t]) for g in units, + t in 1:n_periods + ), + ) + + optimize!(model) + ## asserting finite optimal value + @assert termination_status(model) == MOI.OPTIMAL + ## converting to dense matrix + return JuMP.value.(p.data) +end + +m = Model(HiGHS.Optimizer) +@show unit_commitment( + [1.0, 1.2, 1.4, 1.6], + [1.0, 1.2, 1.4, 1.6], + [1000.0, 1500.0], + [500.0, 1000.0], + model = m, + silent = true, +) + +# ## Perturbation of a single input parameter + +# Let us vary the demand at the second time frame on both loads: + +demand_values = 0.05:0.05:3.0 +pvalues = map(demand_values) do di + return unit_commitment( + [1.0, di, 1.4, 1.6], + [1.0, di, 1.4, 1.6], + [1000.0, 1500.0], + [500.0, 1000.0]; + silent = true, + ) +end +pflat = [getindex.(pvalues, i) for i in eachindex(pvalues[1])]; + +# The influence of this variation of the demand is piecewise linear on the +# generation at different time frames: + +Plots.scatter(demand_values, pflat; xaxis = ("Demand"), yaxis = ("Generation")) +Plots.title!("Different time frames and generators") +Plots.xlims!(0.0, 3.5) + +# ## Forward Differentiation + +# Forward differentiation rule for the solution map of the unit commitment problem. +# It takes as arguments: +# 1. the perturbations on the input parameters +# 2. the differentiated function +# 3. the primal values of the input parameters, + +# and returns a tuple `(primal_output, perturbations)`, the main primal result +# and the perturbation propagated to this result: + +function ChainRulesCore.frule( + (_, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs), + ::typeof(unit_commitment), + load1_demand, + load2_demand, + gen_costs, + noload_costs; + optimizer = HiGHS.Optimizer, +) + ## creating the UC model with a DiffOpt optimizer wrapper around HiGHS + model = DiffOpt.diff_model(optimizer) + ## building and solving the main model + pv = unit_commitment( + load1_demand, + load2_demand, + gen_costs, + noload_costs; + model = model, + ) + ## Setting perturbations in the parameters + DiffOpt.set_forward_parameter.(model, model[:load1_demand], Δload1_demand) + DiffOpt.set_forward_parameter.(model, model[:load2_demand], Δload2_demand) + DiffOpt.set_forward_parameter.(model, model[:Cp], Δgen_costs) + DiffOpt.set_forward_parameter.(model, model[:Cnl], Δnoload_costs) + ## computing the forward differentiation + DiffOpt.forward_differentiate!(model) + ## querying the corresponding perturbation of the decision + Δp = DiffOpt.get_forward_variable.(model, model[:p]) + return (pv, Δp.data) +end + +# We can now compute the perturbation of the output powers `Δpv` +# for a perturbation of the first load demand at time 2: + +load1_demand = [1.0, 1.0, 1.4, 1.6] +load2_demand = [1.0, 1.0, 1.4, 1.6] +gen_costs = [1000.0, 1500.0] +noload_costs = [500.0, 1000.0]; + +# all input perturbations are 0 +# except first load at time 2 +Δload1_demand = 0 * load1_demand +Δload1_demand[2] = 1.0 +Δload2_demand = 0 * load2_demand +Δgen_costs = 0 * gen_costs +Δnoload_costs = 0 * noload_costs +(pv, Δpv) = ChainRulesCore.frule( + (nothing, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs), + unit_commitment, + load1_demand, + load2_demand, + gen_costs, + noload_costs, +) + +Δpv + +# The result matches what we observe in the previous figure: +# the generation of the first generator at the second time frame (third element on the plot). + +# # Reverse-mode differentiation of the solution map + +# The `rrule` returns the primal and a pullback. +# The pullback takes a seed for the optimal solution `̄p` and returns +# derivatives with respect to each input parameter of the function. + +function ChainRulesCore.rrule( + ::typeof(unit_commitment), + load1_demand, + load2_demand, + gen_costs, + noload_costs; + optimizer = HiGHS.Optimizer, + silent = false, +) + model = DiffOpt.diff_model(optimizer) + ## solve the forward UC problem + pv = unit_commitment( + load1_demand, + load2_demand, + gen_costs, + noload_costs; + model = model, + silent = silent, + ) + function pullback_unit_commitment(pb) + ## set sensitivities + DiffOpt.set_reverse_variable.(model, model[:p], pb) + ## compute the gradients + DiffOpt.reverse_differentiate!(model) + ## retrieve the gradients with respect to the parameters + dload1_demand = + DiffOpt.get_reverse_parameter.(model, model[:load1_demand]) + dload2_demand = + DiffOpt.get_reverse_parameter.(model, model[:load2_demand]) + dgen_costs = DiffOpt.get_reverse_parameter.(model, model[:Cp]) + dnoload_costs = DiffOpt.get_reverse_parameter.(model, model[:Cnl]) + return (dload1_demand, dload2_demand, dgen_costs, dnoload_costs) + end + return (pv, pullback_unit_commitment) +end + +# We can set a seed of one on the power of the first generator at the second time frame and zero for all other +# parts of the solution: + +(pv, pullback_unit_commitment) = ChainRulesCore.rrule( + unit_commitment, + load1_demand, + load2_demand, + gen_costs, + noload_costs; + optimizer = HiGHS.Optimizer, + silent = true, +) +dpv = 0 * pv +dpv[1, 2] = 1 +dargs = pullback_unit_commitment(dpv) +(dload1_demand, dload2_demand, dgen_costs, dnoload_costs) = dargs; + +# The sensitivities with respect to the load demands are: +dload1_demand + +# and: +dload2_demand + +# The sensitivity of the generation is propagated to the sensitivity of both +# loads at the second time frame. + +# This example integrating ChainRules was designed with support +# from [Invenia Technical Computing](https://www.invenia.ca/). diff --git a/docs/src/examples/custom-relu_new.jl b/docs/src/examples/custom-relu_new.jl new file mode 100644 index 000000000..160bde4f5 --- /dev/null +++ b/docs/src/examples/custom-relu_new.jl @@ -0,0 +1,128 @@ +# # Custom ReLU layer + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/custom-relu.jl) + +# We demonstrate how DiffOpt can be used to generate a simple neural network +# unit - the ReLU layer. A neural network is created using Flux.jl and +# trained on the MNIST dataset. + +# This tutorial uses the following packages + +using JuMP +import DiffOpt +import Ipopt +import ChainRulesCore +import Flux +import MLDatasets +import Statistics +import Base.Iterators: repeated +using LinearAlgebra + +# ## The ReLU and its derivative + +# Define a relu through an optimization problem solved by a quadratic solver. +# Return the solution of the problem. +# TODO: use HiGHS +function matrix_relu( + y_data::Matrix; + model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer), +) + layer_size, batch_size = size(y_data) + empty!(model) + set_silent(model) + @variable(model, x[1:layer_size, 1:batch_size] >= 0) + @variable(model, y[1:layer_size, 1:batch_size] in Parameter.(y_data)) + @objective(model, Min, x[:]'x[:] - 2y[:]'x[:]) + optimize!(model) + return Float32.(value.(x)) +end + +# Define the reverse differentiation rule, for the function we defined above. +function ChainRulesCore.rrule( + ::typeof(matrix_relu), + y_data::Matrix{T}, +) where {T} + model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer) + pv = matrix_relu(y_data; model = model) + function pullback_matrix_relu(dl_dx) + ## some value from the backpropagation (e.g., loss) is denoted by `l` + ## so `dl_dy` is the derivative of `l` wrt `y` + x = model[:x]::Matrix{JuMP.VariableRef} # load decision variable `x` into scope + y = model[:y]::Matrix{JuMP.VariableRef} # load parameter variable `y` into scope + ## set sensitivities (dl/dx) + for i in eachindex(x) + DiffOpt.set_reverse_variable(model, x[i], dl_dx[i]) + end + ## compute grad (dx/dy) + DiffOpt.reverse_differentiate!(model) + ## return gradient (dl/dy = dl/dx * dx/dy) + dl_dy = DiffOpt.get_reverse_parameter.(model, y) + return (ChainRulesCore.NoTangent(), dl_dy) + end + return pv, pullback_matrix_relu +end + +# For more details about backpropagation, visit [Introduction, ChainRulesCore.jl](https://juliadiff.org/ChainRulesCore.jl/dev/). + +# ## Define the network + +layer_size = 10 +m = Flux.Chain( + Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28) + matrix_relu, + Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9) + Flux.softmax, +) + +# ## Prepare data + +N = 1000 # batch size +## Preprocessing train data +imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:N] +labels = MLDatasets.MNIST(; split = :train).targets[1:N] +train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images +train_Y = Flux.onehotbatch(labels, 0:9); +## Preprocessing test data +test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:N] +test_labels = MLDatasets.MNIST(; split = :test).targets[1:N]; +test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N)) +test_Y = Flux.onehotbatch(test_labels, 0:9); + +# Define input data +# The original data is repeated `epochs` times because `Flux.train!` only +# loops through the data set once + +epochs = 2#50 # ~1 minute (i7 8th gen with 16gb RAM) +## epochs = 100 # leads to 77.8% in about 2 minutes +dataset = repeated((train_X, train_Y), epochs); + +# ## Network training + +# training loss function, Flux optimizer +custom_loss(m, x, y) = Flux.crossentropy(m(x), y) +opt = Flux.setup(Flux.Adam(), m) + +# Train to optimize network parameters + +@time Flux.train!(custom_loss, m, dataset, opt); + +# Although our custom implementation takes time, it is able to reach similar +# accuracy as the usual ReLU function implementation. + +# ## Accuracy results + +# Average of correct guesses + +accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y)); + +# Training accuracy + +accuracy(train_X, train_Y) + +# Test accuracy + +accuracy(test_X, test_Y) + +# Note that the accuracy is low due to simplified training. +# It is possible to increase the number of samples `N`, +# the number of epochs `epoch` and the connectivity `inner`. diff --git a/docs/src/examples/matrix-inversion-manual_new.jl b/docs/src/examples/matrix-inversion-manual_new.jl new file mode 100644 index 000000000..9400d42a8 --- /dev/null +++ b/docs/src/examples/matrix-inversion-manual_new.jl @@ -0,0 +1,150 @@ +# # Differentiating a QP wrt a single variable + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/matrix-inversion-manual.jl) + +# Consider the quadratic program + +# ```math +# \begin{split} +# \begin{array} {ll} +# \mbox{minimize} & \frac{1}{2} x^T Q x + q^T x \\ +# \mbox{subject to} & G x \leq h, x \in \mathcal{R}^2 \\ +# \end{array} +# \end{split} +# ``` + +# where `Q`, `q`, `G` are fixed and `h` is the single parameter. + +# In this example, we'll try to differentiate the QP wrt `h`, by finding its +# jacobian by hand (using Eqn (6) of [QPTH article](https://arxiv.org/pdf/1703.00443.pdf)) +# and compare the results: +# - Manual compuation +# - Using JuMP and DiffOpt + +# Assuming +# ``` +# Q = [[4, 1], [1, 2]] +# q = [1, 1] +# G = [1, 1] +# ``` +# and begining with a starting value of `h=-1` + +# few values just for reference + +# | variable | optimal value | note | +# |----|------|-----| +# | x* | [-0.25; -0.75] | Primal optimal | +# | 𝜆∗ | -0.75 | Dual optimal | + +# ## Finding Jacobian using matrix inversion +# Lets formulate Eqn (6) of [QPTH article](https://arxiv.org/pdf/1703.00443.pdf) for our QP. If we assume `h` as the only parameter and `Q`,`q`,`G` as fixed problem data - also note that our QP doesn't involves `Ax=b` constraint - then Eqn (6) reduces to +# ```math +# \begin{gather} +# \begin{bmatrix} +# Q & G^T \\ +# \lambda^* G & G x^* - h +# \end{bmatrix} +# \begin{bmatrix} +# dx \\ +# d \lambda +# \end{bmatrix} +# = +# \begin{bmatrix} +# 0 \\ +# \lambda^* dh +# \end{bmatrix} +# \end{gather} +# ``` + +# Now to find the jacobians $$ \frac{\partial x}{\partial h}, \frac{\partial \lambda}{\partial h}$$ +# we substitute `dh = I = [1]` and plug in values of `Q`,`q`,`G` to get +# ```math +# \begin{gather} +# \begin{bmatrix} +# 4 & 1 & 1 \\ +# 1 & 2 & 1 \\ +# -0.75 & -0.75 & 0 +# \end{bmatrix} +# \begin{bmatrix} +# \frac{\partial x_1}{\partial h} \\ +# \frac{\partial x_2}{\partial h} \\ +# \frac{\partial \lambda}{\partial h} +# \end{bmatrix} +# = +# \begin{bmatrix} +# 0 \\ +# 0 \\ +# -0.75 +# \end{bmatrix} +# \end{gather} +# ``` + +# Upon solving using matrix inversion, the jacobian is +# ```math +# \frac{\partial x_1}{\partial h} = 0.25, \frac{\partial x_2}{\partial h} = 0.75, \frac{\partial \lambda}{\partial h} = -1.75 +# ``` + +# ## Finding Jacobian using JuMP and DiffOpt + +using JuMP +import DiffOpt +import Ipopt + +n = 2 # variable dimension +m = 1; # no of inequality constraints + +Q = [4.0 1.0; 1.0 2.0] +q = [1.0; 1.0] +G = [1.0 1.0;] +h = [-1.0;] # initial values set + +# Initialize empty model + +model = DiffOpt.quadratic_diff_model(Ipopt.Optimizer) +set_silent(model) + +# Add the variables + +@variable(model, x[1:2]) + +# Add the parameters + +@variable(model, y[1:length(h)] in Parameter.(h)) + +# Add the constraints. + +@constraint(model, cons[j in 1:1], sum(G[j, i] * x[i] for i in 1:2) <= y[j]); + +@objective( + model, + Min, + 1 / 2 * sum(Q[j, i] * x[i] * x[j] for i in 1:2, j in 1:2) + + sum(q[i] * x[i] for i in 1:2) +) + +# Solve problem + +optimize!(model) + +# primal solution + +value.(x) + +# dual solution + +dual.(cons) + +# set sensitivitity + +DiffOpt.set_forward_parameter.(model, y, 1.0) + +# Compute derivatives + +DiffOpt.forward_differentiate!(model) + +# Query derivative + +dx = DiffOpt.get_forward_variable.(model, x) + +using Test #src +@test dx ≈ [0.25, 0.75] atol = 1e-4 rtol = 1e-4 #src diff --git a/docs/src/examples/nearest_correlation_new.jl b/docs/src/examples/nearest_correlation_new.jl new file mode 100644 index 000000000..d22bdd0e9 --- /dev/null +++ b/docs/src/examples/nearest_correlation_new.jl @@ -0,0 +1,66 @@ +# # Nearest correlation + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/nearest_correlation.jl) + +# This example illustrates the sensitivity analysis of the nearest correlation problem studied in [H02]. +# +# Higham, Nicholas J. +# *Computing the nearest correlation matrix—a problem from finance.* +# IMA journal of Numerical Analysis 22.3 (2002): 329-343. + +using DiffOpt, JuMP, SCS, LinearAlgebra +solver = SCS.Optimizer + +function proj(A, dH = Diagonal(ones(size(A, 1))), H_data = ones(size(A))) + n = LinearAlgebra.checksquare(A) + model = Model(() -> DiffOpt.diff_optimizer(solver)) + @variable(model, X[1:n, 1:n] in PSDCone()) + @variable(model, H[1:n, 1:n] in Parameter.(H_data)) + @variable(model, E[1:n, 1:n]) + @constraint(model, [i in 1:n], X[i, i] == 1) + @constraint(model, E .== (H .* (X .- A))) + @objective(model, Min, sum(E .^ 2)) + for i in 1:n + DiffOpt.set_forward_parameter(model, H[i, i], dH[i, i]) + end + optimize!(model) + DiffOpt.forward_differentiate!(model) + dX = DiffOpt.get_forward_variable.(model, X) + return value.(X), dX +end + +# Example from [H02, p. 334-335]: + +A = LinearAlgebra.Tridiagonal(ones(2), ones(3), ones(2)) + +# The projection is computed as follows: + +X, dX = proj(A) +nothing # hide + +# The projection of `A` is: + +X + +# The derivative of the projection with respect to a uniform increase of the weights +# of the diagonal entries is: + +dX + +# Example from [H02, Section 4, p. 340]: + +A = LinearAlgebra.Tridiagonal(-ones(3), 2ones(4), -ones(3)) + +# The projection is computed as follows: + +X, dX = proj(A) +nothing # hide + +# The projection of `A` is: + +X + +# The derivative of the projection with respect to a uniform increase of the weights +# of the diagonal entries is: + +dX diff --git a/docs/src/examples/polyhedral_project_new.jl b/docs/src/examples/polyhedral_project_new.jl new file mode 100644 index 000000000..aa2c9b91a --- /dev/null +++ b/docs/src/examples/polyhedral_project_new.jl @@ -0,0 +1,171 @@ +# # Polyhedral QP layer + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/polyhedral_project.jl) + +# We use DiffOpt to define a custom network layer which, given an input matrix `y`, +# computes its projection onto a polytope defined by a fixed number of inequalities: +# `a_i^T x ≥ b_i`. +# A neural network is created using Flux.jl and trained on the MNIST dataset, +# integrating this quadratic optimization layer. +# +# The QP is solved in the forward pass, and its DiffOpt derivative is used in the backward pass expressed with `ChainRulesCore.rrule`. + +# This example is similar to the custom ReLU layer, except that the layer is parameterized +# by the hyperplanes `(w,b)` and not a simple stateless function. +# This also means that `ChainRulesCore.rrule` must return the derivatives of the output with respect to the +# layer parameters to allow for backpropagation. + +using JuMP +import DiffOpt +import Ipopt +import ChainRulesCore +import Flux +import MLDatasets +import Statistics +using Base.Iterators: repeated +using LinearAlgebra +using Random + +Random.seed!(42) + +# ## The Polytope representation and its derivative + +struct Polytope{N} + w::NTuple{N,Vector{Float64}} + b::Vector{Float64} +end + +Polytope(w::NTuple{N}) where {N} = Polytope{N}(w, randn(N)) + +# We define a "call" operation on the polytope, making it a so-called functor. +# Calling the polytope with a matrix `y` operates an Euclidean projection of this matrix onto the polytope. +function (polytope::Polytope{N})( + y_data::AbstractMatrix; + model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer), +) where {N} + layer_size, batch_size = size(y_data) + empty!(model) + set_silent(model) + @variable(model, x[1:layer_size, 1:batch_size]) + @variable(model, y[1:layer_size, 1:batch_size] in Parameter.(y_data)) + @variable(model, b[idx = 1:N] in Parameter.(polytope.b[idx])) + @variable( + model, + w[idx = 1:N, i = 1:layer_size] in Parameter(polytope.w[idx][i]) + ) + @constraint( + model, + greater_than_cons[idx in 1:N, sample in 1:batch_size], + dot(polytope.w[idx], x[:, sample]) ≥ b[idx] + ) + @objective(model, Min, dot(x - y, x - y)) + optimize!(model) + return Float32.(JuMP.value.(x)) +end + +# The `@functor` macro from Flux implements auxiliary functions for collecting the parameters of +# our custom layer and operating backpropagation. +Flux.@functor Polytope + +# Define the reverse differentiation rule, for the function we defined above. +# Flux uses ChainRules primitives to implement reverse-mode differentiation of the whole network. +# To learn the current layer (the polytope the layer contains), +# the gradient is computed with respect to the `Polytope` fields in a ChainRulesCore.Tangent type +# which is used to represent derivatives with respect to structs. +# For more details about backpropagation, visit [Introduction, ChainRulesCore.jl](https://juliadiff.org/ChainRulesCore.jl/dev/). + +function ChainRulesCore.rrule( + polytope::Polytope{N}, + y_data::AbstractMatrix, +) where {N} + model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer) + xv = polytope(y_data; model = model) + function pullback_matrix_projection(dl_dx) + dl_dx = ChainRulesCore.unthunk(dl_dx) + ## `dl_dy` is the derivative of `l` wrt `y` + x = model[:x]::Matrix{JuMP.VariableRef} + y = model[:y]::Matrix{JuMP.VariableRef} + w = model[:w]::Matrix{JuMP.VariableRef} + b = model[:b]::Vector{JuMP.VariableRef} + layer_size, batch_size = size(x) + ## set sensitivities + for i in eachindex(x) + DiffOpt.set_reverse_variable(model, x[i], dl_dx[i]) + end + ## compute grad + DiffOpt.reverse_differentiate!(model) + ## compute gradient wrt objective function parameter y + dl_dy = DiffOpt.get_reverse_parameter.(model, y) + ## compute gradient wrt objective function parameter w and b + _dl_dw = DiffOpt.get_reverse_parameter.(model, w) + dl_dw = zero.(polytope.w) + for idx in 1:N + dl_dw[idx] .= _dl_dw[idx, :] + end + dl_db = DiffOpt.get_reverse_parameter.(model, b) + dself = ChainRulesCore.Tangent{Polytope{N}}(; w = dl_dw, b = dl_db) + return (dself, dl_dy) + end + return xv, pullback_matrix_projection +end + +# ## Define the Network + +layer_size = 20 +m = Flux.Chain( + Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28) + Polytope((randn(layer_size), randn(layer_size), randn(layer_size))), + Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9) + Flux.softmax, +) + +# ## Prepare data + +M = 500 # batch size +## Preprocessing train data +imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:M] +labels = MLDatasets.MNIST(; split = :train).targets[1:M] +train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images +train_Y = Flux.onehotbatch(labels, 0:9); +## Preprocessing test data +test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:M] +test_labels = MLDatasets.MNIST(; split = :test).targets[1:M] +test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M)) +test_Y = Flux.onehotbatch(test_labels, 0:9); + +# Define input data +# The original data is repeated `epochs` times because `Flux.train!` only +# loops through the data set once + +epochs = 5 +dataset = repeated((train_X, train_Y), epochs); + +# ## Network training + +# training loss function, Flux optimizer +custom_loss(m, x, y) = Flux.crossentropy(m(x), y) +opt = Flux.setup(Flux.Adam(), m) + +# Train to optimize network parameters + +@time Flux.train!(custom_loss, m, dataset, opt); + +# Although our custom implementation takes time, it is able to reach similar +# accuracy as the usual ReLU function implementation. + +# ## Accuracy results + +# Average of correct guesses +accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y)); + +# Training accuracy + +accuracy(train_X, train_Y) + +# Test accuracy + +accuracy(test_X, test_Y) + +# Note that the accuracy is low due to simplified training. +# It is possible to increase the number of samples `N`, +# the number of epochs `epoch` and the connectivity `inner`. diff --git a/docs/src/examples/sensitivity-analysis-ridge_new.jl b/docs/src/examples/sensitivity-analysis-ridge_new.jl new file mode 100644 index 000000000..af91512f2 --- /dev/null +++ b/docs/src/examples/sensitivity-analysis-ridge_new.jl @@ -0,0 +1,171 @@ +# # Sensitivity Analysis of Ridge Regression + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/sensitivity-analysis-ridge.jl) + +# This example illustrates the sensitivity analysis of data points in a +# [Ridge Regression](https://en.wikipedia.org/wiki/Ridge_regression) problem. +# The general form of the problem is given below: + +# ```math +# \begin{split} +# \begin{array} {ll} +# \mbox{minimize} & \sum_{i=1}^{N} (y_{i} - w x_{i} - b)^2 + \alpha (w^2 + b^2) \\ +# \end{array} +# \end{split} +# ``` +# where +# - `w`, `b` are slope and intercept of the regressing line +# - `x`, `y` are the N data points +# - `α` is the regularization constant +# +# which is equivalent to: +# ```math +# \begin{split} +# \begin{array} {ll} +# \mbox{minimize} & e^{\top}e + \alpha (w^2) \\ +# \mbox{s.t.} & e_{i} = y_{i} - w x_{i} - b \quad \quad i=1..N \\ +# \end{array} +# \end{split} +# ``` + +# This tutorial uses the following packages + +using JuMP +import DiffOpt +import Random +import Ipopt +import Plots +using LinearAlgebra: dot + +# ## Define and solve the problem + +# Construct a set of noisy (guassian) data points around a line. + +Random.seed!(42) + +N = 150 + +w_orig = 2 * abs(randn()) +b = rand() +X = randn(N) +Y = w_orig * X .+ b + 0.8 * randn(N); + +# The helper method `fit_ridge` defines and solves the corresponding model. +# The ridge regression is modeled with quadratic programming +# (quadratic objective and linear constraints) and solved in generic methods +# of Ipopt. This is not the standard way of solving the ridge regression problem +# this is done here for didactic purposes. + +function build_fit_ridge(X_data, Y_data, alpha = 0.1) + N = length(Y_data) + ## Initialize a JuMP Model with Ipopt solver + ## model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) # TODO: this is not auto detecting scalar nonlinear function (alpha * w * w) + model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer) + set_silent(model) + @variable(model, w) # angular coefficient + @variable(model, b) # linear coefficient + @variable(model, α in Parameter(alpha)) # regularization parameter + @variable(model, X[1:N] in Parameter.(X_data)) + @variable(model, Y[1:N] in Parameter.(Y_data)) + ## expression defining approximation error + @expression(model, e[i = 1:N], Y[i] - w * X[i] - b) + ## objective minimizing squared error and ridge penalty + @objective(model, Min, 1 / N * dot(e, e) + α * (w^2)) + optimize!(model) + return model +end + +function optimize_fit_ridge!(model, alpha) + set_parameter_value(model[:α], alpha) + optimize!(model) + return model[:w], model[:b] +end + +# Plot the data points and the fitted line for different alpha values + +p = Plots.scatter(X, Y; label = nothing, legend = :topleft) +mi, ma = minimum(X), maximum(X) +Plots.title!("Fitted lines and points") + +model = build_fit_ridge(X, Y) +for alpha in 0.5:0.5:1.5 + local w, b = optimize_fit_ridge!(model, alpha) + ŵ = value(w) + b̂ = value(b) + Plots.plot!( + p, + [mi, ma], + [mi * ŵ + b̂, ma * ŵ + b̂]; + label = "alpha=$alpha", + width = 2, + ) +end +p + +# ## Differentiate + +# Now that we've solved the problem, we can compute the sensitivity of optimal +# values of the slope `w` with +# respect to perturbations in the data points (`x`,`y`). + +alpha = 0.4 +w, b = optimize_fit_ridge!(model, alpha) +ŵ = value(w) +b̂ = value(b) + +# Sensitivity with respect to x and y + +∇y = zero(X) +∇x = zero(X) +for i in 1:N + ## X[i] sensitivity + DiffOpt.empty_input_sensitivities!(model) + DiffOpt.set_forward_parameter(model, model[:X][i], 1.0) + DiffOpt.forward_differentiate!(model) + ∇x[i] = DiffOpt.get_forward_variable(model, w) + ## Y[i] sensitivity + DiffOpt.empty_input_sensitivities!(model) + DiffOpt.set_forward_parameter(model, model[:Y][i], 1.0) + DiffOpt.forward_differentiate!(model) + ∇y[i] = DiffOpt.get_forward_variable(model, w) +end + +# Visualize point sensitivities with respect to regression points. + +p = Plots.scatter( + X, + Y; + color = [dw < 0 ? :blue : :red for dw in ∇x], + markersize = [5 * abs(dw) + 1.2 for dw in ∇x], + label = "", +) +mi, ma = minimum(X), maximum(X) +Plots.plot!( + p, + [mi, ma], + [mi * ŵ + b̂, ma * ŵ + b̂]; + color = :blue, + label = "", +) +Plots.title!("Regression slope sensitivity with respect to x") + +# + +p = Plots.scatter( + X, + Y; + color = [dw < 0 ? :blue : :red for dw in ∇y], + markersize = [5 * abs(dw) + 1.2 for dw in ∇y], + label = "", +) +mi, ma = minimum(X), maximum(X) +Plots.plot!( + p, + [mi, ma], + [mi * ŵ + b̂, ma * ŵ + b̂]; + color = :blue, + label = "", +) +Plots.title!("Regression slope sensitivity with respect to y") + +# Note the points with less central `x` values induce a greater y sensitivity of the slope. diff --git a/docs/src/examples/sensitivity-analysis-svm_new.jl b/docs/src/examples/sensitivity-analysis-svm_new.jl new file mode 100644 index 000000000..a61f75cf5 --- /dev/null +++ b/docs/src/examples/sensitivity-analysis-svm_new.jl @@ -0,0 +1,144 @@ +# # Sensitivity Analysis of SVM + +#md # [![](https://img.shields.io/badge/show-github-579ACA.svg)](@__REPO_ROOT_URL__/docs/src/examples/sensitivity-analysis-svm.jl) + +# This notebook illustrates sensitivity analysis of data points in a [Support Vector Machine](https://en.wikipedia.org/wiki/Support-vector_machine) (inspired from [@matbesancon](http://github.com/matbesancon)'s [SimpleSVMs](http://github.com/matbesancon/SimpleSVMs.jl).) + +# For reference, Section 10.1 of https://online.stat.psu.edu/stat508/book/export/html/792 gives an intuitive explanation of what it means to have a sensitive hyperplane or data point. The general form of the SVM training problem is given below (with $\ell_2$ regularization): + +# ```math +# \begin{split} +# \begin{array} {ll} +# \mbox{minimize} & \lambda||w||^2 + \sum_{i=1}^{N} \xi_{i} \\ +# \mbox{s.t.} & \xi_{i} \ge 0 \quad \quad i=1..N \\ +# & y_{i} (w^T X_{i} + b) \ge 1 - \xi_{i} \quad i=1..N \\ +# \end{array} +# \end{split} +# ``` +# where +# - `X`, `y` are the `N` data points +# - `w` is the support vector +# - `b` determines the offset `b/||w||` of the hyperplane with normal `w` +# - `ξ` is the soft-margin loss +# - `λ` is the $\ell_2$ regularization. +# +# This tutorial uses the following packages + +using JuMP # The mathematical programming modelling language +import DiffOpt # JuMP extension for differentiable optimization +import Ipopt # Optimization solver that handles quadratic programs +import LinearAlgebra +import Plots +import Random + +# ## Define and solve the SVM + +# Construct two clusters of data points. + +N = 100 +D = 2 + +Random.seed!(62) +X_data = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [2.0, 2.0]') +y = append!(ones(N ÷ 2), -ones(N ÷ 2)) +λ = 0.05; + +# Let's initialize a special model that can understand sensitivities + +model = DiffOpt.quadratic_diff_model(Ipopt.Optimizer) +set_silent(model) + +# Add the variables + +@variable(model, ξ[1:N] >= 0) +@variable(model, w[1:D]) +@variable(model, b); + +# Add the parameters to be differentiated + +@variable(model, X[1:N, 1:D] in Parameter.(X_data)) + +# Add the constraints + +@constraint( + model, + con[i in 1:N], + y[i] * (LinearAlgebra.dot(X[i, :], w) + b) >= 1 - ξ[i] +); + +# Define the objective and solve + +@objective(model, Min, λ * LinearAlgebra.dot(w, w) + sum(ξ)) + +optimize!(model) + +# We can visualize the separating hyperplane. + +loss = objective_value(model) + +wv = value.(w) + +bv = value(b) + +svm_x = [-2.0, 4.0] # arbitrary points +svm_y = (-bv .- wv[1] * svm_x) / wv[2] + +p = Plots.scatter( + X_data[:, 1], + X_data[:, 2]; + color = [yi > 0 ? :red : :blue for yi in y], + label = "", +) +Plots.plot!( + p, + svm_x, + svm_y; + label = "loss = $(round(loss, digits=2))", + width = 3, +) + +# ## Gradient of hyperplane wrt the data point coordinates + +# Now that we've solved the SVM, we can compute the sensitivity of optimal +# values -- the separating hyperplane in our case -- with respect to +# perturbations of the problem data -- the data points -- using DiffOpt. + +# How does a change in coordinates of the data points, `X`, +# affects the position of the hyperplane? +# This is achieved by finding gradients of `w` and `b` with respect to `X[i]`. + +# Begin differentiating the model. +# analogous to varying θ in the expression: +# ```math +# y_{i} (w^T (X_{i} + \theta) + b) \ge 1 - \xi_{i} +# ``` +∇ = zeros(N) +for i in 1:N + for j in 1:N + if i == j + ## we consider identical perturbations on all x_i coordinates + DiffOpt.set_forward_parameter.(model, X[i, :], 1.0) + else + DiffOpt.set_forward_parameter.(model, X[i, :], 0.0) + end + end + DiffOpt.forward_differentiate!(model) + dw = DiffOpt.get_forward_variable.(model, w) + db = DiffOpt.get_forward_variable.(model, b) + ∇[i] = LinearAlgebra.norm(dw) + LinearAlgebra.norm(db) +end + +# We can visualize the separating hyperplane sensitivity with respect to the data points. +# Note that all the small numbers were converted into 1/10 of the +# largest value to show all the points of the set. + +p3 = Plots.scatter( + X_data[:, 1], + X_data[:, 2]; + color = [yi > 0 ? :red : :blue for yi in y], + label = "", + markersize = 2 * (max.(1.8∇, 0.2 * maximum(∇))), +) +Plots.yaxis!(p3, (-2, 4.5)) +Plots.plot!(p3, svm_x, svm_y; label = "", width = 3) +Plots.title!("Sensitivity of the separator to data point variations") diff --git a/src/ConicProgram/ConicProgram.jl b/src/ConicProgram/ConicProgram.jl index 89a791831..ab9eb20bd 100644 --- a/src/ConicProgram/ConicProgram.jl +++ b/src/ConicProgram/ConicProgram.jl @@ -50,6 +50,8 @@ const Form{T} = MOI.Utilities.GenericModel{ DiffOpt.ProductOfSets{T}, }, } + +# should the be applied on Model? function MOI.supports( ::Form{T}, ::MOI.ObjectiveFunction{F}, @@ -91,7 +93,6 @@ mutable struct Model <: DiffOpt.AbstractModel input_cache::DiffOpt.InputCache x::Vector{Float64} # Primal - s::Vector{Float64} # Slack y::Vector{Float64} # Dual diff_time::Float64 end @@ -105,7 +106,6 @@ function Model() DiffOpt.InputCache(), Float64[], Float64[], - Float64[], NaN, ) end @@ -121,7 +121,6 @@ function MOI.empty!(model::Model) model.back_grad_cache = nothing empty!(model.input_cache) empty!(model.x) - empty!(model.s) empty!(model.y) model.diff_time = NaN return @@ -141,6 +140,14 @@ function MOI.supports_constraint( return MOI.supports_constraint(model.model, F, S) end +function MOI.supports_constraint( + ::Model, + ::Type{MOI.VectorAffineFunction{T}}, + ::Type{MOI.PositiveSemidefiniteConeSquare}, +) where {T} + return false +end + function MOI.set( model::Model, ::MOI.ConstraintPrimalStart, @@ -148,11 +155,7 @@ function MOI.set( value, ) MOI.throw_if_not_valid(model, ci) - return DiffOpt._enlarge_set( - model.s, - MOI.Utilities.rows(model.model.constraints, ci), - value, - ) + return end function MOI.set( @@ -189,12 +192,6 @@ function _gradient_cache(model::Model) ) end - if any(isnan, model.s) || length(model.s) < length(b) - error( - "Some constraints are missing a value for the `ConstraintPrimalStart` attribute.", - ) - end - if MOI.get(model, MOI.ObjectiveSense()) == MOI.FEASIBILITY_SENSE c = SparseArrays.spzeros(size(A, 2)) else @@ -216,10 +213,12 @@ function _gradient_cache(model::Model) m = A.m n = A.n N = m + n + 1 + + slack = b - A * model.x # NOTE: w = 1.0 systematically since we asserted the primal-dual pair is optimal # `inv(M)((x, y, 1), (0, s, 0)) = (x, y, 1) - (0, s, 0)`, # see Minty parametrization in https://stanford.edu/~boyd/papers/pdf/cone_prog_refine.pdf - (u, v, w) = (model.x, model.y - model.s, 1.0) + (u, v, w) = (model.x, model.y - slack, 1.0) # find gradient of projections on dual of the cones Dπv = DiffOpt.Dπ(v, model.model, model.model.constraints.sets) @@ -260,12 +259,12 @@ function DiffOpt.forward_differentiate!(model::Model) M = gradient_cache.M vp = gradient_cache.vp Dπv = gradient_cache.Dπv - x = model.x - y = model.y - s = model.s A = gradient_cache.A b = gradient_cache.b c = gradient_cache.c + x = model.x + y = model.y + slack = b - A * x objective_function = DiffOpt._convert( MOI.ScalarAffineFunction{Float64}, @@ -302,13 +301,14 @@ function DiffOpt.forward_differentiate!(model::Model) dAj, dAv, ) + dAv .*= -1.0 dA = SparseArrays.sparse(dAi, dAj, dAv, lines, cols) m = size(A, 1) n = size(A, 2) N = m + n + 1 # NOTE: w = 1 systematically since we asserted the primal-dual pair is optimal - (u, v, w) = (x, y - s, 1.0) + (u, v, w) = (x, y - slack, 1.0) # g = dQ * Π(z/|w|) = dQ * [u, vp, 1.0] RHS = [ @@ -339,12 +339,12 @@ function DiffOpt.reverse_differentiate!(model::Model) M = gradient_cache.M vp = gradient_cache.vp Dπv = gradient_cache.Dπv - x = model.x - y = model.y - s = model.s A = gradient_cache.A b = gradient_cache.b c = gradient_cache.c + x = model.x + y = model.y + slack = b - A * x dx = zeros(length(c)) for (vi, value) in model.input_cache.dx @@ -357,13 +357,13 @@ function DiffOpt.reverse_differentiate!(model::Model) n = size(A, 2) N = m + n + 1 # NOTE: w = 1 systematically since we asserted the primal-dual pair is optimal - (u, v, w) = (x, y - s, 1.0) + (u, v, w) = (x, y - slack, 1.0) # dz = D \phi (z)^T (dx,dy,dz) dz = [ dx Dπv' * (dy + ds) - ds - -x' * dx - y' * dy - s' * ds + -x' * dx - y' * dy - slack' * ds ] g = if LinearAlgebra.norm(dz) <= 1e-4 # TODO: parametrize or remove diff --git a/src/DiffOpt.jl b/src/DiffOpt.jl index 0dce78d96..27f16d5ea 100644 --- a/src/DiffOpt.jl +++ b/src/DiffOpt.jl @@ -48,6 +48,8 @@ function add_default_factorization(model) return end +include("jump_wrapper.jl") + export diff_optimizer # TODO diff --git a/src/NonLinearProgram/NonLinearProgram.jl b/src/NonLinearProgram/NonLinearProgram.jl index 6b816b4fc..843d576fc 100644 --- a/src/NonLinearProgram/NonLinearProgram.jl +++ b/src/NonLinearProgram/NonLinearProgram.jl @@ -113,14 +113,19 @@ function MOI.supports_constraint( S<:Union{ MOI.GreaterThan{Float64}, MOI.LessThan{Float64}, - # MOI.Interval{Float64}, MOI.EqualTo{Float64}, - MOI.Parameter{Float64}, }, } return true end +function MOI.supports_add_constrained_variable( + ::Form, + ::Type{MOI.Parameter{T}}, +) where {T} + return true +end + function _add_leq_geq( form::Form, idx::MOI.ConstraintIndex, @@ -189,7 +194,7 @@ function MOI.add_constraint( form.num_constraints += 1 p = MOI.Nonlinear.add_parameter(form.model, set.value) form.var2param[idx] = p - idx_ci = MOI.ConstraintIndex{F,S}(form.num_constraints) + idx_ci = MOI.ConstraintIndex{F,S}(idx.value) form.var2ci[idx] = idx_ci return idx_ci end @@ -294,6 +299,13 @@ function Model() ) end +function MOI.supports_add_constrained_variable( + ::Model, + ::Type{MOI.Parameter{T}}, +) where {T} + return true +end + _objective_sense(form::Form) = form.sense _objective_sense(model::Model) = _objective_sense(model.model) @@ -507,8 +519,8 @@ function DiffOpt.forward_differentiate!(model::Model; tol = 1e-6) Δp = zeros(length(cache.params)) for (i, var_idx) in enumerate(cache.params) ky = form.var2ci[var_idx] - if haskey(model.input_cache.dp, ky) # only for set sensitivities - Δp[i] = model.input_cache.dp[ky] + if haskey(model.input_cache.parameter_constraints, ky) # only for set sensitivities + Δp[i] = model.input_cache.parameter_constraints[ky] end end @@ -595,6 +607,7 @@ function MOI.get( ci::MOI.ConstraintIndex, ) try + # TODO check ci.value's idx = model.cache.dual_mapping[ci.value] return model.forw_grad_cache.dual_Δs[idx] catch @@ -607,7 +620,10 @@ function MOI.get( ::DiffOpt.ReverseConstraintSet, ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}, ) where {T} - return MOI.Parameter{T}(model.back_grad_cache.Δp[ci.value]) + form = model.model + var_idx = MOI.VariableIndex(ci.value) + p_idx = form.var2param[var_idx].value + return MOI.Parameter{T}(model.back_grad_cache.Δp[p_idx]) end end # module NonLinearProgram diff --git a/src/copy_dual.jl b/src/copy_dual.jl index a7c24d23b..dbeca2a19 100644 --- a/src/copy_dual.jl +++ b/src/copy_dual.jl @@ -87,6 +87,9 @@ function _copy_dual(dest::MOI.ModelLike, src::MOI.ModelLike, index_map) MOI.get(src, MOI.VariablePrimal(), vis_src), ) for (F, S) in MOI.get(dest, MOI.ListOfConstraintTypesPresent()) + if F <: MOI.VariableIndex && S <: MOI.Parameter + continue + end _copy_constraint_start( dest, src, diff --git a/src/diff_opt.jl b/src/diff_opt.jl index b850e72aa..3bb4de79d 100644 --- a/src/diff_opt.jl +++ b/src/diff_opt.jl @@ -13,7 +13,6 @@ const MOIDD = MOI.Utilities.DoubleDicts Base.@kwdef mutable struct InputCache dx::Dict{MOI.VariableIndex,Float64} = Dict{MOI.VariableIndex,Float64}()# dz for QP - dp::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram dy::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Dual sensitivity currently only works for NonLinearProgram # ds @@ -23,6 +22,8 @@ Base.@kwdef mutable struct InputCache # concrete value types. # `scalar_constraints` and `vector_constraints` includes `A` and `b` for CPs # or `G` and `h` for QPs + parameter_constraints::Dict{MOI.ConstraintIndex,Float64} = + Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram scalar_constraints::MOIDD.DoubleDict{MOI.ScalarAffineFunction{Float64}} = MOIDD.DoubleDict{MOI.ScalarAffineFunction{Float64}}() # also includes G for QPs vector_constraints::MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}} = @@ -33,8 +34,8 @@ end function Base.empty!(cache::InputCache) empty!(cache.dx) - empty!(cache.dp) empty!(cache.dy) + empty!(cache.parameter_constraints) empty!(cache.scalar_constraints) empty!(cache.vector_constraints) cache.objective = nothing @@ -136,6 +137,16 @@ ConstraintFunction so we consider the expression `θ * (x + 2y - 5)`. """ struct ForwardConstraintFunction <: MOI.AbstractConstraintAttribute end +""" + ForwardConstraintSet <: MOI.AbstractConstraintAttribute + +A `MOI.AbstractConstraintAttribute` to set input data to forward differentiation, that +is, problem input data. + +Currently, this only works for the set `MOI.Parameter`. +""" +struct ForwardConstraintSet <: MOI.AbstractConstraintAttribute end + """ ForwardVariablePrimal <: MOI.AbstractVariableAttribute @@ -167,10 +178,6 @@ MOI.set(model, DiffOpt.ReverseVariablePrimal(), x) """ struct ReverseVariablePrimal <: MOI.AbstractVariableAttribute end -struct ForwardConstraintSet <: MOI.AbstractConstraintAttribute end - -struct ReverseConstraintSet <: MOI.AbstractConstraintAttribute end - """ ReverseConstraintDual <: MOI.AbstractConstraintAttribute @@ -253,6 +260,18 @@ struct ReverseConstraintFunction <: MOI.AbstractConstraintAttribute end MOI.is_set_by_optimize(::ReverseConstraintFunction) = true +""" + ReverseConstraintSet + +An `MOI.AbstractConstraintAttribute` to get output data to reverse differentiation, that +is, problem input data. + +Currently, this only works for the set `MOI.Parameter`. +""" +struct ReverseConstraintSet <: MOI.AbstractConstraintAttribute end + +MOI.is_set_by_optimize(::ReverseConstraintSet) = true + """ DifferentiateTimeSec() @@ -273,6 +292,11 @@ Model supporting [`forward_differentiate!`](@ref) and """ abstract type AbstractModel <: MOI.ModelLike end +function empty_input_sensitivities!(model::AbstractModel) + empty!(model.input_cache) + return +end + MOI.supports_incremental_interface(::AbstractModel) = true function MOI.is_valid(model::AbstractModel, idx::MOI.Index) @@ -287,6 +311,8 @@ function MOI.add_variables(model::AbstractModel, n) return MOI.add_variables(model.model, n) end +# TODO: add support for add_constrained_variable(s) and supports_ + function MOI.Utilities.pass_nonvariable_constraints( dest::AbstractModel, src::MOI.ModelLike, @@ -409,6 +435,11 @@ function MOI.set( ci::MOI.ConstraintIndex{MOI.ScalarAffineFunction{T},S}, func::MOI.ScalarAffineFunction{T}, ) where {T,S} + if MOI.supports_add_constrained_variable(model.model, MOI.Parameter{T}) + error( + "The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.", + ) + end model.input_cache.scalar_constraints[ci] = func return end @@ -419,10 +450,30 @@ function MOI.set( ci::MOI.ConstraintIndex{MOI.VectorAffineFunction{T},S}, func::MOI.VectorAffineFunction{T}, ) where {T,S} + if MOI.supports_add_constrained_variable(model.model, MOI.Parameter{T}) + error( + "The model with type $(typeof(model)) does support Parameters, so setting ForwardConstraintFunction fails.", + ) + end model.input_cache.vector_constraints[ci] = func return end +function MOI.set( + model::AbstractModel, + ::ForwardConstraintSet, + ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}, + set::MOI.Parameter{T}, +) where {T} + if !MOI.supports_add_constrained_variable(model.model, MOI.Parameter{T}) + error( + "The model with type $(typeof(model)) does not support Parameters", + ) + end + model.input_cache.parameter_constraints[ci] = set.value + return +end + function lazy_combination(op::F, α, a, β, b) where {F<:Function} return LazyArrays.ApplyArray( op, @@ -655,6 +706,26 @@ function _push_term( return _push_term(I, J, V, neg, r[term.output_index], term.scalar_term) end +function MOI.supports(::AbstractModel, ::MOI.Name) + return false +end + +function MOI.supports( + ::AbstractModel, + ::MOI.VariableName, + ::Type{MOI.VariableIndex}, +) + return false +end + +function MOI.supports( + ::AbstractModel, + ::MOI.ConstraintName, + ::Type{MOI.ConstraintIndex{F,S}}, +) where {F,S} + return false +end + function MOI.supports(model::AbstractModel, attr::MOI.AbstractModelAttribute) return MOI.supports(model.model, attr) end diff --git a/src/jump_wrapper.jl b/src/jump_wrapper.jl new file mode 100644 index 000000000..ce26c4a5c --- /dev/null +++ b/src/jump_wrapper.jl @@ -0,0 +1,145 @@ +""" + diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true) + +Create a JuMP model with a differentiable optimizer. The optimizer is created +using `optimizer_constructor`. This model will try to select the proper +differentiable optimization method based on the problem structure. + +See also: [`nonlinear_diff_model`](@ref), [`conic_diff_model`](@ref), [`quadratic_diff_model`](@ref). +""" +function diff_model( + optimizer_constructor; + with_bridge_type = Float64, + with_cache_type = Float64, + with_outer_cache = true, +) + inner = diff_optimizer( + optimizer_constructor; + with_bridge_type, + with_cache_type, + with_outer_cache, + ) + return JuMP.direct_model(inner) +end + +""" + nonlinear_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true) + +Create a JuMP model with a differentiable optimizer for nonlinear programs. +The optimizer is created using `optimizer_constructor`. + +See also: [`conic_diff_model`](@ref), [`quadratic_diff_model`](@ref), [`diff_model`](@ref). +""" +function nonlinear_diff_model( + optimizer_constructor; + with_parametric_opt_interface = false, + with_bridge_type = Float64, + with_cache_type = Float64, + with_outer_cache = true, +) + inner = diff_optimizer( + optimizer_constructor; + with_bridge_type, + with_cache_type, + with_outer_cache, + ) + MOI.set(inner, ModelConstructor(), NonLinearProgram.Model) + return JuMP.direct_model(inner) +end + +""" + conic_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true) + +Create a JuMP model with a differentiable optimizer for conic programs. +The optimizer is created using `optimizer_constructor`. + +See also: [`nonlinear_diff_model`](@ref), [`quadratic_diff_model`](@ref), [`diff_model`](@ref). +""" +function conic_diff_model( + optimizer_constructor; + with_bridge_type = Float64, + with_cache_type = Float64, + with_outer_cache = true, +) + inner = diff_optimizer( + optimizer_constructor; + with_bridge_type, + with_cache_type, + with_outer_cache, + ) + MOI.set(inner, ModelConstructor(), ConicProgram.Model) + return JuMP.direct_model(inner) +end + +""" + quadratic_diff_model(optimizer_constructor; with_bridge_type = Float64, with_cache_type = Float64, with_outer_cache = true) + +Create a JuMP model with a differentiable optimizer for quadratic programs. +The optimizer is created using `optimizer_constructor`. + +See also: [`nonlinear_diff_model`](@ref), [`conic_diff_model`](@ref), [`diff_model`](@ref). +""" +function quadratic_diff_model( + optimizer_constructor; + with_bridge_type = Float64, + with_cache_type = Float64, + with_outer_cache = true, +) + inner = diff_optimizer( + optimizer_constructor; + with_bridge_type, + with_cache_type, + with_outer_cache, + ) + MOI.set(inner, ModelConstructor(), QuadraticProgram.Model) + return JuMP.direct_model(inner) +end + +""" + set_forward_parameter(model::JuMP.Model, variable::JuMP.VariableRef, value::Number) + +Set the value of a parameter input sensitivity for forward mode. +""" +function set_forward_parameter( + model::JuMP.Model, + variable::JuMP.VariableRef, + value::Number, +) + return MOI.set( + model, + ForwardConstraintSet(), + ParameterRef(variable), + Parameter(value), + ) +end + +""" + get_reverse_parameter(model::JuMP.Model, variable::JuMP.VariableRef) + +Get the value of a parameter output sensitivity for reverse mode. +""" +function get_reverse_parameter(model::JuMP.Model, variable::JuMP.VariableRef) + return MOI.get(model, ReverseConstraintSet(), ParameterRef(variable)).value +end + +""" + set_reverse_variable(model::JuMP.Model, variable::JuMP.VariableRef, value::Number) + +Set the value of a variable input sensitivity for reverse mode. +""" +function set_reverse_variable( + model::JuMP.Model, + variable::JuMP.VariableRef, + value::Number, +) + return MOI.set(model, ReverseVariablePrimal(), variable, value) +end + +""" + get_forward_variable(model::JuMP.Model, variable::JuMP.VariableRef) + +Get the value of a variable output sensitivity for forward mode. +""" +function get_forward_variable(model::JuMP.Model, variable::JuMP.VariableRef) + return MOI.get(model, ForwardVariablePrimal(), variable) +end diff --git a/src/moi_wrapper.jl b/src/moi_wrapper.jl index 5c7c7be86..d6cdca517 100644 --- a/src/moi_wrapper.jl +++ b/src/moi_wrapper.jl @@ -24,44 +24,60 @@ julia> model.add_constraint(model, ...) """ function diff_optimizer( optimizer_constructor; - with_parametric_opt_interface::Bool = false, with_bridge_type = Float64, - with_cache::Bool = true, + with_cache_type = Float64, + with_outer_cache = true, + allow_parametric_opt_interface = true, ) - optimizer = MOI.instantiate(optimizer_constructor; with_bridge_type) + optimizer = MOI.instantiate( + optimizer_constructor; + with_bridge_type, + with_cache_type, + ) + add_poi = + allow_parametric_opt_interface && + !MOI.supports_add_constrained_variable( + optimizer.model, + MOI.Parameter{Float64}, + ) # When we do `MOI.copy_to(diff, optimizer)` we need to efficiently `MOI.get` # the model information from `optimizer`. However, 1) `optimizer` may not # implement some getters or it may be inefficient and 2) the getters may be # unimplemented or inefficient through some bridges. # For this reason we add a cache layer, the same cache JuMP adds. - caching_opt = if with_cache + caching_opt = if with_outer_cache MOI.Utilities.CachingOptimizer( MOI.Utilities.UniversalFallback( MOI.Utilities.Model{with_bridge_type}(), ), - optimizer, + add_poi ? POI.Optimizer(optimizer) : optimizer, ) else - optimizer - end - if with_parametric_opt_interface - return POI.Optimizer(Optimizer(caching_opt)) - else - return Optimizer(caching_opt) + add_poi ? POI.Optimizer(optimizer) : optimizer end + return Optimizer(caching_opt) end mutable struct Optimizer{OT<:MOI.ModelLike} <: MOI.AbstractOptimizer + # main optimizer responsible for caching the optimization problem data + # and for solving the optimization problem optimizer::OT + # list of differentiation backends for automatic differentiation + # without mode selection model_constructors::Vector{Any} + # used to select a single differentiation backend + # if not `nothing`, it is used to select the model constructor + # and the above is ignored model_constructor::Any + # instantiated differentiation backend from the options above diff::Any + # mapping between the `optimizer` and the `diff` models index_map::Union{Nothing,MOI.Utilities.IndexMap} - # sensitivity input cache using MOI like sparse format + # sensitivity input cache using MOI-like sparse format input_cache::InputCache function Optimizer(optimizer::OT) where {OT<:MOI.ModelLike} @@ -560,6 +576,25 @@ function forward_differentiate!(model::Optimizer) NonLinearKKTJacobianFactorization(), model.input_cache.factorization, ) + T = Float64 + list = MOI.get( + model, + MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(), + ) + parametric_diff = !isempty(list) + if parametric_diff # MOI.supports_constraint(model, MOI.VariableIndex, MOI.Parameter{T}) + # @show "param mode" + for (vi, value) in model.input_cache.parameter_constraints + MOI.set( + diff, + ForwardConstraintSet(), + model.index_map[vi], + MOI.Parameter(value), + ) + end + return forward_differentiate!(diff) + end + # @show "func mode" if model.input_cache.objective !== nothing MOI.set( diff, @@ -570,7 +605,9 @@ function forward_differentiate!(model::Optimizer) ), ) end - for (F, S) in keys(model.input_cache.scalar_constraints.dict) + for (F, S) in MOI.Utilities.DoubleDicts.nonempty_outer_keys( + model.input_cache.scalar_constraints, + ) _copy_forward_in_constraint( diff, model.index_map, @@ -578,7 +615,9 @@ function forward_differentiate!(model::Optimizer) model.input_cache.scalar_constraints[F, S], ) end - for (F, S) in keys(model.input_cache.vector_constraints.dict) + for (F, S) in MOI.Utilities.DoubleDicts.nonempty_outer_keys( + model.input_cache.vector_constraints, + ) _copy_forward_in_constraint( diff, model.index_map, @@ -586,24 +625,19 @@ function forward_differentiate!(model::Optimizer) model.input_cache.vector_constraints[F, S], ) end - if model.input_cache.dp !== nothing - for (vi, value) in model.input_cache.dp - diff.model.input_cache.dp[model.index_map[vi]] = value - end - end return forward_differentiate!(diff) end function empty_input_sensitivities!(model::Optimizer) empty!(model.input_cache) if model.diff !== nothing - empty!(model.diff.model.input_cache) + empty_input_sensitivities!(model.diff) end return end -function _instantiate_with_bridges(model_constructor) - model = MOI.Bridges.LazyBridgeOptimizer(MOI.instantiate(model_constructor)) +function _add_bridges(instantiated_model) + model = MOI.Bridges.LazyBridgeOptimizer(instantiated_model) # We don't add any variable bridge here because: # 1) If `ZerosBridge` is used, `MOI.Bridges.unbridged_function` does not work. # This is in fact expected: since `ZerosBridge` drops the variable, we dont @@ -616,14 +650,37 @@ function _instantiate_with_bridges(model_constructor) return model end +function _instantiate_diff(model::Optimizer, constructor) + # parametric_diff = MOI.supports_constraint( + # model, + # MOI.VariableIndex, + # MOI.Parameter{Float64}, + # ) + list = MOI.get( + model, + MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{Float64}}(), + ) + parametric_diff = !isempty(list) + model_instance = MOI.instantiate(constructor) + needs_poi = + !MOI.supports_add_constrained_variable( + model_instance, + MOI.Parameter{Float64}, + ) + model_bridged = _add_bridges(model_instance) + if needs_poi && parametric_diff + return POI.Optimizer(model_bridged) + end + return model_bridged +end + function _diff(model::Optimizer) if model.diff === nothing _check_termination_status(model) model_constructor = MOI.get(model, ModelConstructor()) if isnothing(model_constructor) - model.diff = nothing for constructor in model.model_constructors - model.diff = _instantiate_with_bridges(constructor) + model.diff = _instantiate_diff(model, constructor) try model.index_map = MOI.copy_to(model.diff, model.optimizer) catch err @@ -648,7 +705,7 @@ function _diff(model::Optimizer) ) end else - model.diff = _instantiate_with_bridges(model_constructor) + model.diff = _instantiate_diff(model, model_constructor) model.index_map = MOI.copy_to(model.diff, model.optimizer) end _copy_dual(model.diff, model.optimizer, model.index_map) @@ -688,6 +745,18 @@ end MOI.supports(::Optimizer, ::ForwardObjectiveFunction) = true function MOI.set(model::Optimizer, ::ForwardObjectiveFunction, objective) + T = Float64 + list = MOI.get( + model, + MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(), + ) + parametric_diff = !isempty(list) + if parametric_diff + error( + "Cannot set forward objective function for a model with parameters. " * + "Use `MOI.set(model, ForwardConstraintSet(), ParameterRef(vi), Parameter(val))` instead.", + ) + end model.input_cache.objective = objective return end @@ -746,6 +815,14 @@ function MOI.set( vi::MOI.VariableIndex, val, ) + T = Float64 + is_param = MOI.is_valid( + model, + MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}(vi.value), + ) + if is_param + error("Trying to set a backward variable sensitivity for a parameter") + end model.input_cache.dx[vi] = val return end @@ -814,6 +891,17 @@ function MOI.set( ci::MOI.ConstraintIndex{MOI.ScalarAffineFunction{T},S}, func::MOI.ScalarAffineFunction{T}, ) where {T,S} + list = MOI.get( + model, + MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(), + ) + parametric_diff = !isempty(list) + if parametric_diff + error( + "Cannot set forward constraint function for a model with parameters. " * + "Use `MOI.set(model, ForwardConstraintSet(), ParameterRef(vi), Parameter(val))` instead.", + ) + end model.input_cache.scalar_constraints[ci] = func return end @@ -824,6 +912,17 @@ function MOI.set( ci::MOI.ConstraintIndex{MOI.VectorAffineFunction{T},S}, func::MOI.VectorAffineFunction{T}, ) where {T,S} + list = MOI.get( + model, + MOI.ListOfConstraintIndices{MOI.VariableIndex,MOI.Parameter{T}}(), + ) + parametric_diff = !isempty(list) + if parametric_diff + error( + "Cannot set forward constraint function for a model with parameters. " * + "Use `MOI.set(model, ForwardConstraintSet(), ParameterRef(vi), Parameter(val))` instead.", + ) + end model.input_cache.vector_constraints[ci] = func return end @@ -858,12 +957,39 @@ function MOI.get( end end +function MOI.supports( + ::Optimizer, + ::ForwardConstraintSet, + ::Type{MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}}, +) where {T} + return true +end + +function MOI.get( + model::Optimizer, + ::ForwardConstraintSet, + ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}, +) where {T} + set = get(model.input_cache.parameter_constraints, ci, zero(T)) + return MOI.Parameter{T}(set) +end + +function MOI.set( + model::Optimizer, + ::ForwardConstraintSet, + ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}, + set::MOI.Parameter, +) where {T} + model.input_cache.parameter_constraints[ci] = set.value + return +end + function MOI.get(model::Optimizer, attr::DifferentiateTimeSec) return MOI.get(model.diff, attr) end function MOI.supports( - model::Optimizer, + ::Optimizer, ::NonLinearKKTJacobianFactorization, ::Function, ) diff --git a/src/parameters.jl b/src/parameters.jl index 68ff54a49..b0a026904 100644 --- a/src/parameters.jl +++ b/src/parameters.jl @@ -10,9 +10,7 @@ MOI.supports(::POI.Optimizer, ::ForwardObjectiveFunction) = false function MOI.set(::POI.Optimizer, ::ForwardObjectiveFunction, _) return error( "Forward objective function is not supported when " * - "`with_parametric_opt_interface` is set to `true` in " * - "`diff_optimizer`." * - "Use parameters to set the forward sensitivity.", + "`JuMP.Parameter`s (or `MOI.Parameter`s) are present in the model.", ) end @@ -26,9 +24,7 @@ function MOI.set( ) return error( "Forward constraint function is not supported when " * - "`with_parametric_opt_interface` is set to `true` in " * - "`diff_optimizer`." * - "Use parameters to set the forward sensitivity.", + "`JuMP.Parameter`s (or `MOI.Parameter`s) are present in the model.", ) end @@ -37,9 +33,7 @@ MOI.supports(::POI.Optimizer, ::ReverseObjectiveFunction) = false function MOI.get(::POI.Optimizer, ::ReverseObjectiveFunction) return error( "Reverse objective function is not supported when " * - "`with_parametric_opt_interface` is set to `true` in " * - "`diff_optimizer`." * - "Use parameters to get the reverse sensitivity.", + "`JuMP.Parameter`s (or `MOI.Parameter`s) are present in the model.", ) end @@ -52,9 +46,7 @@ function MOI.get( ) return error( "Reverse constraint function is not supported when " * - "`with_parametric_opt_interface` is set to `true` in " * - "`diff_optimizer`." * - "Use parameters to get the reverse sensitivity.", + "`JuMP.Parameter`s (or `MOI.Parameter`s) are present in the model.", ) end @@ -315,16 +307,6 @@ function MOI.set( return end -function MOI.set( - model::Optimizer, - ::ForwardConstraintSet, - ci::MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{T}}, - set::MOI.Parameter, -) where {T} - model.input_cache.dp[ci] = set.value - return -end - function MOI.get( model::POI.Optimizer, attr::ForwardVariablePrimal, @@ -567,8 +549,6 @@ function MOI.set( return end -MOI.is_set_by_optimize(::ReverseConstraintSet) = true - function MOI.get( model::POI.Optimizer, ::ReverseConstraintSet, diff --git a/test/Project.toml b/test/Project.toml index e7c5a5b8d..b7775fc19 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,6 +2,7 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" DiffOpt = "930fe3bc-9c6b-11ea-2d94-6184641e85e7" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" @@ -10,13 +11,14 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SCS = "c946c3f1-0d1f-5ce8-9dea-7daa1f7e2d13" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" [compat] HiGHS = "1" diff --git a/test/conic_program.jl b/test/conic_program.jl index a3d4a9398..2b4a20aa5 100644 --- a/test/conic_program.jl +++ b/test/conic_program.jl @@ -11,6 +11,7 @@ import Ipopt import LinearAlgebra import MathOptInterface as MOI import SCS +using JuMP const ATOL = 2e-4 const RTOL = 2e-4 @@ -28,7 +29,7 @@ end function _test_simple_socp(eq_vec::Bool) # referred from _soc2test, https://github.com/jump-dev/MathOptInterface.jl/blob/master/src/Test/contconic.jl#L1355 - # find equivalent diffcp python program here: https://github.com/AKS1996/jump-gsoc-2020/blob/master/diffcp_socp_1_py.ipynb + # find reference diffcp python program here: https://github.com/AKS1996/jump-gsoc-2020/blob/master/diffcp_socp_1_py.ipynb # Problem SOC2 # min x # s.t. y ≥ 1/√2 @@ -38,80 +39,103 @@ function _test_simple_socp(eq_vec::Bool) # s.t. -1/√2 + y ∈ R₊ # 1 - t ∈ {0} # (t,x,y) ∈ SOC₃ - model = DiffOpt.diff_optimizer(SCS.Optimizer) - MOI.set(model, MOI.Silent(), true) - x, y, t = MOI.add_variables(model, 3) - MOI.set( - model, - MOI.ObjectiveFunction{MOI.ScalarAffineFunction{Float64}}(), - 1.0x, - ) - MOI.set(model, MOI.ObjectiveSense(), MOI.MIN_SENSE) + + model = JuMP.Model(() -> DiffOpt.diff_optimizer(SCS.Optimizer)) + set_silent(model) + + x = @variable(model) + y = @variable(model) + t = @variable(model) + + ceq = if eq_vec + @constraint(model, [t] .== [1.0]) + else + @constraint(model, t == 1.0) + end + cnon = @constraint(model, 1.0y >= 1 / √2) + csoc = @constraint(model, [1.0t, 1.0x, 1.0y] in MOI.SecondOrderCone(3)) + + @objective(model, Min, 1.0x) + + optimize!(model) + + # set foward sensitivities if eq_vec - ceq = MOI.add_constraint( - model, - MOI.Utilities.vectorize([-1.0t + 1.0]), - MOI.Zeros(1), - ) + MOI.set.(model, DiffOpt.ForwardConstraintFunction(), ceq, [1.0 * x]) else - ceq = MOI.add_constraint(model, -1.0t, MOI.EqualTo(-1.0)) + MOI.set(model, DiffOpt.ForwardConstraintFunction(), ceq, 1.0 * x) end - cnon = MOI.add_constraint( - model, - MOI.Utilities.vectorize([1.0y - 1 / √2]), - MOI.Nonnegatives(1), - ) - csoc = MOI.add_constraint( - model, - MOI.Utilities.vectorize([1.0t, 1.0x, 1.0y]), - MOI.SecondOrderCone(3), - ) - MOI.optimize!(model) + + DiffOpt.forward_differentiate!(model) + + dx = -0.9999908 + @test MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) ≈ dx atol = ATOL rtol = + RTOL + + MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, 1.0) + + DiffOpt.reverse_differentiate!(model) + if eq_vec - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - ceq, - MOI.Utilities.vectorize([1.0 * x]), + @test all( + isapprox.( + JuMP.coefficient.( + MOI.get.(model, DiffOpt.ReverseConstraintFunction(), ceq), + x, + ), + dx, + atol = ATOL, + rtol = RTOL, + ), ) else - MOI.set(model, DiffOpt.ForwardConstraintFunction(), ceq, 1.0 * x) + @test JuMP.coefficient( + MOI.get(model, DiffOpt.ReverseConstraintFunction(), ceq), + x, + ) ≈ dx atol = ATOL rtol = RTOL end - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - cnon, - MOI.Utilities.vectorize([1.0 * y]), - ) + + DiffOpt.empty_input_sensitivities!(model) + + MOI.set(model, DiffOpt.ForwardConstraintFunction(), cnon, 1.0 * y) + + DiffOpt.forward_differentiate!(model) + + dy = -0.707083 + @test MOI.get(model, DiffOpt.ForwardVariablePrimal(), y) ≈ dy atol = ATOL rtol = + RTOL + + MOI.set(model, DiffOpt.ReverseVariablePrimal(), y, 1.0) + + DiffOpt.reverse_differentiate!(model) + + @test JuMP.coefficient( + MOI.get(model, DiffOpt.ReverseConstraintFunction(), cnon), + y, + ) ≈ dy atol = ATOL rtol = RTOL + + DiffOpt.empty_input_sensitivities!(model) + MOI.set( model, DiffOpt.ForwardConstraintFunction(), csoc, - MOI.Utilities.operate(vcat, Float64, 1.0 * t, 0.0, 0.0), + MOI.Utilities.operate(vcat, Float64, 1.0 * t.index, 0.0, 0.0), ) + DiffOpt.forward_differentiate!(model) - # these matrices are benchmarked with the output generated by diffcp - # refer the python file mentioned above to get equivalent python source code - @test model.diff.model.x ≈ [-1 / √2; 1 / √2; 1.0] atol = ATOL rtol = RTOL - if eq_vec - @test model.diff.model.s ≈ [0.0, 0.0, 1.0, -1 / √2, 1 / √2] atol = ATOL rtol = - RTOL - @test model.diff.model.y ≈ [√2, 1.0, √2, 1.0, -1.0] atol = ATOL rtol = - RTOL - else - @test model.diff.model.s ≈ [0.0, 1.0, -1 / √2, 1 / √2, 0.0] atol = ATOL rtol = - RTOL - @test model.diff.model.y ≈ [1.0, √2, 1.0, -1.0, √2] atol = ATOL rtol = - RTOL - end - dx = [1.12132144; 1 / √2; 1 / √2] - for (i, vi) in enumerate([x, y, t]) - @test dx[i] ≈ MOI.get(model, DiffOpt.ForwardVariablePrimal(), vi) atol = - ATOL rtol = RTOL - end - # @test dx ≈ [1.12132144; 1/√2; 1/√2] atol=ATOL rtol=RTOL - # @test ds ≈ [0.0; 0.0; -2.92893438e-01; 1.12132144e+00; 7.07106999e-01] atol=ATOL rtol=RTOL - # @test dy ≈ [2.4142175; 5.00000557; 3.8284315; √2; -4.00000495] atol=ATOL rtol=RTOL + + ds = 0.0 + @test MOI.get(model, DiffOpt.ForwardVariablePrimal(), t) ≈ ds atol = ATOL rtol = + RTOL + + MOI.set(model, DiffOpt.ReverseVariablePrimal(), t, 1.0) + + DiffOpt.reverse_differentiate!(model) + + # FIXME: this is not working - https://github.com/jump-dev/DiffOpt.jl/issues/283 + # @test JuMP.coefficient(MOI.get(model, DiffOpt.ReverseConstraintFunction(), csoc).func.func.func, t.index) ≈ ds atol=ATOL rtol=RTOL + return end @@ -375,206 +399,82 @@ function test_differentiating_conic_with_PSD_and_SOC_constraints() return end -function test_differentiating_conic_with_PSD_and_POS_constraints() +function _build_simple_sdp() # refer psdt2test, https://github.com/jump-dev/MathOptInterface.jl/blob/master/src/Test/contconic.jl#L4306 # find equivalent diffcp program here - https://github.com/AKS1996/jump-gsoc-2020/blob/master/diffcp_sdp_3_py.ipynb - model = DiffOpt.diff_optimizer(SCS.Optimizer) - MOI.set(model, MOI.Silent(), true) - x = MOI.add_variables(model, 7) - @test MOI.get(model, MOI.NumberOfVariables()) == 7 - η = 10.0 - c1 = MOI.add_constraint( - model, - MOI.VectorAffineFunction( - MOI.VectorAffineTerm.(1, MOI.ScalarAffineTerm.(-1.0, x[1:6])), - [η], - ), - MOI.Nonnegatives(1), - ) - c2 = MOI.add_constraint( - model, - MOI.VectorAffineFunction( - MOI.VectorAffineTerm.(1:6, MOI.ScalarAffineTerm.(1.0, x[1:6])), - zeros(6), - ), - MOI.Nonnegatives(6), - ) - α = 0.8 - δ = 0.9 - c3 = MOI.add_constraint( - model, - MOI.VectorAffineFunction( - MOI.VectorAffineTerm.( - [fill(1, 7); fill(2, 5); fill(3, 6)], - MOI.ScalarAffineTerm.( - [ - δ / 2, - α, - δ, - δ / 4, - δ / 8, - 0.0, - -1.0, - -δ / (2 * √2), - -δ / 4, - 0, - -δ / (8 * √2), - 0.0, - δ / 2, - δ - α, - 0, - δ / 8, - δ / 4, - -1.0, - ], - [x[1:7]; x[1:3]; x[5:6]; x[1:3]; x[5:7]], - ), - ), - zeros(3), - ), - MOI.PositiveSemidefiniteConeTriangle(2), - ) - c4 = MOI.add_constraint( - model, - MOI.VectorAffineFunction( - MOI.VectorAffineTerm.( - 1, - MOI.ScalarAffineTerm.(0.0, [x[1:3]; x[5:6]]), - ), - [0.0], - ), - MOI.Zeros(1), - ) - MOI.set( - model, - MOI.ObjectiveFunction{MOI.ScalarAffineFunction{Float64}}(), - MOI.ScalarAffineFunction([MOI.ScalarAffineTerm(1.0, x[7])], 0.0), - ) - MOI.set(model, MOI.ObjectiveSense(), MOI.MAX_SENSE) - MOI.optimize!(model) - # dc = ones(7) - MOI.set( - model, - DiffOpt.ForwardObjectiveFunction(), - MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(ones(7), x), 0.0), - ) - # db = ones(11) - # dA = ones(11, 7) - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c1, - MOI.Utilities.vectorize(ones(1, 7) * x + ones(1)), - ) - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c2, - MOI.Utilities.vectorize(ones(6, 7) * x + ones(6)), - ) - MOI.set( + # Make a JuMP model backed by DiffOpt.diff_optimizer(SCS.Optimizer) + model = Model(() -> DiffOpt.diff_optimizer(SCS.Optimizer)) + set_silent(model) # just to suppress solver output + + @variable(model, x[1:3]) + + @constraint(model, c1, sum(x[i] for i in 1:3) == 4) + + @constraint(model, c2[i = 1:3], x[i] ≥ 0) + + @constraint(model, x[1] == 2) + + @constraint( model, - DiffOpt.ForwardConstraintFunction(), c3, - MOI.Utilities.vectorize(ones(3, 7) * x + ones(3)), - ) - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c4, - MOI.Utilities.vectorize(ones(1, 7) * x + ones(1)), + LinearAlgebra.Symmetric([ + x[3]+1 2 + 2 2x[3]+2 + ]) in PSDCone() ) - DiffOpt.forward_differentiate!(model) - @test model.diff.model.x ≈ - [20 / 3.0, 0.0, 10 / 3.0, 0.0, 0.0, 0.0, 1.90192379] atol = ATOL rtol = - RTOL - @test model.diff.model.s ≈ [ - 0.0, - 0.0, - 20 / 3.0, - 0.0, - 10 / 3.0, - 0.0, - 0.0, - 0.0, - 4.09807621, - -2.12132, - 1.09807621, - ] atol = ATOL rtol = RTOL - @test model.diff.model.y ≈ [ - 0.0, - 0.19019238, - 0.0, - 0.12597667, - 0.0, - 0.14264428, - 0.14264428, - 0.01274047, - 0.21132487, - 0.408248, - 0.78867513, - ] atol = ATOL rtol = RTOL - atol = 0.3 - rtol = 0.01 - # compare these with https://github.com/AKS1996/jump-gsoc-2020/blob/master/diffcp_sdp_3_py.ipynb - # results are not exactly as: 1. there is some residual error 2. diffcp results are SCS specific, hence scaled - dx = [-39.6066, 10.8953, -14.9189, 10.9054, 10.883, 10.9118, -21.7508] - for (i, vi) in enumerate(x) - @test dx[i] ≈ MOI.get(model, DiffOpt.ForwardVariablePrimal(), vi) atol = - atol rtol = rtol + + @objective(model, Min, 4x[3] + x[2]) + return model +end + +function test_differentiating_conic_with_PSD_constraints() + model = _build_simple_sdp() + optimize!(model) + x = model[:x] + c1 = model[:c1] + c2 = model[:c2] + sx = value.(x) + @test sx ≈ [2.0, 3.0 - sqrt(2), sqrt(2) - 1] atol = ATOL rtol = RTOL + + for i in 1:3 + _model = _build_simple_sdp() + JuMP.set_normalized_coefficient(_model[:c1], _model[:x][i], 1.001) + optimize!(_model) + _dx = (value(_model[:x][i]) - value(sx[i])) / 0.001 + i in (1, 3) ? (@test abs(_dx) < 0.05) : (@test -1.6 < _dx < -1.45) + MOI.set(model, DiffOpt.ForwardConstraintFunction(), c1, x[i] + 0.0) + DiffOpt.forward_differentiate!(model) + _dx = MOI.get(model, DiffOpt.ForwardVariablePrimal(), x[i]) + i in (1, 3) ? (@test abs(_dx) < 0.05) : (@test -1.6 < _dx < -1.45) + MOI.set(model, DiffOpt.ReverseVariablePrimal(), x[i], 1.0) + DiffOpt.reverse_differentiate!(model) + _dx = JuMP.coefficient( + MOI.get(model, DiffOpt.ReverseConstraintFunction(), c1), + x[i], + ) + i in (1, 3) ? (@test abs(_dx) < 0.05) : (@test -1.6 < _dx < -1.45) + DiffOpt.empty_input_sensitivities!(model) end - # @test dy ≈ [0.0, -3.56905, 0.0, -0.380035, 0.0, -0.41398, -0.385321, -0.00743119, -0.644986, -0.550542, -2.36765] atol=atol rtol=rtol - # @test ds ≈ [0.0, 0.0, -50.4973, 0.0, -25.8066, 0.0, 0.0, 0.0, -7.96528, -1.62968, -2.18925] atol=atol rtol=rtol - # TODO: future example, how to differentiate wrt a specific constraint/variable, refer QPLib article for more - dA = zeros(11, 7) - dA[3:8, 1:6] = Matrix{Float64}(LinearAlgebra.I, 6, 6) # differentiating only wrt POS constraint c2 - db = zeros(11) - dc = zeros(7) - # db = zeros(11) - # dA = zeros(11, 7) - # dA[3:8, 1:6] = Matrix{Float64}(LinearAlgebra.I, 6, 6) # differentiating only wrt POS constraint c2 - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c1, - MOI.Utilities.zero_with_output_dimension( - MOI.VectorAffineFunction{Float64}, - 1, - ), - ) - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c2, - MOI.Utilities.vectorize(ones(6) .* x[1:6]), - ) - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c3, - MOI.Utilities.zero_with_output_dimension( - MOI.VectorAffineFunction{Float64}, - 3, - ), - ) - MOI.set( - model, - DiffOpt.ForwardConstraintFunction(), - c4, - MOI.Utilities.zero_with_output_dimension( - MOI.VectorAffineFunction{Float64}, - 1, - ), - ) - DiffOpt.forward_differentiate!(model) - # for (i, vi) in enumerate(X) - # @test 0.0 ≈ MOI.get(model, - # DiffOpt.ForwardVariablePrimal(), vi) atol=1e-2 rtol=RTOL - # end - # TODO add a test here, probably on duals - # # note that there's no change in the PSD slack values or dual optimas - # @test dy ≈ [0.0, 0.0, 0.0, 0.125978, 0.0, 0.142644, 0.142641, 0.0127401, 0.0, 0.0, 0.0] atol=atol rtol=RTOL - # @test ds ≈ [0.0, 0.0, -6.66672, 0.0, -3.33336, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] atol=atol rtol=RTOL + for i in 1:3 + DiffOpt.empty_input_sensitivities!(model) + _model = _build_simple_sdp() + JuMP.set_normalized_coefficient(_model[:c2][i], _model[:x][i], 1.001) + optimize!(_model) + _dx = (value(_model[:x][i]) - value(sx[i])) / 0.001 + @test abs(_dx) < 0.15 + MOI.set(model, DiffOpt.ForwardConstraintFunction(), c2[i], x[i] + 0.0) + DiffOpt.forward_differentiate!(model) + _dx = MOI.get(model, DiffOpt.ForwardVariablePrimal(), x[i]) + @test abs(_dx) < 0.15 + MOI.set(model, DiffOpt.ReverseVariablePrimal(), x[i], 1.0) + DiffOpt.reverse_differentiate!(model) + _dx = JuMP.coefficient( + MOI.get(model, DiffOpt.ReverseConstraintFunction(), c2[i]), + x[i], + ) + @test abs(_dx) < 0.15 + end + return end @@ -616,7 +516,6 @@ function test_differentiating_a_simple_psd() # dc = zeros(1) DiffOpt.forward_differentiate!(model) @test model.diff.model.x ≈ [1.0] atol = 10ATOL rtol = 10RTOL - @test model.diff.model.s ≈ ones(6) atol = ATOL rtol = RTOL @test model.diff.model.y ≈ [1 / 3, -1 / 6, 1 / 3, -1 / 6, -1 / 6, 1 / 3] atol = ATOL rtol = RTOL @test -0.5 ≈ MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) atol = 1e-2 rtol = @@ -763,7 +662,6 @@ function test_verifying_cache_on_a_psd() dc = zeros(1) DiffOpt.forward_differentiate!(model) @test model.diff.model.x ≈ [1.0] atol = 10ATOL rtol = 10RTOL - @test model.diff.model.s ≈ ones(6) atol = ATOL rtol = RTOL @test model.diff.model.y ≈ [1 / 3, -1 / 6, 1 / 3, -1 / 6, -1 / 6, 1 / 3] atol = ATOL rtol = RTOL @test -0.5 ≈ MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) atol = 1e-2 rtol = @@ -924,6 +822,25 @@ function test_quad_to_soc() return end +function test_jump_psd_cone_with_parameter_pv_v_pv() + model = DiffOpt.conic_diff_model(SCS.Optimizer) + @variable(model, x) + @variable(model, p in MOI.Parameter(1.0)) + @constraint( + model, + con, + [p * x, (2 * x - 3), p * 3 * x] in + MOI.PositiveSemidefiniteConeTriangle(2) + ) + @objective(model, Min, x) + optimize!(model) + direction_p = 2.0 + DiffOpt.set_forward_parameter(model, p, direction_p) + DiffOpt.forward_differentiate!(model) + dx = MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) + @test dx ≈ 0.0 atol = 1e-4 rtol = 1e-4 +end + end # module TestConicProgram.runtests() diff --git a/test/data/nlp_problems.jl b/test/data/nlp_problems.jl index 923b48c4c..2769ae82c 100644 --- a/test/data/nlp_problems.jl +++ b/test/data/nlp_problems.jl @@ -9,6 +9,7 @@ https://github.com/jump-dev/JuMP.jl/blob/301d46e81cb66c74c6e22cd89fb89ced740f157 ################################################ function create_nonlinear_jump_model(; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) @variable(model, p ∈ MOI.Parameter(1.0)) @variable(model, p2 ∈ MOI.Parameter(2.0)) @@ -33,6 +34,7 @@ From sIpopt paper: https://optimization-online.org/2011/04/3008/ function create_nonlinear_jump_model_sipopt(; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) @variable(model, p1 ∈ MOI.Parameter(4.5)) @variable(model, p2 ∈ MOI.Parameter(1.0)) @@ -55,6 +57,7 @@ Simple Problems function create_jump_model_1(p_val = [1.5]) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -73,6 +76,7 @@ end function create_jump_model_2(p_val = [1.5]) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -90,6 +94,7 @@ end function create_jump_model_3(p_val = [-1.5]) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -108,6 +113,7 @@ end function create_jump_model_4(p_val = [1.5]) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -126,6 +132,7 @@ end function create_jump_model_5(p_val = [1.5]) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -147,6 +154,7 @@ h(y) = -sum(y .* log.(y)) softmax(x) = exp.(x) / sum(exp.(x)) function create_jump_model_6(p_a = collect(1.0:0.1:2.0)) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -167,6 +175,7 @@ end function create_jump_model_7(p_val = [1.5], g = sin) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -190,6 +199,7 @@ Non Linear Problems function create_nonlinear_jump_model_1(p_val = [1.0; 2.0; 100]; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -214,6 +224,7 @@ end function create_nonlinear_jump_model_2(p_val = [3.0; 2.0; 10]; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -238,6 +249,7 @@ end function create_nonlinear_jump_model_3(p_val = [3.0; 2.0; 10]; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -261,6 +273,7 @@ end function create_nonlinear_jump_model_4(p_val = [1.0; 2.0; 100]; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -286,6 +299,7 @@ end function create_nonlinear_jump_model_5(p_val = [1.0; 2.0; 100]; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters @@ -312,6 +326,7 @@ end function create_nonlinear_jump_model_6(p_val = [100.0; 200.0]; ismin = true) model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) set_silent(model) # Parameters diff --git a/test/jump.jl b/test/jump.jl index bdb031b73..248ceb6ef 100644 --- a/test/jump.jl +++ b/test/jump.jl @@ -680,6 +680,62 @@ function test_sensitivity_index_issue() return end +function test_conic_supports() + model = DiffOpt.conic_diff_model(SCS.Optimizer) + @test MOI.supports(backend(model), DiffOpt.ForwardObjectiveFunction()) + @test MOI.supports( + backend(model), + DiffOpt.ForwardConstraintSet(), + MOI.ConstraintIndex{MOI.VariableIndex,MOI.Parameter{Float64}}, + ) + function some end + @test MOI.supports( + backend(model), + DiffOpt.NonLinearKKTJacobianFactorization(), + some, + ) + MOI.is_set_by_optimize(DiffOpt.ReverseConstraintFunction()) + MOI.is_set_by_optimize(DiffOpt.ReverseConstraintSet()) + model = DiffOpt.ConicProgram.Model() + @test !MOI.supports(model, MOI.Name()) + @test MOI.supports_incremental_interface(model) + return +end + +function test_conic_feasibility() + model = DiffOpt.conic_diff_model(SCS.Optimizer) + set_silent(model) + + @variable(model, x) + @variable(model, p in Parameter(1.0)) + + @constraint(model, con, [0.0, x, p * x] in SecondOrderCone()) + + set_objective_sense(model, MOI.FEASIBILITY_SENSE) + + optimize!(model) + + DiffOpt.set_forward_parameter(model, p, 1.0) + DiffOpt.forward_differentiate!(model) + return +end + +function test_psd_square_error() + model = DiffOpt.conic_diff_model(SCS.Optimizer) + set_silent(model) + + @variable(model, x) + @variable(model, p in Parameter(1.0)) + + @constraint(model, con, [-p*x 0; 0 x] in PSDCone()) + + @test_throws MOI.SetAttributeNotAllowed optimize!(model) + + # DiffOpt.set_forward_parameter(model, p, 1.0) + # DiffOpt.forward_differentiate!(model) + return +end + end # module TestJuMP.runtests() diff --git a/test/jump_wrapper.jl b/test/jump_wrapper.jl new file mode 100644 index 000000000..68d7be386 --- /dev/null +++ b/test/jump_wrapper.jl @@ -0,0 +1,128 @@ +# Copyright (c) 2020: Akshay Sharma and contributors +# +# Use of this source code is governed by an MIT-style license that can be found +# in the LICENSE.md file or at https://opensource.org/licenses/MIT. + +module TestJuMPWrapper + +using Test +using JuMP +import DiffOpt +import HiGHS +import Ipopt +import SCS +import ParametricOptInterface as POI +import MathOptInterface as MOI +import ParametricOptInterface as POI + +const ATOL = 1e-3 +const RTOL = 1e-3 + +function runtests() + for name in names(@__MODULE__; all = true) + if startswith("$name", "test_") + @testset "$(name)" begin + getfield(@__MODULE__, name)() + end + end + end + return +end + +function test_jump_api() + for (MODEL, SOLVER) in [ + (DiffOpt.diff_model, HiGHS.Optimizer), + (DiffOpt.diff_model, SCS.Optimizer), + (DiffOpt.diff_model, Ipopt.Optimizer), + (DiffOpt.quadratic_diff_model, HiGHS.Optimizer), + (DiffOpt.quadratic_diff_model, SCS.Optimizer), + (DiffOpt.quadratic_diff_model, Ipopt.Optimizer), + (DiffOpt.conic_diff_model, HiGHS.Optimizer), + (DiffOpt.conic_diff_model, SCS.Optimizer), + (DiffOpt.conic_diff_model, Ipopt.Optimizer), + (DiffOpt.nonlinear_diff_model, HiGHS.Optimizer), + (DiffOpt.nonlinear_diff_model, SCS.Optimizer), + (DiffOpt.nonlinear_diff_model, Ipopt.Optimizer), + ], + ineq in [true, false], + _min in [true, false], + flip in [true, false] + + @testset "$(MODEL) with: $(SOLVER), $(ineq ? "ineqs" : "eqs"), $(_min ? "Min" : "Max"), $(flip ? "geq" : "leq")" begin + model = MODEL(SOLVER) + set_silent(model) + + p_val = 4.0 + pc_val = 2.0 + @variable(model, x) + @variable(model, p in Parameter(p_val)) + @variable(model, pc in Parameter(pc_val)) + if ineq + if !flip + cons = @constraint(model, con, pc * x >= 3 * p) + else + cons = @constraint(model, con, pc * x <= 3 * p) + end + else + cons = @constraint(model, con, pc * x == 3 * p) + end + sign = flip ? -1 : 1 + if _min + @objective(model, Min, 2x * sign) + else + @objective(model, Max, -2x * sign) + end + optimize!(model) + @test value(x) ≈ 3 * p_val / pc_val atol = ATOL rtol = RTOL + + # the function is + # x(p, pc) = 3p / pc + # hence, + # dx/dp = 3 / pc + # dx/dpc = -3p / pc^2 + + # First, try forward mode AD + + # differentiate w.r.t. p + direction_p = 3.0 + DiffOpt.set_forward_parameter(model, p, direction_p) + DiffOpt.forward_differentiate!(model) + @test DiffOpt.get_forward_variable(model, x) ≈ + direction_p * 3 / pc_val atol = ATOL rtol = RTOL + + # update p and pc + p_val = 2.0 + pc_val = 6.0 + set_parameter_value(p, p_val) + set_parameter_value(pc, pc_val) + # re-optimize + optimize!(model) + # check solution + @test value(x) ≈ 3 * p_val / pc_val atol = ATOL rtol = RTOL + + # stop differentiating with respect to p + DiffOpt.empty_input_sensitivities!(model) + # differentiate w.r.t. pc + direction_pc = 10.0 + DiffOpt.set_forward_parameter(model, pc, direction_pc) + DiffOpt.forward_differentiate!(model) + @test DiffOpt.get_forward_variable(model, x) ≈ + -direction_pc * 3 * p_val / pc_val^2 atol = ATOL rtol = RTOL + + # always a good practice to clear previously set sensitivities + DiffOpt.empty_input_sensitivities!(model) + # Now, reverse model AD + direction_x = 10.0 + DiffOpt.set_reverse_variable(model, x, direction_x) + DiffOpt.reverse_differentiate!(model) + @test DiffOpt.get_reverse_parameter(model, p) ≈ + direction_x * 3 / pc_val atol = ATOL rtol = RTOL + @test DiffOpt.get_reverse_parameter(model, pc) ≈ + -direction_x * 3 * p_val / pc_val^2 atol = ATOL rtol = RTOL + end + end +end + +end # module + +TestJuMPWrapper.runtests() diff --git a/test/moi_wrapper.jl b/test/moi_wrapper.jl index 6c60c2658..c9daf8513 100644 --- a/test/moi_wrapper.jl +++ b/test/moi_wrapper.jl @@ -28,10 +28,10 @@ end function test_moi_test_runtests() model = DiffOpt.diff_optimizer(HiGHS.Optimizer) # `Variable.ZerosBridge` makes dual needed by some tests fail. - MOI.Bridges.remove_bridge( - model.optimizer.optimizer, - MOI.Bridges.Variable.ZerosBridge{Float64}, - ) + # MOI.Bridges.remove_bridge( + # model.optimizer.optimizer.optimizer, + # MOI.Bridges.Variable.ZerosBridge{Float64}, + # ) MOI.set(model, MOI.Silent(), true) config = MOI.Test.Config(; atol = 1e-7, exclude = Any[MOI.compute_conflict!]) diff --git a/test/nlp_program.jl b/test/nlp_program.jl index 862050035..bc54fd163 100644 --- a/test/nlp_program.jl +++ b/test/nlp_program.jl @@ -120,12 +120,8 @@ end function test_analytical_simple(; P = 2) # Number of parameters @testset "Bounds Bounds" begin - m = Model( - () -> DiffOpt.diff_optimizer( - Ipopt.Optimizer; - with_parametric_opt_interface = false, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) @variable(m, 0 ≤ x[1:P] ≤ 1) @variable(m, p[1:P] ∈ Parameter.(0.5)) @@ -194,12 +190,8 @@ function test_analytical_simple(; P = 2) # Number of parameters ) end @testset "Bounds as RHS constraints" begin - m = Model( - () -> DiffOpt.diff_optimizer( - Ipopt.Optimizer; - with_parametric_opt_interface = false, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) @variable(m, x[1:P]) @constraint(m, x .≥ 0) @@ -238,12 +230,8 @@ function test_analytical_simple(; P = 2) # Number of parameters ) end @testset "Bounds as Mixed constraints" begin - m = Model( - () -> DiffOpt.diff_optimizer( - Ipopt.Optimizer; - with_parametric_opt_interface = false, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) @variable(m, x[1:P]) @constraint(m, 0 .≤ x) @@ -282,12 +270,8 @@ function test_analytical_simple(; P = 2) # Number of parameters ) end @testset "Bounds as LHS constraints" begin - m = Model( - () -> DiffOpt.diff_optimizer( - Ipopt.Optimizer; - with_parametric_opt_interface = false, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) @variable(m, x[1:P]) @constraint(m, 0 .≤ x) @@ -664,6 +648,7 @@ function test_differentiating_non_trivial_convex_qp_jump() h = vec(h) b = vec(b) model = JuMP.Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(model, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) MOI.set(model, MOI.Silent(), true) @variable(model, x[1:nz]) @variable(model, p_le[1:nineq_le] ∈ MOI.Parameter.(0.0)) @@ -709,12 +694,8 @@ function test_differentiating_non_trivial_convex_qp_jump() end function test_ReverseConstraintDual() - m = Model( - () -> DiffOpt.diff_optimizer( - Ipopt.Optimizer; - with_parametric_opt_interface = false, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) @variable(m, x[1:2]) @variable(m, p[1:2] ∈ Parameter.(0.5)) @@ -796,12 +777,8 @@ end function test_changing_factorization() P = 2 - m = Model( - () -> DiffOpt.diff_optimizer( - Ipopt.Optimizer; - with_parametric_opt_interface = false, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)) + MOI.set(m, DiffOpt.ModelConstructor(), DiffOpt.NonLinearProgram.Model) @variable(m, x[1:P]) @constraint(m, x .≥ 0) diff --git a/test/parameters.jl b/test/parameters.jl index b38f84c07..9b4f1968d 100644 --- a/test/parameters.jl +++ b/test/parameters.jl @@ -30,12 +30,7 @@ function runtests() end function test_diff_rhs() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) @variable(model, x) @variable(model, p in Parameter(3.0)) @@ -101,12 +96,7 @@ function test_diff_rhs() end function test_diff_vector_rhs() - model = direct_model( - DiffOpt.diff_optimizer( - SCS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = direct_model(DiffOpt.diff_optimizer(SCS.Optimizer)) set_silent(model) @variable(model, x) @variable(model, p in Parameter(3.0)) @@ -152,12 +142,7 @@ function test_diff_vector_rhs() end function test_affine_changes() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 3.0 pc_val = 1.0 @@ -257,12 +242,7 @@ function test_affine_changes() end function test_affine_changes_compact() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 3.0 pc_val = 1.0 @@ -315,12 +295,7 @@ function test_affine_changes_compact() end function test_quadratic_rhs_changes() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 2.0 q_val = 2.0 @@ -444,12 +419,7 @@ function test_quadratic_rhs_changes() end function test_affine_changes_compact_max() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 3.0 pc_val = 1.0 @@ -488,12 +458,7 @@ function test_affine_changes_compact_max() end function test_diff_affine_objective() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 3.0 @variable(model, x) @@ -528,12 +493,7 @@ function test_diff_affine_objective() end function test_diff_quadratic_objective() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 3.0 @variable(model, x) @@ -568,12 +528,7 @@ function test_diff_quadratic_objective() end function test_quadratic_objective_qp() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) p_val = 3.0 @variable(model, x) @@ -610,12 +565,7 @@ function test_quadratic_objective_qp() end function test_diff_errors_POI() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + model = direct_model(DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) @variable(model, x) @variable(model, p in Parameter(3.0)) @@ -672,12 +622,7 @@ function test_diff_errors_POI() end function test_diff_errors() - model = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = false, - ), - ) + model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) set_silent(model) @variable(model, x) @variable(model, p in Parameter(3.0)) @@ -730,12 +675,7 @@ end # Credit to @klamike function test_empty_cache() - m = Model( - () -> DiffOpt.diff_optimizer( - HiGHS.Optimizer; - with_parametric_opt_interface = true, - ), - ) + m = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer)) @variable(m, x) @variable(m, p ∈ Parameter(1.0)) @variable(m, q ∈ Parameter(2.0)) @@ -747,10 +687,10 @@ function test_empty_cache() function get_sensitivity(m, xᵢ, pᵢ) DiffOpt.empty_input_sensitivities!(m) - @test is_empty(unsafe_backend(m).optimizer.input_cache) - if !isnothing(unsafe_backend(m).optimizer.diff) && - !isnothing(unsafe_backend(m).optimizer.diff.model.input_cache) - @test is_empty(unsafe_backend(m).optimizer.diff.model.input_cache) + @test is_empty(unsafe_backend(m).input_cache) + if !isnothing(unsafe_backend(m).diff) && + !isnothing(unsafe_backend(m).diff.optimizer.model.input_cache) + @test is_empty(unsafe_backend(m).diff.optimizer.model.input_cache) end MOI.set( m,