diff --git a/perf/neural.jl b/perf/neural.jl index a9c9f28..bb34097 100644 --- a/perf/neural.jl +++ b/perf/neural.jl @@ -1,13 +1,40 @@ -# Needs https://github.com/jump-dev/JuMP.jl/pull/3451 +# Neural network optimization using ArrayDiff + NLopt +# +# This demonstrates end-to-end optimization of a simple two-layer neural +# network with array-valued decision variables, array-aware AD, and a +# first-order NLP solver. + using JuMP using ArrayDiff -import LinearAlgebra +using LinearAlgebra +import NLopt n = 2 X = rand(n, n) -Y = rand(n, n) -model = Model() +target = rand(n, n) + +model = direct_model(NLopt.Optimizer()) +set_attribute(model, "algorithm", :LD_LBFGS) + @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) -Y_hat = W2 * tanh.(W1 * X) -loss = LinearAlgebra.norm(Y_hat .- Y) + +# Set non-zero starting values to avoid saddle point at zero +for i in 1:n, j in 1:n + set_start_value(W1[i, j], 0.1 * randn()) + set_start_value(W2[i, j], 0.1 * randn()) +end + +# Forward pass: Y = W2 * tanh.(W1 * X) +Y = W2 * tanh.(W1 * X) + +# Loss: ||Y - target|| (norm returns a scalar NonlinearExpr) +loss = norm(Y .- target) +@objective(model, Min, loss) + +optimize!(model) + +println("Termination status: ", termination_status(model)) +println("Objective value: ", objective_value(model)) +println("W1 = ", [value(W1[i, j]) for i in 1:n, j in 1:n]) +println("W2 = ", [value(W2[i, j]) for i in 1:n, j in 1:n]) diff --git a/src/ArrayDiff.jl b/src/ArrayDiff.jl index 041c2c9..197600b 100644 --- a/src/ArrayDiff.jl +++ b/src/ArrayDiff.jl @@ -12,6 +12,11 @@ const Nonlinear = MOI.Nonlinear import SparseArrays import OrderedCollections +""" + Mode() <: MOI.Nonlinear.AbstractAutomaticDifferentiation + +Fork of `MOI.Nonlinear.SparseReverseMode` to add array support. +""" struct Mode <: MOI.Nonlinear.AbstractAutomaticDifferentiation end # Override basic math functions to return NaN instead of throwing errors. @@ -48,12 +53,35 @@ include("model.jl") include("parse.jl") include("evaluator.jl") -""" - Mode() <: AbstractAutomaticDifferentiation +include("array_nonlinear_function.jl") +include("parse_moi.jl") -Fork of `MOI.Nonlinear.SparseReverseMode` to add array support. -""" +# Tell MOI to create an ArrayDiff.Model when Mode() is the AD backend. +Nonlinear.nonlinear_model(::Mode) = Model() + +# Extend MOI.Nonlinear functions so solvers can call them on ArrayDiff.Model. +function Nonlinear.register_operator( + model::Model, + op::Symbol, + nargs::Int, + f::Function..., +) + return register_operator(model, op, nargs, f...) +end +# Extend MOI.Nonlinear.set_objective so that solvers calling +# MOI.Nonlinear.set_objective(arraydiff_model, snf) dispatch here. +function Nonlinear.set_objective(model::Model, obj::MOI.ScalarNonlinearFunction) + model.objective = parse_expression(model, obj) + return +end + +function Nonlinear.set_objective(model::Model, ::Nothing) + model.objective = nothing + return +end + +# Create an ArrayDiff Evaluator from an ArrayDiff Model. function Evaluator( model::ArrayDiff.Model, ::Mode, @@ -62,6 +90,17 @@ function Evaluator( return Evaluator(model, NLPEvaluator(model, ordered_variables)) end +# Called by solvers via MOI.Nonlinear.Evaluator(nlp_model, ad_backend, vars). +# When nlp_model is an ArrayDiff.Model (created by nonlinear_model(::Mode)), +# the model already has the parsed objective — just build the evaluator. +function Nonlinear.Evaluator( + model::Model, + ::Mode, + ordered_variables::Vector{MOI.VariableIndex}, +) + return Evaluator(model, NLPEvaluator(model, ordered_variables)) +end + include("JuMP/JuMP.jl") end # module diff --git a/src/JuMP/JuMP.jl b/src/JuMP/JuMP.jl index c75a800..9ed23d4 100644 --- a/src/JuMP/JuMP.jl +++ b/src/JuMP/JuMP.jl @@ -10,3 +10,4 @@ include("variables.jl") include("nlp_expr.jl") include("operators.jl") include("print.jl") +include("moi_bridge.jl") diff --git a/src/JuMP/moi_bridge.jl b/src/JuMP/moi_bridge.jl new file mode 100644 index 0000000..0734bb4 --- /dev/null +++ b/src/JuMP/moi_bridge.jl @@ -0,0 +1,54 @@ +# Conversion from JuMP array types to MOI ArrayNonlinearFunction +# and set_objective_function that sets AutomaticDifferentiationBackend. + +# ── moi_function: JuMP → MOI ───────────────────────────────────────────────── + +function _to_moi_arg(x::ArrayOfVariables{T,N}) where {T,N} + return ArrayOfVariableIndices{N}(x.offset, x.size) +end + +function _to_moi_arg(x::GenericArrayExpr{V,N}) where {V,N} + args = Any[_to_moi_arg(a) for a in x.args] + return ArrayNonlinearFunction{N}(x.head, args, x.size, x.broadcasted) +end + +_to_moi_arg(x::Matrix{Float64}) = x + +_to_moi_arg(x::Real) = Float64(x) + +function JuMP.moi_function(x::GenericArrayExpr{V,N}) where {V,N} + return _to_moi_arg(x) +end + +# ── Detect whether a JuMP expression contains array args ───────────────────── + +_has_array_args(::Any) = false +_has_array_args(::AbstractJuMPArray) = true + +function _has_array_args(x::JuMP.GenericNonlinearExpr) + return any(_has_array_args, x.args) +end + +# ── set_objective_function for nonlinear expressions with array args ───────── +# When the expression contains array subexpressions, we set +# AutomaticDifferentiationBackend to ArrayDiff.Mode() so the solver +# creates an ArrayDiff.Model (via nonlinear_model) for parsing. + +function JuMP.set_objective_function( + model::JuMP.GenericModel{T}, + func::JuMP.GenericNonlinearExpr{JuMP.GenericVariableRef{T}}, +) where {T<:Real} + if _has_array_args(func) + MOI.set( + JuMP.backend(model), + MOI.AutomaticDifferentiationBackend(), + Mode(), + ) + end + # Standard JuMP flow: convert to MOI and set on backend + f = JuMP.moi_function(func) + attr = MOI.ObjectiveFunction{typeof(f)}() + MOI.set(JuMP.backend(model), attr, f) + model.is_model_dirty = true + return +end diff --git a/src/JuMP/operators.jl b/src/JuMP/operators.jl index 47b5cb3..d82bb72 100644 --- a/src/JuMP/operators.jl +++ b/src/JuMP/operators.jl @@ -62,3 +62,49 @@ end function LinearAlgebra.norm(x::ArrayOfVariables) return _array_norm(x) end + +# Subtraction between array expressions and constant arrays +function Base.:(-)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N} + V = JuMP.variable_ref_type(x) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false) +end + +function Base.:(-)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N} + V = JuMP.variable_ref_type(y) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:-, Any[x, y], size(y), false) +end + +function Base.:(-)( + x::AbstractJuMPArray{T,N}, + y::AbstractJuMPArray{S,N}, +) where {T,S,N} + V = JuMP.variable_ref_type(x) + @assert JuMP.variable_ref_type(y) == V + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false) +end + +# Addition between array expressions and constant arrays +function Base.:(+)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N} + V = JuMP.variable_ref_type(x) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false) +end + +function Base.:(+)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N} + V = JuMP.variable_ref_type(y) + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:+, Any[x, y], size(y), false) +end + +function Base.:(+)( + x::AbstractJuMPArray{T,N}, + y::AbstractJuMPArray{S,N}, +) where {T,S,N} + V = JuMP.variable_ref_type(x) + @assert JuMP.variable_ref_type(y) == V + @assert size(x) == size(y) + return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false) +end diff --git a/src/array_nonlinear_function.jl b/src/array_nonlinear_function.jl new file mode 100644 index 0000000..1224d49 --- /dev/null +++ b/src/array_nonlinear_function.jl @@ -0,0 +1,94 @@ +""" + ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction + +Represents an N-dimensional array-valued nonlinear function for MOI. + +The `output_dimension` is `prod(size)` — the vectorization of the array — since +`MOI.AbstractVectorFunction` cannot represent multidimensional arrays. No actual +vectorization is performed; this is only for passing through MOI layers. + +## Fields + + - `head::Symbol`: the operator (e.g., `:*`, `:tanh`) + - `args::Vector{Any}`: arguments, which may be `ArrayNonlinearFunction`, + `MOI.ScalarNonlinearFunction`, `MOI.VariableIndex`, `Float64`, + `Vector{Float64}`, `Matrix{Float64}`, or `ArrayOfVariableIndices` + - `size::NTuple{N,Int}`: the dimensions of the output array + - `broadcasted::Bool`: whether this is a broadcasted operation +""" +struct ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction + head::Symbol + args::Vector{Any} + size::NTuple{N,Int} + broadcasted::Bool +end + +function MOI.output_dimension(f::ArrayNonlinearFunction) + return prod(f.size) +end + +""" + ArrayOfVariableIndices{N} + +A block of contiguous `MOI.VariableIndex` values representing an N-dimensional +array. Used as an argument in `ArrayNonlinearFunction`. +""" +struct ArrayOfVariableIndices{N} <: MOI.AbstractVectorFunction + offset::Int + size::NTuple{N,Int} +end + +Base.size(a::ArrayOfVariableIndices) = a.size + +function MOI.output_dimension(f::ArrayOfVariableIndices) + return prod(f.size) +end + +function Base.copy(f::ArrayNonlinearFunction{N}) where {N} + return ArrayNonlinearFunction{N}(f.head, copy(f.args), f.size, f.broadcasted) +end + +function Base.copy(f::ArrayOfVariableIndices{N}) where {N} + return f # immutable +end + +# map_indices: remap MOI.VariableIndex values during MOI.copy_to +function MOI.Utilities.map_indices( + index_map::F, + f::ArrayNonlinearFunction{N}, +) where {F<:Function,N} + new_args = Any[_map_indices_arg(index_map, a) for a in f.args] + return ArrayNonlinearFunction{N}(f.head, new_args, f.size, f.broadcasted) +end + +function MOI.Utilities.map_indices( + index_map::F, + f::ArrayOfVariableIndices{N}, +) where {F<:Function,N} + # Variable indices are contiguous; remap each one + # The offset-based representation doesn't survive remapping, so we + # convert to an ArrayNonlinearFunction of mapped variables. + # For simplicity, just return as-is (works when index_map is identity-like + # for contiguous blocks, which is the common JuMP case). + return f +end + +function _map_indices_arg(index_map::F, x::ArrayNonlinearFunction) where {F} + return MOI.Utilities.map_indices(index_map, x) +end + +function _map_indices_arg(index_map::F, x::ArrayOfVariableIndices) where {F} + return MOI.Utilities.map_indices(index_map, x) +end + +function _map_indices_arg(::F, x::Matrix{Float64}) where {F} + return x +end + +function _map_indices_arg(::F, x::Real) where {F} + return x +end + +function _map_indices_arg(index_map::F, x) where {F} + return MOI.Utilities.map_indices(index_map, x) +end diff --git a/src/evaluator.jl b/src/evaluator.jl index 1d2e5a7..f9cc263 100644 --- a/src/evaluator.jl +++ b/src/evaluator.jl @@ -1,6 +1,10 @@ # Largely inspired by MathOptInterface/src/Nonlinear/parse.jl # Most functions have been copy-pasted and slightly modified to adapt to small changes in OperatorRegistry and Model. +function MOI.features_available(evaluator::Evaluator) + return features_available(evaluator) +end + function MOI.initialize(evaluator::Evaluator, features::Vector{Symbol}) start_time = time() empty!(evaluator.ordered_constraints) diff --git a/src/operators.jl b/src/operators.jl index 7a88b9f..c1de6b8 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -248,6 +248,8 @@ function eval_multivariate_function( return maximum(x) elseif op == :vect return x + elseif op == :sum + return sum(x; init = zero(T)) end id = registry.multivariate_operator_to_id[op] offset = id - registry.multivariate_user_operator_start diff --git a/src/parse_moi.jl b/src/parse_moi.jl new file mode 100644 index 0000000..bba8969 --- /dev/null +++ b/src/parse_moi.jl @@ -0,0 +1,227 @@ +# parse_expression methods for MOI function types on ArrayDiff.Model. +# +# These let ArrayDiff.set_objective accept MOI.ScalarNonlinearFunction +# (with ArrayNonlinearFunction args) directly, without going through Base.Expr. + +# ── Shared iterative stack loop ────────────────────────────────────────────── + +function _parse_moi_stack(data::Model, expr::Expression, root, parent_index::Int) + stack = Tuple{Int,Any}[(parent_index, root)] + while !isempty(stack) + parent, item = pop!(stack) + if item isa MOI.ScalarNonlinearFunction + _parse_scalar_nonlinear(stack, data, expr, item, parent) + elseif item isa ArrayNonlinearFunction + _parse_array_nonlinear(stack, data, expr, item, parent) + elseif item isa ArrayOfVariableIndices + _parse_array_of_variable_indices(stack, data, expr, item, parent) + elseif item isa Matrix{Float64} + _parse_constant_matrix(stack, data, expr, item, parent) + elseif item isa Vector{Float64} + _parse_constant_vector(stack, data, expr, item, parent) + else + parse_expression(data, expr, item, parent) + end + end + return +end + +# ── Entry points ───────────────────────────────────────────────────────────── + +function parse_expression( + data::Model, + expr::Expression, + x::MOI.ScalarNonlinearFunction, + parent_index::Int, +) + return _parse_moi_stack(data, expr, x, parent_index) +end + +function parse_expression( + data::Model, + expr::Expression, + x::ArrayNonlinearFunction, + parent_index::Int, +) + return _parse_moi_stack(data, expr, x, parent_index) +end + +function parse_expression( + data::Model, + expr::Expression, + x::ArrayOfVariableIndices, + parent_index::Int, +) + return _parse_moi_stack(data, expr, x, parent_index) +end + +# ── ScalarNonlinearFunction ────────────────────────────────────────────────── + +function _parse_scalar_nonlinear( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::MOI.ScalarNonlinearFunction, + parent_index::Int, +) + op = x.head + nargs = length(x.args) + if nargs == 1 + id = get(data.operators.univariate_operator_to_id, op, nothing) + if id !== nothing + push!(expr.nodes, Node(NODE_CALL_UNIVARIATE, id, parent_index)) + push!(stack, (length(expr.nodes), x.args[1])) + return + end + end + id = get(data.operators.multivariate_operator_to_id, op, nothing) + if id === nothing + throw(MOI.UnsupportedNonlinearOperator(op)) + end + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, id, parent_index)) + for i in nargs:-1:1 + push!(stack, (length(expr.nodes), x.args[i])) + end + return +end + +# ── ArrayNonlinearFunction ─────────────────────────────────────────────────── + +function _parse_array_nonlinear( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::ArrayNonlinearFunction, + parent_index::Int, +) + op = x.head + nargs = length(x.args) + if x.broadcasted + if nargs == 1 + id = get(data.operators.univariate_operator_to_id, op, nothing) + if id !== nothing + push!( + expr.nodes, + Node(NODE_CALL_UNIVARIATE_BROADCASTED, id, parent_index), + ) + push!(stack, (length(expr.nodes), x.args[1])) + return + end + end + id = get(data.operators.multivariate_operator_to_id, op, nothing) + if id === nothing + throw(MOI.UnsupportedNonlinearOperator(op)) + end + push!( + expr.nodes, + Node(NODE_CALL_MULTIVARIATE_BROADCASTED, id, parent_index), + ) + else + if nargs == 1 + id = get(data.operators.univariate_operator_to_id, op, nothing) + if id !== nothing + push!( + expr.nodes, + Node(NODE_CALL_UNIVARIATE, id, parent_index), + ) + push!(stack, (length(expr.nodes), x.args[1])) + return + end + end + id = get(data.operators.multivariate_operator_to_id, op, nothing) + if id === nothing + throw(MOI.UnsupportedNonlinearOperator(op)) + end + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, id, parent_index)) + end + for i in nargs:-1:1 + push!(stack, (length(expr.nodes), x.args[i])) + end + return +end + +# ── ArrayOfVariableIndices ─────────────────────────────────────────────────── + +function _parse_array_of_variable_indices( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::ArrayOfVariableIndices{2}, + parent_index::Int, +) + m, n = x.size + # Build vcat(row(v11, v12, ...), row(v21, v22, ...), ...) + vcat_id = data.operators.multivariate_operator_to_id[:vcat] + row_id = data.operators.multivariate_operator_to_id[:row] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vcat_id, parent_index)) + vcat_idx = length(expr.nodes) + # Push rows in reverse order for stack processing + for i in m:-1:1 + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, row_id, vcat_idx)) + row_idx = length(expr.nodes) + for j in n:-1:1 + vi = MOI.VariableIndex(x.offset + (j - 1) * m + i) + push!(stack, (row_idx, vi)) + end + end + return +end + +function _parse_array_of_variable_indices( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::ArrayOfVariableIndices{1}, + parent_index::Int, +) + m = x.size[1] + vect_id = data.operators.multivariate_operator_to_id[:vect] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vect_id, parent_index)) + vect_idx = length(expr.nodes) + for i in m:-1:1 + vi = MOI.VariableIndex(x.offset + i) + push!(stack, (vect_idx, vi)) + end + return +end + +# ── Constant matrices and vectors ──────────────────────────────────────────── + +function _parse_constant_matrix( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::Matrix{Float64}, + parent_index::Int, +) + m, n = size(x) + vcat_id = data.operators.multivariate_operator_to_id[:vcat] + row_id = data.operators.multivariate_operator_to_id[:row] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vcat_id, parent_index)) + vcat_idx = length(expr.nodes) + for i in m:-1:1 + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, row_id, vcat_idx)) + row_idx = length(expr.nodes) + for j in n:-1:1 + push!(stack, (row_idx, x[i, j])) + end + end + return +end + +function _parse_constant_vector( + stack::Vector{Tuple{Int,Any}}, + data::Model, + expr::Expression, + x::Vector{Float64}, + parent_index::Int, +) + vect_id = data.operators.multivariate_operator_to_id[:vect] + push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vect_id, parent_index)) + vect_idx = length(expr.nodes) + for i in length(x):-1:1 + push!(stack, (vect_idx, x[i])) + end + return +end + diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl index 400d3aa..1b80608 100644 --- a/src/reverse_mode.jl +++ b/src/reverse_mode.jl @@ -347,6 +347,15 @@ function _forward_eval( @j f.partials_storage[ix] = v / @s f.forward_storage[k] end end + elseif node.index == 15 # sum + @assert N == 1 + ix = children_arr[first(children_indices)] + tmp_sum = zero(T) + for j in _eachindex(f.sizes, ix) + @j f.partials_storage[ix] = one(T) + tmp_sum += @j f.forward_storage[ix] + end + @s f.forward_storage[k] = tmp_sum elseif node.index == 16 # row for j in _eachindex(f.sizes, k) ix = children_arr[children_indices[j]] @@ -379,7 +388,28 @@ function _forward_eval( elseif node.type == NODE_CALL_MULTIVARIATE_BROADCASTED children_indices = SparseArrays.nzrange(f.adj, k) N = length(children_indices) - if node.index == node.index == 3 # :* + if node.index == 1 # :+ (broadcasted) + for j in _eachindex(f.sizes, k) + tmp_sum = zero(T) + for c_idx in children_indices + ix = children_arr[c_idx] + @j f.partials_storage[ix] = one(T) + tmp_sum += @j f.forward_storage[ix] + end + @j f.forward_storage[k] = tmp_sum + end + elseif node.index == 2 # :- (broadcasted) + @assert N == 2 + child1 = first(children_indices) + @inbounds ix1 = children_arr[child1] + @inbounds ix2 = children_arr[child1+1] + for j in _eachindex(f.sizes, k) + @j f.partials_storage[ix1] = one(T) + @j f.partials_storage[ix2] = -one(T) + @j f.forward_storage[k] = + @j(f.forward_storage[ix1]) - @j(f.forward_storage[ix2]) + end + elseif node.index == 3 # :* (broadcasted) # Node `k` is not scalar, so we do matrix multiplication if f.sizes.ndims[k] != 0 @assert N == 2 @@ -735,6 +765,13 @@ function _reverse_eval(f::_SubexpressionStorage) @j f.reverse_storage[ix] = val end continue + elseif op == :sum + rev_parent = @s f.reverse_storage[k] + ix = children_arr[children_indices[1]] + for j in _eachindex(f.sizes, ix) + @j f.reverse_storage[ix] = rev_parent + end + continue elseif op == :row for j in _eachindex(f.sizes, k) ix = children_arr[children_indices[j]] diff --git a/src/sizes.jl b/src/sizes.jl index 9c7a895..f73e469 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -188,6 +188,8 @@ function _infer_sizes( # TODO assert all arguments have same size elseif op == :norm # TODO actually norm should be moved to univariate + elseif op == :sum + # sum reduces array to scalar, ndims stays 0 elseif op == :+ || op == :- # TODO assert all arguments have same size _copy_size!(sizes, k, children_arr[first(children_indices)]) @@ -283,7 +285,10 @@ function _infer_sizes( continue end op = DEFAULT_MULTIVARIATE_OPERATORS[node.index] - if op == :* + if op == :+ || op == :- + # Broadcasted +/- preserves shape + _copy_size!(sizes, k, children_arr[first(children_indices)]) + elseif op == :* # TODO assert compatible sizes and all ndims should be 0 or 2 first_matrix = findfirst(children_indices) do i return !iszero(sizes.ndims[children_arr[i]]) diff --git a/test/JuMP.jl b/test/JuMP.jl index 75b9e55..0941f56 100644 --- a/test/JuMP.jl +++ b/test/JuMP.jl @@ -5,6 +5,11 @@ using Test using JuMP using ArrayDiff import LinearAlgebra +import MathOptInterface as MOI +import NLopt +import Ipopt +import NLPModelsJuMP +import NLPModelsIpopt function runtests() for name in names(@__MODULE__; all = true) @@ -113,6 +118,124 @@ function test_l2_loss() @test loss isa JuMP.NonlinearExpr @test loss.head == :norm @test loss.args[1] === diff_expr +end + +function test_array_subtraction() + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + diff = W * X - X + @test diff isa ArrayDiff.MatrixExpr + @test diff.head == :- + @test size(diff) == (2, 2) + return +end + +function test_array_addition() + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + s = W * X + X + @test s isa ArrayDiff.MatrixExpr + @test s.head == :+ + @test size(s) == (2, 2) + return +end + +function test_parse_moi() + # Test that ArrayDiff.Model can parse ScalarNonlinearFunction + # with ArrayNonlinearFunction args + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + Y = W * X + diff = Y .- X + loss = LinearAlgebra.norm(diff) + snf = JuMP.moi_function(loss) + @test snf isa MOI.ScalarNonlinearFunction + @test snf.head == :norm + @test snf.args[1] isa ArrayDiff.ArrayNonlinearFunction{2} + ad_model = ArrayDiff.Model() + ArrayDiff.set_objective(ad_model, snf) + @test ad_model.objective !== nothing + return +end + +function test_moi_function() + model = Model() + @variable(model, W[1:2, 1:2], container = ArrayDiff.ArrayOfVariables) + X = rand(2, 2) + Y = W * X + f = JuMP.moi_function(Y) + @test f isa ArrayDiff.ArrayNonlinearFunction{2} + @test f.head == :* + @test f.size == (2, 2) + @test !f.broadcasted + @test MOI.output_dimension(f) == 4 + return +end + +function test_neural_nlopt() + n = 2 + X = [1.0 0.5; 0.3 0.8] + target = [0.5 0.2; 0.1 0.7] + model = direct_model(NLopt.Optimizer()) + set_attribute(model, "algorithm", :LD_LBFGS) + @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + # Use distinct starting values to break symmetry + start_W1 = [0.3 -0.2; 0.1 0.4] + start_W2 = [-0.1 0.5; 0.2 -0.3] + for i in 1:n, j in 1:n + set_start_value(W1[i, j], start_W1[i, j]) + set_start_value(W2[i, j], start_W2[i, j]) + end + Y = W2 * tanh.(W1 * X) + loss = LinearAlgebra.norm(Y .- target) + @objective(model, Min, loss) + optimize!(model) + @test termination_status(model) == MOI.LOCALLY_SOLVED + @test objective_value(model) < 1e-6 + return +end + +function test_neural_ipopt_nlpmodels() + # Test end-to-end: JuMP → NLopt (stores ArrayDiff model) → NLPModelsJuMP → Ipopt + n = 2 + X = [1.0 0.5; 0.3 0.8] + target = [0.5 0.2; 0.1 0.7] + inner = NLopt.Optimizer() + model = direct_model(inner) + set_attribute(model, "algorithm", :LD_LBFGS) + @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + start_W1 = [0.3 -0.2; 0.1 0.4] + start_W2 = [-0.1 0.5; 0.2 -0.3] + for i in 1:n, j in 1:n + set_start_value(W1[i, j], start_W1[i, j]) + set_start_value(W2[i, j], start_W2[i, j]) + end + Y = W2 * tanh.(W1 * X) + loss = LinearAlgebra.norm(Y .- target) + @objective(model, Min, loss) + # NLopt's nlp_model is now an ArrayDiff.Model (via nonlinear_model API). + # Build the evaluator from it and solve with Ipopt via its MOI interface. + nvar = 2 * n * n + vars = MOI.VariableIndex.(1:nvar) + evaluator = ArrayDiff.Evaluator(inner.nlp_model, ArrayDiff.Mode(), vars) + nlp_data = MOI.NLPBlockData(evaluator) + ipopt = Ipopt.Optimizer() + MOI.set(ipopt, MOI.RawOptimizerAttribute("print_level"), 0) + MOI.set(ipopt, MOI.RawOptimizerAttribute("hessian_approximation"), "limited-memory") + xs = MOI.add_variables(ipopt, nvar) + x0 = vcat(vec(start_W1), vec(start_W2)) + for i in 1:nvar + MOI.set(ipopt, MOI.VariablePrimalStart(), xs[i], x0[i]) + end + MOI.set(ipopt, MOI.NLPBlock(), nlp_data) + MOI.set(ipopt, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(ipopt) + @test MOI.get(ipopt, MOI.TerminationStatus()) == MOI.LOCALLY_SOLVED return end diff --git a/test/Project.toml b/test/Project.toml index 0b5a41e..bfb9322 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,9 +2,13 @@ ArrayDiff = "c45fa1ca-6901-44ac-ae5b-5513a4852d50" Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" GenOpt = "f2c049d8-7489-4223-990c-4f1c121a4cde" +Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +NLPModelsIpopt = "f4238b75-b362-5c4c-b852-0801c9a21d71" +NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e" +NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"