blegat · blegat · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/perf/neural.jl b/perf/neural.jl
@@ -1,13 +1,40 @@
-# Needs https://github.com/jump-dev/JuMP.jl/pull/3451
+# Neural network optimization using ArrayDiff + NLopt
+#
+# This demonstrates end-to-end optimization of a simple two-layer neural
+# network with array-valued decision variables, array-aware AD, and a
+# first-order NLP solver.
+
 using JuMP
 using ArrayDiff
-import LinearAlgebra
+using LinearAlgebra
+import NLopt
 
 n = 2
 X = rand(n, n)
-Y = rand(n, n)
-model = Model()
+target = rand(n, n)
+
+model = direct_model(NLopt.Optimizer())
+set_attribute(model, "algorithm", :LD_LBFGS)
+
 @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
 @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
-Y_hat = W2 * tanh.(W1 * X)
-loss = LinearAlgebra.norm(Y_hat .- Y)
+
+# Set non-zero starting values to avoid saddle point at zero
+for i in 1:n, j in 1:n
+    set_start_value(W1[i, j], 0.1 * randn())
+    set_start_value(W2[i, j], 0.1 * randn())
+end
+
+# Forward pass: Y = W2 * tanh.(W1 * X)
+Y = W2 * tanh.(W1 * X)
+
+# Loss: ||Y - target||  (norm returns a scalar NonlinearExpr)
+loss = norm(Y .- target)
+@objective(model, Min, loss)
+
+optimize!(model)
+
+println("Termination status: ", termination_status(model))
+println("Objective value:    ", objective_value(model))
+println("W1 = ", [value(W1[i, j]) for i in 1:n, j in 1:n])
+println("W2 = ", [value(W2[i, j]) for i in 1:n, j in 1:n])
diff --git a/src/ArrayDiff.jl b/src/ArrayDiff.jl
@@ -12,6 +12,11 @@ const Nonlinear = MOI.Nonlinear
 import SparseArrays
 import OrderedCollections
 
+"""
+    Mode() <: MOI.Nonlinear.AbstractAutomaticDifferentiation
+
+Fork of `MOI.Nonlinear.SparseReverseMode` to add array support.
+"""
 struct Mode <: MOI.Nonlinear.AbstractAutomaticDifferentiation end
 
 # Override basic math functions to return NaN instead of throwing errors.
@@ -48,12 +53,35 @@ include("model.jl")
 include("parse.jl")
 include("evaluator.jl")
 
-"""
-    Mode() <: AbstractAutomaticDifferentiation
+include("array_nonlinear_function.jl")
+include("parse_moi.jl")
 
-Fork of `MOI.Nonlinear.SparseReverseMode` to add array support.
-"""
+# Tell MOI to create an ArrayDiff.Model when Mode() is the AD backend.
+Nonlinear.nonlinear_model(::Mode) = Model()
+
+# Extend MOI.Nonlinear functions so solvers can call them on ArrayDiff.Model.
+function Nonlinear.register_operator(
+    model::Model,
+    op::Symbol,
+    nargs::Int,
+    f::Function...,
+)
+    return register_operator(model, op, nargs, f...)
+end
 
+# Extend MOI.Nonlinear.set_objective so that solvers calling
+# MOI.Nonlinear.set_objective(arraydiff_model, snf) dispatch here.
+function Nonlinear.set_objective(model::Model, obj::MOI.ScalarNonlinearFunction)
+    model.objective = parse_expression(model, obj)
+    return
+end
+
+function Nonlinear.set_objective(model::Model, ::Nothing)
+    model.objective = nothing
+    return
+end
+
+# Create an ArrayDiff Evaluator from an ArrayDiff Model.
 function Evaluator(
     model::ArrayDiff.Model,
     ::Mode,
@@ -62,6 +90,17 @@ function Evaluator(
     return Evaluator(model, NLPEvaluator(model, ordered_variables))
 end
 
+# Called by solvers via MOI.Nonlinear.Evaluator(nlp_model, ad_backend, vars).
+# When nlp_model is an ArrayDiff.Model (created by nonlinear_model(::Mode)),
+# the model already has the parsed objective — just build the evaluator.
+function Nonlinear.Evaluator(
+    model::Model,
+    ::Mode,
+    ordered_variables::Vector{MOI.VariableIndex},
+)
+    return Evaluator(model, NLPEvaluator(model, ordered_variables))
+end
+
 include("JuMP/JuMP.jl")
 
 end  # module
diff --git a/src/JuMP/JuMP.jl b/src/JuMP/JuMP.jl
@@ -10,3 +10,4 @@ include("variables.jl")
 include("nlp_expr.jl")
 include("operators.jl")
 include("print.jl")
+include("moi_bridge.jl")
diff --git a/src/JuMP/moi_bridge.jl b/src/JuMP/moi_bridge.jl
@@ -0,0 +1,54 @@
+# Conversion from JuMP array types to MOI ArrayNonlinearFunction
+# and set_objective_function that sets AutomaticDifferentiationBackend.
+
+# ── moi_function: JuMP → MOI ─────────────────────────────────────────────────
+
+function _to_moi_arg(x::ArrayOfVariables{T,N}) where {T,N}
+    return ArrayOfVariableIndices{N}(x.offset, x.size)
+end
+
+function _to_moi_arg(x::GenericArrayExpr{V,N}) where {V,N}
+    args = Any[_to_moi_arg(a) for a in x.args]
+    return ArrayNonlinearFunction{N}(x.head, args, x.size, x.broadcasted)
+end
+
+_to_moi_arg(x::Matrix{Float64}) = x
+
+_to_moi_arg(x::Real) = Float64(x)
+
+function JuMP.moi_function(x::GenericArrayExpr{V,N}) where {V,N}
+    return _to_moi_arg(x)
+end
+
+# ── Detect whether a JuMP expression contains array args ─────────────────────
+
+_has_array_args(::Any) = false
+_has_array_args(::AbstractJuMPArray) = true
+
+function _has_array_args(x::JuMP.GenericNonlinearExpr)
+    return any(_has_array_args, x.args)
+end
+
+# ── set_objective_function for nonlinear expressions with array args ─────────
+# When the expression contains array subexpressions, we set
+# AutomaticDifferentiationBackend to ArrayDiff.Mode() so the solver
+# creates an ArrayDiff.Model (via nonlinear_model) for parsing.
+
+function JuMP.set_objective_function(
+    model::JuMP.GenericModel{T},
+    func::JuMP.GenericNonlinearExpr{JuMP.GenericVariableRef{T}},
+) where {T<:Real}
+    if _has_array_args(func)
+        MOI.set(
+            JuMP.backend(model),
+            MOI.AutomaticDifferentiationBackend(),
+            Mode(),
+        )
+    end
+    # Standard JuMP flow: convert to MOI and set on backend
+    f = JuMP.moi_function(func)
+    attr = MOI.ObjectiveFunction{typeof(f)}()
+    MOI.set(JuMP.backend(model), attr, f)
+    model.is_model_dirty = true
+    return
+end
diff --git a/src/JuMP/operators.jl b/src/JuMP/operators.jl
@@ -62,3 +62,49 @@ end
 function LinearAlgebra.norm(x::ArrayOfVariables)
     return _array_norm(x)
 end
+
+# Subtraction between array expressions and constant arrays
+function Base.:(-)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N}
+    V = JuMP.variable_ref_type(x)
+    @assert size(x) == size(y)
+    return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false)
+end
+
+function Base.:(-)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N}
+    V = JuMP.variable_ref_type(y)
+    @assert size(x) == size(y)
+    return GenericArrayExpr{V,N}(:-, Any[x, y], size(y), false)
+end
+
+function Base.:(-)(
+    x::AbstractJuMPArray{T,N},
+    y::AbstractJuMPArray{S,N},
+) where {T,S,N}
+    V = JuMP.variable_ref_type(x)
+    @assert JuMP.variable_ref_type(y) == V
+    @assert size(x) == size(y)
+    return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false)
+end
+
+# Addition between array expressions and constant arrays
+function Base.:(+)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N}
+    V = JuMP.variable_ref_type(x)
+    @assert size(x) == size(y)
+    return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false)
+end
+
+function Base.:(+)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N}
+    V = JuMP.variable_ref_type(y)
+    @assert size(x) == size(y)
+    return GenericArrayExpr{V,N}(:+, Any[x, y], size(y), false)
+end
+
+function Base.:(+)(
+    x::AbstractJuMPArray{T,N},
+    y::AbstractJuMPArray{S,N},
+) where {T,S,N}
+    V = JuMP.variable_ref_type(x)
+    @assert JuMP.variable_ref_type(y) == V
+    @assert size(x) == size(y)
+    return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false)
+end
diff --git a/src/array_nonlinear_function.jl b/src/array_nonlinear_function.jl
@@ -0,0 +1,94 @@
+"""
+    ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction
+
+Represents an N-dimensional array-valued nonlinear function for MOI.
+
+The `output_dimension` is `prod(size)` — the vectorization of the array — since
+`MOI.AbstractVectorFunction` cannot represent multidimensional arrays. No actual
+vectorization is performed; this is only for passing through MOI layers.
+
+## Fields
+
+  - `head::Symbol`: the operator (e.g., `:*`, `:tanh`)
+  - `args::Vector{Any}`: arguments, which may be `ArrayNonlinearFunction`,
+    `MOI.ScalarNonlinearFunction`, `MOI.VariableIndex`, `Float64`,
+    `Vector{Float64}`, `Matrix{Float64}`, or `ArrayOfVariableIndices`
+  - `size::NTuple{N,Int}`: the dimensions of the output array
+  - `broadcasted::Bool`: whether this is a broadcasted operation
+"""
+struct ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction
+    head::Symbol
+    args::Vector{Any}
+    size::NTuple{N,Int}
+    broadcasted::Bool
+end
+
+function MOI.output_dimension(f::ArrayNonlinearFunction)
+    return prod(f.size)
+end
+
+"""
+    ArrayOfVariableIndices{N}
+
+A block of contiguous `MOI.VariableIndex` values representing an N-dimensional
+array. Used as an argument in `ArrayNonlinearFunction`.
+"""
+struct ArrayOfVariableIndices{N} <: MOI.AbstractVectorFunction
+    offset::Int
+    size::NTuple{N,Int}
+end
+
+Base.size(a::ArrayOfVariableIndices) = a.size
+
+function MOI.output_dimension(f::ArrayOfVariableIndices)
+    return prod(f.size)
+end
+
+function Base.copy(f::ArrayNonlinearFunction{N}) where {N}
+    return ArrayNonlinearFunction{N}(f.head, copy(f.args), f.size, f.broadcasted)
+end
+
+function Base.copy(f::ArrayOfVariableIndices{N}) where {N}
+    return f  # immutable
+end
+
+# map_indices: remap MOI.VariableIndex values during MOI.copy_to
+function MOI.Utilities.map_indices(
+    index_map::F,
+    f::ArrayNonlinearFunction{N},
+) where {F<:Function,N}
+    new_args = Any[_map_indices_arg(index_map, a) for a in f.args]
+    return ArrayNonlinearFunction{N}(f.head, new_args, f.size, f.broadcasted)
+end
+
+function MOI.Utilities.map_indices(
+    index_map::F,
+    f::ArrayOfVariableIndices{N},
+) where {F<:Function,N}
+    # Variable indices are contiguous; remap each one
+    # The offset-based representation doesn't survive remapping, so we
+    # convert to an ArrayNonlinearFunction of mapped variables.
+    # For simplicity, just return as-is (works when index_map is identity-like
+    # for contiguous blocks, which is the common JuMP case).
+    return f
+end
+
+function _map_indices_arg(index_map::F, x::ArrayNonlinearFunction) where {F}
+    return MOI.Utilities.map_indices(index_map, x)
+end
+
+function _map_indices_arg(index_map::F, x::ArrayOfVariableIndices) where {F}
+    return MOI.Utilities.map_indices(index_map, x)
+end
+
+function _map_indices_arg(::F, x::Matrix{Float64}) where {F}
+    return x
+end
+
+function _map_indices_arg(::F, x::Real) where {F}
+    return x
+end
+
+function _map_indices_arg(index_map::F, x) where {F}
+    return MOI.Utilities.map_indices(index_map, x)
+end
diff --git a/src/evaluator.jl b/src/evaluator.jl
@@ -1,6 +1,10 @@
 # Largely inspired by MathOptInterface/src/Nonlinear/parse.jl
 # Most functions have been copy-pasted and slightly modified to adapt to small changes in OperatorRegistry and Model.
 
+function MOI.features_available(evaluator::Evaluator)
+    return features_available(evaluator)
+end
+
 function MOI.initialize(evaluator::Evaluator, features::Vector{Symbol})
     start_time = time()
     empty!(evaluator.ordered_constraints)

diff --git a/src/operators.jl b/src/operators.jl
@@ -248,6 +248,8 @@ function eval_multivariate_function(
         return maximum(x)
     elseif op == :vect
         return x
+    elseif op == :sum
+        return sum(x; init = zero(T))
     end
     id = registry.multivariate_operator_to_id[op]
     offset = id - registry.multivariate_user_operator_start