Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions perf/neural.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,40 @@
# Needs https://github.com/jump-dev/JuMP.jl/pull/3451
# Neural network optimization using ArrayDiff + NLopt
#
# This demonstrates end-to-end optimization of a simple two-layer neural
# network with array-valued decision variables, array-aware AD, and a
# first-order NLP solver.

using JuMP
using ArrayDiff
import LinearAlgebra
using LinearAlgebra
import NLopt

n = 2
X = rand(n, n)
Y = rand(n, n)
model = Model()
target = rand(n, n)

model = direct_model(NLopt.Optimizer())
set_attribute(model, "algorithm", :LD_LBFGS)

@variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
@variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
Y_hat = W2 * tanh.(W1 * X)
loss = LinearAlgebra.norm(Y_hat .- Y)

# Set non-zero starting values to avoid saddle point at zero
for i in 1:n, j in 1:n
set_start_value(W1[i, j], 0.1 * randn())
set_start_value(W2[i, j], 0.1 * randn())
end

# Forward pass: Y = W2 * tanh.(W1 * X)
Y = W2 * tanh.(W1 * X)

# Loss: ||Y - target|| (norm returns a scalar NonlinearExpr)
loss = norm(Y .- target)
@objective(model, Min, loss)

optimize!(model)

println("Termination status: ", termination_status(model))
println("Objective value: ", objective_value(model))
println("W1 = ", [value(W1[i, j]) for i in 1:n, j in 1:n])
println("W2 = ", [value(W2[i, j]) for i in 1:n, j in 1:n])
47 changes: 43 additions & 4 deletions src/ArrayDiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ const Nonlinear = MOI.Nonlinear
import SparseArrays
import OrderedCollections

"""
Mode() <: MOI.Nonlinear.AbstractAutomaticDifferentiation

Fork of `MOI.Nonlinear.SparseReverseMode` to add array support.
"""
struct Mode <: MOI.Nonlinear.AbstractAutomaticDifferentiation end

# Override basic math functions to return NaN instead of throwing errors.
Expand Down Expand Up @@ -48,12 +53,35 @@ include("model.jl")
include("parse.jl")
include("evaluator.jl")

"""
Mode() <: AbstractAutomaticDifferentiation
include("array_nonlinear_function.jl")
include("parse_moi.jl")

Fork of `MOI.Nonlinear.SparseReverseMode` to add array support.
"""
# Tell MOI to create an ArrayDiff.Model when Mode() is the AD backend.
Nonlinear.nonlinear_model(::Mode) = Model()

# Extend MOI.Nonlinear functions so solvers can call them on ArrayDiff.Model.
function Nonlinear.register_operator(
model::Model,
op::Symbol,
nargs::Int,
f::Function...,
)
return register_operator(model, op, nargs, f...)
end

# Extend MOI.Nonlinear.set_objective so that solvers calling
# MOI.Nonlinear.set_objective(arraydiff_model, snf) dispatch here.
function Nonlinear.set_objective(model::Model, obj::MOI.ScalarNonlinearFunction)
model.objective = parse_expression(model, obj)
return
end

function Nonlinear.set_objective(model::Model, ::Nothing)
model.objective = nothing
return
end

# Create an ArrayDiff Evaluator from an ArrayDiff Model.
function Evaluator(
model::ArrayDiff.Model,
::Mode,
Expand All @@ -62,6 +90,17 @@ function Evaluator(
return Evaluator(model, NLPEvaluator(model, ordered_variables))
end

# Called by solvers via MOI.Nonlinear.Evaluator(nlp_model, ad_backend, vars).
# When nlp_model is an ArrayDiff.Model (created by nonlinear_model(::Mode)),
# the model already has the parsed objective — just build the evaluator.
function Nonlinear.Evaluator(
model::Model,
::Mode,
ordered_variables::Vector{MOI.VariableIndex},
)
return Evaluator(model, NLPEvaluator(model, ordered_variables))
end

include("JuMP/JuMP.jl")

end # module
1 change: 1 addition & 0 deletions src/JuMP/JuMP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ include("variables.jl")
include("nlp_expr.jl")
include("operators.jl")
include("print.jl")
include("moi_bridge.jl")
54 changes: 54 additions & 0 deletions src/JuMP/moi_bridge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Conversion from JuMP array types to MOI ArrayNonlinearFunction
# and set_objective_function that sets AutomaticDifferentiationBackend.

# ── moi_function: JuMP → MOI ─────────────────────────────────────────────────

function _to_moi_arg(x::ArrayOfVariables{T,N}) where {T,N}
return ArrayOfVariableIndices{N}(x.offset, x.size)
end

function _to_moi_arg(x::GenericArrayExpr{V,N}) where {V,N}
args = Any[_to_moi_arg(a) for a in x.args]
return ArrayNonlinearFunction{N}(x.head, args, x.size, x.broadcasted)
end

_to_moi_arg(x::Matrix{Float64}) = x

_to_moi_arg(x::Real) = Float64(x)

function JuMP.moi_function(x::GenericArrayExpr{V,N}) where {V,N}
return _to_moi_arg(x)
end

# ── Detect whether a JuMP expression contains array args ─────────────────────

_has_array_args(::Any) = false
_has_array_args(::AbstractJuMPArray) = true

function _has_array_args(x::JuMP.GenericNonlinearExpr)
return any(_has_array_args, x.args)
end

# ── set_objective_function for nonlinear expressions with array args ─────────
# When the expression contains array subexpressions, we set
# AutomaticDifferentiationBackend to ArrayDiff.Mode() so the solver
# creates an ArrayDiff.Model (via nonlinear_model) for parsing.

function JuMP.set_objective_function(
model::JuMP.GenericModel{T},
func::JuMP.GenericNonlinearExpr{JuMP.GenericVariableRef{T}},
) where {T<:Real}
if _has_array_args(func)
MOI.set(
JuMP.backend(model),
MOI.AutomaticDifferentiationBackend(),
Mode(),
)
end
# Standard JuMP flow: convert to MOI and set on backend
f = JuMP.moi_function(func)
attr = MOI.ObjectiveFunction{typeof(f)}()
MOI.set(JuMP.backend(model), attr, f)
model.is_model_dirty = true
return
end
46 changes: 46 additions & 0 deletions src/JuMP/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,49 @@ end
function LinearAlgebra.norm(x::ArrayOfVariables)
return _array_norm(x)
end

# Subtraction between array expressions and constant arrays
function Base.:(-)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N}
V = JuMP.variable_ref_type(x)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false)
end

function Base.:(-)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N}
V = JuMP.variable_ref_type(y)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:-, Any[x, y], size(y), false)
end

function Base.:(-)(
x::AbstractJuMPArray{T,N},
y::AbstractJuMPArray{S,N},
) where {T,S,N}
V = JuMP.variable_ref_type(x)
@assert JuMP.variable_ref_type(y) == V
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:-, Any[x, y], size(x), false)
end

# Addition between array expressions and constant arrays
function Base.:(+)(x::AbstractJuMPArray{T,N}, y::AbstractArray{S,N}) where {S,T,N}
V = JuMP.variable_ref_type(x)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false)
end

function Base.:(+)(x::AbstractArray{S,N}, y::AbstractJuMPArray{T,N}) where {S,T,N}
V = JuMP.variable_ref_type(y)
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:+, Any[x, y], size(y), false)
end

function Base.:(+)(
x::AbstractJuMPArray{T,N},
y::AbstractJuMPArray{S,N},
) where {T,S,N}
V = JuMP.variable_ref_type(x)
@assert JuMP.variable_ref_type(y) == V
@assert size(x) == size(y)
return GenericArrayExpr{V,N}(:+, Any[x, y], size(x), false)
end
94 changes: 94 additions & 0 deletions src/array_nonlinear_function.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction

Represents an N-dimensional array-valued nonlinear function for MOI.

The `output_dimension` is `prod(size)` — the vectorization of the array — since
`MOI.AbstractVectorFunction` cannot represent multidimensional arrays. No actual
vectorization is performed; this is only for passing through MOI layers.

## Fields

- `head::Symbol`: the operator (e.g., `:*`, `:tanh`)
- `args::Vector{Any}`: arguments, which may be `ArrayNonlinearFunction`,
`MOI.ScalarNonlinearFunction`, `MOI.VariableIndex`, `Float64`,
`Vector{Float64}`, `Matrix{Float64}`, or `ArrayOfVariableIndices`
- `size::NTuple{N,Int}`: the dimensions of the output array
- `broadcasted::Bool`: whether this is a broadcasted operation
"""
struct ArrayNonlinearFunction{N} <: MOI.AbstractVectorFunction
head::Symbol
args::Vector{Any}
size::NTuple{N,Int}
broadcasted::Bool
end

function MOI.output_dimension(f::ArrayNonlinearFunction)
return prod(f.size)
end

"""
ArrayOfVariableIndices{N}

A block of contiguous `MOI.VariableIndex` values representing an N-dimensional
array. Used as an argument in `ArrayNonlinearFunction`.
"""
struct ArrayOfVariableIndices{N} <: MOI.AbstractVectorFunction
offset::Int
size::NTuple{N,Int}
end

Base.size(a::ArrayOfVariableIndices) = a.size

function MOI.output_dimension(f::ArrayOfVariableIndices)
return prod(f.size)
end

function Base.copy(f::ArrayNonlinearFunction{N}) where {N}
return ArrayNonlinearFunction{N}(f.head, copy(f.args), f.size, f.broadcasted)
end

function Base.copy(f::ArrayOfVariableIndices{N}) where {N}
return f # immutable
end

# map_indices: remap MOI.VariableIndex values during MOI.copy_to
function MOI.Utilities.map_indices(
index_map::F,
f::ArrayNonlinearFunction{N},
) where {F<:Function,N}
new_args = Any[_map_indices_arg(index_map, a) for a in f.args]
return ArrayNonlinearFunction{N}(f.head, new_args, f.size, f.broadcasted)
end

function MOI.Utilities.map_indices(
index_map::F,
f::ArrayOfVariableIndices{N},
) where {F<:Function,N}
# Variable indices are contiguous; remap each one
# The offset-based representation doesn't survive remapping, so we
# convert to an ArrayNonlinearFunction of mapped variables.
# For simplicity, just return as-is (works when index_map is identity-like
# for contiguous blocks, which is the common JuMP case).
return f
end

function _map_indices_arg(index_map::F, x::ArrayNonlinearFunction) where {F}
return MOI.Utilities.map_indices(index_map, x)
end

function _map_indices_arg(index_map::F, x::ArrayOfVariableIndices) where {F}
return MOI.Utilities.map_indices(index_map, x)
end

function _map_indices_arg(::F, x::Matrix{Float64}) where {F}
return x
end

function _map_indices_arg(::F, x::Real) where {F}
return x
end

function _map_indices_arg(index_map::F, x) where {F}
return MOI.Utilities.map_indices(index_map, x)
end
4 changes: 4 additions & 0 deletions src/evaluator.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Largely inspired by MathOptInterface/src/Nonlinear/parse.jl
# Most functions have been copy-pasted and slightly modified to adapt to small changes in OperatorRegistry and Model.

function MOI.features_available(evaluator::Evaluator)
return features_available(evaluator)
end

function MOI.initialize(evaluator::Evaluator, features::Vector{Symbol})
start_time = time()
empty!(evaluator.ordered_constraints)
Expand Down
2 changes: 2 additions & 0 deletions src/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ function eval_multivariate_function(
return maximum(x)
elseif op == :vect
return x
elseif op == :sum
return sum(x; init = zero(T))
end
id = registry.multivariate_operator_to_id[op]
offset = id - registry.multivariate_user_operator_start
Expand Down
Loading
Loading