From 7437140f9894538510f1ab546624fbf6c8e9a4f3 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 15 Feb 2024 15:08:22 -0500
Subject: [PATCH 01/32] change generation

---
 .../generate_full_datasets_script.jl          | 77 ++++++++++++-------
 1 file changed, 49 insertions(+), 28 deletions(-)

diff --git a/examples/powermodels/generate_full_datasets_script.jl b/examples/powermodels/generate_full_datasets_script.jl
index 8b40d2d..83dba34 100644
--- a/examples/powermodels/generate_full_datasets_script.jl
+++ b/examples/powermodels/generate_full_datasets_script.jl
@@ -1,45 +1,61 @@
-# run with: julia ./examples/powermodels/generate_full_datasets_script.jl "./examples/powermodels/data/pglib_opf_case300_ieee/case300.config.toml"
-config_path = ARGS[1]
+# run with: julia ./examples/powermodels/generate_full_datasets_script.jl "./examples/powermodels/data/pglib_opf_case300_ieee/case300.config.toml" SOCWRConicPowerModel
 
-using Pkg: Pkg;
-Pkg.activate(".");
+################################################################
+############## PowerModels Dataset Generation ##############
+################################################################
 
-using TestEnv
-TestEnv.activate()
+using Distributed
+using Random
+
+##############
+# Load Functions
+##############
+
+@everywhere import Pkg
+
+@everywhere Pkg.activate(dirname(dirname(@__DIR__)))
+
+@everywhere Pkg.instantiate()
 
 ########## SCRIPT REQUIRED PACKAGES ##########
 
-using L2O
-using Arrow
-using Test
-using UUIDs
-using PowerModels
-import JuMP.MOI as MOI
-import ParametricOptInterface as POI
-using TOML
+@everywhere using L2O
+@everywhere using Arrow
+@everywhere using Test
+@everywhere using UUIDs
+@everywhere using PowerModels
+@everywhere import JuMP.MOI as MOI
+@everywhere import ParametricOptInterface as POI
+@everywhere using TOML
+
+## SOLVER PACKAGES ##
+
+# using Clarabel
+@everywhere using Gurobi
 
 PowerModels.silence()
 
-## SOLVER PACKAGES ##
+##############
+# Parameters
+##############
 
-using Clarabel
-using Gurobi
-using NonconvexNLopt
+config_path = ARGS[3]
 
 ########## POI SOLVER ##########
 
-cached =
-    () -> MOI.Bridges.full_bridge_optimizer(
-        MOI.Utilities.CachingOptimizer(
-            MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
-            Clarabel.Optimizer(),
-        ),
-        Float64,
-    )
+# cached =
+#     () -> MOI.Bridges.full_bridge_optimizer(
+#         MOI.Utilities.CachingOptimizer(
+#             MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
+#             Gurobi.Optimizer(),
+#         ),
+#         Float64,
+#     )
 
-POI_cached_optimizer() = POI.Optimizer(cached())
+POI_cached_optimizer() = Gurobi.Optimizer() # POI.Optimizer(cached())
 
 ########## PARAMETERS ##########
+@info "Loading configuration file: $config_path"
 
 config = TOML.parsefile(config_path)
 path = config["export_dir"]
@@ -52,7 +68,11 @@ filetype = ArrowFile # ArrowFile # CSVFile
 case_name = config["case_name"]
 case_file_path = joinpath(path, case_name)
 mkpath(case_file_path)
-network_formulation = eval(Symbol(ARGS[2])) # SOCWRConicPowerModel # DCPPowerModel
+network_formulation = eval(Symbol(ARGS[4])) # SOCWRConicPowerModel # DCPPowerModel
+
+##############
+# Solve and store solutions
+##############
 
 ########## SAMPLER DATASET GENERATION ##########
 
@@ -136,6 +156,7 @@ end
 
 ########## WORST CASE NONCONVEX DATASET GENERATION ##########
 if haskey(config, "worst_case_nonconvex")
+    @everywhere using NonconvexNLopt
     num_p = config["worst_case_nonconvex"]["num_samples"]
 
     success_solves, number_variables, number_loads, batch_id = generate_worst_case_dataset_Nonconvex(

From 891b1c591fab4bc2c7382c1d90c7f81ba63f9068 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 15 Feb 2024 15:20:35 -0500
Subject: [PATCH 02/32] ignore config

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 7d1a06e..38b3609 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ examples/unitcommitment/app/*
 *.wandb
 *latest-run
 *.html
+*.config.toml

From 02541b5aaa5d709700eb790f1ee3b36de73ef058 Mon Sep 17 00:00:00 2001
From: Andrew Rosemberg <arosemberg3@gatech.edu>
Date: Thu, 15 Feb 2024 16:29:25 -0500
Subject: [PATCH 03/32] update

---
 examples/powermodels/generate_full_datasets_script.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/powermodels/generate_full_datasets_script.jl b/examples/powermodels/generate_full_datasets_script.jl
index 83dba34..7727752 100644
--- a/examples/powermodels/generate_full_datasets_script.jl
+++ b/examples/powermodels/generate_full_datasets_script.jl
@@ -1,4 +1,4 @@
-# run with: julia ./examples/powermodels/generate_full_datasets_script.jl "./examples/powermodels/data/pglib_opf_case300_ieee/case300.config.toml" SOCWRConicPowerModel
+# run with: julia ./generate_full_datasets_script.jl 1 1 "../powermodels/data/6468_rte.config.toml" SOCWRConicPowerModel
 
 ################################################################
 ############## PowerModels Dataset Generation ##############
@@ -88,8 +88,8 @@ if haskey(config, "sampler")
             filetype=filetype,
             network_formulation=network_formulation,
             optimizer=POI_cached_optimizer,
-            internal_load_sampler=(_o, n) -> load_sampler(
-                _o, n; max_multiplier=1.25, min_multiplier=0.8, step_multiplier=0.01
+            internal_load_sampler=(_o, n, idx, num_inputs) -> load_sampler(
+                _o, n, idx, num_inputs; max_multiplier=1.25, min_multiplier=0.8, step_multiplier=0.01
             ),
         )
         global success_solves += _success_solves

From 6d9c11f1fda7fd830033676836412954cf109627 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Tue, 20 Feb 2024 10:57:04 -0500
Subject: [PATCH 04/32] update code

---
 .../cuttingplanes}/cutting_planes.jl          |  0
 .../generate_full_datasets_script.jl          | 74 +++++++++----------
 examples/powermodels/pglib_datagen.jl         |  4 +-
 examples/powermodels/visualize.jl             | 17 ++++-
 examples/powermodels/write_to_file.jl         | 42 +++++++++++
 .../test_worst_case.jl                        | 11 +++
 .../worst_case_active_learning}/worst_case.jl |  0
 .../worst_case_iter.jl                        |  0
 src/L2O.jl                                    |  7 +-
 src/samplers.jl                               | 51 +++++++++++++
 test/runtests.jl                              | 13 +---
 test/samplers.jl                              | 22 ++++++
 12 files changed, 188 insertions(+), 53 deletions(-)
 rename {src => examples/cuttingplanes}/cutting_planes.jl (100%)
 create mode 100644 examples/powermodels/write_to_file.jl
 rename test/worst_case.jl => examples/worst_case_active_learning/test_worst_case.jl (96%)
 rename {src => examples/worst_case_active_learning}/worst_case.jl (100%)
 rename {src => examples/worst_case_active_learning}/worst_case_iter.jl (100%)
 create mode 100644 src/samplers.jl
 create mode 100644 test/samplers.jl

diff --git a/src/cutting_planes.jl b/examples/cuttingplanes/cutting_planes.jl
similarity index 100%
rename from src/cutting_planes.jl
rename to examples/cuttingplanes/cutting_planes.jl
diff --git a/examples/powermodels/generate_full_datasets_script.jl b/examples/powermodels/generate_full_datasets_script.jl
index 7727752..7c126e8 100644
--- a/examples/powermodels/generate_full_datasets_script.jl
+++ b/examples/powermodels/generate_full_datasets_script.jl
@@ -131,42 +131,42 @@ if haskey(config, "line_search")
 end
 
 ########## WORST CASE DUAL DATASET GENERATION ##########
-if haskey(config, "worst_case_dual")
-    num_p = config["worst_case_dual"]["num_samples"]
-    function optimizer_factory()
-        IPO_OPT = Gurobi.Optimizer()
-        # IPO_OPT = MadNLP.Optimizer(print_level=MadNLP.INFO, max_iter=100)
-        # IPO = MOI.Bridges.Constraint.SOCtoNonConvexQuad{Float64}(IPO_OPT)
-        # MIP = QuadraticToBinary.Optimizer{Float64}(IPO)
-        return () -> IPO_OPT
-    end
+# if haskey(config, "worst_case_dual")
+#     num_p = config["worst_case_dual"]["num_samples"]
+#     function optimizer_factory()
+#         IPO_OPT = Gurobi.Optimizer()
+#         # IPO_OPT = MadNLP.Optimizer(print_level=MadNLP.INFO, max_iter=100)
+#         # IPO = MOI.Bridges.Constraint.SOCtoNonConvexQuad{Float64}(IPO_OPT)
+#         # MIP = QuadraticToBinary.Optimizer{Float64}(IPO)
+#         return () -> IPO_OPT
+#     end
+
+#     success_solves, number_variables, number_loads, batch_id = generate_worst_case_dataset(
+#         case_file_path,
+#         case_name;
+#         num_p=num_p,
+#         filetype=filetype,
+#         network_formulation=network_formulation,
+#         optimizer_factory=optimizer_factory,
+#         hook=(model) -> set_optimizer_attribute(model, "NonConvex", 2),
+#     )
 
-    success_solves, number_variables, number_loads, batch_id = generate_worst_case_dataset(
-        case_file_path,
-        case_name;
-        num_p=num_p,
-        filetype=filetype,
-        network_formulation=network_formulation,
-        optimizer_factory=optimizer_factory,
-        hook=(model) -> set_optimizer_attribute(model, "NonConvex", 2),
-    )
-
-    @info "Success solves Worst Case: $(success_solves) of $(num_p)"
-end
+#     @info "Success solves Worst Case: $(success_solves) of $(num_p)"
+# end
+
+# ########## WORST CASE NONCONVEX DATASET GENERATION ##########
+# if haskey(config, "worst_case_nonconvex")
+#     @everywhere using NonconvexNLopt
+#     num_p = config["worst_case_nonconvex"]["num_samples"]
+
+#     success_solves, number_variables, number_loads, batch_id = generate_worst_case_dataset_Nonconvex(
+#         case_file_path,
+#         case_name;
+#         num_p=num_p,
+#         filetype=filetype,
+#         network_formulation=network_formulation,
+#         optimizer=POI_cached_optimizer,
+#     )
 
-########## WORST CASE NONCONVEX DATASET GENERATION ##########
-if haskey(config, "worst_case_nonconvex")
-    @everywhere using NonconvexNLopt
-    num_p = config["worst_case_nonconvex"]["num_samples"]
-
-    success_solves, number_variables, number_loads, batch_id = generate_worst_case_dataset_Nonconvex(
-        case_file_path,
-        case_name;
-        num_p=num_p,
-        filetype=filetype,
-        network_formulation=network_formulation,
-        optimizer=POI_cached_optimizer,
-    )
-
-    @info "Success solves Worst Case: $(success_solves * 100) of $(num_p)"
-end
+#     @info "Success solves Worst Case: $(success_solves * 100) of $(num_p)"
+# end
diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl
index dc43e6c..8ab1d34 100644
--- a/examples/powermodels/pglib_datagen.jl
+++ b/examples/powermodels/pglib_datagen.jl
@@ -77,7 +77,9 @@ function load_parameter_factory(model, indices; load_set=nothing)
     if isnothing(load_set)
         return @variable(model, _p[i=indices])
     end
-    return @variable(model, _p[i=indices] in load_set)
+    num_loads = floor(Int,length(indices) / 2)
+    pd_index = indices[1:num_loads]
+    return [@variable(model, pd[i=pd_index] in load_set[i]).data; @variable(model, qd[i=pd_index] in load_set[i+num_loads]).data]
 end
 
 """
diff --git a/examples/powermodels/visualize.jl b/examples/powermodels/visualize.jl
index 0b59871..ad8853a 100644
--- a/examples/powermodels/visualize.jl
+++ b/examples/powermodels/visualize.jl
@@ -9,8 +9,8 @@ cossim(x,y) = dot(x,y) / (norm(x)*norm(y))
 ##############
 # Parameters
 ##############
-network_formulation = "ACPPowerModel"
-case_name = "pglib_opf_case300_ieee"
+network_formulation = "SOCWRConicPowerModel" # "DCPPowerModel" # "SOCWRConicPowerModel"
+case_name = "6468_rte" # pglib_opf_case300_ieee # 6468_rte 
 path_dataset = joinpath(dirname(@__FILE__), "data")
 case_file_path = joinpath(path_dataset, case_name)
 case_file_path_train = joinpath(case_file_path, "input", "train")
@@ -90,6 +90,19 @@ function total_load_vector(input_data; is_test=false)
     return df
 end
 
+function total_load_vector_annon(input_data)
+    df = DataFrame()
+    df.id = input_data.id
+    num_loads = floor(Int, length(names(input_data[!, Not(:id)])) / 2)
+    for i in 1:num_loads
+        df[!, "load[$i]"] = zeros(size(input_data, 1))
+    end
+    for j in 1:size(input_data, 1), i in 1:num_loads
+        df[j, "load[$i]"] = sqrt(input_data[j, i+1]^2 + input_data[j, i+num_loads+1]^2)
+    end
+    return df
+end
+
 ######### Plot Load Vectors #########
 load_vector_train = total_load_vector(input_data_train)
 load_vector_test = total_load_vector(input_data_test; is_test=true)
diff --git a/examples/powermodels/write_to_file.jl b/examples/powermodels/write_to_file.jl
new file mode 100644
index 0000000..1143561
--- /dev/null
+++ b/examples/powermodels/write_to_file.jl
@@ -0,0 +1,42 @@
+using JuMP
+using PowerModels
+using PGLib
+using Gurobi
+
+optimizer = Gurobi.Optimizer
+network_formulation = SOCWRConicPowerModel
+
+matpower_case_name = "6468_rte"
+
+network_data = make_basic_network(pglib(matpower_case_name))
+
+# The problem to iterate over
+model = JuMP.Model(optimizer)
+
+# Save original load value and Link POI
+num_loads = length(network_data["load"])
+num_inputs = num_loads * 2
+original_load = vcat(
+    [network_data["load"]["$l"]["pd"] for l in 1:num_loads],
+    [network_data["load"]["$l"]["qd"] for l in 1:num_loads],
+)
+
+p = load_parameter_factory(model, 1:num_inputs; load_set=MOI.Parameter.(original_load))
+
+for (str_i, l) in network_data["load"]
+    i = parse(Int, str_i)
+    l["pd"] = p[i]
+    l["qd"] = p[num_loads + i]
+end
+
+pm = instantiate_model(
+    network_data,
+    network_formulation,
+    PowerModels.build_opf;
+    setting=Dict("output" => Dict("branch_flows" => true, "duals" => true)),
+    jump_model=model,
+)
+
+write_to_file(model, "$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
+
+dest_model = read_from_file("$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
\ No newline at end of file
diff --git a/test/worst_case.jl b/examples/worst_case_active_learning/test_worst_case.jl
similarity index 96%
rename from test/worst_case.jl
rename to examples/worst_case_active_learning/test_worst_case.jl
index 52f5a94..b6940dd 100644
--- a/test/worst_case.jl
+++ b/examples/worst_case_active_learning/test_worst_case.jl
@@ -180,3 +180,14 @@ function test_worst_case_problem_iterator(path::AbstractString, num_p=10)
         end
     end
 end
+
+####### Run Tests
+path = mktempdir()
+test_worst_case_problem_iterator(path)
+file_in, file_out = test_generate_worst_case_dataset(
+    path, "pglib_opf_case5_pjm", 20
+)
+file_in, file_out = test_generate_worst_case_dataset_Nonconvex(
+    path, "pglib_opf_case5_pjm", 20
+)
+
diff --git a/src/worst_case.jl b/examples/worst_case_active_learning/worst_case.jl
similarity index 100%
rename from src/worst_case.jl
rename to examples/worst_case_active_learning/worst_case.jl
diff --git a/src/worst_case_iter.jl b/examples/worst_case_active_learning/worst_case_iter.jl
similarity index 100%
rename from src/worst_case_iter.jl
rename to examples/worst_case_active_learning/worst_case_iter.jl
diff --git a/src/L2O.jl b/src/L2O.jl
index 568547a..fdfeaf2 100644
--- a/src/L2O.jl
+++ b/src/L2O.jl
@@ -26,7 +26,6 @@ export ArrowFile,
     Recorder,
     save,
     solve_batch,
-    WorstCaseProblemIterator,
     set_primal_variable!,
     set_dual_variable!,
     set_model!,
@@ -37,16 +36,16 @@ export ArrowFile,
     ConvexRule,
     relative_rmse,
     relative_mae,
-    inconvexhull
+    inconvexhull,
+    line_sampler
 
 include("datasetgen.jl")
 include("csvrecorder.jl")
 include("arrowrecorder.jl")
-include("worst_case.jl")
-include("worst_case_iter.jl")
 include("FullyConnected.jl")
 include("nn_expression.jl")
 include("metrics.jl")
 include("inconvexhull.jl")
+include("samplers.jl")
 
 end
diff --git a/src/samplers.jl b/src/samplers.jl
new file mode 100644
index 0000000..2ea434a
--- /dev/null
+++ b/src/samplers.jl
@@ -0,0 +1,51 @@
+"""
+    function line_sampler(
+        original_parameter::T,
+        num_p::F,
+        parameter_index::F,
+        num_inputs::F,
+        line_index::F;
+        step_multiplier::T=1.01,
+    ) where {T<:Real,F<:Integer}
+
+This sampler returns a set of parameters that for a line in one dimension of the parameter space. 
+The idea is to change the value of one parameter and keep the rest constant.
+"""
+function line_sampler(
+    original_parameters::Vector{T},
+    parameter_indexes::Vector{F},
+    range_p::AbstractVector{T},
+) where {T<:Real,F<:Integer}
+    parameters = hcat(fill(original_parameters, length(range_p))...)
+
+    for parameter_index in parameter_indexes
+        parameters[parameter_index, :] = [original_parameters[parameter_index] * mul for mul in range_p]
+    end
+
+    return parameters
+end
+
+"""
+    box_sampler(original_parameter::T, num_p::Int, max_multiplier::T=3.0, min_multiplier::T=0.0, step_multiplier::T=0.1)
+
+Uniformly sample values around the original parameter value over a discrete range inside a box.
+"""
+function box_sampler(
+    original_parameter::T,
+    num_p::F,
+    range_p::AbstractVector{T}=0.8:0.01:1.25,
+) where {T<:Real,F<:Integer}
+    # parameter sampling
+    parameter_samples =
+        original_parameter * rand(range_p, num_p)
+    return parameter_samples
+end
+
+function box_sampler(
+    original_parameters::Vector{T},
+    num_p::F,
+    range_p::AbstractVector{T}=0.8:0.01:1.25,
+) where {T<:Real,F<:Integer}
+    # parameter sampling
+    return hcat(box_sampler.(original_parameters, num_p, range_p)...)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 6775697..c580b36 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -22,8 +22,6 @@ const examples_dir = joinpath(test_dir, "..", "examples")
 
 include(joinpath(test_dir, "datasetgen.jl"))
 
-include(joinpath(test_dir, "worst_case.jl"))
-
 include(joinpath(examples_dir, "powermodels", "pglib_datagen.jl"))
 
 include(joinpath(test_dir, "test_flux_forecaster.jl"))
@@ -32,21 +30,18 @@ include(joinpath(test_dir, "nn_expression.jl"))
 
 include(joinpath(test_dir, "inconvexhull.jl"))
 
+include(joinpath(test_dir, "samplers.jl"))
+
 @testset "L2O.jl" begin
+    test_line_sampler()
+    test_box_sampler()
     test_fully_connected()
     test_flux_jump_basic()
     test_inconvexhull()
 
     mktempdir() do path
         test_problem_iterator(path)
-        test_worst_case_problem_iterator(path)
         file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm", 20)
-        file_in, file_out = test_generate_worst_case_dataset(
-            path, "pglib_opf_case5_pjm", 20
-        )
-        file_in, file_out = test_generate_worst_case_dataset_Nonconvex(
-            path, "pglib_opf_case5_pjm", 20
-        )
         test_flux_forecaster(file_in, file_out)
     end
 end
diff --git a/test/samplers.jl b/test/samplers.jl
new file mode 100644
index 0000000..0aabc04
--- /dev/null
+++ b/test/samplers.jl
@@ -0,0 +1,22 @@
+function test_line_sampler(; num_p=10, range_p = 1:0.01:1.1)
+    original_parameter = rand(10)
+    for parameter_index = 1:num_p
+        parameters = line_sampler(
+            original_parameter,
+            [parameter_index],
+            range_p,
+        )
+        @test parameters[parameter_index, 1] == original_parameter[parameter_index]
+        @test parameters[parameter_index, :] == [original_parameter[parameter_index] * mul for mul in range_p]
+    end
+end
+
+function test_box_sampler(; num_p=10, max_multiplier=3.0, min_multiplier=0.0, step_multiplier=0.1)
+    original_parameter = rand(10)
+    parameters = box_sampler(original_parameter, num_p, min_multiplier:step_multiplier:max_multiplier)
+    @test size(parameters) == (10, num_p)
+    @test all(parameters .>= original_parameter * min_multiplier)
+    @test all(parameters .<= original_parameter * max_multiplier)
+
+    return nothing
+end
\ No newline at end of file

From 917057738fb91fe67a36a505da1e2ee25bb2db3a Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Tue, 20 Feb 2024 11:00:49 -0500
Subject: [PATCH 05/32] update

---
 Project.toml                                           | 1 -
 examples/worst_case_active_learning/worst_case.jl      | 2 ++
 examples/worst_case_active_learning/worst_case_iter.jl | 2 ++
 src/L2O.jl                                             | 5 ++---
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index a928939..52a5e2f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,7 +12,6 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-Nonconvex = "01bcebdf-4d21-426d-b5c4-6132c1619978"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/examples/worst_case_active_learning/worst_case.jl b/examples/worst_case_active_learning/worst_case.jl
index 78f73ad..cea1afe 100644
--- a/examples/worst_case_active_learning/worst_case.jl
+++ b/examples/worst_case_active_learning/worst_case.jl
@@ -1,3 +1,5 @@
+using Dualization
+
 """
     WorstCaseProblemIterator
 
diff --git a/examples/worst_case_active_learning/worst_case_iter.jl b/examples/worst_case_active_learning/worst_case_iter.jl
index 2024350..45cca53 100644
--- a/examples/worst_case_active_learning/worst_case_iter.jl
+++ b/examples/worst_case_active_learning/worst_case_iter.jl
@@ -1,3 +1,5 @@
+using Nonconvex
+
 # Nonconvex needs a minimization objective function that only receives the decision vector.
 function primal_objective(parameter_values, parameters, filter_fn; penalty=1e8)
     model = owner_model(first(parameters))
diff --git a/src/L2O.jl b/src/L2O.jl
index fdfeaf2..41f3b7e 100644
--- a/src/L2O.jl
+++ b/src/L2O.jl
@@ -2,7 +2,6 @@ module L2O
 
 using Arrow
 using CSV
-using Dualization
 using JuMP
 using UUIDs
 import ParametricOptInterface as POI
@@ -10,7 +9,6 @@ import JuMP.MOI as MOI
 import Base: string
 using Statistics
 
-using Nonconvex
 using Zygote
 
 using MLJFlux
@@ -37,7 +35,8 @@ export ArrowFile,
     relative_rmse,
     relative_mae,
     inconvexhull,
-    line_sampler
+    line_sampler,
+    box_sampler
 
 include("datasetgen.jl")
 include("csvrecorder.jl")

From cdb092d4ee7d6f80e2aebbd0789992803a9034ec Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Tue, 20 Feb 2024 11:14:37 -0500
Subject: [PATCH 06/32] update code

---
 Project.toml                                    |  6 +-----
 .../test_worst_case.jl                          |  2 ++
 src/samplers.jl                                 | 17 +++++++++--------
 test/runtests.jl                                |  2 --
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/Project.toml b/Project.toml
index 52a5e2f..745d306 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,7 +6,6 @@ version = "1.2.0-DEV"
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-Dualization = "191a621a-6537-11e9-281d-650236a99e60"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -22,14 +21,12 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 [compat]
 Arrow = "2"
 CSV = "0.10"
-Dualization = "0.5"
 JuMP = "1"
 ParametricOptInterface = "0.7"
 Zygote = "^0.6.68"
 julia = "1.6"
 
 [extras]
-AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
 CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
 Clarabel = "61c947e1-3e6d-4ee4-985a-eec8c727bd6e"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
@@ -37,10 +34,9 @@ DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
-NonconvexNLopt = "b43a31b8-ff9b-442d-8e31-c163daa8ab75"
 PGLib = "07a8691f-3d11-4330-951b-3c50f98338be"
 PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "DelimitedFiles", "PGLib", "HiGHS", "PowerModels", "DataFrames", "Clarabel", "Ipopt", "NonconvexNLopt", "MLJ"]
+test = ["Test", "DelimitedFiles", "PGLib", "HiGHS", "PowerModels", "DataFrames", "Clarabel", "Ipopt", "MLJ"]
diff --git a/examples/worst_case_active_learning/test_worst_case.jl b/examples/worst_case_active_learning/test_worst_case.jl
index b6940dd..f1d7544 100644
--- a/examples/worst_case_active_learning/test_worst_case.jl
+++ b/examples/worst_case_active_learning/test_worst_case.jl
@@ -1,3 +1,5 @@
+using NonconvexNLopt
+
 """
     test_worst_case_problem_iterator(path::AbstractString)
 
diff --git a/src/samplers.jl b/src/samplers.jl
index 2ea434a..d5404c5 100644
--- a/src/samplers.jl
+++ b/src/samplers.jl
@@ -1,11 +1,8 @@
 """
     function line_sampler(
-        original_parameter::T,
-        num_p::F,
-        parameter_index::F,
-        num_inputs::F,
-        line_index::F;
-        step_multiplier::T=1.01,
+        original_parameters::Vector{T},
+        parameter_indexes::Vector{F},
+        range_p::AbstractVector{T},
     ) where {T<:Real,F<:Integer}
 
 This sampler returns a set of parameters that for a line in one dimension of the parameter space. 
@@ -26,8 +23,12 @@ function line_sampler(
 end
 
 """
-    box_sampler(original_parameter::T, num_p::Int, max_multiplier::T=3.0, min_multiplier::T=0.0, step_multiplier::T=0.1)
-
+    function box_sampler(
+        original_parameter::T,
+        num_p::F,
+        range_p::AbstractVector{T}=0.8:0.01:1.25,
+    ) where {T<:Real,F<:Integer}
+    
 Uniformly sample values around the original parameter value over a discrete range inside a box.
 """
 function box_sampler(
diff --git a/test/runtests.jl b/test/runtests.jl
index c580b36..b485934 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,8 +15,6 @@ using CSV
 using DataFrames
 using Optimisers
 
-using NonconvexNLopt
-
 const test_dir = dirname(@__FILE__)
 const examples_dir = joinpath(test_dir, "..", "examples")
 

From 2dd79eb07f8eee80267d24d34cfc84fd320891c6 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Tue, 20 Feb 2024 13:21:14 -0500
Subject: [PATCH 07/32] update fix test

---
 src/samplers.jl  | 2 +-
 test/samplers.jl | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/samplers.jl b/src/samplers.jl
index d5404c5..077da9e 100644
--- a/src/samplers.jl
+++ b/src/samplers.jl
@@ -48,5 +48,5 @@ function box_sampler(
     range_p::AbstractVector{T}=0.8:0.01:1.25,
 ) where {T<:Real,F<:Integer}
     # parameter sampling
-    return hcat(box_sampler.(original_parameters, num_p, range_p)...)
+    return vcat([box_sampler(p, num_p, range_p)' for p in original_parameters]...)
 end
diff --git a/test/samplers.jl b/test/samplers.jl
index 0aabc04..dbac79e 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -1,7 +1,7 @@
 function test_line_sampler(; num_p=10, range_p = 1:0.01:1.1)
     original_parameter = rand(10)
     for parameter_index = 1:num_p
-        parameters = line_sampler(
+        parameters = L2O.line_sampler(
             original_parameter,
             [parameter_index],
             range_p,
@@ -9,6 +9,7 @@ function test_line_sampler(; num_p=10, range_p = 1:0.01:1.1)
         @test parameters[parameter_index, 1] == original_parameter[parameter_index]
         @test parameters[parameter_index, :] == [original_parameter[parameter_index] * mul for mul in range_p]
     end
+    return nothing
 end
 
 function test_box_sampler(; num_p=10, max_multiplier=3.0, min_multiplier=0.0, step_multiplier=0.1)
@@ -17,6 +18,5 @@ function test_box_sampler(; num_p=10, max_multiplier=3.0, min_multiplier=0.0, st
     @test size(parameters) == (10, num_p)
     @test all(parameters .>= original_parameter * min_multiplier)
     @test all(parameters .<= original_parameter * max_multiplier)
-
     return nothing
 end
\ No newline at end of file

From 7d123badcf9f17ffa6d8a57089ed7bbc1456775d Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Tue, 20 Feb 2024 14:54:57 -0500
Subject: [PATCH 08/32] update tests

---
 examples/generate_dataset.jl |  7 +++++++
 src/L2O.jl                   |  3 ++-
 src/samplers.jl              | 29 ++++++++++++++++++++++++++++-
 test/samplers.jl             | 27 ++++++++++++++++++++++-----
 4 files changed, 59 insertions(+), 7 deletions(-)
 create mode 100644 examples/generate_dataset.jl

diff --git a/examples/generate_dataset.jl b/examples/generate_dataset.jl
new file mode 100644
index 0000000..2fa7e94
--- /dev/null
+++ b/examples/generate_dataset.jl
@@ -0,0 +1,7 @@
+using L2O
+using Arrow
+using Test
+using UUIDs
+import JuMP.MOI as MOI
+import ParametricOptInterface as POI
+using TOML
\ No newline at end of file
diff --git a/src/L2O.jl b/src/L2O.jl
index 41f3b7e..73e26ce 100644
--- a/src/L2O.jl
+++ b/src/L2O.jl
@@ -36,7 +36,8 @@ export ArrowFile,
     relative_mae,
     inconvexhull,
     line_sampler,
-    box_sampler
+    box_sampler,
+    general_sampler
 
 include("datasetgen.jl")
 include("csvrecorder.jl")
diff --git a/src/samplers.jl b/src/samplers.jl
index 077da9e..9208022 100644
--- a/src/samplers.jl
+++ b/src/samplers.jl
@@ -10,7 +10,7 @@ The idea is to change the value of one parameter and keep the rest constant.
 """
 function line_sampler(
     original_parameters::Vector{T},
-    parameter_indexes::Vector{F},
+    parameter_indexes::AbstractVector{F},
     range_p::AbstractVector{T},
 ) where {T<:Real,F<:Integer}
     parameters = hcat(fill(original_parameters, length(range_p))...)
@@ -22,6 +22,20 @@ function line_sampler(
     return parameters
 end
 
+function line_sampler(
+    original_parameters::Vector{T},
+    range_p::AbstractVector{T},
+) where {T<:Real}
+    parameters = zeros(T, length(original_parameters), length(range_p) * (1 + length(original_parameters)))
+    parameters[:, 1:length(range_p)] = line_sampler(original_parameters, 1:length(original_parameters), range_p)
+
+    for parameter_index=1:length(original_parameters)
+        parameters[:, length(range_p) * parameter_index + 1:length(range_p) * (parameter_index + 1)] = line_sampler(original_parameters, [parameter_index], range_p)
+    end
+
+    return parameters
+end
+
 """
     function box_sampler(
         original_parameter::T,
@@ -50,3 +64,16 @@ function box_sampler(
     # parameter sampling
     return vcat([box_sampler(p, num_p, range_p)' for p in original_parameters]...)
 end
+
+function general_sampler(
+    original_parameters::Vector{T},
+    line_sampler_range::AbstractVector{T}=1.01:0.01:1.25,
+    box_sampler_num_p::Union{F, Nothing}=nothing,
+    box_sampler_range::AbstractVector{T}=0.7:0.01:1.25,
+) where {T<:Real,F<:Integer}
+    if box_sampler_num_p == nothing
+        return line_sampler(original_parameters, line_sampler_range)
+    else
+        return hcat(line_sampler(original_parameters, line_sampler_range), box_sampler(original_parameters, box_sampler_num_p, box_sampler_range))
+    end
+end
\ No newline at end of file
diff --git a/test/samplers.jl b/test/samplers.jl
index dbac79e..665a47f 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -1,5 +1,5 @@
 function test_line_sampler(; num_p=10, range_p = 1:0.01:1.1)
-    original_parameter = rand(10)
+    original_parameter = rand(num_p)
     for parameter_index = 1:num_p
         parameters = L2O.line_sampler(
             original_parameter,
@@ -9,14 +9,31 @@ function test_line_sampler(; num_p=10, range_p = 1:0.01:1.1)
         @test parameters[parameter_index, 1] == original_parameter[parameter_index]
         @test parameters[parameter_index, :] == [original_parameter[parameter_index] * mul for mul in range_p]
     end
+    parameters = L2O.line_sampler(
+        original_parameter,
+        range_p,
+    )
+    @test size(parameters) == (10, length(range_p) * (1 + num_p))
     return nothing
 end
 
-function test_box_sampler(; num_p=10, max_multiplier=3.0, min_multiplier=0.0, step_multiplier=0.1)
-    original_parameter = rand(10)
-    parameters = box_sampler(original_parameter, num_p, min_multiplier:step_multiplier:max_multiplier)
-    @test size(parameters) == (10, num_p)
+function test_box_sampler(; num_p=10, num_s=5, max_multiplier=3.0, min_multiplier=0.0, step_multiplier=0.1)
+    original_parameter = rand(num_p)
+    parameters = box_sampler(original_parameter, num_s, min_multiplier:step_multiplier:max_multiplier)
+    @test size(parameters) == (num_p, num_s)
     @test all(parameters .>= original_parameter * min_multiplier)
     @test all(parameters .<= original_parameter * max_multiplier)
     return nothing
+end
+
+function test_general_sampler(; num_p=10, num_s=5, range_p=1.01:0.01:1.25)
+    original_parameter = rand(num_p)
+    parameters = general_sampler(
+        original_parameter,
+        range_p,
+        num_s,
+        range_p,
+    )
+    @test size(parameters) == (num_p, num_s + length(range_p) * (1 + num_p))
+    return nothing
 end
\ No newline at end of file

From c682c8721b6fa64ff8e60ea60c75db636b2e55fb Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Tue, 20 Feb 2024 16:30:50 -0500
Subject: [PATCH 09/32] update code

---
 Project.toml     |  1 +
 src/L2O.jl       |  2 ++
 src/samplers.jl  | 60 +++++++++++++++++++++++++++++++++---------------
 test/samplers.jl | 12 ++++++----
 4 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/Project.toml b/Project.toml
index 745d306..22c7c63 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,6 +6,7 @@ version = "1.2.0-DEV"
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/L2O.jl b/src/L2O.jl
index 73e26ce..a6552cd 100644
--- a/src/L2O.jl
+++ b/src/L2O.jl
@@ -8,6 +8,7 @@ import ParametricOptInterface as POI
 import JuMP.MOI as MOI
 import Base: string
 using Statistics
+using Distributions
 
 using Zygote
 
@@ -37,6 +38,7 @@ export ArrowFile,
     inconvexhull,
     line_sampler,
     box_sampler,
+    scaled_distribution_sampler,
     general_sampler
 
 include("datasetgen.jl")
diff --git a/src/samplers.jl b/src/samplers.jl
index 9208022..8ca80c6 100644
--- a/src/samplers.jl
+++ b/src/samplers.jl
@@ -11,7 +11,7 @@ The idea is to change the value of one parameter and keep the rest constant.
 function line_sampler(
     original_parameters::Vector{T},
     parameter_indexes::AbstractVector{F},
-    range_p::AbstractVector{T},
+    range_p::AbstractVector{T}=1.01:0.01:1.25,
 ) where {T<:Real,F<:Integer}
     parameters = hcat(fill(original_parameters, length(range_p))...)
 
@@ -24,7 +24,7 @@ end
 
 function line_sampler(
     original_parameters::Vector{T},
-    range_p::AbstractVector{T},
+    range_p::AbstractVector{T}=1.01:0.01:1.25,
 ) where {T<:Real}
     parameters = zeros(T, length(original_parameters), length(range_p) * (1 + length(original_parameters)))
     parameters[:, 1:length(range_p)] = line_sampler(original_parameters, 1:length(original_parameters), range_p)
@@ -39,7 +39,7 @@ end
 """
     function box_sampler(
         original_parameter::T,
-        num_p::F,
+        num_s::F,
         range_p::AbstractVector{T}=0.8:0.01:1.25,
     ) where {T<:Real,F<:Integer}
     
@@ -47,33 +47,57 @@ Uniformly sample values around the original parameter value over a discrete rang
 """
 function box_sampler(
     original_parameter::T,
-    num_p::F,
+    num_s::F,
     range_p::AbstractVector{T}=0.8:0.01:1.25,
 ) where {T<:Real,F<:Integer}
-    # parameter sampling
     parameter_samples =
-        original_parameter * rand(range_p, num_p)
+        original_parameter * rand(range_p, num_s)
     return parameter_samples
 end
 
 function box_sampler(
     original_parameters::Vector{T},
-    num_p::F,
+    num_s::F,
     range_p::AbstractVector{T}=0.8:0.01:1.25,
 ) where {T<:Real,F<:Integer}
-    # parameter sampling
-    return vcat([box_sampler(p, num_p, range_p)' for p in original_parameters]...)
+    return vcat([box_sampler(p, num_s, range_p)' for p in original_parameters]...)
 end
 
-function general_sampler(
+function scaled_distribution_sampler(
     original_parameters::Vector{T},
-    line_sampler_range::AbstractVector{T}=1.01:0.01:1.25,
-    box_sampler_num_p::Union{F, Nothing}=nothing,
-    box_sampler_range::AbstractVector{T}=0.7:0.01:1.25,
+    num_s::F;
+    rng::AbstractRNG=Random.GLOBAL_RNG,
+    scaler_multiplier::Distribution=Uniform(0.8, 1.25),
+    distribution::Distribution=MvLogNormal(fill(-(1.05 .^ 2) ./ 2.0, length(original_parameters)), 1.05)
 ) where {T<:Real,F<:Integer}
-    if box_sampler_num_p == nothing
-        return line_sampler(original_parameters, line_sampler_range)
-    else
-        return hcat(line_sampler(original_parameters, line_sampler_range), box_sampler(original_parameters, box_sampler_num_p, box_sampler_range))
+    column_scales = rand(rng, scaler_multiplier, num_s)
+    parameter_samples = rand(rng, distribution, num_s)
+    
+    for n in 1:num_s
+        parameter_samples[:, n] = original_parameters .* parameter_samples[:, n] .* column_scales[n]
     end
-end
\ No newline at end of file
+    return parameter_samples
+end
+
+function general_sampler(
+    original_parameters::Vector{T};
+    samplers::Vector{Function}=[
+        (original_parameters) -> scaled_distribution_sampler(original_parameters, 1000),
+        line_sampler, 
+        (original_parameters) -> box_sampler(original_parameters, 10),
+    ]
+) where {T<:Real}
+    return hcat([sampler(original_parameters) for sampler in samplers]...)
+end
+
+function load_parameters(file::AbstractString)
+    model = read_from_file(file)
+    cons = constraint_object.(all_constraints(model, VariableRef, MOI.Parameter{Float64}))
+    parameters = [cons[i].func for i in 1:length(cons)]
+    vals = [cons[i].set.value for i in 1:length(cons)]
+    return parameters, vals
+end
+
+function general_sampler(file::AbstractString)
+    return general_sampler(load_parameters(file))
+end
diff --git a/test/samplers.jl b/test/samplers.jl
index 665a47f..3642d8f 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -29,11 +29,13 @@ end
 function test_general_sampler(; num_p=10, num_s=5, range_p=1.01:0.01:1.25)
     original_parameter = rand(num_p)
     parameters = general_sampler(
-        original_parameter,
-        range_p,
-        num_s,
-        range_p,
+        original_parameter;
+        samplers=[
+            (original_parameters) -> scaled_distribution_sampler(original_parameters, num_s),
+            line_sampler, 
+            (original_parameters) -> box_sampler(original_parameters, num_s),
+        ]
     )
-    @test size(parameters) == (num_p, num_s + length(range_p) * (1 + num_p))
+    @test size(parameters) == (num_p, 2 * num_s + length(range_p) * (1 + num_p))
     return nothing
 end
\ No newline at end of file

From b26367a7228a0c084c4ba2b18deea352e1db19d7 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Tue, 20 Feb 2024 22:29:17 -0500
Subject: [PATCH 10/32] update samplers

---
 src/samplers.jl | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/samplers.jl b/src/samplers.jl
index 8ca80c6..e24233b 100644
--- a/src/samplers.jl
+++ b/src/samplers.jl
@@ -90,14 +90,36 @@ function general_sampler(
     return hcat([sampler(original_parameters) for sampler in samplers]...)
 end
 
-function load_parameters(file::AbstractString)
-    model = read_from_file(file)
+function load_parameters(model::JuMP.Model)
     cons = constraint_object.(all_constraints(model, VariableRef, MOI.Parameter{Float64}))
     parameters = [cons[i].func for i in 1:length(cons)]
     vals = [cons[i].set.value for i in 1:length(cons)]
     return parameters, vals
 end
 
-function general_sampler(file::AbstractString)
-    return general_sampler(load_parameters(file))
+function load_parameters(file::AbstractString)
+    return load_parameters(read_from_file(file))
 end
+
+function general_sampler(
+    file::AbstractString;
+    samplers::Vector{Function}=[
+        (original_parameters) -> scaled_distribution_sampler(original_parameters, 1000),
+        line_sampler, 
+        (original_parameters) -> box_sampler(original_parameters, 10),
+    ],
+    filetype::FileType=ArrowFile,
+    save_file::AbstractString=split(file, ".mof.json")[1],
+    batch_id::UUID=uuid1()
+) where {T<:Real}
+    parameters, original_values = load_parameters(file)
+    problem_iterator = ProblemIterator(
+        Dict{VariableRef,Vector{T}}(zip(parameters, original_values))
+    )
+    save(
+        problem_iterator,
+        save_file * "_input_" * batch_id,
+        filetype,
+    )
+    return problem_iterator
+end
\ No newline at end of file

From 0c501b57068b76d6ca58a7d24e6c4b94c03de51d Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Wed, 21 Feb 2024 13:11:48 -0500
Subject: [PATCH 11/32] update tests

---
 examples/powermodels/pglib_datagen.jl | 54 ++++++++++-----------
 examples/powermodels/write_to_file.jl | 17 +++----
 src/samplers.jl                       | 67 +++++++++++++++++++++++----
 test/runtests.jl                      |  4 ++
 test/samplers.jl                      | 51 ++++++++++++++++++++
 5 files changed, 147 insertions(+), 46 deletions(-)

diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl
index 8ab1d34..69b365d 100644
--- a/examples/powermodels/pglib_datagen.jl
+++ b/examples/powermodels/pglib_datagen.jl
@@ -40,33 +40,33 @@ function load_sampler(
     return load_samples
 end
 
-"""
-    line_sampler(original_parameter::T, num_p::Int, parameter_index::F, num_inputs::F, line_index::F; step_multiplier::T=0.1)
-
-line_sampler is a function to help generate a dataset for varying parameter values. It has two modes:
- - If line_index is not outside the parameter index range: 
-    Return an incremental vector for the parameter at parameter_index and an unchanged parameter for the rest;
- - If line_index is outside the parameter index range:
-    Return an incremental vector for all parameters. 
-"""
-function line_sampler(
-    original_parameter::T,
-    num_p::F,
-    parameter_index::F,
-    num_inputs::F,
-    line_index::F;
-    step_multiplier::T=1.01,
-) where {T<:Real,F<:Integer}
-    # parameter sampling
-    num_parameters = floor(Int, num_inputs / 2)
-    if (parameter_index == line_index) ||
-        (parameter_index - num_parameters == line_index) ||
-        (line_index == num_inputs + 1)
-        return [original_parameter * step_multiplier^(j) for j in 1:num_p]
-    else
-        return ones(num_p) * original_parameter
-    end
-end
+# """
+#     line_sampler(original_parameter::T, num_p::Int, parameter_index::F, num_inputs::F, line_index::F; step_multiplier::T=0.1)
+
+# line_sampler is a function to help generate a dataset for varying parameter values. It has two modes:
+#  - If line_index is not outside the parameter index range: 
+#     Return an incremental vector for the parameter at parameter_index and an unchanged parameter for the rest;
+#  - If line_index is outside the parameter index range:
+#     Return an incremental vector for all parameters. 
+# """
+# function line_sampler(
+#     original_parameter::T,
+#     num_p::F,
+#     parameter_index::F,
+#     num_inputs::F,
+#     line_index::F;
+#     step_multiplier::T=1.01,
+# ) where {T<:Real,F<:Integer}
+#     # parameter sampling
+#     num_parameters = floor(Int, num_inputs / 2)
+#     if (parameter_index == line_index) ||
+#         (parameter_index - num_parameters == line_index) ||
+#         (line_index == num_inputs + 1)
+#         return [original_parameter * step_multiplier^(j) for j in 1:num_p]
+#     else
+#         return ones(num_p) * original_parameter
+#     end
+# end
 
 """
     load_parameter_factory(model, indices; load_set=nothing)
diff --git a/examples/powermodels/write_to_file.jl b/examples/powermodels/write_to_file.jl
index 1143561..2d9a2a0 100644
--- a/examples/powermodels/write_to_file.jl
+++ b/examples/powermodels/write_to_file.jl
@@ -4,9 +4,9 @@ using PGLib
 using Gurobi
 
 optimizer = Gurobi.Optimizer
-network_formulation = SOCWRConicPowerModel
+network_formulation = DCPPowerModel
 
-matpower_case_name = "6468_rte"
+matpower_case_name = "pglib_opf_case5_pjm"
 
 network_data = make_basic_network(pglib(matpower_case_name))
 
@@ -15,18 +15,13 @@ model = JuMP.Model(optimizer)
 
 # Save original load value and Link POI
 num_loads = length(network_data["load"])
-num_inputs = num_loads * 2
-original_load = vcat(
-    [network_data["load"]["$l"]["pd"] for l in 1:num_loads],
-    [network_data["load"]["$l"]["qd"] for l in 1:num_loads],
-)
 
-p = load_parameter_factory(model, 1:num_inputs; load_set=MOI.Parameter.(original_load))
+@variable(model, load_scaler[i=1:num_loads] in MOI.Parameter.(1.0))
 
 for (str_i, l) in network_data["load"]
     i = parse(Int, str_i)
-    l["pd"] = p[i]
-    l["qd"] = p[num_loads + i]
+    l["pd"] = load_scaler[i] * l["pd"]
+    l["qd"] = load_scaler[i] * l["qd"]
 end
 
 pm = instantiate_model(
@@ -39,4 +34,4 @@ pm = instantiate_model(
 
 write_to_file(model, "$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
 
-dest_model = read_from_file("$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
\ No newline at end of file
+# dest_model = read_from_file("$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
\ No newline at end of file
diff --git a/src/samplers.jl b/src/samplers.jl
index e24233b..e27db6d 100644
--- a/src/samplers.jl
+++ b/src/samplers.jl
@@ -63,6 +63,17 @@ function box_sampler(
     return vcat([box_sampler(p, num_s, range_p)' for p in original_parameters]...)
 end
 
+"""
+    function scaled_distribution_sampler(
+        original_parameters::Vector{T},
+        num_s::F;
+        rng::AbstractRNG=Random.GLOBAL_RNG,
+        scaler_multiplier::Distribution=Uniform(0.8, 1.25),
+        distribution::Distribution=MvLogNormal(fill(-(1.05 .^ 2) ./ 2.0, length(original_parameters)), 1.05)
+    ) where {T<:Real,F<:Integer}
+
+Sample from a distribution and scale the parameters by a random value over a uniform distribution.
+"""
 function scaled_distribution_sampler(
     original_parameters::Vector{T},
     num_s::F;
@@ -79,17 +90,34 @@ function scaled_distribution_sampler(
     return parameter_samples
 end
 
+"""
+    function general_sampler(
+        original_parameters::Vector{T};
+        samplers::Vector{Function}=[
+            (original_parameters) -> scaled_distribution_sampler(original_parameters, 1000),
+            L2O.line_sampler, 
+            (original_parameters) -> box_sampler(original_parameters, 10),
+        ]
+    ) where {T<:Real}
+
+This function is a general sampler that uses a set of samplers to sample the parameter space.
+"""
 function general_sampler(
     original_parameters::Vector{T};
     samplers::Vector{Function}=[
         (original_parameters) -> scaled_distribution_sampler(original_parameters, 1000),
-        line_sampler, 
+        L2O.line_sampler, 
         (original_parameters) -> box_sampler(original_parameters, 10),
     ]
 ) where {T<:Real}
     return hcat([sampler(original_parameters) for sampler in samplers]...)
 end
 
+"""
+    load_parameters(model::JuMP.Model)
+
+Load the parameters from a JuMP model.
+"""
 function load_parameters(model::JuMP.Model)
     cons = constraint_object.(all_constraints(model, VariableRef, MOI.Parameter{Float64}))
     parameters = [cons[i].func for i in 1:length(cons)]
@@ -97,28 +125,51 @@ function load_parameters(model::JuMP.Model)
     return parameters, vals
 end
 
+"""
+    load_parameters(file::AbstractString)
+
+Load the parameters from a saved jump model.
+"""
 function load_parameters(file::AbstractString)
     return load_parameters(read_from_file(file))
 end
 
+"""
+    general_sampler(
+        file::AbstractString;
+        samplers::Vector{Function}=[
+            (original_parameters) -> scaled_distribution_sampler(original_parameters, 1000),
+            L2O.line_sampler, 
+            (original_parameters) -> box_sampler(original_parameters, 10),
+        ],
+        batch_id::UUID=uuid1(),
+        save_file::AbstractString=split(file, ".mof.json")[1] * "_input_" * string(batch_id),
+        filetype::Type{T}=ArrowFile
+    ) where {T<:FileType}
+
+This function is a general sampler that uses a set of samplers to sample the parameter space. 
+It loads the underlying model from `file` and samples the parameters.
+It saves the sampled parameters to `save_file`.
+"""
 function general_sampler(
     file::AbstractString;
     samplers::Vector{Function}=[
         (original_parameters) -> scaled_distribution_sampler(original_parameters, 1000),
-        line_sampler, 
+        L2O.line_sampler, 
         (original_parameters) -> box_sampler(original_parameters, 10),
     ],
-    filetype::FileType=ArrowFile,
-    save_file::AbstractString=split(file, ".mof.json")[1],
-    batch_id::UUID=uuid1()
-) where {T<:Real}
+    batch_id::UUID=uuid1(),
+    save_file::AbstractString=split(file, ".mof.json")[1] * "_input_" * string(batch_id),
+    filetype::Type{T}=ArrowFile
+) where {T<:FileType}
     parameters, original_values = load_parameters(file)
+    vals = general_sampler(original_values, samplers=samplers)
     problem_iterator = ProblemIterator(
-        Dict{VariableRef,Vector{T}}(zip(parameters, original_values))
+        Dict(parameters .=> [Vector(r) for r in eachrow(vals)]),
     )
     save(
         problem_iterator,
-        save_file * "_input_" * batch_id,
+        save_file,
         filetype,
     )
     return problem_iterator
diff --git a/test/runtests.jl b/test/runtests.jl
index b485934..faf44cb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -31,13 +31,17 @@ include(joinpath(test_dir, "inconvexhull.jl"))
 include(joinpath(test_dir, "samplers.jl"))
 
 @testset "L2O.jl" begin
+    test_load_parameters_model()
+    test_load_parameters()
     test_line_sampler()
     test_box_sampler()
+    test_general_sampler()
     test_fully_connected()
     test_flux_jump_basic()
     test_inconvexhull()
 
     mktempdir() do path
+        test_general_sampler_file(; cache_dir=path)
         test_problem_iterator(path)
         file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm", 20)
         test_flux_forecaster(file_in, file_out)
diff --git a/test/samplers.jl b/test/samplers.jl
index 3642d8f..6533c37 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -37,5 +37,56 @@ function test_general_sampler(; num_p=10, num_s=5, range_p=1.01:0.01:1.25)
         ]
     )
     @test size(parameters) == (num_p, 2 * num_s + length(range_p) * (1 + num_p))
+    return nothing
+end
+
+function test_load_parameters_model(;num_p=10, num_v=5)
+    model = JuMP.Model()
+    @variable(model, 0 <= x[1:num_v] <= 1)
+    @variable(model, p[1:num_p] in MOI.Parameter.(1.0))
+    @constraint(model, cons, sum(x) + sum(p) >= 3)
+    @objective(model, Min, 2x)
+
+    parameters, vals = L2O.load_parameters(model)
+    @test length(parameters) == num_p
+    @test length(vals) == num_p
+    @test all(vals .== 1.0)
+    @test all(parameters .== p)
+    return nothing
+end
+
+function test_load_parameters()
+    file="pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json"
+    parameters, vals = L2O.load_parameters(file)
+    @test length(parameters) == 3
+    @test length(vals) == 3
+    @test all(vals .== 1.0)
+    return nothing
+end
+
+function test_general_sampler_file(file::AbstractString="pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json"; 
+    num_s=5, range_p=1.01:0.01:1.25, 
+    cache_dir=mktempdir(),
+    batch_id=uuid1(),
+    save_file = joinpath(cache_dir, split(split(file, ".mof.json")[1], "/")[end] * "_input_" * string(batch_id)),
+)
+    _, vals = L2O.load_parameters(file)
+    num_p=length(vals)
+    problem_iterator = general_sampler(
+        file;
+        samplers=[
+            (original_parameters) -> scaled_distribution_sampler(original_parameters, num_s),
+            line_sampler, 
+            (original_parameters) -> box_sampler(original_parameters, num_s),
+        ],
+        save_file=save_file,
+        batch_id=batch_id
+    )
+    @test length(problem_iterator.ids) == 2 * num_s + length(range_p) * (1 + num_p)
+    @test length(problem_iterator.pairs) == num_p
+
+    input_table = DataFrame(Arrow.Table(save_file * ".arrow"))
+    @test size(input_table) == (length(problem_iterator.ids), num_p + 1)
+
     return nothing
 end
\ No newline at end of file

From 831b4ecf458c49e1cad6970d867929302646a92e Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Wed, 21 Feb 2024 13:23:36 -0500
Subject: [PATCH 12/32] update code

---
 examples/powermodels/write_to_file.jl         |   4 +-
 ..._case5_pjm_DCPPowerModel_POI_load.mof.json | 928 ++++++++++++++++++
 test/samplers.jl                              |   2 +-
 3 files changed, 931 insertions(+), 3 deletions(-)
 create mode 100644 test/pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json

diff --git a/examples/powermodels/write_to_file.jl b/examples/powermodels/write_to_file.jl
index 2d9a2a0..f9e2942 100644
--- a/examples/powermodels/write_to_file.jl
+++ b/examples/powermodels/write_to_file.jl
@@ -4,9 +4,9 @@ using PGLib
 using Gurobi
 
 optimizer = Gurobi.Optimizer
-network_formulation = DCPPowerModel
+network_formulation = ACPPowerModel # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
 
-matpower_case_name = "pglib_opf_case5_pjm"
+matpower_case_name = "6468_rte"
 
 network_data = make_basic_network(pglib(matpower_case_name))
 
diff --git a/test/pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json b/test/pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json
new file mode 100644
index 0000000..78fccd9
--- /dev/null
+++ b/test/pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json
@@ -0,0 +1,928 @@
+{
+  "name": "MathOptFormat Model",
+  "version": {
+    "major": 1,
+    "minor": 7
+  },
+  "variables": [
+    {
+      "name": "0_pg[5]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_pg[4]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_pg[2]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_pg[3]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_pg[1]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_p[(5, 3, 4)]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_p[(4, 2, 3)]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_p[(6, 4, 5)]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_p[(2, 1, 4)]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_p[(3, 1, 5)]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_p[(1, 1, 2)]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "load_scaler[1]"
+    },
+    {
+      "name": "load_scaler[2]"
+    },
+    {
+      "name": "load_scaler[3]"
+    },
+    {
+      "name": "0_va[5]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_va[4]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_va[2]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_va[3]",
+      "primal_start": 0.0
+    },
+    {
+      "name": "0_va[1]",
+      "primal_start": 0.0
+    }
+  ],
+  "objective": {
+    "sense": "min",
+    "function": {
+      "type": "ScalarQuadraticFunction",
+      "affine_terms": [
+        {
+          "coefficient": 1000.0,
+          "variable": "0_pg[5]"
+        },
+        {
+          "coefficient": 4000.0,
+          "variable": "0_pg[4]"
+        },
+        {
+          "coefficient": 1500.0,
+          "variable": "0_pg[2]"
+        },
+        {
+          "coefficient": 3000.0,
+          "variable": "0_pg[3]"
+        },
+        {
+          "coefficient": 1400.0,
+          "variable": "0_pg[1]"
+        }
+      ],
+      "quadratic_terms": [],
+      "constant": 0.0
+    }
+  },
+  "constraints": [
+    {
+      "name": "c1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[4]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_pg[5]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_p[(6, 4, 5)]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_p[(3, 1, 5)]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c3",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_pg[4]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_p[(5, 3, 4)]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(6, 4, 5)]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_p[(2, 1, 4)]"
+          },
+          {
+            "coefficient": 4.0,
+            "variable": "load_scaler[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c4",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(4, 2, 3)]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_p[(1, 1, 2)]"
+          },
+          {
+            "coefficient": 3.0,
+            "variable": "load_scaler[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c5",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_pg[3]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(5, 3, 4)]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_p[(4, 2, 3)]"
+          },
+          {
+            "coefficient": 3.0,
+            "variable": "load_scaler[2]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c6",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_pg[2]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_pg[1]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(2, 1, 4)]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(3, 1, 5)]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(1, 1, 2)]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c7",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(5, 3, 4)]"
+          },
+          {
+            "coefficient": 33.33666700003334,
+            "variable": "0_va[4]"
+          },
+          {
+            "coefficient": -33.33666700003334,
+            "variable": "0_va[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c8",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(4, 2, 3)]"
+          },
+          {
+            "coefficient": -91.67583425009167,
+            "variable": "0_va[2]"
+          },
+          {
+            "coefficient": 91.67583425009167,
+            "variable": "0_va[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c9",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(6, 4, 5)]"
+          },
+          {
+            "coefficient": 33.33666700003334,
+            "variable": "0_va[5]"
+          },
+          {
+            "coefficient": -33.33666700003334,
+            "variable": "0_va[4]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c10",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(2, 1, 4)]"
+          },
+          {
+            "coefficient": 32.56904637832204,
+            "variable": "0_va[4]"
+          },
+          {
+            "coefficient": -32.56904637832204,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c11",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(3, 1, 5)]"
+          },
+          {
+            "coefficient": 154.70297029702968,
+            "variable": "0_va[5]"
+          },
+          {
+            "coefficient": -154.70297029702968,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c12",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_p[(1, 1, 2)]"
+          },
+          {
+            "coefficient": 35.234840209999646,
+            "variable": "0_va[2]"
+          },
+          {
+            "coefficient": -35.234840209999646,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "EqualTo",
+        "value": 0.0
+      }
+    },
+    {
+      "name": "c1_1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[4]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -0.5235987755982988
+      }
+    },
+    {
+      "name": "c2_1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[2]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -0.5235987755982988
+      }
+    },
+    {
+      "name": "c3_1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[5]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[4]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -0.5235987755982988
+      }
+    },
+    {
+      "name": "c4_1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[4]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -0.5235987755982988
+      }
+    },
+    {
+      "name": "c5_1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[5]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -0.5235987755982988
+      }
+    },
+    {
+      "name": "c6_1",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[2]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -0.5235987755982988
+      }
+    },
+    {
+      "name": "c1_2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[4]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.5235987755982988
+      }
+    },
+    {
+      "name": "c2_2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[2]"
+          },
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[3]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.5235987755982988
+      }
+    },
+    {
+      "name": "c3_2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[5]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[4]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.5235987755982988
+      }
+    },
+    {
+      "name": "c4_2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[4]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.5235987755982988
+      }
+    },
+    {
+      "name": "c5_2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[5]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.5235987755982988
+      }
+    },
+    {
+      "name": "c6_2",
+      "function": {
+        "type": "ScalarAffineFunction",
+        "terms": [
+          {
+            "coefficient": -1.0,
+            "variable": "0_va[2]"
+          },
+          {
+            "coefficient": 1.0,
+            "variable": "0_va[1]"
+          }
+        ],
+        "constant": 0.0
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.5235987755982988
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[5]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": 0.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[4]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": 0.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[2]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": 0.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[3]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": 0.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[1]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": 0.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(5, 3, 4)]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(4, 2, 3)]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(6, 4, 5)]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -2.4
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(2, 1, 4)]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(3, 1, 5)]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(1, 1, 2)]"
+      },
+      "set": {
+        "type": "GreaterThan",
+        "lower": -4.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[5]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 6.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[4]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 2.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[2]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 1.7
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[3]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 5.2
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_pg[1]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 0.4
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(5, 3, 4)]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(4, 2, 3)]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(6, 4, 5)]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 2.4
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(2, 1, 4)]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(3, 1, 5)]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 4.26
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "0_p[(1, 1, 2)]"
+      },
+      "set": {
+        "type": "LessThan",
+        "upper": 4.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "load_scaler[1]"
+      },
+      "set": {
+        "type": "Parameter",
+        "value": 1.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "load_scaler[2]"
+      },
+      "set": {
+        "type": "Parameter",
+        "value": 1.0
+      }
+    },
+    {
+      "function": {
+        "type": "Variable",
+        "name": "load_scaler[3]"
+      },
+      "set": {
+        "type": "Parameter",
+        "value": 1.0
+      }
+    }
+  ]
+}
diff --git a/test/samplers.jl b/test/samplers.jl
index 6533c37..9f29d36 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -89,4 +89,4 @@ function test_general_sampler_file(file::AbstractString="pglib_opf_case5_pjm_DCP
     @test size(input_table) == (length(problem_iterator.ids), num_p + 1)
 
     return nothing
-end
\ No newline at end of file
+end

From 0cc7088988e4edd50e40b4a5685474e17e3541e6 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Wed, 21 Feb 2024 15:42:39 -0500
Subject: [PATCH 13/32] adjust record for infeasible

---
 examples/powermodels/write_to_file.jl |  7 ++++--
 src/arrowrecorder.jl                  | 28 +++++++++++++++++------
 src/csvrecorder.jl                    | 33 +++++++++++++++++++--------
 src/datasetgen.jl                     | 13 +++++++----
 4 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/examples/powermodels/write_to_file.jl b/examples/powermodels/write_to_file.jl
index f9e2942..2840868 100644
--- a/examples/powermodels/write_to_file.jl
+++ b/examples/powermodels/write_to_file.jl
@@ -4,7 +4,7 @@ using PGLib
 using Gurobi
 
 optimizer = Gurobi.Optimizer
-network_formulation = ACPPowerModel # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
+network_formulation = SOCWRConicPowerModel # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
 
 matpower_case_name = "6468_rte"
 
@@ -16,7 +16,7 @@ model = JuMP.Model(optimizer)
 # Save original load value and Link POI
 num_loads = length(network_data["load"])
 
-@variable(model, load_scaler[i=1:num_loads] in MOI.Parameter.(1.0))
+@variable(model, load_scaler[i=1:num_loads] in MOI.Parameter.(1.03))
 
 for (str_i, l) in network_data["load"]
     i = parse(Int, str_i)
@@ -32,6 +32,9 @@ pm = instantiate_model(
     jump_model=model,
 )
 
+# JuMP.optimize!(model)
+# JuMP.termination_status(model)
+
 write_to_file(model, "$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
 
 # dest_model = read_from_file("$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
\ No newline at end of file
diff --git a/src/arrowrecorder.jl b/src/arrowrecorder.jl
index e2d1618..5991f5b 100644
--- a/src/arrowrecorder.jl
+++ b/src/arrowrecorder.jl
@@ -9,20 +9,34 @@ Record optimization problem solution to an Arrow file.
 """
 function record(recorder::Recorder{ArrowFile}, id::UUID; input=false)
     _filename = input ? filename_input(recorder) : filename(recorder)
-
     _filename = _filename * "_$(string(id))." * string(ArrowFile)
-
     model = recorder.model
 
+    status=JuMP.termination_status(model)
+    primal_status=JuMP.primal_status(model)
+    dual_status=JuMP.dual_status(model)
+
+    primal_values = if in(primal_status, DECISION_STATUS)
+        [[value.(p)] for p in recorder.primal_variables]
+    else
+        [[zeros(length(p))] for p in recorder.primal_variables]
+    end
+
+    dual_values = if in(dual_status, DECISION_STATUS)
+        [[dual.(p)] for p in recorder.dual_variables]
+    else
+        [[zeros(length(p))] for p in recorder.dual_variables]
+    end
+
     df = (;
         id=[id],
         zip(
             Symbol.(name.(recorder.primal_variables)),
-            [[value.(p)] for p in recorder.primal_variables],
+            primal_values,
         )...,
         zip(
             Symbol.("dual_" .* name.(recorder.dual_variables)),
-            [[dual.(p)] for p in recorder.dual_variables],
+            dual_values,
         )...,
     )
     if !input
@@ -31,9 +45,9 @@ function record(recorder::Recorder{ArrowFile}, id::UUID; input=false)
             (;
                 objective=[JuMP.objective_value(model)],
                 time=[JuMP.solve_time(model)],
-                status=[string(JuMP.termination_status(model))],
-                primal_status=[string(JuMP.primal_status(model))],
-                dual_status=[string(JuMP.dual_status(model))],
+                status=[string(status)],
+                primal_status=[string(primal_status)],
+                dual_status=[string(dual_status)],
             ),
         )
     end
diff --git a/src/csvrecorder.jl b/src/csvrecorder.jl
index f5fcf7b..02e36d9 100644
--- a/src/csvrecorder.jl
+++ b/src/csvrecorder.jl
@@ -11,6 +11,10 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
     _filename = input ? filename_input(recorder) : filename(recorder)
     _filename = _filename * "." * string(CSVFile)
 
+    model = recorder.model
+    primal_status=JuMP.primal_status(model)
+    dual_status=JuMP.dual_status(model)
+
     if !isfile(_filename)
         open(_filename, "w") do f
             write(f, "id")
@@ -32,16 +36,27 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
     end
     open(_filename, "a") do f
         write(f, "$id")
-        for p in recorder.primal_variables
-            val = value.(p)
-            write(f, ",$val")
+        if in(primal_status, DECISION_STATUS)
+            for p in recorder.primal_variables
+                val = value.(p)
+                write(f, ",$val")
+            end
+        else
+            for p in recorder.primal_variables
+                write(f, ",0")
+            end
         end
-        for p in recorder.dual_variables
-            val = dual.(p)
-            write(f, ",$val")
+        if in(dual_status, DECISION_STATUS)
+            for p in recorder.dual_variables
+                val = dual.(p)
+                write(f, ",$val")
+            end
+        else
+            for p in recorder.dual_variables
+                write(f, ",0")
+            end
         end
-        # save objective value
-        model = recorder.model
+
         if !input
             # save objective value
             obj = JuMP.objective_value(model)
@@ -53,10 +68,8 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
             status = JuMP.termination_status(model)
             write(f, ",$status")
             # save primal status
-            primal_status = JuMP.primal_status(model)
             write(f, ",$primal_status")
             # save dual status
-            dual_status = JuMP.dual_status(model)
             write(f, ",$dual_status")
         end
         # end line
diff --git a/src/datasetgen.jl b/src/datasetgen.jl
index 1561ef0..fc3b464 100644
--- a/src/datasetgen.jl
+++ b/src/datasetgen.jl
@@ -20,10 +20,15 @@ termination_status_filter(status) = in(status, ACCEPTED_TERMINATION_STATUSES)
 primal_status_filter(status) = in(status, DECISION_STATUS)
 dual_status_filter(status) = in(status, DECISION_STATUS)
 
-function filter_fn(model)
-    return termination_status_filter(termination_status(model)) &&
-           primal_status_filter(primal_status(model)) &&
-           dual_status_filter(dual_status(model))
+function filter_fn(model; check_primal=true, check_dual=true)
+    if !termination_status_filter(termination_status(model))
+        return false
+    elseif check_primal && !primal_status_filter(primal_status(model))
+        return false
+    elseif check_dual && !dual_status_filter(dual_status(model))
+        return false
+    end
+    return true
 end
 
 """

From 88fc73d921a187c8f90ec37f66450c32b6bfe077 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Wed, 21 Feb 2024 16:38:21 -0500
Subject: [PATCH 14/32] create loader

---
 src/arrowrecorder.jl |  4 ++++
 src/csvrecorder.jl   |  5 +++++
 src/datasetgen.jl    | 21 +++++++++++++++++++--
 test/datasetgen.jl   |  5 +++++
 test/runtests.jl     |  6 +++++-
 test/samplers.jl     |  8 +++++---
 6 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/src/arrowrecorder.jl b/src/arrowrecorder.jl
index 5991f5b..a7e6018 100644
--- a/src/arrowrecorder.jl
+++ b/src/arrowrecorder.jl
@@ -59,3 +59,7 @@ function save(table::NamedTuple, filename::String, ::Type{ArrowFile})
     filename = filename * "." * string(ArrowFile)
     return Arrow.write(filename, table)
 end
+
+function load(filename::String, ::Type{ArrowFile})
+    return DataFrame(Arrow.Table(filename * "." * string(ArrowFile)))
+end
diff --git a/src/csvrecorder.jl b/src/csvrecorder.jl
index 02e36d9..6794c5a 100644
--- a/src/csvrecorder.jl
+++ b/src/csvrecorder.jl
@@ -85,3 +85,8 @@ function save(table::NamedTuple, filename::String, ::Type{CSVFile}; kwargs...)
     CSV.write(filename, table; append=isappend)
     return nothing
 end
+
+function load(filename::String, ::Type{CSVFile})
+    filename = filename * "." * string(CSVFile)
+    return CSV.read(filename, DataFrame)
+end
diff --git a/src/datasetgen.jl b/src/datasetgen.jl
index fc3b464..60d504f 100644
--- a/src/datasetgen.jl
+++ b/src/datasetgen.jl
@@ -136,12 +136,12 @@ function ProblemIterator(
 end
 
 """
-    save(problem_iterator::ProblemIterator, filename::String, file_type::Type{T})
+    save(problem_iterator::ProblemIterator, filename::AbstractString, file_type::Type{T})
 
 Save optimization problem instances to a file.
 """
 function save(
-    problem_iterator::AbstractProblemIterator, filename::String, file_type::Type{T}
+    problem_iterator::AbstractProblemIterator, filename::AbstractString, file_type::Type{T}
 ) where {T<:FileType}
     kys = sort(collect(keys(problem_iterator.pairs)); by=(v) -> index(v).value)
     df = (; id=problem_iterator.ids,)
@@ -150,6 +150,23 @@ function save(
     return nothing
 end
 
+function load(model_file::AbstractString, input_file::AbstractString, ::Type{T}) where {T<:FileType}
+    df = load(input_file, T)
+    model = read_from_file(model_file)
+    parameters, _ = L2O.load_parameters(model)
+    ids = df.id
+    pairs = Dict{VariableRef,Vector{Float64}}()
+    for ky in keys(df)
+        if ky != :id
+            parameter = findfirst(parameters) do p
+                name(p) == ky
+            end
+            push!(pairs, parameter => df[ky])
+        end
+    end
+    return ProblemIterator(ids, pairs)
+end
+
 """
     update_model!(model::JuMP.Model, p::VariableRef, val::Real)
 
diff --git a/test/datasetgen.jl b/test/datasetgen.jl
index 72edfbe..2d2ac05 100644
--- a/test/datasetgen.jl
+++ b/test/datasetgen.jl
@@ -97,3 +97,8 @@ function test_problem_iterator(path::AbstractString)
         end
     end
 end
+
+function test_load(model_file::AbstractString, input_file::AbstractString, ::Type{T}) where {T<:FileType}
+    problem_iterator = load(model_file, input_file, T)
+    @test problem_iterator isa AbstractProblemIterator
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index faf44cb..e9917e1 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -41,7 +41,11 @@ include(joinpath(test_dir, "samplers.jl"))
     test_inconvexhull()
 
     mktempdir() do path
-        test_general_sampler_file(; cache_dir=path)
+        model_file = "pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json"
+        @testset "Samplers saving on $filetype" for filetype in [ArrowFile, CSVFile]
+            file_in = test_general_sampler_file(model_file; cache_dir=path, filetype=filetype)
+            test_load(model_file, input_file, filetype)
+        end
         test_problem_iterator(path)
         file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm", 20)
         test_flux_forecaster(file_in, file_out)
diff --git a/test/samplers.jl b/test/samplers.jl
index 9f29d36..b92e446 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -69,6 +69,7 @@ function test_general_sampler_file(file::AbstractString="pglib_opf_case5_pjm_DCP
     cache_dir=mktempdir(),
     batch_id=uuid1(),
     save_file = joinpath(cache_dir, split(split(file, ".mof.json")[1], "/")[end] * "_input_" * string(batch_id)),
+    filetype=CSVFile
 )
     _, vals = L2O.load_parameters(file)
     num_p=length(vals)
@@ -80,13 +81,14 @@ function test_general_sampler_file(file::AbstractString="pglib_opf_case5_pjm_DCP
             (original_parameters) -> box_sampler(original_parameters, num_s),
         ],
         save_file=save_file,
-        batch_id=batch_id
+        batch_id=batch_id,
+        filetype=filetype
     )
     @test length(problem_iterator.ids) == 2 * num_s + length(range_p) * (1 + num_p)
     @test length(problem_iterator.pairs) == num_p
 
-    input_table = DataFrame(Arrow.Table(save_file * ".arrow"))
+    input_table = load(save_file, filetype)
     @test size(input_table) == (length(problem_iterator.ids), num_p + 1)
 
-    return nothing
+    return save_file
 end

From ee0722692d33406f99eb21888ecfed9945204e8f Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-2.pace.gatech.edu>
Date: Wed, 21 Feb 2024 17:35:56 -0500
Subject: [PATCH 15/32] fix test

---
 examples/generate_dataset.jl | 15 +++++++++------
 test/samplers.jl             |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/examples/generate_dataset.jl b/examples/generate_dataset.jl
index 2fa7e94..a79fc8d 100644
--- a/examples/generate_dataset.jl
+++ b/examples/generate_dataset.jl
@@ -1,7 +1,10 @@
 using L2O
-using Arrow
-using Test
-using UUIDs
-import JuMP.MOI as MOI
-import ParametricOptInterface as POI
-using TOML
\ No newline at end of file
+
+general_sampler(
+    "examples/powermodels/data/6468_rte/SOCWRConicPowerModel/6468_rte_SOCWRConicPowerModel_POI_load.mof.json";
+    samplers=[
+        (original_parameters) -> scaled_distribution_sampler(original_parameters, 10000),
+        (original_parameters) -> line_sampler(original_parameters, 1.01:0.01:1.25), 
+        (original_parameters) -> box_sampler(original_parameters, 300),
+    ],
+)
\ No newline at end of file
diff --git a/test/samplers.jl b/test/samplers.jl
index b92e446..a89cc50 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -77,7 +77,7 @@ function test_general_sampler_file(file::AbstractString="pglib_opf_case5_pjm_DCP
         file;
         samplers=[
             (original_parameters) -> scaled_distribution_sampler(original_parameters, num_s),
-            line_sampler, 
+            (original_parameters) -> line_sampler(original_parameters, range_p),
             (original_parameters) -> box_sampler(original_parameters, num_s),
         ],
         save_file=save_file,

From b0a6d892430958a9f87308d3691b800c3e7d4dfd Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Thu, 22 Feb 2024 11:09:59 -0500
Subject: [PATCH 16/32] update tests

---
 Project.toml                                  |  4 +-
 ..._dataset.jl => generate_dataset_inputs.jl} |  2 +-
 examples/solve_dataset.jl                     | 60 +++++++++++++++++++
 src/L2O.jl                                    |  2 +
 src/datasetgen.jl                             | 50 ++++++++++++----
 test/datasetgen.jl                            | 22 ++++++-
 test/runtests.jl                              |  4 +-
 test/samplers.jl                              |  4 +-
 8 files changed, 127 insertions(+), 21 deletions(-)
 rename examples/{generate_dataset.jl => generate_dataset_inputs.jl} (72%)
 create mode 100644 examples/solve_dataset.jl

diff --git a/Project.toml b/Project.toml
index 22c7c63..5ce1825 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,6 +6,7 @@ version = "1.2.0-DEV"
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
@@ -30,7 +31,6 @@ julia = "1.6"
 [extras]
 CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
 Clarabel = "61c947e1-3e6d-4ee4-985a-eec8c727bd6e"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
@@ -40,4 +40,4 @@ PowerModels = "c36e90e8-916a-50a6-bd94-075b64ef4655"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "DelimitedFiles", "PGLib", "HiGHS", "PowerModels", "DataFrames", "Clarabel", "Ipopt", "MLJ"]
+test = ["Test", "DelimitedFiles", "PGLib", "HiGHS", "PowerModels", "Clarabel", "Ipopt", "MLJ"]
diff --git a/examples/generate_dataset.jl b/examples/generate_dataset_inputs.jl
similarity index 72%
rename from examples/generate_dataset.jl
rename to examples/generate_dataset_inputs.jl
index a79fc8d..16d42d1 100644
--- a/examples/generate_dataset.jl
+++ b/examples/generate_dataset_inputs.jl
@@ -1,7 +1,7 @@
 using L2O
 
 general_sampler(
-    "examples/powermodels/data/6468_rte/SOCWRConicPowerModel/6468_rte_SOCWRConicPowerModel_POI_load.mof.json";
+    "examples/powermodels/data/6468_rte/6468_rte_SOCWRConicPowerModel_POI_load.mof.json";
     samplers=[
         (original_parameters) -> scaled_distribution_sampler(original_parameters, 10000),
         (original_parameters) -> line_sampler(original_parameters, 1.01:0.01:1.25), 
diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
new file mode 100644
index 0000000..36c8ca9
--- /dev/null
+++ b/examples/solve_dataset.jl
@@ -0,0 +1,60 @@
+################################################################
+###################### Dataset Generation ######################
+################################################################
+
+using Distributed
+using Random
+
+##############
+# Load Functions
+##############
+
+@everywhere import Pkg
+
+@everywhere Pkg.activate(dirname(dirname(@__DIR__)))
+
+@everywhere Pkg.instantiate()
+
+########## SCRIPT REQUIRED PACKAGES ##########
+
+@everywhere using L2O
+@everywhere using UUIDs
+@everywhere import ParametricOptInterface as POI
+
+## SOLVER PACKAGES ##
+
+@everywhere using Gurobi
+@everywhere using Ipopt
+
+POI_cached_optimizer() = Gurobi.Optimizer()
+
+########## PARAMETERS ##########
+filetype = ArrowFile
+model_file = "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json"
+input_file = "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad.arrow"
+
+save_path = "examples/powermodels/data/6468_rte/output/"
+case_name = split(split(model_file, ".mof.")[1], "/")[end]
+
+problem_iterator = load(model_file, input_file, filetype)
+
+batch_size = 10
+num_problems = length(problem_iterator.ids)
+num_batches = ceil(Int, num_problems / batch_size)
+
+recorder = Recorder{filetype}(file; filterfn= (model) -> true, model=problem_iterator.model)
+
+variable_refs = return_variablerefs(pm)
+for variableref in variable_refs
+    set_name(variableref, replace(name(variableref), "," => "_"))
+end
+set_primal_variable!(recorder, variable_refs)
+
+ProblemIterator(ids, pairs)
+
+########## SOLVE ##########
+@sync @distributed for i in 1:num_batches
+    idx_range = (i-1)*batch_size+1:min(i*batch_size, length(num_problems))
+    
+    @info "Batch $i of $num_batches done"
+end
\ No newline at end of file
diff --git a/src/L2O.jl b/src/L2O.jl
index a6552cd..b6ac70a 100644
--- a/src/L2O.jl
+++ b/src/L2O.jl
@@ -2,6 +2,7 @@ module L2O
 
 using Arrow
 using CSV
+using DataFrames
 using JuMP
 using UUIDs
 import ParametricOptInterface as POI
@@ -24,6 +25,7 @@ export ArrowFile,
     ProblemIterator,
     Recorder,
     save,
+    load,
     solve_batch,
     set_primal_variable!,
     set_dual_variable!,
diff --git a/src/datasetgen.jl b/src/datasetgen.jl
index 60d504f..5d6460d 100644
--- a/src/datasetgen.jl
+++ b/src/datasetgen.jl
@@ -150,21 +150,49 @@ function save(
     return nothing
 end
 
-function load(model_file::AbstractString, input_file::AbstractString, ::Type{T}) where {T<:FileType}
-    df = load(input_file, T)
-    model = read_from_file(model_file)
-    parameters, _ = L2O.load_parameters(model)
-    ids = df.id
+function _dataframe_to_dict(df::DataFrame, parameters::Vector{VariableRef})
     pairs = Dict{VariableRef,Vector{Float64}}()
-    for ky in keys(df)
-        if ky != :id
-            parameter = findfirst(parameters) do p
-                name(p) == ky
+    for ky in names(df)
+        if ky != "id"
+            idx = findfirst(parameters) do p
+                name(p) == string(ky)
             end
-            push!(pairs, parameter => df[ky])
+            parameter = parameters[idx]
+            push!(pairs, parameter => df[!,ky])
+        end
+    end
+    return pairs
+end
+
+function load(model_file::AbstractString, input_file::AbstractString, ::Type{T}; 
+    batch_size::Union{Nothing, Integer}=nothing,
+    ignore_ids::Vector{UUID}=UUID[]
+) where {T<:FileType}
+    # Load full set
+    df = load(input_file, T)
+    # Remove ignored ids
+    df.id = UUID.(df.id)
+    if !isempty(ignore_ids)
+        df = filter(:id => (id) -> !(id in ignore_ids), df)
+        if isempty(df)
+            @warn("All ids are ignored")
+            return nothing
         end
     end
-    return ProblemIterator(ids, pairs)
+    ids = df.id
+    # Load model
+    model = read_from_file(model_file)
+    # Retrieve parameters
+    parameters, _ = L2O.load_parameters(model)
+    # No batch
+    if isnothing(batch_size)
+        pairs = _dataframe_to_dict(df, parameters)
+        return ProblemIterator(ids, pairs)
+    end
+    # Batch
+    num_batches = ceil(Int, length(ids) / batch_size)
+    idx_range = (i) -> (i-1)*batch_size+1:min(i*batch_size, length(ids))
+    return [ProblemIterator(ids[idx_range(i)], _dataframe_to_dict(df[idx_range(i), :], parameters)) for i in 1:num_batches]
 end
 
 """
diff --git a/test/datasetgen.jl b/test/datasetgen.jl
index 2d2ac05..9416d38 100644
--- a/test/datasetgen.jl
+++ b/test/datasetgen.jl
@@ -98,7 +98,23 @@ function test_problem_iterator(path::AbstractString)
     end
 end
 
-function test_load(model_file::AbstractString, input_file::AbstractString, ::Type{T}) where {T<:FileType}
-    problem_iterator = load(model_file, input_file, T)
-    @test problem_iterator isa AbstractProblemIterator
+function test_load(model_file::AbstractString, input_file::AbstractString, ::Type{T}, ids::Vector{UUID};
+    batch_size::Integer=32
+) where {T<:L2O.FileType}
+    # Test Load full set 
+    problem_iterator = L2O.load(model_file, input_file, T)
+    @test problem_iterator isa L2O.AbstractProblemIterator
+    @test length(problem_iterator.ids) == length(ids)
+    # Test load only half of the ids
+    num_ids_ignored = floor(Int, length(ids) / 2)
+    problem_iterator =  L2O.load(model_file, input_file, T; ignore_ids=ids[1:num_ids_ignored])
+    @test length(problem_iterator.ids) == length(ids) - num_ids_ignored
+    # Test warning all ids to be ignored
+    problem_iterator =  L2O.load(model_file, input_file, T; ignore_ids=ids)
+    @test isnothing(problem_iterator)
+    # Test Load batch of problem iterators
+    problem_iterators =  L2O.load(model_file, input_file, T; batch_size=batch_size)
+    @test length(problem_iterators) == ceil(Int, length(ids) / batch_size)
+    @test all(isa.(problem_iterators, L2O.AbstractProblemIterator))
+    return nothing
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index e9917e1..bb4e777 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -43,8 +43,8 @@ include(joinpath(test_dir, "samplers.jl"))
     mktempdir() do path
         model_file = "pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json"
         @testset "Samplers saving on $filetype" for filetype in [ArrowFile, CSVFile]
-            file_in = test_general_sampler_file(model_file; cache_dir=path, filetype=filetype)
-            test_load(model_file, input_file, filetype)
+            file_in, ids = test_general_sampler_file(model_file; cache_dir=path, filetype=filetype)
+            test_load(model_file, file_in, filetype, ids)
         end
         test_problem_iterator(path)
         file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm", 20)
diff --git a/test/samplers.jl b/test/samplers.jl
index a89cc50..02890a7 100644
--- a/test/samplers.jl
+++ b/test/samplers.jl
@@ -87,8 +87,8 @@ function test_general_sampler_file(file::AbstractString="pglib_opf_case5_pjm_DCP
     @test length(problem_iterator.ids) == 2 * num_s + length(range_p) * (1 + num_p)
     @test length(problem_iterator.pairs) == num_p
 
-    input_table = load(save_file, filetype)
+    input_table = L2O.load(save_file, filetype)
     @test size(input_table) == (length(problem_iterator.ids), num_p + 1)
 
-    return save_file
+    return save_file, problem_iterator.ids
 end

From 57c44be36f0ec1dfdb2b06464ab4f9181af7a6c5 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Thu, 22 Feb 2024 11:22:21 -0500
Subject: [PATCH 17/32] update script

---
 examples/solve_dataset.jl | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 36c8ca9..1bb1af6 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -35,26 +35,13 @@ input_file = "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7
 
 save_path = "examples/powermodels/data/6468_rte/output/"
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
+batch_size = 200
 
-problem_iterator = load(model_file, input_file, filetype)
+problem_iterators = load(model_file, input_file, filetype; batch_size=batch_size)
 
-batch_size = 10
-num_problems = length(problem_iterator.ids)
-num_batches = ceil(Int, num_problems / batch_size)
-
-recorder = Recorder{filetype}(file; filterfn= (model) -> true, model=problem_iterator.model)
-
-variable_refs = return_variablerefs(pm)
-for variableref in variable_refs
-    set_name(variableref, replace(name(variableref), "," => "_"))
+@sync @distributed for problem_iterators in problem_iterators
+    output_file = joinpath(save_path, "$(case_name)_output_$(UUID())")
+    recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
+    successfull_solves = solve_batch(problem_iterator, recorder)
+    @info "Solved $(length(successfull_solves)) problems"
 end
-set_primal_variable!(recorder, variable_refs)
-
-ProblemIterator(ids, pairs)
-
-########## SOLVE ##########
-@sync @distributed for i in 1:num_batches
-    idx_range = (i-1)*batch_size+1:min(i*batch_size, length(num_problems))
-    
-    @info "Batch $i of $num_batches done"
-end
\ No newline at end of file

From 92d290b070d9726b490debd0306fe380ca1ecd21 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Thu, 22 Feb 2024 11:25:05 -0500
Subject: [PATCH 18/32] update code

---
 examples/solve_dataset.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 1bb1af6..eaf7d61 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -37,6 +37,8 @@ save_path = "examples/powermodels/data/6468_rte/output/"
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
 batch_size = 200
 
+########## SOLVE ##########
+
 problem_iterators = load(model_file, input_file, filetype; batch_size=batch_size)
 
 @sync @distributed for problem_iterators in problem_iterators

From f363d481658c6452d7dea5f056f38ec22d69cd3e Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-3.pace.gatech.edu>
Date: Thu, 22 Feb 2024 11:35:25 -0500
Subject: [PATCH 19/32] update script

---
 examples/solve_dataset.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index eaf7d61..f9adaa2 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -42,6 +42,7 @@ batch_size = 200
 problem_iterators = load(model_file, input_file, filetype; batch_size=batch_size)
 
 @sync @distributed for problem_iterators in problem_iterators
+    set_optimizer(problem_iterator.model, POI_cached_optimizer())
     output_file = joinpath(save_path, "$(case_name)_output_$(UUID())")
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
     successfull_solves = solve_batch(problem_iterator, recorder)

From 942e058655a268f38420f0505e9d72ec3b41d63e Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 22 Feb 2024 12:42:24 -0500
Subject: [PATCH 20/32] update script

---
 examples/solve_dataset.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index f9adaa2..c1aaf23 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -11,7 +11,7 @@ using Random
 
 @everywhere import Pkg
 
-@everywhere Pkg.activate(dirname(dirname(@__DIR__)))
+@everywhere Pkg.activate(dirname(@__DIR__))
 
 @everywhere Pkg.instantiate()
 

From cc022cd498f8e332a63d798ccb2b4f44e9d936a7 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 22 Feb 2024 13:44:03 -0500
Subject: [PATCH 21/32] update code

---
 examples/solve_dataset.jl |  6 +++---
 src/datasetgen.jl         | 16 ++++++++++------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index c1aaf23..a5a0570 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -24,14 +24,14 @@ using Random
 ## SOLVER PACKAGES ##
 
 @everywhere using Gurobi
-@everywhere using Ipopt
+# @everywhere using Ipopt
 
 POI_cached_optimizer() = Gurobi.Optimizer()
 
 ########## PARAMETERS ##########
 filetype = ArrowFile
 model_file = "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json"
-input_file = "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad.arrow"
+input_file = "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad"
 
 save_path = "examples/powermodels/data/6468_rte/output/"
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
@@ -42,7 +42,7 @@ batch_size = 200
 problem_iterators = load(model_file, input_file, filetype; batch_size=batch_size)
 
 @sync @distributed for problem_iterators in problem_iterators
-    set_optimizer(problem_iterator.model, POI_cached_optimizer())
+    set_optimizer(problem_iterator.model, () -> POI_cached_optimizer())
     output_file = joinpath(save_path, "$(case_name)_output_$(UUID())")
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
     successfull_solves = solve_batch(problem_iterator, recorder)
diff --git a/src/datasetgen.jl b/src/datasetgen.jl
index 5d6460d..4384d21 100644
--- a/src/datasetgen.jl
+++ b/src/datasetgen.jl
@@ -164,6 +164,14 @@ function _dataframe_to_dict(df::DataFrame, parameters::Vector{VariableRef})
     return pairs
 end
 
+function _dataframe_to_dict(df::DataFrame, model_file::AbstractString)
+    # Load model
+    model = read_from_file(model_file)
+    # Retrieve parameters
+    parameters, _ = L2O.load_parameters(model)
+    return _dataframe_to_dict(df, parameters)
+end
+
 function load(model_file::AbstractString, input_file::AbstractString, ::Type{T}; 
     batch_size::Union{Nothing, Integer}=nothing,
     ignore_ids::Vector{UUID}=UUID[]
@@ -180,19 +188,15 @@ function load(model_file::AbstractString, input_file::AbstractString, ::Type{T};
         end
     end
     ids = df.id
-    # Load model
-    model = read_from_file(model_file)
-    # Retrieve parameters
-    parameters, _ = L2O.load_parameters(model)
     # No batch
     if isnothing(batch_size)
-        pairs = _dataframe_to_dict(df, parameters)
+        pairs = _dataframe_to_dict(df, model_file)
         return ProblemIterator(ids, pairs)
     end
     # Batch
     num_batches = ceil(Int, length(ids) / batch_size)
     idx_range = (i) -> (i-1)*batch_size+1:min(i*batch_size, length(ids))
-    return [ProblemIterator(ids[idx_range(i)], _dataframe_to_dict(df[idx_range(i), :], parameters)) for i in 1:num_batches]
+    return [ProblemIterator(ids[idx_range(i)], _dataframe_to_dict(df[idx_range(i), :], model_file)) for i in 1:num_batches]
 end
 
 """

From f5bd49e5d29411cf036c2dc46a838ea95dfc4d66 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 22 Feb 2024 14:03:02 -0500
Subject: [PATCH 22/32] fix script

---
 examples/solve_dataset.jl | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index a5a0570..a2b6629 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -11,7 +11,9 @@ using Random
 
 @everywhere import Pkg
 
-@everywhere Pkg.activate(dirname(@__DIR__))
+l2o_path = dirname(@__DIR__)
+
+@everywhere Pkg.activate(l2o_path)
 
 @everywhere Pkg.instantiate()
 
@@ -30,10 +32,10 @@ POI_cached_optimizer() = Gurobi.Optimizer()
 
 ########## PARAMETERS ##########
 filetype = ArrowFile
-model_file = "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json"
-input_file = "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad"
+model_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json")
+input_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad")
 
-save_path = "examples/powermodels/data/6468_rte/output/"
+save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/")
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
 batch_size = 200
 

From 55f84fea34dbc48359f61b619ffe1ed5ff6a4465 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 22 Feb 2024 16:33:54 -0500
Subject: [PATCH 23/32] update script

---
 examples/solve_dataset.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index a2b6629..5c325be 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -9,9 +9,9 @@ using Random
 # Load Functions
 ##############
 
-@everywhere import Pkg
+@everywhere l2o_path = dirname(@__DIR__)
 
-l2o_path = dirname(@__DIR__)
+@everywhere import Pkg
 
 @everywhere Pkg.activate(l2o_path)
 

From 3581d9f86873d200764b0b0fe88bd8ec834dc98a Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Thu, 22 Feb 2024 17:09:21 -0500
Subject: [PATCH 24/32] update parallel code

---
 examples/solve_dataset.jl | 5 +++--
 src/datasetgen.jl         | 2 +-
 test/datasetgen.jl        | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 5c325be..f880561 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -41,9 +41,10 @@ batch_size = 200
 
 ########## SOLVE ##########
 
-problem_iterators = load(model_file, input_file, filetype; batch_size=batch_size)
+problem_iterator_factory, num_batches = load(model_file, input_file, filetype; batch_size=batch_size)
 
-@sync @distributed for problem_iterators in problem_iterators
+@sync @distributed for i in 1:num_batches
+    problem_iterator = problem_iterator_factory(i)
     set_optimizer(problem_iterator.model, () -> POI_cached_optimizer())
     output_file = joinpath(save_path, "$(case_name)_output_$(UUID())")
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
diff --git a/src/datasetgen.jl b/src/datasetgen.jl
index 4384d21..022b20c 100644
--- a/src/datasetgen.jl
+++ b/src/datasetgen.jl
@@ -196,7 +196,7 @@ function load(model_file::AbstractString, input_file::AbstractString, ::Type{T};
     # Batch
     num_batches = ceil(Int, length(ids) / batch_size)
     idx_range = (i) -> (i-1)*batch_size+1:min(i*batch_size, length(ids))
-    return [ProblemIterator(ids[idx_range(i)], _dataframe_to_dict(df[idx_range(i), :], model_file)) for i in 1:num_batches]
+    return (i) -> ProblemIterator(ids[idx_range(i)], _dataframe_to_dict(df[idx_range(i), :], model_file)), num_batches
 end
 
 """
diff --git a/test/datasetgen.jl b/test/datasetgen.jl
index 9416d38..0fbf280 100644
--- a/test/datasetgen.jl
+++ b/test/datasetgen.jl
@@ -113,8 +113,8 @@ function test_load(model_file::AbstractString, input_file::AbstractString, ::Typ
     problem_iterator =  L2O.load(model_file, input_file, T; ignore_ids=ids)
     @test isnothing(problem_iterator)
     # Test Load batch of problem iterators
-    problem_iterators =  L2O.load(model_file, input_file, T; batch_size=batch_size)
-    @test length(problem_iterators) == ceil(Int, length(ids) / batch_size)
-    @test all(isa.(problem_iterators, L2O.AbstractProblemIterator))
+    problem_iterator_factory, num_batches =  L2O.load(model_file, input_file, T; batch_size=batch_size)
+    @test num_batches == ceil(Int, length(ids) / batch_size)
+    @test problem_iterator_factory(1) isa L2O.AbstractProblemIterator
     return nothing
 end

From 0a5d067d326318116d5cf0fb0f2434bb82e860cc Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 11:56:47 -0500
Subject: [PATCH 25/32] fix bug infeasible

---
 examples/solve_dataset.jl | 11 ++++++++---
 src/arrowrecorder.jl      | 20 +++++++++++++-------
 src/csvrecorder.jl        | 18 +++++++++++-------
 3 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index f880561..e2b008a 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -22,6 +22,9 @@ using Random
 @everywhere using L2O
 @everywhere using UUIDs
 @everywhere import ParametricOptInterface as POI
+@everywhere using JuMP
+@everywhere using UUIDs
+@everywhere using Arrow
 
 ## SOLVER PACKAGES ##
 
@@ -37,16 +40,18 @@ input_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_r
 
 save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/")
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
-batch_size = 200
+processed_output_files = [file for file in readdir(save_path; join=true) if occursin(case_name, file)]
+ids = Vector(Arrow.Table(processed_output_files).id)
+batch_size = 20
 
 ########## SOLVE ##########
 
-problem_iterator_factory, num_batches = load(model_file, input_file, filetype; batch_size=batch_size)
+problem_iterator_factory, num_batches = load(model_file, input_file, filetype; batch_size=batch_size, ignore_ids=ids)
 
 @sync @distributed for i in 1:num_batches
     problem_iterator = problem_iterator_factory(i)
     set_optimizer(problem_iterator.model, () -> POI_cached_optimizer())
-    output_file = joinpath(save_path, "$(case_name)_output_$(UUID())")
+    output_file = joinpath(save_path, "$(case_name)_output_$(uuid1())")
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
     successfull_solves = solve_batch(problem_iterator, recorder)
     @info "Solved $(length(successfull_solves)) problems"
diff --git a/src/arrowrecorder.jl b/src/arrowrecorder.jl
index a7e6018..9aa45eb 100644
--- a/src/arrowrecorder.jl
+++ b/src/arrowrecorder.jl
@@ -13,21 +13,27 @@ function record(recorder::Recorder{ArrowFile}, id::UUID; input=false)
     model = recorder.model
 
     status=JuMP.termination_status(model)
-    primal_status=JuMP.primal_status(model)
-    dual_status=JuMP.dual_status(model)
+    primal_stat=JuMP.primal_status(model)
+    dual_stat=JuMP.dual_status(model)
 
-    primal_values = if in(primal_status, DECISION_STATUS)
+    primal_values = if in(primal_stat, DECISION_STATUS)
         [[value.(p)] for p in recorder.primal_variables]
     else
         [[zeros(length(p))] for p in recorder.primal_variables]
     end
 
-    dual_values = if in(dual_status, DECISION_STATUS)
+    dual_values = if in(dual_stat, DECISION_STATUS)
         [[dual.(p)] for p in recorder.dual_variables]
     else
         [[zeros(length(p))] for p in recorder.dual_variables]
     end
 
+    objective = if in(status, ACCEPTED_TERMINATION_STATUSES)
+        JuMP.objective_value(model)
+    else
+        0.0
+    end
+
     df = (;
         id=[id],
         zip(
@@ -43,11 +49,11 @@ function record(recorder::Recorder{ArrowFile}, id::UUID; input=false)
         df = merge(
             df,
             (;
-                objective=[JuMP.objective_value(model)],
+                objective=[objective],
                 time=[JuMP.solve_time(model)],
                 status=[string(status)],
-                primal_status=[string(primal_status)],
-                dual_status=[string(dual_status)],
+                primal_status=[string(primal_stat)],
+                dual_status=[string(dual_stat)],
             ),
         )
     end
diff --git a/src/csvrecorder.jl b/src/csvrecorder.jl
index 6794c5a..82d4f22 100644
--- a/src/csvrecorder.jl
+++ b/src/csvrecorder.jl
@@ -12,8 +12,8 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
     _filename = _filename * "." * string(CSVFile)
 
     model = recorder.model
-    primal_status=JuMP.primal_status(model)
-    dual_status=JuMP.dual_status(model)
+    primal_stat=JuMP.primal_status(model)
+    dual_stat=JuMP.dual_status(model)
 
     if !isfile(_filename)
         open(_filename, "w") do f
@@ -36,7 +36,7 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
     end
     open(_filename, "a") do f
         write(f, "$id")
-        if in(primal_status, DECISION_STATUS)
+        if in(primal_stat, DECISION_STATUS)
             for p in recorder.primal_variables
                 val = value.(p)
                 write(f, ",$val")
@@ -46,7 +46,7 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
                 write(f, ",0")
             end
         end
-        if in(dual_status, DECISION_STATUS)
+        if in(dual_stat, DECISION_STATUS)
             for p in recorder.dual_variables
                 val = dual.(p)
                 write(f, ",$val")
@@ -59,7 +59,11 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
 
         if !input
             # save objective value
-            obj = JuMP.objective_value(model)
+            obj = if in(status, ACCEPTED_TERMINATION_STATUSES)
+                JuMP.objective_value(model)
+            else
+                0.0
+            end
             write(f, ",$obj")
             # save solve time
             time = JuMP.solve_time(model)
@@ -68,9 +72,9 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
             status = JuMP.termination_status(model)
             write(f, ",$status")
             # save primal status
-            write(f, ",$primal_status")
+            write(f, ",$primal_stat")
             # save dual status
-            write(f, ",$dual_status")
+            write(f, ",$dual_stat")
         end
         # end line
         write(f, "\n")

From f1f40e67cc9f3191e0f2373693c1b853327ac47c Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 13:54:44 -0500
Subject: [PATCH 26/32] update script

---
 examples/solve_dataset.jl | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index e2b008a..66305b0 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -11,30 +11,33 @@ using Random
 
 @everywhere l2o_path = dirname(@__DIR__)
 
-@everywhere import Pkg
+@everywhere begin
+    import Pkg
 
-@everywhere Pkg.activate(l2o_path)
+    Pkg.activate(l2o_path)
 
-@everywhere Pkg.instantiate()
+    Pkg.instantiate()
 
-########## SCRIPT REQUIRED PACKAGES ##########
+    ########## SCRIPT REQUIRED PACKAGES ##########
 
-@everywhere using L2O
-@everywhere using UUIDs
-@everywhere import ParametricOptInterface as POI
-@everywhere using JuMP
-@everywhere using UUIDs
-@everywhere using Arrow
+    using L2O
+    using UUIDs
+    import ParametricOptInterface as POI
+    using JuMP
+    using UUIDs
+    using Arrow
 
-## SOLVER PACKAGES ##
+    ## SOLVER PACKAGES ##
 
-@everywhere using Gurobi
-# @everywhere using Ipopt
+    using Gurobi
+    # using Ipopt
 
-POI_cached_optimizer() = Gurobi.Optimizer()
+    POI_cached_optimizer() = Gurobi.Optimizer()
+
+    filetype = ArrowFile
+end
 
 ########## PARAMETERS ##########
-filetype = ArrowFile
 model_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json")
 input_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad")
 

From 8057aae3affb3b14743578f0b6fa71d3c84a1e7f Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 15:12:50 -0500
Subject: [PATCH 27/32] update code

---
 examples/solve_dataset.jl | 41 ++++++++++++++++++++-------------------
 src/L2O.jl                |  3 ++-
 src/arrowrecorder.jl      | 16 +++++++++++++++
 src/csvrecorder.jl        |  2 +-
 test/datasetgen.jl        | 12 ++++++++++++
 test/runtests.jl          |  1 +
 6 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 66305b0..73a0a37 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -11,37 +11,36 @@ using Random
 
 @everywhere l2o_path = dirname(@__DIR__)
 
-@everywhere begin
-    import Pkg
+@everywhere import Pkg
 
-    Pkg.activate(l2o_path)
+@everywhere Pkg.activate(l2o_path)
 
-    Pkg.instantiate()
+@everywhere Pkg.instantiate()
 
-    ########## SCRIPT REQUIRED PACKAGES ##########
+########## SCRIPT REQUIRED PACKAGES ##########
 
-    using L2O
-    using UUIDs
-    import ParametricOptInterface as POI
-    using JuMP
-    using UUIDs
-    using Arrow
+@everywhere using L2O
+@everywhere using UUIDs
+@everywhere import ParametricOptInterface as POI
+@everywhere using JuMP
+@everywhere using UUIDs
+@everywhere using Arrow
 
-    ## SOLVER PACKAGES ##
+## SOLVER PACKAGES ##
 
-    using Gurobi
-    # using Ipopt
+@everywhere using Gurobi
+# @everywhere using Ipopt
 
-    POI_cached_optimizer() = Gurobi.Optimizer()
+@everywhere POI_cached_optimizer() = Gurobi.Optimizer()
+
+@everywhere filetype = ArrowFile
 
-    filetype = ArrowFile
-end
 
 ########## PARAMETERS ##########
-model_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json")
+model_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json") # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
 input_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad")
 
-save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/")
+save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/SOCWRConicPowerModel")
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
 processed_output_files = [file for file in readdir(save_path; join=true) if occursin(case_name, file)]
 ids = Vector(Arrow.Table(processed_output_files).id)
@@ -52,10 +51,12 @@ batch_size = 20
 problem_iterator_factory, num_batches = load(model_file, input_file, filetype; batch_size=batch_size, ignore_ids=ids)
 
 @sync @distributed for i in 1:num_batches
+    batch_id = uuid1()
     problem_iterator = problem_iterator_factory(i)
     set_optimizer(problem_iterator.model, () -> POI_cached_optimizer())
-    output_file = joinpath(save_path, "$(case_name)_output_$(uuid1())")
+    output_file = joinpath(save_path, "$(case_name)_output_$(batch_id)")
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
     successfull_solves = solve_batch(problem_iterator, recorder)
     @info "Solved $(length(successfull_solves)) problems"
+    compress_batch_arrow(save_path, batch_id, case_name)
 end
diff --git a/src/L2O.jl b/src/L2O.jl
index b6ac70a..1aa8233 100644
--- a/src/L2O.jl
+++ b/src/L2O.jl
@@ -41,7 +41,8 @@ export ArrowFile,
     line_sampler,
     box_sampler,
     scaled_distribution_sampler,
-    general_sampler
+    general_sampler,
+    compress_batch_arrow
 
 include("datasetgen.jl")
 include("csvrecorder.jl")
diff --git a/src/arrowrecorder.jl b/src/arrowrecorder.jl
index 9aa45eb..039f8e1 100644
--- a/src/arrowrecorder.jl
+++ b/src/arrowrecorder.jl
@@ -69,3 +69,19 @@ end
 function load(filename::String, ::Type{ArrowFile})
     return DataFrame(Arrow.Table(filename * "." * string(ArrowFile)))
 end
+
+function compress_batch_arrow(case_file_path::String, batch_id::UUID, case_name::String; keyword="output")
+    batch_id = string(batch_id)
+    iter_files = readdir(case_file_path; join=true)
+    file_outs = [
+        file for file in iter_files if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(batch_id, file)
+    ]
+    output_table = Arrow.Table(file_outs)
+    Arrow.write(
+        joinpath(case_file_path, "$(case_name)_$(keyword)_" * batch_id * ".arrow"),
+        output_table,
+    )
+    for file in file_outs
+        rm(file)
+    end
+end
diff --git a/src/csvrecorder.jl b/src/csvrecorder.jl
index 82d4f22..080cc29 100644
--- a/src/csvrecorder.jl
+++ b/src/csvrecorder.jl
@@ -59,6 +59,7 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
 
         if !input
             # save objective value
+            status = JuMP.termination_status(model)
             obj = if in(status, ACCEPTED_TERMINATION_STATUSES)
                 JuMP.objective_value(model)
             else
@@ -69,7 +70,6 @@ function record(recorder::Recorder{CSVFile}, id::UUID; input=false)
             time = JuMP.solve_time(model)
             write(f, ",$time")
             # save status
-            status = JuMP.termination_status(model)
             write(f, ",$status")
             # save primal status
             write(f, ",$primal_stat")
diff --git a/test/datasetgen.jl b/test/datasetgen.jl
index 0fbf280..1dc25b5 100644
--- a/test/datasetgen.jl
+++ b/test/datasetgen.jl
@@ -118,3 +118,15 @@ function test_load(model_file::AbstractString, input_file::AbstractString, ::Typ
     @test problem_iterator_factory(1) isa L2O.AbstractProblemIterator
     return nothing
 end
+
+function test_compress_batch_arrow(case_file_path::AbstractString=mktempdir(), batch_id::UUID=uuid1(), case_name::AbstractString="test"; keyword::AbstractString="output")
+    random_data = rand(10, 10)
+    col_names = ["col_$(i)" for i in 1:10]
+    dfs = [DataFrame(random_data[i:i, :], col_names) for i in 1:10]
+    for i in 1:10
+        Arrow.write(joinpath(case_file_path, "$(case_name)_$(keyword)_$(batch_id)_$(uuid1()).arrow"), dfs[i])
+    end
+    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(string(batch_id), file)]) == 10
+    L2O.compress_batch_arrow(case_file_path, batch_id, case_name; keyword=keyword)
+    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(string(batch_id), file)]) == 1
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index bb4e777..1f339c3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -41,6 +41,7 @@ include(joinpath(test_dir, "samplers.jl"))
     test_inconvexhull()
 
     mktempdir() do path
+        test_compress_batch_arrow(path)
         model_file = "pglib_opf_case5_pjm_DCPPowerModel_POI_load.mof.json"
         @testset "Samplers saving on $filetype" for filetype in [ArrowFile, CSVFile]
             file_in, ids = test_general_sampler_file(model_file; cache_dir=path, filetype=filetype)

From 4770de30fd54fd558f5a249e15e0431e0d87d60a Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 15:34:20 -0500
Subject: [PATCH 28/32] fix code

---
 examples/solve_dataset.jl |  4 ++--
 src/arrowrecorder.jl      |  7 +++----
 test/datasetgen.jl        | 14 +++++++++-----
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 73a0a37..1b06509 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -44,7 +44,7 @@ save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/SOCWRC
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
 processed_output_files = [file for file in readdir(save_path; join=true) if occursin(case_name, file)]
 ids = Vector(Arrow.Table(processed_output_files).id)
-batch_size = 20
+batch_size = 200
 
 ########## SOLVE ##########
 
@@ -58,5 +58,5 @@ problem_iterator_factory, num_batches = load(model_file, input_file, filetype; b
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
     successfull_solves = solve_batch(problem_iterator, recorder)
     @info "Solved $(length(successfull_solves)) problems"
-    compress_batch_arrow(save_path, batch_id, case_name)
+    compress_batch_arrow(save_path, case_name; keyword_all="output", batch_id=string(batch_id), keyword_any=[string(batch_id)])
 end
diff --git a/src/arrowrecorder.jl b/src/arrowrecorder.jl
index 039f8e1..fed60fa 100644
--- a/src/arrowrecorder.jl
+++ b/src/arrowrecorder.jl
@@ -70,15 +70,14 @@ function load(filename::String, ::Type{ArrowFile})
     return DataFrame(Arrow.Table(filename * "." * string(ArrowFile)))
 end
 
-function compress_batch_arrow(case_file_path::String, batch_id::UUID, case_name::String; keyword="output")
-    batch_id = string(batch_id)
+function compress_batch_arrow(case_file_path::String, case_name::String; keyword_all="output", batch_id::String=string(uuid1()), keyword_any=["_"])
     iter_files = readdir(case_file_path; join=true)
     file_outs = [
-        file for file in iter_files if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(batch_id, file)
+        file for file in iter_files if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword_all, file) && any(x -> occursin(x, file), keyword_any)
     ]
     output_table = Arrow.Table(file_outs)
     Arrow.write(
-        joinpath(case_file_path, "$(case_name)_$(keyword)_" * batch_id * ".arrow"),
+        joinpath(case_file_path, "$(case_name)_$(keyword_all)_$(batch_id).arrow"),
         output_table,
     )
     for file in file_outs
diff --git a/test/datasetgen.jl b/test/datasetgen.jl
index 1dc25b5..3fa1c7f 100644
--- a/test/datasetgen.jl
+++ b/test/datasetgen.jl
@@ -119,14 +119,18 @@ function test_load(model_file::AbstractString, input_file::AbstractString, ::Typ
     return nothing
 end
 
-function test_compress_batch_arrow(case_file_path::AbstractString=mktempdir(), batch_id::UUID=uuid1(), case_name::AbstractString="test"; keyword::AbstractString="output")
+function test_compress_batch_arrow(case_file_path::AbstractString=mktempdir(), case_name::AbstractString="test"; keyword::AbstractString="output")
     random_data = rand(10, 10)
     col_names = ["col_$(i)" for i in 1:10]
+    batch_ids = [string(uuid1()) for _ in 1:10]
     dfs = [DataFrame(random_data[i:i, :], col_names) for i in 1:10]
     for i in 1:10
-        Arrow.write(joinpath(case_file_path, "$(case_name)_$(keyword)_$(batch_id)_$(uuid1()).arrow"), dfs[i])
+        Arrow.write(joinpath(case_file_path, "$(case_name)_$(keyword)_$(batch_ids[i]).arrow"), dfs[i])
     end
-    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(string(batch_id), file)]) == 10
-    L2O.compress_batch_arrow(case_file_path, batch_id, case_name; keyword=keyword)
-    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(string(batch_id), file)]) == 1
+    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && any(x -> occursin(x, file), batch_ids)]) == 10
+    batch_id = string(uuid1())
+    L2O.compress_batch_arrow(case_file_path, case_name; keyword_all=keyword, batch_id=batch_id, keyword_any=batch_ids)
+    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && any(x -> occursin(x, file), batch_id)]) == 0
+    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(batch_id, file)]) == 1
+    return nothing
 end

From f23c8e4f9d5580e94d8006caf315a9cd14384971 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 15:42:45 -0500
Subject: [PATCH 29/32] fix tests

---
 test/datasetgen.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/datasetgen.jl b/test/datasetgen.jl
index 3fa1c7f..dee94de 100644
--- a/test/datasetgen.jl
+++ b/test/datasetgen.jl
@@ -130,7 +130,7 @@ function test_compress_batch_arrow(case_file_path::AbstractString=mktempdir(), c
     @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && any(x -> occursin(x, file), batch_ids)]) == 10
     batch_id = string(uuid1())
     L2O.compress_batch_arrow(case_file_path, case_name; keyword_all=keyword, batch_id=batch_id, keyword_any=batch_ids)
-    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && any(x -> occursin(x, file), batch_id)]) == 0
+    @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && any(x -> occursin(x, file), batch_ids)]) == 0
     @test length([file for file in readdir(case_file_path; join=true) if occursin(case_name, file) && occursin("arrow", file) && occursin(keyword, file) && occursin(batch_id, file)]) == 1
     return nothing
 end

From b8ffb31b95332d3f7bf12d2c7031fde0c084d631 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 16:15:57 -0500
Subject: [PATCH 30/32] update generation

---
 examples/solve_dataset.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 1b06509..062880e 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -28,10 +28,10 @@ using Random
 
 ## SOLVER PACKAGES ##
 
-@everywhere using Gurobi
-# @everywhere using Ipopt
+# @everywhere using Gurobi
+@everywhere using Ipopt
 
-@everywhere POI_cached_optimizer() = Gurobi.Optimizer()
+@everywhere POI_cached_optimizer() = Ipopt.Optimizer()
 
 @everywhere filetype = ArrowFile
 
@@ -43,7 +43,7 @@ input_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_r
 save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/SOCWRConicPowerModel")
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
 processed_output_files = [file for file in readdir(save_path; join=true) if occursin(case_name, file)]
-ids = Vector(Arrow.Table(processed_output_files).id)
+ids = vcat([Vector(Arrow.Table(file).id) for file in processed_output_files]...)
 batch_size = 200
 
 ########## SOLVE ##########

From 64608fbddc53026d9845cfb9cda55ecd7532be1f Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-1.pace.gatech.edu>
Date: Fri, 23 Feb 2024 16:20:01 -0500
Subject: [PATCH 31/32] update script

---
 examples/solve_dataset.jl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 062880e..0e52b76 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -37,13 +37,17 @@ using Random
 
 
 ########## PARAMETERS ##########
-model_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_SOCWRConicPowerModel_POI_load.mof.json") # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
+model_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_ACPPowerModel_POI_load.mof.json") # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
 input_file = joinpath(l2o_path, "examples/powermodels/data/6468_rte/input/6468_rte_POI_load_input_7f284054-d107-11ee-3fe9-09f5e129b1ad")
 
-save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/SOCWRConicPowerModel")
+save_path = joinpath(l2o_path, "examples/powermodels/data/6468_rte/output/ACPPowerModel")
 case_name = split(split(model_file, ".mof.")[1], "/")[end]
 processed_output_files = [file for file in readdir(save_path; join=true) if occursin(case_name, file)]
-ids = vcat([Vector(Arrow.Table(file).id) for file in processed_output_files]...)
+ids = if length(processed_output_files) == 0
+    UUID[]
+else
+    vcat([Vector(Arrow.Table(file).id) for file in processed_output_files]...)
+end
 batch_size = 200
 
 ########## SOLVE ##########

From e0bd10eea7211a6f8ffdf79d8a208806752f9e56 Mon Sep 17 00:00:00 2001
From: Andrew David Werner Rosemberg
 <arosemberg3@login-phoenix-slurm-4.pace.gatech.edu>
Date: Mon, 26 Feb 2024 15:47:31 -0500
Subject: [PATCH 32/32] update code

---
 examples/powermodels/visualize.jl     | 16 +++++-----
 examples/powermodels/write_to_file.jl | 45 ++++++++++++++++++++++-----
 examples/solve_dataset.jl             | 16 ++++++++--
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/examples/powermodels/visualize.jl b/examples/powermodels/visualize.jl
index ad8853a..fdc4305 100644
--- a/examples/powermodels/visualize.jl
+++ b/examples/powermodels/visualize.jl
@@ -9,7 +9,7 @@ cossim(x,y) = dot(x,y) / (norm(x)*norm(y))
 ##############
 # Parameters
 ##############
-network_formulation = "SOCWRConicPowerModel" # "DCPPowerModel" # "SOCWRConicPowerModel"
+network_formulation = "ACPPowerModel" # ACPPowerModel "DCPPowerModel" # "SOCWRConicPowerModel"
 case_name = "6468_rte" # pglib_opf_case300_ieee # 6468_rte 
 path_dataset = joinpath(dirname(@__FILE__), "data")
 case_file_path = joinpath(path_dataset, case_name)
@@ -45,6 +45,8 @@ output_table = Arrow.Table(file_outs)
 input_data_train = DataFrame(input_table_train)
 input_data_test = DataFrame(input_table_test)
 output_data = DataFrame(output_table)
+output_data.operational_cost = output_data.objective
+output_data = output_data[output_data.objective .> 10, :]
 input_data = vcat(input_data_train, input_data_test[!, Not(:in_train_convex_hull)])
 
 ##############
@@ -59,13 +61,13 @@ file_outs_soc = [
 ]
 output_table_soc = Arrow.Table(file_outs_soc)
 output_data_soc = DataFrame(output_table_soc)
-output_data_soc.operational_cost_soc = output_data_soc.operational_cost
-output_data_soc = output_data_soc[output_data_soc.operational_cost .> 10, :]
+output_data_soc.operational_cost_soc = output_data_soc.objective
+output_data_soc = output_data_soc[output_data_soc.objective .> 10, :]
 
 # compare SOC and AC operational_cost by id
 ac_soc = innerjoin(output_data[!, [:id, :operational_cost]], output_data_soc[!, [:id, :operational_cost_soc]], on=:id, makeunique=true)
 
-ac_soc.error = abs.(ac_soc.operational_cost .- ac_soc.operational_cost_soc) ./ ac_soc.operational_cost * 100
+ac_soc.error = (ac_soc.operational_cost .- ac_soc.operational_cost_soc) ./ ac_soc.operational_cost * 100
 mean(ac_soc.error)
 maximum(ac_soc.error)
 ac_soc[findmax(ac_soc.error)[2], :]
@@ -108,12 +110,12 @@ load_vector_train = total_load_vector(input_data_train)
 load_vector_test = total_load_vector(input_data_test; is_test=true)
 
 # Nominal Loads
-nominal_loads = Vector(load_vector_train[1, Not(:id)])
+nominal_loads = Vector(input_data_train[1, Not(:id)])
 norm_nominal_loads = norm(nominal_loads)
 
 # Load divergence
-theta_train = [acos(cossim(nominal_loads, Vector(load_vector_train[i, Not(:id)]))) for i in 2:size(load_vector_train, 1)] * 180 / pi
-norm_sim_train = [norm(Vector(load_vector_train[i, Not(:id)])) / norm_nominal_loads for i in 2:size(load_vector_train, 1)]
+theta_train = [acos(cossim(nominal_loads, Vector(input_data_train[i, Not(:id)]))) for i in 2:10000] * 180 / pi
+norm_sim_train = [norm(Vector(input_data_train[i, Not(:id)])) / norm_nominal_loads for i in 2:10000]
 
 theta_test = [acos(cossim(nominal_loads, Vector(load_vector_test[i, Not([:id, :in_hull])]))) for i in 1:size(load_vector_test, 1)] * 180 / pi
 norm_sim_test = [norm(Vector(load_vector_test[i, Not([:id, :in_hull])])) / norm_nominal_loads for i in 1:size(load_vector_test, 1)]
diff --git a/examples/powermodels/write_to_file.jl b/examples/powermodels/write_to_file.jl
index 2840868..3627a46 100644
--- a/examples/powermodels/write_to_file.jl
+++ b/examples/powermodels/write_to_file.jl
@@ -1,22 +1,46 @@
 using JuMP
 using PowerModels
 using PGLib
-using Gurobi
+using Clarabel
+using Ipopt
+import ParametricOptInterface as POI
 
-optimizer = Gurobi.Optimizer
-network_formulation = SOCWRConicPowerModel # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
+cached_clara =
+    () -> MOI.Bridges.full_bridge_optimizer(
+        MOI.Utilities.CachingOptimizer(
+            MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
+            Clarabel.Optimizer(),
+        ),
+        Float64,
+)
+
+POI_cached_optimizer_clara() = POI.Optimizer(cached_clara())
+
+ipopt = Ipopt.Optimizer()
+    MOI.set(ipopt, MOI.RawOptimizerAttribute("print_level"), 0)
+    cached =
+        () -> MOI.Bridges.full_bridge_optimizer(
+            MOI.Utilities.CachingOptimizer(
+                MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
+                ipopt,
+            ),
+            Float64,
+)
+POI_cached_optimizer() = POI.Optimizer(cached())
+
+network_formulation = ACPPowerModel # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
 
 matpower_case_name = "6468_rte"
 
 network_data = make_basic_network(pglib(matpower_case_name))
 
 # The problem to iterate over
-model = JuMP.Model(optimizer)
+model = JuMP.Model(() -> POI_cached_optimizer())
 
 # Save original load value and Link POI
 num_loads = length(network_data["load"])
 
-@variable(model, load_scaler[i=1:num_loads] in MOI.Parameter.(1.03))
+@variable(model, load_scaler[i=1:num_loads] in MOI.Parameter.(1.0)) # needs fixing -> need to be instantiated after all other variables
 
 for (str_i, l) in network_data["load"]
     i = parse(Int, str_i)
@@ -32,9 +56,14 @@ pm = instantiate_model(
     jump_model=model,
 )
 
-# JuMP.optimize!(model)
-# JuMP.termination_status(model)
+JuMP.optimize!(model)
+JuMP.termination_status(model)
+JuMP.objective_value(model)
 
 write_to_file(model, "$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
 
-# dest_model = read_from_file("$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
\ No newline at end of file
+# dest_model = read_from_file("examples/powermodels/data/$(matpower_case_name)/input/$(matpower_case_name)_$(network_formulation)_POI_load.mof.json")
+# set_optimizer(dest_model, () -> POI_cached_optimizer())
+# JuMP.optimize!(dest_model)
+# JuMP.termination_status(dest_model)
+# JuMP.objective_value(dest_model)
\ No newline at end of file
diff --git a/examples/solve_dataset.jl b/examples/solve_dataset.jl
index 0e52b76..8c0301e 100644
--- a/examples/solve_dataset.jl
+++ b/examples/solve_dataset.jl
@@ -30,8 +30,9 @@ using Random
 
 # @everywhere using Gurobi
 @everywhere using Ipopt
+# @everywhere using Clarabel
 
-@everywhere POI_cached_optimizer() = Ipopt.Optimizer()
+# @everywhere POI_cached_optimizer() = Clarabel.Optimizer()
 
 @everywhere filetype = ArrowFile
 
@@ -55,6 +56,17 @@ batch_size = 200
 problem_iterator_factory, num_batches = load(model_file, input_file, filetype; batch_size=batch_size, ignore_ids=ids)
 
 @sync @distributed for i in 1:num_batches
+    ipopt = Ipopt.Optimizer()
+    MOI.set(ipopt, MOI.RawOptimizerAttribute("print_level"), 0)
+    cached =
+        () -> MOI.Bridges.full_bridge_optimizer(
+            MOI.Utilities.CachingOptimizer(
+                MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
+                ipopt,
+            ),
+            Float64,
+    )
+    POI_cached_optimizer() = POI.Optimizer(cached())
     batch_id = uuid1()
     problem_iterator = problem_iterator_factory(i)
     set_optimizer(problem_iterator.model, () -> POI_cached_optimizer())
@@ -62,5 +74,5 @@ problem_iterator_factory, num_batches = load(model_file, input_file, filetype; b
     recorder = Recorder{filetype}(output_file; filterfn= (model) -> true, model=problem_iterator.model)
     successfull_solves = solve_batch(problem_iterator, recorder)
     @info "Solved $(length(successfull_solves)) problems"
-    compress_batch_arrow(save_path, case_name; keyword_all="output", batch_id=string(batch_id), keyword_any=[string(batch_id)])
+    L2O.compress_batch_arrow(save_path, case_name; keyword_all="output", batch_id=string(batch_id), keyword_any=[string(batch_id)])
 end