From dd7afd7c8a806d6bf1592470880268d0e72ae6c7 Mon Sep 17 00:00:00 2001
From: andrewrosemberg <andrewrosemberg@gmail.com>
Date: Wed, 19 Jul 2023 14:42:22 -0300
Subject: [PATCH 1/4] add script to generate datasets

---
 .../{powermodels => flux}/flux_forecaster.jl  |  0
 .../powermodels/generate_full_datasets.jl     | 47 +++++++++++++++++++
 examples/powermodels/pglib_datagen.jl         | 22 +++++----
 test/runtests.jl                              |  4 +-
 4 files changed, 62 insertions(+), 11 deletions(-)
 rename examples/{powermodels => flux}/flux_forecaster.jl (100%)
 create mode 100644 examples/powermodels/generate_full_datasets.jl

diff --git a/examples/powermodels/flux_forecaster.jl b/examples/flux/flux_forecaster.jl
similarity index 100%
rename from examples/powermodels/flux_forecaster.jl
rename to examples/flux/flux_forecaster.jl
diff --git a/examples/powermodels/generate_full_datasets.jl b/examples/powermodels/generate_full_datasets.jl
new file mode 100644
index 0000000..4223cfd
--- /dev/null
+++ b/examples/powermodels/generate_full_datasets.jl
@@ -0,0 +1,47 @@
+using TestEnv
+TestEnv.activate()
+
+using Arrow
+using Flux
+using HiGHS
+using JuMP
+using L2O
+import ParametricOptInterface as POI
+using Test
+using UUIDs
+
+# Paths
+path_powermodels = joinpath(pwd(), "examples", "powermodels")
+path = joinpath(path_powermodels, "data")
+include(joinpath(path_powermodels, "pglib_datagen.jl"))
+
+# Parameters
+num_batches = 2
+num_p = 10
+filetype = ArrowFile
+
+# Case name
+case_name = "pglib_opf_case5_pjm"
+case_file_path = joinpath(path, case_name)
+
+# Create directory if it does not exist
+if !isdir(case_file_path)
+    mkdir(case_file_path)
+end
+
+# Generate dataset
+batch_ids = Array{String}(undef, num_batches)
+success_solves = 0.0
+for i in 1:num_batches
+    _success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(case_file_path, case_name; num_p=num_p, filetype=filetype)
+    success_solves += _success_solves
+    batch_ids[i] = batch_id
+end
+success_solves /= num_batches
+
+# Load input and output data tables
+file_ins = [joinpath(case_file_path, case_name * "_input_" * batch_id * "." * string(filetype)) for batch_id in batch_ids]
+file_outs = [joinpath(case_file_path, case_name * "_output_" * batch_id * "." * string(filetype)) for batch_id in batch_ids]
+
+input_table = Arrow.Table(file_ins)
+output_table = Arrow.Table(file_outs)
diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl
index 294b711..cd9f983 100644
--- a/examples/powermodels/pglib_datagen.jl
+++ b/examples/powermodels/pglib_datagen.jl
@@ -52,11 +52,12 @@ function generate_dataset_pglib(
     num_p=10,
     load_sampler=load_sampler,
 )
-    case_file_path = joinpath(data_dir, case_name)
+    matpower_case_name = case_name * ".m"
+    case_file_path = joinpath(data_dir, matpower_case_name)
     if download_files && !isfile(case_file_path)
         Downloads.download(
             "https://raw.githubusercontent.com/power-grid-lib/pglib-opf/01681386d084d8bd03b429abcd1ee6966f68b9a3/" *
-            case_name,
+            matpower_case_name,
             case_file_path,
         )
     end
@@ -94,14 +95,15 @@ function generate_dataset_pglib(
             ],
         ),
     )
+    batch_id = string(uuid1())
     save(
         problem_iterator,
-        joinpath(data_dir, case_name * "_input." * string(filetype)),
+        joinpath(data_dir, case_name * "_input_" * batch_id * "." * string(filetype)),
         filetype,
     )
 
     # Solve the problem and return the number of successfull solves
-    file = joinpath(data_dir, case_name * "_output." * string(filetype))
+    file = joinpath(data_dir, case_name * "_output_" * batch_id * "." * string(filetype))
     variable_refs = return_variablerefs(pm)
     for variableref in variable_refs
         set_name(variableref, replace(name(variableref), "," => "_"))
@@ -110,16 +112,17 @@ function generate_dataset_pglib(
     recorder = Recorder{filetype}(file; primal_variables=variable_refs)
     return solve_batch(model, problem_iterator, recorder),
     number_vars,
-    length(original_load)
+    length(original_load),
+    batch_id
 end
 
 function test_pglib_datasetgen(path::AbstractString, case_name::AbstractString, num_p::Int)
-    file_in = joinpath(path, case_name * "_input.csv")
-    file_out = joinpath(path, case_name * "_output.csv")
     @testset "Dataset Generation pglib case" begin
-        success_solves, number_variables, number_loads = generate_dataset_pglib(
+        success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(
             path, case_name; num_p=num_p
         )
+        file_in = joinpath(path, case_name * "_input_" * batch_id * ".csv")
+        file_out = joinpath(path, case_name * "_output_" * batch_id * ".csv")
         # Check if problem iterator was saved
         @test isfile(file_in)
         @test length(readdlm(file_in, ',')[:, 1]) == num_p + 1
@@ -129,6 +132,7 @@ function test_pglib_datasetgen(path::AbstractString, case_name::AbstractString,
         @test isfile(file_out)
         @test length(readdlm(file_out, ',')[:, 1]) == num_p * success_solves + 1
         @test length(readdlm(file_out, ',')[1, :]) == number_variables + 1
+
+        return file_in, file_out
     end
-    return file_in, file_out
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 490cf54..b74cbae 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,12 +15,12 @@ include(joinpath(test_dir, "datasetgen.jl"))
 
 include(joinpath(examples_dir, "powermodels", "pglib_datagen.jl"))
 
-include(joinpath(examples_dir, "powermodels", "flux_forecaster.jl"))
+include(joinpath(examples_dir, "flux", "flux_forecaster.jl"))
 
 @testset "L2O.jl" begin
     mktempdir() do path
         testdataset_gen(path)
-        file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm.m", 20)
+        file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm", 20)
         test_flux_forecaster(file_in, file_out)
     end
 end

From a4197f0acf3191c80326ae96eafae21ba3b7f242 Mon Sep 17 00:00:00 2001
From: andrewrosemberg <andrewrosemberg@gmail.com>
Date: Wed, 19 Jul 2023 15:14:58 -0300
Subject: [PATCH 2/4] add test script

---
 examples/flux/flux_forecaster_script.jl       | 73 +++++++++++++++++++
 ..._forecaster.jl => test_flux_forecaster.jl} |  1 -
 ...ts.jl => generate_full_datasets_script.jl} | 12 +--
 examples/powermodels/pglib_datagen.jl         |  1 +
 test/runtests.jl                              |  2 +-
 5 files changed, 76 insertions(+), 13 deletions(-)
 create mode 100644 examples/flux/flux_forecaster_script.jl
 rename examples/flux/{flux_forecaster.jl => test_flux_forecaster.jl} (99%)
 rename examples/powermodels/{generate_full_datasets.jl => generate_full_datasets_script.jl} (65%)

diff --git a/examples/flux/flux_forecaster_script.jl b/examples/flux/flux_forecaster_script.jl
new file mode 100644
index 0000000..80a2dda
--- /dev/null
+++ b/examples/flux/flux_forecaster_script.jl
@@ -0,0 +1,73 @@
+using TestEnv
+TestEnv.activate()
+
+using Arrow
+using Flux
+using DataFrames
+
+# Paths
+path_dataset = joinpath(pwd(), "examples", "powermodels", "data")
+case_name = "pglib_opf_case5_pjm"
+filetype = ArrowFile
+
+# Load input and output data tables
+iter_files = readdir(joinpath(path_dataset, case_name))
+file_ins = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("input", file)]
+file_outs = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("output", file)]
+batch_ids = [split(split(file, "_")[end], ".")[1] for file in file_ins]
+
+# Load input and output data tables
+train_idx = [1]
+test_idx = [2]
+
+input_table_train = Arrow.Table(file_ins[train_idx])
+output_table_train = Arrow.Table(file_outs[train_idx])
+
+input_table_test = Arrow.Table(file_ins[test_idx])
+output_table_test = Arrow.Table(file_outs[test_idx])
+
+# Convert to dataframes
+input_data_train = DataFrame(input_table_train)
+output_data_train = DataFrame(output_table_train)
+
+input_data_test = DataFrame(input_table_test)
+output_data_test = DataFrame(output_table_test)
+
+# Separate input and output variables
+output_variables_train = output_data_train[!, Not(:id)]
+input_features_train = innerjoin(input_data_train, output_data_train[!, [:id]], on = :id)[!, Not(:id)] # just use success solves
+
+output_variables_test = output_data_test[!, Not(:id)]
+input_features_test = innerjoin(input_data_test, output_data_test[!, [:id]], on = :id)[!, Not(:id)] # just use success solves
+
+# Define model
+model = Chain(
+    Dense(size(input_features_train, 2), 64, relu),
+    Dense(64, 32, relu),
+    Dense(32, size(output_variables_train, 2)),
+)
+
+# Define loss function
+loss(x, y) = Flux.mse(model(x), y)
+
+# Convert the data to matrices
+input_features_train = Matrix(input_features_train)'
+output_variables_train = Matrix(output_variables_train)'
+
+input_features_test = Matrix(input_features_test)'
+output_variables_test = Matrix(output_variables_test)'
+
+# Define the optimizer
+optimizer = Flux.ADAM()
+
+# Train the model
+Flux.train!(
+    loss, Flux.params(model), [(input_features_train, output_variables_train)], optimizer
+)
+
+# Make predictions
+predictions = model(input_features_test)
+
+# Calculate the error
+error = Flux.mse(predictions,output_variables_test)
+
diff --git a/examples/flux/flux_forecaster.jl b/examples/flux/test_flux_forecaster.jl
similarity index 99%
rename from examples/flux/flux_forecaster.jl
rename to examples/flux/test_flux_forecaster.jl
index 5a0d7a7..1d89844 100644
--- a/examples/flux/flux_forecaster.jl
+++ b/examples/flux/test_flux_forecaster.jl
@@ -1,7 +1,6 @@
 using Flux
 using CSV
 using DataFrames
-using L2O
 
 function test_flux_forecaster(file_in::AbstractString, file_out::AbstractString)
     @testset "Flux.jl" begin
diff --git a/examples/powermodels/generate_full_datasets.jl b/examples/powermodels/generate_full_datasets_script.jl
similarity index 65%
rename from examples/powermodels/generate_full_datasets.jl
rename to examples/powermodels/generate_full_datasets_script.jl
index 4223cfd..dc45716 100644
--- a/examples/powermodels/generate_full_datasets.jl
+++ b/examples/powermodels/generate_full_datasets_script.jl
@@ -2,11 +2,7 @@ using TestEnv
 TestEnv.activate()
 
 using Arrow
-using Flux
-using HiGHS
-using JuMP
 using L2O
-import ParametricOptInterface as POI
 using Test
 using UUIDs
 
@@ -17,7 +13,7 @@ include(joinpath(path_powermodels, "pglib_datagen.jl"))
 
 # Parameters
 num_batches = 2
-num_p = 10
+num_p = 1000
 filetype = ArrowFile
 
 # Case name
@@ -39,9 +35,3 @@ for i in 1:num_batches
 end
 success_solves /= num_batches
 
-# Load input and output data tables
-file_ins = [joinpath(case_file_path, case_name * "_input_" * batch_id * "." * string(filetype)) for batch_id in batch_ids]
-file_outs = [joinpath(case_file_path, case_name * "_output_" * batch_id * "." * string(filetype)) for batch_id in batch_ids]
-
-input_table = Arrow.Table(file_ins)
-output_table = Arrow.Table(file_outs)
diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl
index cd9f983..dba3635 100644
--- a/examples/powermodels/pglib_datagen.jl
+++ b/examples/powermodels/pglib_datagen.jl
@@ -96,6 +96,7 @@ function generate_dataset_pglib(
         ),
     )
     batch_id = string(uuid1())
+    @info "Batch ID: $batch_id"
     save(
         problem_iterator,
         joinpath(data_dir, case_name * "_input_" * batch_id * "." * string(filetype)),
diff --git a/test/runtests.jl b/test/runtests.jl
index b74cbae..d742466 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,7 +15,7 @@ include(joinpath(test_dir, "datasetgen.jl"))
 
 include(joinpath(examples_dir, "powermodels", "pglib_datagen.jl"))
 
-include(joinpath(examples_dir, "flux", "flux_forecaster.jl"))
+include(joinpath(examples_dir, "flux", "test_flux_forecaster.jl"))
 
 @testset "L2O.jl" begin
     mktempdir() do path

From fd5c5001b8a9f050a1b76530ee7a0201e3b957ff Mon Sep 17 00:00:00 2001
From: andrewrosemberg <andrewrosemberg@gmail.com>
Date: Wed, 19 Jul 2023 16:35:15 -0300
Subject: [PATCH 3/4] update dataset generation script

---
 examples/flux/flux_forecaster_script.jl       | 10 +++++----
 .../generate_full_datasets_script.jl          | 14 +++++-------
 examples/powermodels/pglib_datagen.jl         | 22 ++++++++++++++-----
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/examples/flux/flux_forecaster_script.jl b/examples/flux/flux_forecaster_script.jl
index 80a2dda..375996f 100644
--- a/examples/flux/flux_forecaster_script.jl
+++ b/examples/flux/flux_forecaster_script.jl
@@ -4,16 +4,19 @@ TestEnv.activate()
 using Arrow
 using Flux
 using DataFrames
+using PowerModels
 
 # Paths
 path_dataset = joinpath(pwd(), "examples", "powermodels", "data")
 case_name = "pglib_opf_case5_pjm"
 filetype = ArrowFile
+network_formulation = DCPPowerModel
+case_file_path = joinpath(path, case_name, string(network_formulation))
 
 # Load input and output data tables
-iter_files = readdir(joinpath(path_dataset, case_name))
-file_ins = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("input", file)]
-file_outs = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("output", file)]
+iter_files = readdir(joinpath(case_file_path))
+file_ins = [joinpath(case_file_path, file) for file in iter_files if occursin("input", file)]
+file_outs = [joinpath(case_file_path, file) for file in iter_files if occursin("output", file)]
 batch_ids = [split(split(file, "_")[end], ".")[1] for file in file_ins]
 
 # Load input and output data tables
@@ -70,4 +73,3 @@ predictions = model(input_features_test)
 
 # Calculate the error
 error = Flux.mse(predictions,output_variables_test)
-
diff --git a/examples/powermodels/generate_full_datasets_script.jl b/examples/powermodels/generate_full_datasets_script.jl
index dc45716..c60a1cd 100644
--- a/examples/powermodels/generate_full_datasets_script.jl
+++ b/examples/powermodels/generate_full_datasets_script.jl
@@ -5,6 +5,7 @@ using Arrow
 using L2O
 using Test
 using UUIDs
+using PowerModels
 
 # Paths
 path_powermodels = joinpath(pwd(), "examples", "powermodels")
@@ -18,20 +19,15 @@ filetype = ArrowFile
 
 # Case name
 case_name = "pglib_opf_case5_pjm"
+network_formulation = DCPPowerModel
 case_file_path = joinpath(path, case_name)
 
-# Create directory if it does not exist
-if !isdir(case_file_path)
-    mkdir(case_file_path)
-end
-
 # Generate dataset
-batch_ids = Array{String}(undef, num_batches)
 success_solves = 0.0
 for i in 1:num_batches
-    _success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(case_file_path, case_name; num_p=num_p, filetype=filetype)
+    _success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(case_file_path, case_name; 
+        num_p=num_p, filetype=filetype, network_formulation=network_formulation
+    )
     success_solves += _success_solves
-    batch_ids[i] = batch_id
 end
 success_solves /= num_batches
-
diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl
index dba3635..179ab41 100644
--- a/examples/powermodels/pglib_datagen.jl
+++ b/examples/powermodels/pglib_datagen.jl
@@ -51,7 +51,9 @@ function generate_dataset_pglib(
     download_files=true,
     num_p=10,
     load_sampler=load_sampler,
+    network_formulation=DCPPowerModel,
 )
+    # Download file
     matpower_case_name = case_name * ".m"
     case_file_path = joinpath(data_dir, matpower_case_name)
     if download_files && !isfile(case_file_path)
@@ -62,11 +64,18 @@ function generate_dataset_pglib(
         )
     end
 
+    # save folder
+    data_sim_dir = joinpath(data_dir, string(network_formulation))
+    if !isdir(data_sim_dir)
+        mkdir(data_sim_dir)
+    end
+
     # Read data
     network_data = PowerModels.parse_file(case_file_path)
 
     # The problem to iterate over
     model = Model(() -> POI.Optimizer(HiGHS.Optimizer()))
+    MOI.set(model, MOI.Silent(), true)
 
     # Save original load value and Link POI
     original_load = [l["pd"] for l in values(network_data["load"])]
@@ -80,7 +89,7 @@ function generate_dataset_pglib(
     # Instantiate the model
     pm = instantiate_model(
         network_data,
-        DCPPowerModel,
+        network_formulation,
         PowerModels.build_opf;
         setting=Dict("output" => Dict("duals" => true)),
         jump_model=model,
@@ -99,12 +108,12 @@ function generate_dataset_pglib(
     @info "Batch ID: $batch_id"
     save(
         problem_iterator,
-        joinpath(data_dir, case_name * "_input_" * batch_id * "." * string(filetype)),
+        joinpath(data_sim_dir, case_name * "_" * string(network_formulation) * "_input_" * batch_id * "." * string(filetype)),
         filetype,
     )
 
     # Solve the problem and return the number of successfull solves
-    file = joinpath(data_dir, case_name * "_output_" * batch_id * "." * string(filetype))
+    file = joinpath(data_sim_dir, case_name * "_" * string(network_formulation) * "_output_" * batch_id * "." * string(filetype))
     variable_refs = return_variablerefs(pm)
     for variableref in variable_refs
         set_name(variableref, replace(name(variableref), "," => "_"))
@@ -119,11 +128,12 @@ end
 
 function test_pglib_datasetgen(path::AbstractString, case_name::AbstractString, num_p::Int)
     @testset "Dataset Generation pglib case" begin
+        network_formulation = DCPPowerModel
         success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(
-            path, case_name; num_p=num_p
+            path, case_name; num_p=num_p, network_formulation=network_formulation
         )
-        file_in = joinpath(path, case_name * "_input_" * batch_id * ".csv")
-        file_out = joinpath(path, case_name * "_output_" * batch_id * ".csv")
+        file_in = joinpath(path, string(network_formulation), case_name * "_" * string(network_formulation) * "_input_" * batch_id * ".csv")
+        file_out = joinpath(path, string(network_formulation), case_name * "_" * string(network_formulation) * "_output_" * batch_id * ".csv")
         # Check if problem iterator was saved
         @test isfile(file_in)
         @test length(readdlm(file_in, ',')[:, 1]) == num_p + 1

From af3a0772d95eae4710e880c70cafc0ed8d75c032 Mon Sep 17 00:00:00 2001
From: andrewrosemberg <andrewrosemberg@gmail.com>
Date: Wed, 19 Jul 2023 16:35:33 -0300
Subject: [PATCH 4/4] fix format

---
 examples/flux/flux_forecaster_script.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/flux/flux_forecaster_script.jl b/examples/flux/flux_forecaster_script.jl
index 375996f..2d6d670 100644
--- a/examples/flux/flux_forecaster_script.jl
+++ b/examples/flux/flux_forecaster_script.jl
@@ -72,4 +72,4 @@ Flux.train!(
 predictions = model(input_features_test)
 
 # Calculate the error
-error = Flux.mse(predictions,output_variables_test)
+error = Flux.mse(predictions, output_variables_test)