From dd7afd7c8a806d6bf1592470880268d0e72ae6c7 Mon Sep 17 00:00:00 2001 From: andrewrosemberg Date: Wed, 19 Jul 2023 14:42:22 -0300 Subject: [PATCH 1/4] add script to generate datasets --- .../{powermodels => flux}/flux_forecaster.jl | 0 .../powermodels/generate_full_datasets.jl | 47 +++++++++++++++++++ examples/powermodels/pglib_datagen.jl | 22 +++++---- test/runtests.jl | 4 +- 4 files changed, 62 insertions(+), 11 deletions(-) rename examples/{powermodels => flux}/flux_forecaster.jl (100%) create mode 100644 examples/powermodels/generate_full_datasets.jl diff --git a/examples/powermodels/flux_forecaster.jl b/examples/flux/flux_forecaster.jl similarity index 100% rename from examples/powermodels/flux_forecaster.jl rename to examples/flux/flux_forecaster.jl diff --git a/examples/powermodels/generate_full_datasets.jl b/examples/powermodels/generate_full_datasets.jl new file mode 100644 index 0000000..4223cfd --- /dev/null +++ b/examples/powermodels/generate_full_datasets.jl @@ -0,0 +1,47 @@ +using TestEnv +TestEnv.activate() + +using Arrow +using Flux +using HiGHS +using JuMP +using L2O +import ParametricOptInterface as POI +using Test +using UUIDs + +# Paths +path_powermodels = joinpath(pwd(), "examples", "powermodels") +path = joinpath(path_powermodels, "data") +include(joinpath(path_powermodels, "pglib_datagen.jl")) + +# Parameters +num_batches = 2 +num_p = 10 +filetype = ArrowFile + +# Case name +case_name = "pglib_opf_case5_pjm" +case_file_path = joinpath(path, case_name) + +# Create directory if it does not exist +if !isdir(case_file_path) + mkdir(case_file_path) +end + +# Generate dataset +batch_ids = Array{String}(undef, num_batches) +success_solves = 0.0 +for i in 1:num_batches + _success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(case_file_path, case_name; num_p=num_p, filetype=filetype) + success_solves += _success_solves + batch_ids[i] = batch_id +end +success_solves /= num_batches + +# Load input and output data tables +file_ins = [joinpath(case_file_path, case_name * "_input_" * batch_id * "." * string(filetype)) for batch_id in batch_ids] +file_outs = [joinpath(case_file_path, case_name * "_output_" * batch_id * "." * string(filetype)) for batch_id in batch_ids] + +input_table = Arrow.Table(file_ins) +output_table = Arrow.Table(file_outs) diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl index 294b711..cd9f983 100644 --- a/examples/powermodels/pglib_datagen.jl +++ b/examples/powermodels/pglib_datagen.jl @@ -52,11 +52,12 @@ function generate_dataset_pglib( num_p=10, load_sampler=load_sampler, ) - case_file_path = joinpath(data_dir, case_name) + matpower_case_name = case_name * ".m" + case_file_path = joinpath(data_dir, matpower_case_name) if download_files && !isfile(case_file_path) Downloads.download( "https://raw.githubusercontent.com/power-grid-lib/pglib-opf/01681386d084d8bd03b429abcd1ee6966f68b9a3/" * - case_name, + matpower_case_name, case_file_path, ) end @@ -94,14 +95,15 @@ function generate_dataset_pglib( ], ), ) + batch_id = string(uuid1()) save( problem_iterator, - joinpath(data_dir, case_name * "_input." * string(filetype)), + joinpath(data_dir, case_name * "_input_" * batch_id * "." * string(filetype)), filetype, ) # Solve the problem and return the number of successfull solves - file = joinpath(data_dir, case_name * "_output." * string(filetype)) + file = joinpath(data_dir, case_name * "_output_" * batch_id * "." * string(filetype)) variable_refs = return_variablerefs(pm) for variableref in variable_refs set_name(variableref, replace(name(variableref), "," => "_")) @@ -110,16 +112,17 @@ function generate_dataset_pglib( recorder = Recorder{filetype}(file; primal_variables=variable_refs) return solve_batch(model, problem_iterator, recorder), number_vars, - length(original_load) + length(original_load), + batch_id end function test_pglib_datasetgen(path::AbstractString, case_name::AbstractString, num_p::Int) - file_in = joinpath(path, case_name * "_input.csv") - file_out = joinpath(path, case_name * "_output.csv") @testset "Dataset Generation pglib case" begin - success_solves, number_variables, number_loads = generate_dataset_pglib( + success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib( path, case_name; num_p=num_p ) + file_in = joinpath(path, case_name * "_input_" * batch_id * ".csv") + file_out = joinpath(path, case_name * "_output_" * batch_id * ".csv") # Check if problem iterator was saved @test isfile(file_in) @test length(readdlm(file_in, ',')[:, 1]) == num_p + 1 @@ -129,6 +132,7 @@ function test_pglib_datasetgen(path::AbstractString, case_name::AbstractString, @test isfile(file_out) @test length(readdlm(file_out, ',')[:, 1]) == num_p * success_solves + 1 @test length(readdlm(file_out, ',')[1, :]) == number_variables + 1 + + return file_in, file_out end - return file_in, file_out end diff --git a/test/runtests.jl b/test/runtests.jl index 490cf54..b74cbae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,12 +15,12 @@ include(joinpath(test_dir, "datasetgen.jl")) include(joinpath(examples_dir, "powermodels", "pglib_datagen.jl")) -include(joinpath(examples_dir, "powermodels", "flux_forecaster.jl")) +include(joinpath(examples_dir, "flux", "flux_forecaster.jl")) @testset "L2O.jl" begin mktempdir() do path testdataset_gen(path) - file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm.m", 20) + file_in, file_out = test_pglib_datasetgen(path, "pglib_opf_case5_pjm", 20) test_flux_forecaster(file_in, file_out) end end From a4197f0acf3191c80326ae96eafae21ba3b7f242 Mon Sep 17 00:00:00 2001 From: andrewrosemberg Date: Wed, 19 Jul 2023 15:14:58 -0300 Subject: [PATCH 2/4] add test script --- examples/flux/flux_forecaster_script.jl | 73 +++++++++++++++++++ ..._forecaster.jl => test_flux_forecaster.jl} | 1 - ...ts.jl => generate_full_datasets_script.jl} | 12 +-- examples/powermodels/pglib_datagen.jl | 1 + test/runtests.jl | 2 +- 5 files changed, 76 insertions(+), 13 deletions(-) create mode 100644 examples/flux/flux_forecaster_script.jl rename examples/flux/{flux_forecaster.jl => test_flux_forecaster.jl} (99%) rename examples/powermodels/{generate_full_datasets.jl => generate_full_datasets_script.jl} (65%) diff --git a/examples/flux/flux_forecaster_script.jl b/examples/flux/flux_forecaster_script.jl new file mode 100644 index 0000000..80a2dda --- /dev/null +++ b/examples/flux/flux_forecaster_script.jl @@ -0,0 +1,73 @@ +using TestEnv +TestEnv.activate() + +using Arrow +using Flux +using DataFrames + +# Paths +path_dataset = joinpath(pwd(), "examples", "powermodels", "data") +case_name = "pglib_opf_case5_pjm" +filetype = ArrowFile + +# Load input and output data tables +iter_files = readdir(joinpath(path_dataset, case_name)) +file_ins = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("input", file)] +file_outs = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("output", file)] +batch_ids = [split(split(file, "_")[end], ".")[1] for file in file_ins] + +# Load input and output data tables +train_idx = [1] +test_idx = [2] + +input_table_train = Arrow.Table(file_ins[train_idx]) +output_table_train = Arrow.Table(file_outs[train_idx]) + +input_table_test = Arrow.Table(file_ins[test_idx]) +output_table_test = Arrow.Table(file_outs[test_idx]) + +# Convert to dataframes +input_data_train = DataFrame(input_table_train) +output_data_train = DataFrame(output_table_train) + +input_data_test = DataFrame(input_table_test) +output_data_test = DataFrame(output_table_test) + +# Separate input and output variables +output_variables_train = output_data_train[!, Not(:id)] +input_features_train = innerjoin(input_data_train, output_data_train[!, [:id]], on = :id)[!, Not(:id)] # just use success solves + +output_variables_test = output_data_test[!, Not(:id)] +input_features_test = innerjoin(input_data_test, output_data_test[!, [:id]], on = :id)[!, Not(:id)] # just use success solves + +# Define model +model = Chain( + Dense(size(input_features_train, 2), 64, relu), + Dense(64, 32, relu), + Dense(32, size(output_variables_train, 2)), +) + +# Define loss function +loss(x, y) = Flux.mse(model(x), y) + +# Convert the data to matrices +input_features_train = Matrix(input_features_train)' +output_variables_train = Matrix(output_variables_train)' + +input_features_test = Matrix(input_features_test)' +output_variables_test = Matrix(output_variables_test)' + +# Define the optimizer +optimizer = Flux.ADAM() + +# Train the model +Flux.train!( + loss, Flux.params(model), [(input_features_train, output_variables_train)], optimizer +) + +# Make predictions +predictions = model(input_features_test) + +# Calculate the error +error = Flux.mse(predictions,output_variables_test) + diff --git a/examples/flux/flux_forecaster.jl b/examples/flux/test_flux_forecaster.jl similarity index 99% rename from examples/flux/flux_forecaster.jl rename to examples/flux/test_flux_forecaster.jl index 5a0d7a7..1d89844 100644 --- a/examples/flux/flux_forecaster.jl +++ b/examples/flux/test_flux_forecaster.jl @@ -1,7 +1,6 @@ using Flux using CSV using DataFrames -using L2O function test_flux_forecaster(file_in::AbstractString, file_out::AbstractString) @testset "Flux.jl" begin diff --git a/examples/powermodels/generate_full_datasets.jl b/examples/powermodels/generate_full_datasets_script.jl similarity index 65% rename from examples/powermodels/generate_full_datasets.jl rename to examples/powermodels/generate_full_datasets_script.jl index 4223cfd..dc45716 100644 --- a/examples/powermodels/generate_full_datasets.jl +++ b/examples/powermodels/generate_full_datasets_script.jl @@ -2,11 +2,7 @@ using TestEnv TestEnv.activate() using Arrow -using Flux -using HiGHS -using JuMP using L2O -import ParametricOptInterface as POI using Test using UUIDs @@ -17,7 +13,7 @@ include(joinpath(path_powermodels, "pglib_datagen.jl")) # Parameters num_batches = 2 -num_p = 10 +num_p = 1000 filetype = ArrowFile # Case name @@ -39,9 +35,3 @@ for i in 1:num_batches end success_solves /= num_batches -# Load input and output data tables -file_ins = [joinpath(case_file_path, case_name * "_input_" * batch_id * "." * string(filetype)) for batch_id in batch_ids] -file_outs = [joinpath(case_file_path, case_name * "_output_" * batch_id * "." * string(filetype)) for batch_id in batch_ids] - -input_table = Arrow.Table(file_ins) -output_table = Arrow.Table(file_outs) diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl index cd9f983..dba3635 100644 --- a/examples/powermodels/pglib_datagen.jl +++ b/examples/powermodels/pglib_datagen.jl @@ -96,6 +96,7 @@ function generate_dataset_pglib( ), ) batch_id = string(uuid1()) + @info "Batch ID: $batch_id" save( problem_iterator, joinpath(data_dir, case_name * "_input_" * batch_id * "." * string(filetype)), diff --git a/test/runtests.jl b/test/runtests.jl index b74cbae..d742466 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,7 +15,7 @@ include(joinpath(test_dir, "datasetgen.jl")) include(joinpath(examples_dir, "powermodels", "pglib_datagen.jl")) -include(joinpath(examples_dir, "flux", "flux_forecaster.jl")) +include(joinpath(examples_dir, "flux", "test_flux_forecaster.jl")) @testset "L2O.jl" begin mktempdir() do path From fd5c5001b8a9f050a1b76530ee7a0201e3b957ff Mon Sep 17 00:00:00 2001 From: andrewrosemberg Date: Wed, 19 Jul 2023 16:35:15 -0300 Subject: [PATCH 3/4] update dataset generation script --- examples/flux/flux_forecaster_script.jl | 10 +++++---- .../generate_full_datasets_script.jl | 14 +++++------- examples/powermodels/pglib_datagen.jl | 22 ++++++++++++++----- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/examples/flux/flux_forecaster_script.jl b/examples/flux/flux_forecaster_script.jl index 80a2dda..375996f 100644 --- a/examples/flux/flux_forecaster_script.jl +++ b/examples/flux/flux_forecaster_script.jl @@ -4,16 +4,19 @@ TestEnv.activate() using Arrow using Flux using DataFrames +using PowerModels # Paths path_dataset = joinpath(pwd(), "examples", "powermodels", "data") case_name = "pglib_opf_case5_pjm" filetype = ArrowFile +network_formulation = DCPPowerModel +case_file_path = joinpath(path, case_name, string(network_formulation)) # Load input and output data tables -iter_files = readdir(joinpath(path_dataset, case_name)) -file_ins = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("input", file)] -file_outs = [joinpath(path_dataset, case_name, file) for file in iter_files if occursin("output", file)] +iter_files = readdir(joinpath(case_file_path)) +file_ins = [joinpath(case_file_path, file) for file in iter_files if occursin("input", file)] +file_outs = [joinpath(case_file_path, file) for file in iter_files if occursin("output", file)] batch_ids = [split(split(file, "_")[end], ".")[1] for file in file_ins] # Load input and output data tables @@ -70,4 +73,3 @@ predictions = model(input_features_test) # Calculate the error error = Flux.mse(predictions,output_variables_test) - diff --git a/examples/powermodels/generate_full_datasets_script.jl b/examples/powermodels/generate_full_datasets_script.jl index dc45716..c60a1cd 100644 --- a/examples/powermodels/generate_full_datasets_script.jl +++ b/examples/powermodels/generate_full_datasets_script.jl @@ -5,6 +5,7 @@ using Arrow using L2O using Test using UUIDs +using PowerModels # Paths path_powermodels = joinpath(pwd(), "examples", "powermodels") @@ -18,20 +19,15 @@ filetype = ArrowFile # Case name case_name = "pglib_opf_case5_pjm" +network_formulation = DCPPowerModel case_file_path = joinpath(path, case_name) -# Create directory if it does not exist -if !isdir(case_file_path) - mkdir(case_file_path) -end - # Generate dataset -batch_ids = Array{String}(undef, num_batches) success_solves = 0.0 for i in 1:num_batches - _success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(case_file_path, case_name; num_p=num_p, filetype=filetype) + _success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib(case_file_path, case_name; + num_p=num_p, filetype=filetype, network_formulation=network_formulation + ) success_solves += _success_solves - batch_ids[i] = batch_id end success_solves /= num_batches - diff --git a/examples/powermodels/pglib_datagen.jl b/examples/powermodels/pglib_datagen.jl index dba3635..179ab41 100644 --- a/examples/powermodels/pglib_datagen.jl +++ b/examples/powermodels/pglib_datagen.jl @@ -51,7 +51,9 @@ function generate_dataset_pglib( download_files=true, num_p=10, load_sampler=load_sampler, + network_formulation=DCPPowerModel, ) + # Download file matpower_case_name = case_name * ".m" case_file_path = joinpath(data_dir, matpower_case_name) if download_files && !isfile(case_file_path) @@ -62,11 +64,18 @@ function generate_dataset_pglib( ) end + # save folder + data_sim_dir = joinpath(data_dir, string(network_formulation)) + if !isdir(data_sim_dir) + mkdir(data_sim_dir) + end + # Read data network_data = PowerModels.parse_file(case_file_path) # The problem to iterate over model = Model(() -> POI.Optimizer(HiGHS.Optimizer())) + MOI.set(model, MOI.Silent(), true) # Save original load value and Link POI original_load = [l["pd"] for l in values(network_data["load"])] @@ -80,7 +89,7 @@ function generate_dataset_pglib( # Instantiate the model pm = instantiate_model( network_data, - DCPPowerModel, + network_formulation, PowerModels.build_opf; setting=Dict("output" => Dict("duals" => true)), jump_model=model, @@ -99,12 +108,12 @@ function generate_dataset_pglib( @info "Batch ID: $batch_id" save( problem_iterator, - joinpath(data_dir, case_name * "_input_" * batch_id * "." * string(filetype)), + joinpath(data_sim_dir, case_name * "_" * string(network_formulation) * "_input_" * batch_id * "." * string(filetype)), filetype, ) # Solve the problem and return the number of successfull solves - file = joinpath(data_dir, case_name * "_output_" * batch_id * "." * string(filetype)) + file = joinpath(data_sim_dir, case_name * "_" * string(network_formulation) * "_output_" * batch_id * "." * string(filetype)) variable_refs = return_variablerefs(pm) for variableref in variable_refs set_name(variableref, replace(name(variableref), "," => "_")) @@ -119,11 +128,12 @@ end function test_pglib_datasetgen(path::AbstractString, case_name::AbstractString, num_p::Int) @testset "Dataset Generation pglib case" begin + network_formulation = DCPPowerModel success_solves, number_variables, number_loads, batch_id = generate_dataset_pglib( - path, case_name; num_p=num_p + path, case_name; num_p=num_p, network_formulation=network_formulation ) - file_in = joinpath(path, case_name * "_input_" * batch_id * ".csv") - file_out = joinpath(path, case_name * "_output_" * batch_id * ".csv") + file_in = joinpath(path, string(network_formulation), case_name * "_" * string(network_formulation) * "_input_" * batch_id * ".csv") + file_out = joinpath(path, string(network_formulation), case_name * "_" * string(network_formulation) * "_output_" * batch_id * ".csv") # Check if problem iterator was saved @test isfile(file_in) @test length(readdlm(file_in, ',')[:, 1]) == num_p + 1 From af3a0772d95eae4710e880c70cafc0ed8d75c032 Mon Sep 17 00:00:00 2001 From: andrewrosemberg Date: Wed, 19 Jul 2023 16:35:33 -0300 Subject: [PATCH 4/4] fix format --- examples/flux/flux_forecaster_script.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/flux/flux_forecaster_script.jl b/examples/flux/flux_forecaster_script.jl index 375996f..2d6d670 100644 --- a/examples/flux/flux_forecaster_script.jl +++ b/examples/flux/flux_forecaster_script.jl @@ -72,4 +72,4 @@ Flux.train!( predictions = model(input_features_test) # Calculate the error -error = Flux.mse(predictions,output_variables_test) +error = Flux.mse(predictions, output_variables_test)