LearningToOptimize · andrewrosemberg · Feb 14, 2024 · Feb 2, 2024 · Feb 3, 2024 · Feb 3, 2024
diff --git a/.gitignore b/.gitignore
@@ -12,9 +12,16 @@ Manifest.toml
 *.out
 *.sbatch
 examples/unitcommitment/app/*
+*/app/*
 *.edu
-examples/unitcommitment/wandb/*
+*.yaml
 *.png
 *.jls
 *.jlso
 *.jld2
+*.txt
+*.json
+*.log
+*.wandb
+*latest-run
+*.html
diff --git a/README.md b/README.md
@@ -134,7 +134,7 @@ Flux.train!(loss, Flux.params(model), [(input_features, output_variables)], opti
 predictions = model(input_features)
 ```
 
-## Comming Soon
+## Coming Soon
 
 Future features:
  - ML objectives that penalize infeasible predictions;

diff --git a/examples/powermodels/HuggingFaceDatasets.jl b/examples/powermodels/HuggingFaceDatasets.jl
@@ -31,7 +31,7 @@ cache_dir="./examples/powermodels/data/"
 organization = "L2O"
 dataset = "pglib_opf_solves"
 case_name = "pglib_opf_case300_ieee"
-formulation = "DCPPowerModel"
+formulation = "SOCWRConicPowerModel" # ACPPowerModel SOCWRConicPowerModel DCPPowerModel
 io_type = "input"
 download_dataset(organization, dataset, case_name, io_type; cache_dir=cache_dir)
 

diff --git a/examples/powermodels/data_split.jl b/examples/powermodels/data_split.jl
@@ -0,0 +1,95 @@
+####################################################
+############## PowerModels Data Split ##############
+####################################################
+import Pkg
+Pkg.activate(dirname(dirname(@__DIR__)))
+
+using Distributed
+using Random
+
+##############
+# Load Packages everywhere
+##############
+
+@everywhere import Pkg
+@everywhere Pkg.activate(dirname(dirname(@__DIR__)))
+@everywhere Pkg.instantiate()
+@everywhere using DataFrames
+@everywhere using L2O
+@everywhere using Gurobi
+@everywhere using Arrow
+@everywhere using MLUtils
+
+##############
+# Parameters
+##############
+case_name = "pglib_opf_case300_ieee" # pglib_opf_case300_ieee # pglib_opf_case5_pjm
+filetype = ArrowFile # ArrowFile # CSVFile
+path_dataset = joinpath(dirname(@__FILE__), "data")
+case_file_path = joinpath(path_dataset, case_name)
+case_file_path_input = joinpath(case_file_path, "input")
+
+mkpath(joinpath(case_file_path_input, "train"))
+mkpath(joinpath(case_file_path_input, "test"))
+
+##############
+# Load Data
+##############
+iter_files_in = readdir(joinpath(case_file_path_input))
+iter_files_in = filter(x -> occursin(string(filetype), x), iter_files_in)
+file_ins = [
+    joinpath(case_file_path_input, file) for file in iter_files_in if occursin("input", file)
+]
+batch_ids = [split(split(file, "_")[end], ".")[1] for file in file_ins]
+
+# Load input and output data tables
+if filetype === ArrowFile
+    input_table_train = Arrow.Table(file_ins)
+else
+    input_table_train = CSV.read(file_ins[train_idx], DataFrame)
+end
+
+# Convert to dataframes
+input_data = DataFrame(input_table_train)
+
+##############
+# Split Data
+##############
+Random.seed!(123)
+train_idx, test_idx = splitobs(1:size(input_data, 1), at=(0.7), shuffle=true)
+
+train_table = input_data[train_idx, :]
+test_table = input_data[test_idx, :]
+
+batch_size = 10
+
+num_batches = ceil(Int, length(test_idx) / batch_size)
+
+##############
+# Check Convex-Hull
+##############
+
+@info "Computing if test points are in the convex hull of the training set" batch_size num_batches
+
+inhull = Array{Bool}(undef, length(test_idx))
+@sync @distributed for i in 1:num_batches
+    idx_range = (i-1)*batch_size+1:min(i*batch_size, length(test_idx))
+    batch = test_table[idx_range, :]
+    inhull[idx_range] = inconvexhull(Matrix(train_table[!, Not(:id)]), Matrix(batch[!, Not(:id)]), Gurobi.Optimizer)
+    @info "Batch $i of $num_batches done"
+end
+
+test_table.in_train_convex_hull = inhull
+
+##############
+# Save Files
+##############
+
+# Save the training and test sets
+if filetype === ArrowFile
+    Arrow.write(joinpath(case_file_path_input, "train", case_name * "_train_input" * ".arrow"), train_table)
+    Arrow.write(joinpath(case_file_path_input, "test", case_name * "_test_input" * ".arrow"), test_table)
+else
+    CSV.write(joinpath(case_file_path_input, "train", case_name * "_train_input" * ".csv"), train_table)
+    CSV.write(joinpath(case_file_path_input, "test", case_name * "_test_input" * ".csv"), test_table)
+end
diff --git a/examples/powermodels/flux_forecaster_script.jl b/examples/powermodels/flux_forecaster_script.jl
diff --git a/examples/powermodels/jls2jld2.jl b/examples/powermodels/jls2jld2.jl
@@ -0,0 +1,84 @@
+using Arrow
+using CSV
+using MLJFlux
+using MLUtils
+using Flux
+using MLJ
+using CUDA
+using DataFrames
+using PowerModels
+using L2O
+using Random
+using JLD2
+using Wandb, Dates, Logging
+
+include(joinpath(dirname(dirname(@__FILE__)), "training_utils.jl")) # include("../training_utils.jl")
+
+##############
+# Parameters
+##############
+case_name = ARGS[1] # case_name="pglib_opf_case300_ieee" # pglib_opf_case5_pjm
+network_formulation = ARGS[2] # network_formulation=ACPPowerModel SOCWRConicPowerModel DCPPowerModel
+icnn = parse(Bool, ARGS[3]) # icnn=true # false
+filetype = ArrowFile # ArrowFile # CSVFile
+layers = [512, 256, 64] # [256, 64, 32]
+path_dataset = joinpath(dirname(@__FILE__), "data")
+case_file_path = joinpath(path_dataset, case_name)
+case_file_path_output = joinpath(case_file_path, "output", string(network_formulation))
+case_file_path_input = joinpath(case_file_path, "input", "train")
+save_file = if icnn
+    "$(case_name)_$(network_formulation)_$(replace(string(layers), ", " => "_"))_icnn"
+else
+    "$(case_name)_$(network_formulation)_$(replace(string(layers), ", " => "_"))_dnn"
+end
+
+##############
+# Load Data
+##############
+
+iter_files_in = readdir(joinpath(case_file_path_input))
+iter_files_in = filter(x -> occursin(string(filetype), x), iter_files_in)
+file_ins = [
+    joinpath(case_file_path_input, file) for file in iter_files_in if occursin("input", file)
+]
+iter_files_out = readdir(joinpath(case_file_path_output))
+iter_files_out = filter(x -> occursin(string(filetype), x), iter_files_out)
+file_outs = [
+    joinpath(case_file_path_output, file) for file in iter_files_out if occursin("output", file)
+]
+# batch_ids = [split(split(file, "_")[end], ".")[1] for file in file_ins]
+
+# Load input and output data tables
+if filetype === ArrowFile
+    input_table_train = Arrow.Table(file_ins)
+    output_table_train = Arrow.Table(file_outs)
+else
+    input_table_train = CSV.read(file_ins[train_idx], DataFrame)
+    output_table_train = CSV.read(file_outs[train_idx], DataFrame)
+end
+
+# Convert to dataframes
+input_data = DataFrame(input_table_train)
+output_data = DataFrame(output_table_train)
+
+# filter out rows with 0.0 operational_cost (i.e. inidicative of numerical issues)
+output_data = output_data[output_data.operational_cost .> 10, :]
+
+# match
+train_table = innerjoin(input_data, output_data[!, [:id, :operational_cost]]; on=:id)
+
+input_features = names(train_table[!, Not([:id, :operational_cost])])
+
+##############
+# Load JLS Model
+##############
+num = 3
+model_dir = joinpath(dirname(@__FILE__), "models")
+mach = machine(joinpath(model_dir, save_file * "$num.jls"))
+
+##############
+# Save JLD2 Model
+##############
+model = mach.fitresult[1]
+model_state = Flux.state(model)
+jldsave(joinpath(model_dir, save_file * ".jld2"), model_state=model_state, input_features=input_features, layers=mach.model.builder.hidden_sizes)