-
Notifications
You must be signed in to change notification settings - Fork 282
Refactor workflow to stateless run manifest architecture and SA input design coordination #3708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
2b08818
efc6a33
ceb3573
31b4c12
5902fdb
16499f4
6d234f9
7e62bef
a223998
8223f1d
770814f
20d14e0
dd525d1
ceb53cc
982cd35
fc86545
78bf963
3ce205b
20643d5
502622e
c9be470
d06704d
1918d46
60d80b3
9a6c512
0aa313e
d8682e3
7cc8762
80fd907
708254b
af8f89e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,7 @@ Imports: | |
| PEcAn.uncertainty, | ||
| PEcAn.utils, | ||
| purrr (>= 0.2.3), | ||
| rlang, | ||
| XML | ||
| Suggests: | ||
| mockery, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,11 +8,12 @@ | |
| #' | ||
| #' @param settings a PEcAn settings list | ||
| #' @param ensemble.size number of ensemble runs | ||
| #' @param input_design input indices for samples | ||
| #' @param write should the runs be written to the database? | ||
| #' @param posterior.files Filenames for posteriors for drawing samples for ensemble and sensitivity | ||
| #' analysis (e.g. post.distns.Rdata, or prior.distns.Rdata) | ||
| #' @param overwrite logical: Replace output files that already exist? | ||
| #' @param input_design Input design specification. A list with \code{ensemble} and/or | ||
| #' \code{sensitivity} entries, each containing a data.frame of input indices. | ||
| #' | ||
| #' @details The default value for \code{posterior.files} is NA, in which case the | ||
| #' most recent posterior or prior (in that order) for the workflow is used. | ||
|
|
@@ -23,11 +24,16 @@ | |
| #' @return an updated settings list, which includes ensemble IDs for SA and ensemble analysis | ||
| #' @export | ||
| #' | ||
| #' @author David LeBauer, Shawn Serbin, Ryan Kelly, Mike Dietze | ||
| #' @author David LeBauer, Shawn Serbin, Ryan Kelly, Mike Dietze, Akash B V | ||
|
|
||
| run.write.configs <- function(settings, ensemble.size, input_design, write = TRUE, | ||
| posterior.files = rep(NA, length(settings$pfts)), | ||
| overwrite = TRUE) { | ||
|
|
||
| # extract designs from input_design list | ||
| input_design_ens <- if (!is.null(input_design)) input_design$ensemble else NULL | ||
| input_design_sa <- if (!is.null(input_design)) input_design$sensitivity else NULL | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per earlier comments, I'd recommend only having one input_design, not separate ens and sa designs |
||
|
|
||
| ## Skip database connection if settings$database is NULL or write is False | ||
| if (!isTRUE(write) && is.null(settings$database)) { | ||
| PEcAn.logger::logger.info("Not writing this run to database, so database connection skipped") | ||
|
|
@@ -105,10 +111,10 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU | |
|
|
||
| samples.file <- file.path(settings$outdir, "samples.Rdata") | ||
| if (file.exists(samples.file)) { | ||
| samples <- new.env() | ||
| load(samples.file, envir = samples) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples | ||
| trait.samples <- samples$trait.samples | ||
| trait_sample_indices <- input_design[["param"]] | ||
| existing_data <- new.env() | ||
| load(samples.file, envir = existing_data) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples | ||
| trait.samples <- existing_data$trait.samples | ||
| trait_sample_indices <- input_design_ens[["param"]] | ||
| ensemble.samples <- list() | ||
| for (pft in names(trait.samples)) { | ||
| pft_traits <- trait.samples[[pft]] | ||
|
|
@@ -120,9 +126,9 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU | |
| ) | ||
| names(ensemble.samples[[pft]]) <- names(pft_traits) | ||
| } | ||
| sa.samples <- samples$sa.samples | ||
| runs.samples <- samples$runs.samples | ||
| ## env.samples <- samples$env.samples | ||
| sa.samples <- existing_data$sa.samples | ||
| ## runs.samples <- existing_data$runs.samples | ||
| ## env.samples <- existing_data$env.samples | ||
| } else { | ||
| PEcAn.logger::logger.error(samples.file, "not found, this file is required by the run.write.configs function") | ||
| } | ||
|
|
@@ -159,6 +165,9 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU | |
| pft.names <- names(trait.samples) | ||
| trait.names <- lapply(trait.samples, names) | ||
|
|
||
| # Initialize the Manifest Dataframe | ||
| run_manifest_df <- data.frame() | ||
|
|
||
| ### NEED TO IMPLEMENT: Load Environmental Priors and Posteriors | ||
|
|
||
| ### Sensitivity Analysis | ||
|
|
@@ -170,11 +179,17 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU | |
| quantile.samples = sa.samples, | ||
| settings = settings, | ||
| model = model, | ||
| input_design = input_design_sa, | ||
| write.to.db = write | ||
| ) | ||
|
|
||
| # collect manifest data | ||
| if ("manifest" %in% names(sa.runs)) { | ||
| run_manifest_df <- rbind(run_manifest_df, sa.runs$manifest) | ||
| } | ||
|
|
||
| # Store output in settings and output variables | ||
| runs.samples$sa <- sa.run.ids <- sa.runs$runs | ||
| sa.run.ids <- sa.runs$runs | ||
| settings$sensitivity.analysis$ensemble.id <- sa.ensemble.id <- sa.runs$ensemble.id | ||
|
|
||
| # Save sensitivity analysis info | ||
|
|
@@ -192,12 +207,17 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU | |
| ensemble.samples = ensemble.samples, | ||
| settings = settings, | ||
| model = model, | ||
| input_design = input_design, | ||
| input_design = input_design_ens, | ||
| write.to.db = write | ||
| ) | ||
|
|
||
| # collect manifest data | ||
| if ("manifest" %in% names(ens.runs)) { | ||
| run_manifest_df <- rbind(run_manifest_df, ens.runs$manifest) | ||
| } | ||
|
|
||
| # Store output in settings and output variables | ||
| runs.samples$ensemble <- ens.run.ids <- ens.runs$runs | ||
| ens.run.ids <- ens.runs$runs | ||
| settings$ensemble$ensemble.id <- ens.ensemble.id <- ens.runs$ensemble.id | ||
| ens.samples <- ensemble.samples # rename just for consistency | ||
|
|
||
|
|
@@ -211,13 +231,19 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU | |
| PEcAn.logger::logger.info("###### Finished writing model run config files #####") | ||
| PEcAn.logger::logger.info("config files samples in ", file.path(settings$outdir, "run")) | ||
|
|
||
| ### Save output from SA/Ensemble runs | ||
| # A lot of this is duplicate with the ensemble/sa specific output above, but kept for backwards compatibility. | ||
| save(ensemble.samples, trait.samples, sa.samples, runs.samples, pft.names, trait.names, | ||
| file = file.path(settings$outdir, "samples.Rdata") | ||
| ) | ||
| PEcAn.logger::logger.info("parameter values for runs in ", file.path(settings$outdir, "samples.RData")) | ||
| # write runs manifest | ||
| manifest.file <- file.path(settings$outdir, "runs_manifest.csv") | ||
|
|
||
| # always write manifest (even if empty) so downstream knows workflow completed | ||
| utils::write.table(run_manifest_df, | ||
| file = manifest.file, | ||
| sep = ",", | ||
| row.names = FALSE, | ||
| col.names = overwrite || !file.exists(manifest.file), | ||
| append = !overwrite) | ||
|
|
||
| PEcAn.logger::logger.info("Run manifest written to ", manifest.file) | ||
|
|
||
| options(scipen = scipen) | ||
| invisible(settings) | ||
| return(settings) | ||
| } | ||
| return(invisible(settings)) | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Please don't remove terminating newlines. These days the rule is mostly just to avoid needless extra changes, but there do exist some (mostly ancient 😅 ) tools that complain if it isn't there. |
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -2,47 +2,63 @@ | |||
| #' | ||||
| #' @param settings a PEcAn Settings or MultiSettings object | ||||
| #' @param overwrite logical: Replace config files if they already exist? | ||||
| #' @param input_design the input indices for samples | ||||
| #' @param input_design Optional. Input design specification. Can be: | ||||
| #' \itemize{ | ||||
| #' \item A list with \code{ensemble} and/or \code{sensitivity} entries | ||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| #' \item A single data.frame (interpreted as ensemble design) | ||||
| #' \item NULL to auto-generate designs based on settings | ||||
| #' } | ||||
| #' | ||||
| #' @return A modified settings object, invisibly | ||||
| #' | ||||
| #' @details | ||||
| #' This function serves as the orchestration layer between PEcAn workflows and | ||||
| #' the config-writing machinery. It generates appropriate input designs | ||||
| #' (ensemble and/or SA) if not provided. For MultiSettings, it generates designs once | ||||
| #' from the first site then shares across all sites for consistent sampling. Finally, | ||||
| #' it delegates to \code{\link{run.write.configs}} for actual config generation. | ||||
| #' The input design determines how parameter samples and input files (met, soil, | ||||
| #' etc.) are coordinated across runs. Ensemble designs typically use random or | ||||
| #' quasi-random sampling, while SA designs hold non-parameter inputs constant | ||||
| #' (OAT methodology). | ||||
| #' | ||||
| #' @importFrom dplyr %>% | ||||
| #' @importFrom rlang %||% | ||||
| #' @export | ||||
|
|
||||
|
|
||||
| runModule.run.write.configs <- function(settings, | ||||
| overwrite = TRUE, | ||||
| input_design = NULL) { | ||||
|
|
||||
| if (PEcAn.settings::is.MultiSettings(settings)) { | ||||
| if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) { | ||||
| PEcAn.logger::logger.warn("Existing runs.txt file will be removed.") | ||||
| unlink(file.path(settings$rundir, "runs.txt")) | ||||
| } | ||||
| if (is.null(input_design)) { | ||||
| ensemble_size <- settings$ensemble$size | ||||
| design_result <- PEcAn.uncertainty::generate_joint_ensemble_design( | ||||
| settings = settings[1], | ||||
| ensemble_size = ensemble_size | ||||
| ) | ||||
| input_design <- design_result$X | ||||
| } | ||||
|
|
||||
| # prepare designs once for all sites (consistent sampling) | ||||
| designs <- .prepare_input_designs(settings[1], input_design) | ||||
|
|
||||
| return(PEcAn.settings::papply(settings, | ||||
| runModule.run.write.configs, | ||||
| overwrite = FALSE, | ||||
| input_design = input_design)) | ||||
| input_design = designs)) | ||||
|
|
||||
| } else if (PEcAn.settings::is.Settings(settings)) { | ||||
| # double check making sure we have method for parameter sampling | ||||
| if (is.null(settings$ensemble$samplingspace$parameters$method)) { | ||||
| settings$ensemble$samplingspace$parameters$method <- "uniform" | ||||
| } | ||||
| if (is.null(input_design)) { | ||||
| ensemble_size <- settings$ensemble$size | ||||
| design_result <- PEcAn.uncertainty::generate_joint_ensemble_design( | ||||
| settings = settings, | ||||
| ensemble_size = ensemble_size | ||||
| ) | ||||
| input_design <- design_result$X | ||||
| } | ||||
| ensemble_size <- nrow(input_design) | ||||
|
|
||||
| # prepare designs (may already be normalized from MultiSettings) | ||||
| designs <- .prepare_input_designs(settings, input_design) | ||||
|
|
||||
| # determine ensemble size from design | ||||
| ensemble_size <- if (!is.null(designs$ensemble)) { | ||||
| nrow(designs$ensemble) | ||||
| } else { | ||||
| settings$ensemble$size %||% 1 | ||||
| } | ||||
|
|
||||
| # check to see if there are posterior.files tags under pft | ||||
| posterior.files <- settings$pfts %>% | ||||
|
|
@@ -54,9 +70,68 @@ runModule.run.write.configs <- function(settings, | |||
| write = isTRUE(settings$database$bety$write), # treat null as FALSE | ||||
| posterior.files = posterior.files, | ||||
| overwrite = overwrite, | ||||
| input_design = input_design | ||||
| input_design = designs | ||||
| )) | ||||
| } else { | ||||
| stop("runModule.run.write.configs only works with Settings or MultiSettings") | ||||
| } | ||||
| } | ||||
|
|
||||
|
|
||||
| #' Prepare input designs for ensemble and sensitivity analysis | ||||
| #' | ||||
| #' Normalizes and generates input design matrices. This helper ensures | ||||
| #' consistent handling of the various input_design formats and | ||||
| #' auto-generates designs when needed. | ||||
| #' | ||||
| #' @param settings A single PEcAn settings object | ||||
| #' @param input_design Input design specification (see \code{runModule.run.write.configs}) | ||||
| #' @return A list with \code{ensemble} and \code{sensitivity} entries (each a data.frame or NULL) | ||||
| #' | ||||
| #' @details | ||||
| #' Input normalization rules: | ||||
| #' \itemize{ | ||||
| #' \item If \code{input_design} is already a list with \code{ensemble}/\code{sensitivity} | ||||
| #' keys, return as-is | ||||
| #' \item If \code{input_design} is a single data.frame, interpret as ensemble design | ||||
| #' \item If NULL and \code{settings$ensemble} exists, generate via | ||||
| #' \code{generate_joint_ensemble_design} | ||||
| #' \item If NULL and \code{settings$sensitivity.analysis} exists, generate via | ||||
| #' \code{generate_OAT_SA_design} | ||||
| #' } | ||||
| #' | ||||
| #' @keywords internal | ||||
|
|
||||
| .prepare_input_designs <- function(settings, input_design) { | ||||
|
|
||||
| # already normalized? return as-is | ||||
| if (is.list(input_design) && | ||||
| any(c("ensemble", "sensitivity") %in% names(input_design))) { | ||||
| return(input_design) | ||||
| } | ||||
|
|
||||
| designs <- list(ensemble = NULL, sensitivity = NULL) | ||||
|
|
||||
| # single data.frame = ensemble design | ||||
| if (is.data.frame(input_design)) { | ||||
| designs$ensemble <- input_design | ||||
| } | ||||
|
|
||||
| # generate ensemble design if needed | ||||
| if (is.null(designs$ensemble) && "ensemble" %in% names(settings)) { | ||||
| ensemble_size <- settings$ensemble$size %||% 1 | ||||
| design_result <- PEcAn.uncertainty::generate_joint_ensemble_design( | ||||
| settings = settings, | ||||
| ensemble_size = ensemble_size | ||||
| ) | ||||
| designs$ensemble <- design_result$X | ||||
| } | ||||
|
|
||||
| # generate SA design if needed | ||||
| if (is.null(designs$sensitivity) && "sensitivity.analysis" %in% names(settings)) { | ||||
| design_result <- PEcAn.uncertainty::generate_OAT_SA_design(settings) | ||||
| designs$sensitivity <- design_result$X | ||||
| } | ||||
|
|
||||
| return(designs) | ||||
| } | ||||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once 1.10.0 is released this will need moving to go under the next unreleased heading, but need to wait until then