diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index 18756b4555..849efd3c92 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -104,10 +104,15 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU options(scipen = 12) samples.file <- file.path(settings$outdir, "samples.Rdata") + existing_data <- NULL if (file.exists(samples.file)) { - samples <- new.env() - load(samples.file, envir = samples) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples - trait.samples <- samples$trait.samples + existing_data <- new.env() + load(samples.file, envir = existing_data) + trait.samples <- existing_data$trait.samples + sa.samples <- existing_data$sa.samples + runs.samples <- existing_data$runs.samples + + # Generate ensemble.samples for current site trait_sample_indices <- input_design[["param"]] ensemble.samples <- list() for (pft in names(trait.samples)) { @@ -120,11 +125,9 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU ) names(ensemble.samples[[pft]]) <- names(pft_traits) } - sa.samples <- samples$sa.samples - runs.samples <- samples$runs.samples - ## env.samples <- samples$env.samples } else { - PEcAn.logger::logger.error(samples.file, "not found, this file is required by the run.write.configs function") + # cannot proceed without sample.Rdata + PEcAn.logger::logger.severe(samples.file, "not found, this file is required by the run.write.configs function") } ## remove previous runs.txt @@ -170,6 +173,7 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU quantile.samples = sa.samples, settings = settings, model = model, + input_design = input_design, write.to.db = write ) @@ -211,6 +215,80 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU PEcAn.logger::logger.info("###### Finished writing model run config files #####") PEcAn.logger::logger.info("config files samples in ", file.path(settings$outdir, "run")) + ## Use existing_data from earlier load + if (!is.null(existing_data)) { + # Merge ensemble.samples + if (!is.null(existing_data$ensemble.samples)) { + for (pft_name in names(ensemble.samples)) { + if (pft_name %in% names(existing_data$ensemble.samples)) { + # combine parameter samples for same PFT across sites + ensemble.samples[[pft_name]] <- rbind( + existing_data$ensemble.samples[[pft_name]], + ensemble.samples[[pft_name]] + ) + } + # New PFTs from current site are automatically preserved + } + # preserves existing PFTs -- add PFTs that exist in file but not current site + for (existing_pft in names(existing_data$ensemble.samples)) { + if (!existing_pft %in% names(ensemble.samples)) { + ensemble.samples[[existing_pft]] <- existing_data$ensemble.samples[[existing_pft]] + } + } + } + # Merge trait.samples + if (!is.null(existing_data$trait.samples)) { + trait.samples <- utils::modifyList(existing_data$trait.samples, trait.samples) + } + # Merge sa.samples + if (!is.null(existing_data$sa.samples)) { + sa.samples <- utils::modifyList(existing_data$sa.samples, sa.samples) + } + # Merge runs.samples + if (!is.null(existing_data$runs.samples)) { + # Merge ensemble runs + if (!is.null(runs.samples$ensemble)) { + if (!is.null(existing_data$runs.samples$ensemble)) { + runs.samples$ensemble <- rbind( + existing_data$runs.samples$ensemble, + runs.samples$ensemble + ) + } + } else if (!is.null(existing_data$runs.samples$ensemble)) { + # Current site has no ensemble, preserve existing + runs.samples$ensemble <- existing_data$runs.samples$ensemble + } + # Merge SA runs + if (!is.null(runs.samples$sa)) { + if (!is.null(existing_data$runs.samples$sa)) { + for (pft_name in names(runs.samples$sa)) { + if (pft_name %in% names(existing_data$runs.samples$sa)) { + runs.samples$sa[[pft_name]] <- rbind( + existing_data$runs.samples$sa[[pft_name]], + runs.samples$sa[[pft_name]] + ) + } + } + for (existing_pft in names(existing_data$runs.samples$sa)) { + if (!existing_pft %in% names(runs.samples$sa)) { + runs.samples$sa[[existing_pft]] <- existing_data$runs.samples$sa[[existing_pft]] + } + } + } + } else if (!is.null(existing_data$runs.samples$sa)) { + # Current site has no SA, preserve existing + runs.samples$sa <- existing_data$runs.samples$sa + } + } + # Merge pft.names + if (!is.null(existing_data$pft.names)) { + pft.names <- unique(c(existing_data$pft.names, pft.names)) + } + # Merge trait.names + if (!is.null(existing_data$trait.names)) { + trait.names <- utils::modifyList(existing_data$trait.names, trait.names) + } + } ### Save output from SA/Ensemble runs # A lot of this is duplicate with the ensemble/sa specific output above, but kept for backwards compatibility. save(ensemble.samples, trait.samples, sa.samples, runs.samples, pft.names, trait.names, @@ -221,3 +299,4 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU invisible(settings) return(settings) } + diff --git a/modules/uncertainty/R/sensitivity.R b/modules/uncertainty/R/sensitivity.R index 7caa635b8b..e7114e3cb2 100644 --- a/modules/uncertainty/R/sensitivity.R +++ b/modules/uncertainty/R/sensitivity.R @@ -94,6 +94,7 @@ read.sa.output <- function(traits, quantiles, pecandir, outdir, pft.name = "", #' by \code{settings$rundir} before writing to it? #' @param write.to.db logical: Record this run to BETY? If TRUE, uses connection #' settings specified in \code{settings$database} +#' @param input_design data.frame coordinating input files across runs #' #' @return list, containing $runs = data frame of runids, #' and $ensemble.id = the ensemble ID for these runs. @@ -101,7 +102,7 @@ read.sa.output <- function(traits, quantiles, pecandir, outdir, pft.name = "", #' @export #' @author David LeBauer, Carl Davidson write.sa.configs <- function(defaults, quantile.samples, settings, model, - clean = FALSE, write.to.db = TRUE) { + clean = FALSE, write.to.db = TRUE, input_design = NULL) { scipen <- getOption("scipen") options(scipen = 12) my.write.config <- paste("write.config.", model, sep = "") @@ -223,6 +224,50 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, dir.create(file.path(settings$rundir, run.id), recursive = TRUE) dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE) + ## select single met path for SA runs without input_design (backward compatibility) + ## when input_design is provided (recommended), it supersedes this selection + ## and allows different met files per SA run + # I check to make sure the path under the met is a list. + # if it's specified what met needs to be used in 'met.id' under sensitivity + # analysis of pecan xml we used that otherwise, I use the first met. + if (is.list(settings$run$inputs$met$path)) { + # This checks for met.id tag in the settings under sensitivity analysis - + # if it's not there it creates it. Then it's gonna use what it created. + if (is.null(settings$sensitivity.analysis$met.id)) { + settings$sensitivity.analysis$met.id <- 1 + } + settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]] + } + + # Apply input design coordination for median run + median_settings <- settings + if (!is.null(input_design)) { + # Coordinate inputs for median run (use first row) + for (input_tag in colnames(input_design)) { + if (input_tag != "param" && !is.null(median_settings$run$inputs[[input_tag]]$path)) { + input_paths <- median_settings$run$inputs[[input_tag]]$path + # Assume list structure (consistent with write.ensemble.configs) + if (length(input_paths) > 1) { + input_index <- input_design[[input_tag]][1] + median_settings$run$inputs[[input_tag]]$path <- input_paths[[input_index]] + } + } + } + } + + median_input_info <- "" + for (input_tag in names(median_settings$run$inputs)) { + input_data <- median_settings$run$inputs[[input_tag]] + # At SA stage, path is ALWAYS a resolved string (thanks to input design) + if (!is.null(input_data) && !is.null(input_data$path)) { + median_input_info <- paste0(median_input_info, + format(input_tag, width = 12, justify = "left"), + ": ", + input_data$path, + "\n") + } + } + # write run information to disk TODO need to print list of pft names and trait # names cat("runtype : sensitivity analysis\n", @@ -236,7 +281,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "model id : ", settings$model$id, "\n", "site : ", settings$run$site$name, "\n", "site id : ", settings$run$site$id, "\n", - "met data : ", settings$run$site$met, "\n", + median_input_info, "start date : ", settings$run$start.date, "\n", "end date : ", settings$run$end.date, "\n", "hostname : ", settings$host$name, "\n", @@ -246,24 +291,10 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, sep = "") - # I check to make sure the path under the met is a list. - # if it's specified what met needs to be used in 'met.id' under sensitivity - # analysis of pecan xml we used that otherwise, I use the first met. - if (is.list(settings$run$inputs$met$path)) { - # This checks for met.id tag in the settings under sensitivity analysis - - # if it's not there it creates it. Then it's gonna use what it created. - if (is.null(settings$sensitivity.analysis$met.id)) { - settings$sensitivity.analysis$met.id <- 1 - } - settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]] - - } - - # write configuration do.call(my.write.config, args = list(defaults = defaults, trait.values = median.samples, - settings = settings, + settings = median_settings, run.id = run.id)) cat( run.id, @@ -272,16 +303,18 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, append = TRUE ) + run_index <- 1 + ## loop over pfts runs <- list() - for (i in seq_along(names(quantile.samples))) { - pftname <- names(quantile.samples)[i] + for (pft_idx in seq_along(names(quantile.samples))) { + pftname <- names(quantile.samples)[pft_idx] if (pftname == "env") { next } - traits <- colnames(quantile.samples[[i]]) - quantiles.str <- rownames(quantile.samples[[i]]) + traits <- colnames(quantile.samples[[pft_idx]]) + quantiles.str <- rownames(quantile.samples[[pft_idx]]) runs[[pftname]] <- data.frame() @@ -293,7 +326,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } else { quantile <- as.numeric(quantile.str) / 100 trait.samples <- median.samples - trait.samples[[i]][trait] <- quantile.samples[[i]][quantile.str, trait, drop = FALSE] + trait.samples[[pft_idx]][trait] <- quantile.samples[[pft_idx]][quantile.str, trait, drop = FALSE] if (!is.null(con)) { paramlist <- paste0("quantile=", quantile.str, ",trait=", trait, ",pft=", pftname) @@ -343,12 +376,15 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, run.type = "SA", index = round(quantile, 3), trait = trait, - pft.name = names(trait.samples)[i], + pft.name = names(trait.samples)[pft_idx], site.id = settings$run$site$id ) } runs[[pftname]][quantile.str, trait] <- run.id + # Increment run counter + run_index <- run_index + 1 + # create folders (cleaning up old ones if needed) if (clean) { unlink(file.path(settings$rundir, run.id)) @@ -357,11 +393,38 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, dir.create(file.path(settings$rundir, run.id), recursive = TRUE) dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE) - # write run information to disk + # Apply input design coordination for SA runs + settings_copy <- settings + if (!is.null(input_design)) { + for (input_tag in colnames(input_design)) { + if (input_tag != "param" && !is.null(settings_copy$run$inputs[[input_tag]]$path)) { + input_paths <- settings_copy$run$inputs[[input_tag]]$path + if (length(input_paths) > 1) { + input_index <- input_design[[input_tag]][run_index] + settings_copy$run$inputs[[input_tag]]$path <- input_paths[[input_index]] + } + } + } + } + + # Build dynamic input info string for SA run README + sa_input_info <- "" + for (input_tag in names(settings_copy$run$inputs)) { + input_data <- settings_copy$run$inputs[[input_tag]] + if (!is.null(input_data) && !is.null(input_data$path)) { + sa_input_info <- paste0(sa_input_info, + format(input_tag, width = 12, justify = "left"), + ": ", + input_data$path, + "\n") + } + } + + # write SA run information to disk cat("runtype : sensitivity analysis\n", "workflow id : ", workflow.id, "\n", "ensemble id : ", ensemble.id, "\n", - "pft name : ", names(trait.samples)[i], "\n", + "pft name : ", names(trait.samples)[pft_idx], "\n", "quantile : ", quantile.str, "\n", "trait : ", trait, "\n", "run id : ", run.id, "\n", @@ -369,7 +432,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "model id : ", settings$model$id, "\n", "site : ", settings$run$site$name, "\n", "site id : ", settings$run$site$id, "\n", - "met data : ", settings$run$site$met, "\n", + sa_input_info, "start date : ", settings$run$start.date, "\n", "end date : ", settings$run$end.date, "\n", "hostname : ", settings$host$name, "\n", @@ -382,7 +445,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, # write configuration do.call(my.write.config, args = list(defaults = defaults, trait.values = trait.samples, - settings = settings, + settings = settings_copy, run.id)) cat( run.id, diff --git a/modules/uncertainty/man/write.sa.configs.Rd b/modules/uncertainty/man/write.sa.configs.Rd index 545970713c..87227cff18 100644 --- a/modules/uncertainty/man/write.sa.configs.Rd +++ b/modules/uncertainty/man/write.sa.configs.Rd @@ -10,7 +10,8 @@ write.sa.configs( settings, model, clean = FALSE, - write.to.db = TRUE + write.to.db = TRUE, + input_design = NULL ) } \arguments{ @@ -27,6 +28,8 @@ by \code{settings$rundir} before writing to it?} \item{write.to.db}{logical: Record this run to BETY? If TRUE, uses connection settings specified in \code{settings$database}} + +\item{input_design}{data.frame coordinating input files across runs} } \value{ list, containing $runs = data frame of runids,