From 0c3d0fc642c597a27fc66a170ab07e1edebeb6ea Mon Sep 17 00:00:00 2001 From: divne7022 Date: Mon, 10 Nov 2025 14:02:34 +0000 Subject: [PATCH 1/6] pass input_design param to write.sa.config and resolve the overwriting issue of sample.Rdata for multisite setting --- base/workflow/R/run.write.configs.R | 133 +++++++++++++++++++++++----- 1 file changed, 109 insertions(+), 24 deletions(-) diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index 18756b4555..a2ac4ee381 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -51,7 +51,7 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } ) } - + ## Which posterior to use? for (i in seq_along(settings$pfts)) { ## if posterior.files is specified us that @@ -60,9 +60,9 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU if (!is.null(settings$pfts[[i]]$posteriorid)) { # TODO: sometimes `files` is a 0x0 tibble and other operations with it fail. files <- PEcAn.DB::dbfile.check("Posterior", - settings$pfts[[i]]$posteriorid, - con, settings$host$name, - return.all = TRUE + settings$pfts[[i]]$posteriorid, + con, settings$host$name, + return.all = TRUE ) pid <- grep("post.distns.*Rdata", files$file_name) ## is there a posterior file? if (length(pid) == 0) { @@ -97,12 +97,12 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } } ## end else } ## end for loop over pfts - + ## Sample parameters model <- settings$model$type scipen <- getOption("scipen") options(scipen = 12) - + samples.file <- file.path(settings$outdir, "samples.Rdata") if (file.exists(samples.file)) { samples <- new.env() @@ -126,17 +126,17 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } else { PEcAn.logger::logger.error(samples.file, "not found, this file is required by the run.write.configs function") } - + ## remove previous runs.txt if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) { PEcAn.logger::logger.warn("Existing runs.txt file will be removed.") unlink(file.path(settings$rundir, "runs.txt")) } - + PEcAn.utils::load.modelpkg(model) - + ## Check for model-specific write configs - + my.write.config <- paste0("write.config.", model) if (!exists(my.write.config)) { PEcAn.logger::logger.error( @@ -145,22 +145,22 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU my.write.config ) } - + ## Prepare for model output. Clean up any old config files (if exists) # TODO: shouldn't this check if the files exist before removing them? my.remove.config <- paste0("remove.config.", model) if (exists(my.remove.config)) { do.call(my.remove.config, args = list(settings$rundir, settings)) } - + # TODO RK : need to write to runs_inputs table - + # Save names pft.names <- names(trait.samples) trait.names <- lapply(trait.samples, names) - + ### NEED TO IMPLEMENT: Load Environmental Priors and Posteriors - + ### Sensitivity Analysis if ("sensitivity.analysis" %in% names(settings)) { ### Write out SA config files @@ -170,20 +170,21 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU quantile.samples = sa.samples, settings = settings, model = model, + input_design = input_design, write.to.db = write ) - + # Store output in settings and output variables runs.samples$sa <- sa.run.ids <- sa.runs$runs settings$sensitivity.analysis$ensemble.id <- sa.ensemble.id <- sa.runs$ensemble.id - + # Save sensitivity analysis info fname <- PEcAn.uncertainty::sensitivity.filename(settings, "sensitivity.samples", "Rdata", - all.var.yr = TRUE, pft = NULL + all.var.yr = TRUE, pft = NULL ) save(sa.run.ids, sa.ensemble.id, sa.samples, pft.names, trait.names, file = fname) } ### End of SA - + ### Write ENSEMBLE if ("ensemble" %in% names(settings)) { ens.runs <- PEcAn.uncertainty::write.ensemble.configs( @@ -195,29 +196,113 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU input_design = input_design, write.to.db = write ) - + # Store output in settings and output variables runs.samples$ensemble <- ens.run.ids <- ens.runs$runs settings$ensemble$ensemble.id <- ens.ensemble.id <- ens.runs$ensemble.id ens.samples <- ensemble.samples # rename just for consistency - + # Save ensemble analysis info fname <- PEcAn.uncertainty::ensemble.filename(settings, "ensemble.samples", "Rdata", all.var.yr = TRUE) save(ens.run.ids, ens.ensemble.id, ens.samples, pft.names, trait.names, file = fname) } else { PEcAn.logger::logger.info("not writing config files for ensemble, settings are NULL") } ### End of Ensemble - + PEcAn.logger::logger.info("###### Finished writing model run config files #####") PEcAn.logger::logger.info("config files samples in ", file.path(settings$outdir, "run")) + + samples.file <- file.path(settings$outdir, "samples.Rdata") + + if (file.exists(samples.file)) { + existing_env <- new.env() + load(samples.file, envir = existing_env) + if (!is.null(existing_env$ensemble.samples)) { + for (pft_name in names(ensemble.samples)) { + if (pft_name %in% names(existing_env$ensemble.samples)) { + # combine parameter samples for same PFT across sites + ensemble.samples[[pft_name]] <- rbind( + existing_env$ensemble.samples[[pft_name]], + ensemble.samples[[pft_name]] + ) + } + # New PFTs from current site are automatically preserved + } + + # preserves existing PFTs -- add PFTs that exist in file but not current site + for (existing_pft in names(existing_env$ensemble.samples)) { + if (!existing_pft %in% names(ensemble.samples)) { + ensemble.samples[[existing_pft]] <- existing_env$ensemble.samples[[existing_pft]] + } + } + } + + if (!is.null(existing_env$trait.samples)) { + trait.samples <- modifyList(existing_env$trait.samples, trait.samples) + } + + if (!is.null(existing_env$sa.samples)) { + sa.samples <- modifyList(existing_env$sa.samples, sa.samples) + } + + if (!is.null(existing_env$runs.samples)) { + if (!is.null(runs.samples$ensemble)) { + if (!is.null(existing_env$runs.samples$ensemble)) { + runs.samples$ensemble <- rbind( + existing_env$runs.samples$ensemble, + runs.samples$ensemble + ) + } + # If existing is NULL, current ensemble data is preserved + } else if (!is.null(existing_env$runs.samples$ensemble)) { + # Current site has no ensemble, preserve existing + runs.samples$ensemble <- existing_env$runs.samples$ensemble + } + + # Merge SA runs + if (!is.null(runs.samples$sa)) { + if (!is.null(existing_env$runs.samples$sa)) { + for (pft_name in names(runs.samples$sa)) { + if (pft_name %in% names(existing_env$runs.samples$sa)) { + runs.samples$sa[[pft_name]] <- rbind( + existing_env$runs.samples$sa[[pft_name]], + runs.samples$sa[[pft_name]] + ) + } + } + + for (existing_pft in names(existing_env$runs.samples$sa)) { + if (!existing_pft %in% names(runs.samples$sa)) { + runs.samples$sa[[existing_pft]] <- existing_env$runs.samples$sa[[existing_pft]] + } + } + } + # If existing SA is NULL, current SA data is preserved + } else if (!is.null(existing_env$runs.samples$sa)) { + # Current site has no SA, preserve existing + runs.samples$sa <- existing_env$runs.samples$sa + } + } + if (!is.null(existing_env$pft.names)) { + pft.names <- unique(c(existing_env$pft.names, pft.names)) + } + + if (!is.null(existing_env$trait.names)) { + trait.names <- modifyList(existing_env$trait.names, trait.names) + } + + } + ### Save output from SA/Ensemble runs # A lot of this is duplicate with the ensemble/sa specific output above, but kept for backwards compatibility. save(ensemble.samples, trait.samples, sa.samples, runs.samples, pft.names, trait.names, - file = file.path(settings$outdir, "samples.Rdata") + file = samples.file ) - PEcAn.logger::logger.info("parameter values for runs in ", file.path(settings$outdir, "samples.RData")) + + PEcAn.logger::logger.info("parameter values for runs in ", samples.file) options(scipen = scipen) invisible(settings) return(settings) } + From 18a35641c3f195e6d3c196bda8951ba7e26962a1 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Mon, 10 Nov 2025 14:05:41 +0000 Subject: [PATCH 2/6] add input_design param for input coordination and enhancing README.txt to track all inputs used per run --- modules/uncertainty/R/sensitivity.R | 203 ++++++++++++++++++---------- 1 file changed, 132 insertions(+), 71 deletions(-) diff --git a/modules/uncertainty/R/sensitivity.R b/modules/uncertainty/R/sensitivity.R index 7caa635b8b..c94881f374 100644 --- a/modules/uncertainty/R/sensitivity.R +++ b/modules/uncertainty/R/sensitivity.R @@ -94,6 +94,7 @@ read.sa.output <- function(traits, quantiles, pecandir, outdir, pft.name = "", #' by \code{settings$rundir} before writing to it? #' @param write.to.db logical: Record this run to BETY? If TRUE, uses connection #' settings specified in \code{settings$database} +#' @param input_design data.frame coordinating input files across runs #' #' @return list, containing $runs = data frame of runids, #' and $ensemble.id = the ensemble ID for these runs. @@ -101,11 +102,11 @@ read.sa.output <- function(traits, quantiles, pecandir, outdir, pft.name = "", #' @export #' @author David LeBauer, Carl Davidson write.sa.configs <- function(defaults, quantile.samples, settings, model, - clean = FALSE, write.to.db = TRUE) { + clean = FALSE, write.to.db = TRUE, input_design = NULL) { scipen <- getOption("scipen") options(scipen = 12) my.write.config <- paste("write.config.", model, sep = "") - + if (write.to.db) { con <- try(PEcAn.DB::db.open(settings$database$bety), silent = TRUE) if (inherits(con, "try-error")) { @@ -116,23 +117,23 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } else { con <- NULL } - + # Get the workflow id if ("workflow" %in% names(settings)) { workflow.id <- settings$workflow$id } else { workflow.id <- -1 } - + # find all inputs that have an id inputs <- names(settings$run$inputs) inputs <- inputs[grepl(".id$", inputs)] - + runs <- data.frame() - + # Reading the site.pft specific tags from xml site.pfts.vec <- as.character(unlist(settings$run$site$site.pft)) - + if (!is.null(site.pfts.vec)) { # find the name of pfts defined in the body of pecan.xml defined.pfts <- as.character(unlist(purrr::map(settings$pfts, "name"))) @@ -151,8 +152,8 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, ) } } - - + + ## write median run MEDIAN <- "50" median.samples <- list() @@ -160,14 +161,14 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, median.samples[[i]] <- quantile.samples[[i]][MEDIAN, , drop = FALSE] } names(median.samples) <- names(quantile.samples) - + if (!is.null(con)) { # Note: ignores any existing run or ensemble ids in settings ensemble.id <- PEcAn.DB::db.query(paste0( "INSERT INTO ensembles (runtype, workflow_id) ", "VALUES ('sensitivity analysis', ", format(workflow.id, scientific = FALSE), ") ", "RETURNING id"), con = con)[["id"]] - + paramlist <- paste0( "quantile=MEDIAN,trait=all,pft=", paste(lapply(settings$pfts, function(x) x[["name"]]), sep = ",") @@ -176,15 +177,15 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "INSERT INTO runs ", "(model_id, site_id, start_time, finish_time, outdir, ensemble_id, parameter_list) ", "values ('", - settings$model$id, "', '", - settings$run$site$id, "', '", - settings$run$start.date, "', '", - settings$run$end.date, "', '", - settings$run$outdir, "', ", - ensemble.id, ", '", - paramlist, "') ", + settings$model$id, "', '", + settings$run$site$id, "', '", + settings$run$start.date, "', '", + settings$run$end.date, "', '", + settings$run$outdir, "', ", + ensemble.id, ", '", + paramlist, "') ", "RETURNING id"), con = con)[["id"]] - + # associate posteriors with ensembles for (pft in defaults) { PEcAn.DB::db.query( @@ -194,7 +195,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, con = con ) } - + # associate inputs with runs if (!is.null(inputs)) { for (x in inputs) { @@ -214,7 +215,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, ensemble.id <- settings$sensitivity.analysis$ensemble.id %||% rlang::hash(settings) } medianrun <- run.id - + # create folders (cleaning up old ones if needed) if (clean) { unlink(file.path(settings$rundir, run.id)) @@ -222,7 +223,49 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } dir.create(file.path(settings$rundir, run.id), recursive = TRUE) dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE) - + + # I check to make sure the path under the met is a list. + # if it's specified what met needs to be used in 'met.id' under sensitivity + # analysis of pecan xml we used that otherwise, I use the first met. + if (is.list(settings$run$inputs$met$path)) { + # This checks for met.id tag in the settings under sensitivity analysis - + # if it's not there it creates it. Then it's gonna use what it created. + if (is.null(settings$sensitivity.analysis$met.id)) { + settings$sensitivity.analysis$met.id <- 1 + } + settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]] + } + + # Apply input design coordination for median run + median_settings <- settings + if (!is.null(input_design)) { + # Coordinate inputs for median run (use first row) + for (input_tag in colnames(input_design)) { + if (input_tag != "param" && !is.null(median_settings$run$inputs[[input_tag]]$path)) { + input_paths <- median_settings$run$inputs[[input_tag]]$path + # Assume list structure (consistent with write.ensemble.configs) + if (length(input_paths) > 1) { + input_index <- input_design[[input_tag]][1] + median_settings$run$inputs[[input_tag]]$path <- input_paths[[input_index]] + } + } + } + } + + median_input_info <- "" + for (input_tag in names(median_settings$run$inputs)) { + input_data <- median_settings$run$inputs[[input_tag]] + + # At SA stage, path is ALWAYS a resolved string (thanks to input design) + if (!is.null(input_data) && !is.null(input_data$path)) { + median_input_info <- paste0(median_input_info, + format(input_tag, width = 12, justify = "left"), + ": ", + input_data$path, + "\n") + } + } + # write run information to disk TODO need to print list of pft names and trait # names cat("runtype : sensitivity analysis\n", @@ -236,7 +279,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "model id : ", settings$model$id, "\n", "site : ", settings$run$site$name, "\n", "site id : ", settings$run$site$id, "\n", - "met data : ", settings$run$site$met, "\n", + median_input_info, "start date : ", settings$run$start.date, "\n", "end date : ", settings$run$end.date, "\n", "hostname : ", settings$host$name, "\n", @@ -244,26 +287,11 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "outdir : ", file.path(settings$host$outdir, run.id), "\n", file = file.path(settings$rundir, run.id, "README.txt"), sep = "") - - - # I check to make sure the path under the met is a list. - # if it's specified what met needs to be used in 'met.id' under sensitivity - # analysis of pecan xml we used that otherwise, I use the first met. - if (is.list(settings$run$inputs$met$path)) { - # This checks for met.id tag in the settings under sensitivity analysis - - # if it's not there it creates it. Then it's gonna use what it created. - if (is.null(settings$sensitivity.analysis$met.id)) { - settings$sensitivity.analysis$met.id <- 1 - } - settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]] - - } - - + # write configuration do.call(my.write.config, args = list(defaults = defaults, trait.values = median.samples, - settings = settings, + settings = median_settings, run.id = run.id)) cat( run.id, @@ -271,20 +299,22 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, sep = "\n", append = TRUE ) - + + run_index <- 1 + ## loop over pfts runs <- list() - for (i in seq_along(names(quantile.samples))) { - pftname <- names(quantile.samples)[i] + for (pft_idx in seq_along(names(quantile.samples))) { + pftname <- names(quantile.samples)[pft_idx] if (pftname == "env") { next } - - traits <- colnames(quantile.samples[[i]]) - quantiles.str <- rownames(quantile.samples[[i]]) - + + traits <- colnames(quantile.samples[[pft_idx]]) + quantiles.str <- rownames(quantile.samples[[pft_idx]]) + runs[[pftname]] <- data.frame() - + ## loop over variables for (trait in traits) { for (quantile.str in quantiles.str) { @@ -293,29 +323,29 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } else { quantile <- as.numeric(quantile.str) / 100 trait.samples <- median.samples - trait.samples[[i]][trait] <- quantile.samples[[i]][quantile.str, trait, drop = FALSE] - + trait.samples[[pft_idx]][trait] <- quantile.samples[[pft_idx]][quantile.str, trait, drop = FALSE] + if (!is.null(con)) { paramlist <- paste0("quantile=", quantile.str, ",trait=", trait, ",pft=", pftname) insert_result <- PEcAn.DB::db.query( paste0( "INSERT INTO runs (", - "model_id, site_id, start_time, finish_time, outdir,", - " ensemble_id, parameter_list) ", + "model_id, site_id, start_time, finish_time, outdir,", + " ensemble_id, parameter_list) ", "values ('", - settings$model$id, "', '", - settings$run$site$id, "', '", - settings$run$start.date, "', '", - settings$run$end.date, "', '", - settings$run$outdir, "', ", - ensemble.id, ", '", - paramlist, + settings$model$id, "', '", + settings$run$site$id, "', '", + settings$run$start.date, "', '", + settings$run$end.date, "', '", + settings$run$outdir, "', ", + ensemble.id, ", '", + paramlist, "') ", "RETURNING id"), con = con ) run.id <- insert_result[["id"]] - + # associate posteriors with ensembles for (pft in defaults) { PEcAn.DB::db.query( @@ -326,7 +356,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, con = con ) } - + # associate inputs with runs if (!is.null(inputs)) { for (x in inputs) { @@ -343,12 +373,15 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, run.type = "SA", index = round(quantile, 3), trait = trait, - pft.name = names(trait.samples)[i], + pft.name = names(trait.samples)[pft_idx], site.id = settings$run$site$id ) } runs[[pftname]][quantile.str, trait] <- run.id - + + # Increment run counter + run_index <- run_index + 1 + # create folders (cleaning up old ones if needed) if (clean) { unlink(file.path(settings$rundir, run.id)) @@ -356,12 +389,40 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } dir.create(file.path(settings$rundir, run.id), recursive = TRUE) dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE) - - # write run information to disk + + # Apply input design coordination for SA runs + settings_copy <- settings + if (!is.null(input_design)) { + for (input_tag in colnames(input_design)) { + if (input_tag != "param" && !is.null(settings_copy$run$inputs[[input_tag]]$path)) { + input_paths <- settings_copy$run$inputs[[input_tag]]$path + if (length(input_paths) > 1) { + input_index <- input_design[[input_tag]][run_index] + settings_copy$run$inputs[[input_tag]]$path <- input_paths[[input_index]] + } + } + } + } + + + # Build dynamic input info string for SA run README + sa_input_info <- "" + for (input_tag in names(settings_copy$run$inputs)) { + input_data <- settings_copy$run$inputs[[input_tag]] + if (!is.null(input_data) && !is.null(input_data$path)) { + sa_input_info <- paste0(sa_input_info, + format(input_tag, width = 12, justify = "left"), + ": ", + input_data$path, + "\n") + } + } + + # write SA run information to disk cat("runtype : sensitivity analysis\n", "workflow id : ", workflow.id, "\n", "ensemble id : ", ensemble.id, "\n", - "pft name : ", names(trait.samples)[i], "\n", + "pft name : ", names(trait.samples)[pft_idx], "\n", "quantile : ", quantile.str, "\n", "trait : ", trait, "\n", "run id : ", run.id, "\n", @@ -369,7 +430,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "model id : ", settings$model$id, "\n", "site : ", settings$run$site$name, "\n", "site id : ", settings$run$site$id, "\n", - "met data : ", settings$run$site$met, "\n", + sa_input_info, "start date : ", settings$run$start.date, "\n", "end date : ", settings$run$end.date, "\n", "hostname : ", settings$host$name, "\n", @@ -377,12 +438,12 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "outdir : ", file.path(settings$host$outdir, run.id), "\n", file = file.path(settings$rundir, run.id, "README.txt"), sep = "") - - + + # write configuration do.call(my.write.config, args = list(defaults = defaults, trait.values = trait.samples, - settings = settings, + settings = settings_copy, run.id)) cat( run.id, @@ -394,7 +455,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } } } - + options(scipen = scipen) return(invisible(list(runs = runs, ensemble.id = ensemble.id))) } # write.sa.configs From f075e4d7d0f9ad85ee340bbe01f7dca464988677 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Mon, 10 Nov 2025 14:06:24 +0000 Subject: [PATCH 3/6] update .Rd --- modules/uncertainty/man/write.sa.configs.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/uncertainty/man/write.sa.configs.Rd b/modules/uncertainty/man/write.sa.configs.Rd index 545970713c..87227cff18 100644 --- a/modules/uncertainty/man/write.sa.configs.Rd +++ b/modules/uncertainty/man/write.sa.configs.Rd @@ -10,7 +10,8 @@ write.sa.configs( settings, model, clean = FALSE, - write.to.db = TRUE + write.to.db = TRUE, + input_design = NULL ) } \arguments{ @@ -27,6 +28,8 @@ by \code{settings$rundir} before writing to it?} \item{write.to.db}{logical: Record this run to BETY? If TRUE, uses connection settings specified in \code{settings$database}} + +\item{input_design}{data.frame coordinating input files across runs} } \value{ list, containing $runs = data frame of runids, From f7b0c3a5870afb729bb91e07eaf502fa9cbc2643 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Wed, 12 Nov 2025 09:16:52 +0000 Subject: [PATCH 4/6] fix indent, remove end-of-line whitespace and consolidated to a single env load --- base/workflow/R/run.write.configs.R | 144 +++++++++++++--------------- 1 file changed, 69 insertions(+), 75 deletions(-) diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index a2ac4ee381..17fd706cea 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -51,7 +51,7 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } ) } - + ## Which posterior to use? for (i in seq_along(settings$pfts)) { ## if posterior.files is specified us that @@ -60,9 +60,9 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU if (!is.null(settings$pfts[[i]]$posteriorid)) { # TODO: sometimes `files` is a 0x0 tibble and other operations with it fail. files <- PEcAn.DB::dbfile.check("Posterior", - settings$pfts[[i]]$posteriorid, - con, settings$host$name, - return.all = TRUE + settings$pfts[[i]]$posteriorid, + con, settings$host$name, + return.all = TRUE ) pid <- grep("post.distns.*Rdata", files$file_name) ## is there a posterior file? if (length(pid) == 0) { @@ -97,17 +97,22 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } } ## end else } ## end for loop over pfts - + ## Sample parameters model <- settings$model$type scipen <- getOption("scipen") options(scipen = 12) - + samples.file <- file.path(settings$outdir, "samples.Rdata") + existing_data <- NULL if (file.exists(samples.file)) { - samples <- new.env() - load(samples.file, envir = samples) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples - trait.samples <- samples$trait.samples + existing_data <- new.env() + load(samples.file, envir = existing_data) + trait.samples <- existing_data$trait.samples + sa.samples <- existing_data$sa.samples + runs.samples <- existing_data$runs.samples + + # Generate ensemble.samples for current site trait_sample_indices <- input_design[["param"]] ensemble.samples <- list() for (pft in names(trait.samples)) { @@ -120,23 +125,21 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU ) names(ensemble.samples[[pft]]) <- names(pft_traits) } - sa.samples <- samples$sa.samples - runs.samples <- samples$runs.samples - ## env.samples <- samples$env.samples } else { - PEcAn.logger::logger.error(samples.file, "not found, this file is required by the run.write.configs function") + # cannot proceed without sample.Rdata + PEcAn.logger::logger.severe(samples.file, "not found, this file is required by the run.write.configs function") } - + ## remove previous runs.txt if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) { PEcAn.logger::logger.warn("Existing runs.txt file will be removed.") unlink(file.path(settings$rundir, "runs.txt")) } - + PEcAn.utils::load.modelpkg(model) - + ## Check for model-specific write configs - + my.write.config <- paste0("write.config.", model) if (!exists(my.write.config)) { PEcAn.logger::logger.error( @@ -145,22 +148,22 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU my.write.config ) } - + ## Prepare for model output. Clean up any old config files (if exists) # TODO: shouldn't this check if the files exist before removing them? my.remove.config <- paste0("remove.config.", model) if (exists(my.remove.config)) { do.call(my.remove.config, args = list(settings$rundir, settings)) } - + # TODO RK : need to write to runs_inputs table - + # Save names pft.names <- names(trait.samples) trait.names <- lapply(trait.samples, names) - + ### NEED TO IMPLEMENT: Load Environmental Priors and Posteriors - + ### Sensitivity Analysis if ("sensitivity.analysis" %in% names(settings)) { ### Write out SA config files @@ -173,18 +176,18 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU input_design = input_design, write.to.db = write ) - + # Store output in settings and output variables runs.samples$sa <- sa.run.ids <- sa.runs$runs settings$sensitivity.analysis$ensemble.id <- sa.ensemble.id <- sa.runs$ensemble.id - + # Save sensitivity analysis info fname <- PEcAn.uncertainty::sensitivity.filename(settings, "sensitivity.samples", "Rdata", - all.var.yr = TRUE, pft = NULL + all.var.yr = TRUE, pft = NULL ) save(sa.run.ids, sa.ensemble.id, sa.samples, pft.names, trait.names, file = fname) } ### End of SA - + ### Write ENSEMBLE if ("ensemble" %in% names(settings)) { ens.runs <- PEcAn.uncertainty::write.ensemble.configs( @@ -196,111 +199,102 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU input_design = input_design, write.to.db = write ) - + # Store output in settings and output variables runs.samples$ensemble <- ens.run.ids <- ens.runs$runs settings$ensemble$ensemble.id <- ens.ensemble.id <- ens.runs$ensemble.id ens.samples <- ensemble.samples # rename just for consistency - + # Save ensemble analysis info fname <- PEcAn.uncertainty::ensemble.filename(settings, "ensemble.samples", "Rdata", all.var.yr = TRUE) save(ens.run.ids, ens.ensemble.id, ens.samples, pft.names, trait.names, file = fname) } else { PEcAn.logger::logger.info("not writing config files for ensemble, settings are NULL") } ### End of Ensemble - + PEcAn.logger::logger.info("###### Finished writing model run config files #####") PEcAn.logger::logger.info("config files samples in ", file.path(settings$outdir, "run")) - - samples.file <- file.path(settings$outdir, "samples.Rdata") - - if (file.exists(samples.file)) { - existing_env <- new.env() - load(samples.file, envir = existing_env) - if (!is.null(existing_env$ensemble.samples)) { + + ## Use existing_data from earlier load + if (!is.null(existing_data)) { + # Merge ensemble.samples + if (!is.null(existing_data$ensemble.samples)) { for (pft_name in names(ensemble.samples)) { - if (pft_name %in% names(existing_env$ensemble.samples)) { + if (pft_name %in% names(existing_data$ensemble.samples)) { # combine parameter samples for same PFT across sites ensemble.samples[[pft_name]] <- rbind( - existing_env$ensemble.samples[[pft_name]], + existing_data$ensemble.samples[[pft_name]], ensemble.samples[[pft_name]] ) } # New PFTs from current site are automatically preserved } - # preserves existing PFTs -- add PFTs that exist in file but not current site - for (existing_pft in names(existing_env$ensemble.samples)) { + for (existing_pft in names(existing_data$ensemble.samples)) { if (!existing_pft %in% names(ensemble.samples)) { - ensemble.samples[[existing_pft]] <- existing_env$ensemble.samples[[existing_pft]] + ensemble.samples[[existing_pft]] <- existing_data$ensemble.samples[[existing_pft]] } } } - - if (!is.null(existing_env$trait.samples)) { - trait.samples <- modifyList(existing_env$trait.samples, trait.samples) + # Merge trait.samples + if (!is.null(existing_data$trait.samples)) { + trait.samples <- modifyList(existing_data$trait.samples, trait.samples) } - - if (!is.null(existing_env$sa.samples)) { - sa.samples <- modifyList(existing_env$sa.samples, sa.samples) + # Merge sa.samples + if (!is.null(existing_data$sa.samples)) { + sa.samples <- modifyList(existing_data$sa.samples, sa.samples) } - - if (!is.null(existing_env$runs.samples)) { + # Merge runs.samples + if (!is.null(existing_data$runs.samples)) { + # Merge ensemble runs if (!is.null(runs.samples$ensemble)) { - if (!is.null(existing_env$runs.samples$ensemble)) { + if (!is.null(existing_data$runs.samples$ensemble)) { runs.samples$ensemble <- rbind( - existing_env$runs.samples$ensemble, + existing_data$runs.samples$ensemble, runs.samples$ensemble ) } - # If existing is NULL, current ensemble data is preserved - } else if (!is.null(existing_env$runs.samples$ensemble)) { + } else if (!is.null(existing_data$runs.samples$ensemble)) { # Current site has no ensemble, preserve existing - runs.samples$ensemble <- existing_env$runs.samples$ensemble + runs.samples$ensemble <- existing_data$runs.samples$ensemble } - # Merge SA runs if (!is.null(runs.samples$sa)) { - if (!is.null(existing_env$runs.samples$sa)) { + if (!is.null(existing_data$runs.samples$sa)) { for (pft_name in names(runs.samples$sa)) { - if (pft_name %in% names(existing_env$runs.samples$sa)) { + if (pft_name %in% names(existing_data$runs.samples$sa)) { runs.samples$sa[[pft_name]] <- rbind( - existing_env$runs.samples$sa[[pft_name]], + existing_data$runs.samples$sa[[pft_name]], runs.samples$sa[[pft_name]] ) } } - - for (existing_pft in names(existing_env$runs.samples$sa)) { + for (existing_pft in names(existing_data$runs.samples$sa)) { if (!existing_pft %in% names(runs.samples$sa)) { - runs.samples$sa[[existing_pft]] <- existing_env$runs.samples$sa[[existing_pft]] + runs.samples$sa[[existing_pft]] <- existing_data$runs.samples$sa[[existing_pft]] } } } - # If existing SA is NULL, current SA data is preserved - } else if (!is.null(existing_env$runs.samples$sa)) { + } else if (!is.null(existing_data$runs.samples$sa)) { # Current site has no SA, preserve existing - runs.samples$sa <- existing_env$runs.samples$sa + runs.samples$sa <- existing_data$runs.samples$sa } } - - if (!is.null(existing_env$pft.names)) { - pft.names <- unique(c(existing_env$pft.names, pft.names)) + # Merge pft.names + if (!is.null(existing_data$pft.names)) { + pft.names <- unique(c(existing_data$pft.names, pft.names)) } - - if (!is.null(existing_env$trait.names)) { - trait.names <- modifyList(existing_env$trait.names, trait.names) + # Merge trait.names + if (!is.null(existing_data$trait.names)) { + trait.names <- modifyList(existing_data$trait.names, trait.names) } - } - ### Save output from SA/Ensemble runs # A lot of this is duplicate with the ensemble/sa specific output above, but kept for backwards compatibility. save(ensemble.samples, trait.samples, sa.samples, runs.samples, pft.names, trait.names, - file = samples.file + file = file.path(settings$outdir, "samples.Rdata") ) - - PEcAn.logger::logger.info("parameter values for runs in ", samples.file) + PEcAn.logger::logger.info("parameter values for runs in ", file.path(settings$outdir, "samples.RData")) options(scipen = scipen) invisible(settings) return(settings) From 81f50f6ce6f91dcfcf2fff69c71ab5d88c7119b6 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Wed, 12 Nov 2025 09:18:12 +0000 Subject: [PATCH 5/6] fix indent, remove end-of-line whitespace, remove auto linter and addcomments --- modules/uncertainty/R/sensitivity.R | 122 ++++++++++++++-------------- 1 file changed, 62 insertions(+), 60 deletions(-) diff --git a/modules/uncertainty/R/sensitivity.R b/modules/uncertainty/R/sensitivity.R index c94881f374..e7114e3cb2 100644 --- a/modules/uncertainty/R/sensitivity.R +++ b/modules/uncertainty/R/sensitivity.R @@ -106,7 +106,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, scipen <- getOption("scipen") options(scipen = 12) my.write.config <- paste("write.config.", model, sep = "") - + if (write.to.db) { con <- try(PEcAn.DB::db.open(settings$database$bety), silent = TRUE) if (inherits(con, "try-error")) { @@ -117,23 +117,23 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } else { con <- NULL } - + # Get the workflow id if ("workflow" %in% names(settings)) { workflow.id <- settings$workflow$id } else { workflow.id <- -1 } - + # find all inputs that have an id inputs <- names(settings$run$inputs) inputs <- inputs[grepl(".id$", inputs)] - + runs <- data.frame() - + # Reading the site.pft specific tags from xml site.pfts.vec <- as.character(unlist(settings$run$site$site.pft)) - + if (!is.null(site.pfts.vec)) { # find the name of pfts defined in the body of pecan.xml defined.pfts <- as.character(unlist(purrr::map(settings$pfts, "name"))) @@ -152,8 +152,8 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, ) } } - - + + ## write median run MEDIAN <- "50" median.samples <- list() @@ -161,14 +161,14 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, median.samples[[i]] <- quantile.samples[[i]][MEDIAN, , drop = FALSE] } names(median.samples) <- names(quantile.samples) - + if (!is.null(con)) { # Note: ignores any existing run or ensemble ids in settings ensemble.id <- PEcAn.DB::db.query(paste0( "INSERT INTO ensembles (runtype, workflow_id) ", "VALUES ('sensitivity analysis', ", format(workflow.id, scientific = FALSE), ") ", "RETURNING id"), con = con)[["id"]] - + paramlist <- paste0( "quantile=MEDIAN,trait=all,pft=", paste(lapply(settings$pfts, function(x) x[["name"]]), sep = ",") @@ -177,15 +177,15 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "INSERT INTO runs ", "(model_id, site_id, start_time, finish_time, outdir, ensemble_id, parameter_list) ", "values ('", - settings$model$id, "', '", - settings$run$site$id, "', '", - settings$run$start.date, "', '", - settings$run$end.date, "', '", - settings$run$outdir, "', ", - ensemble.id, ", '", - paramlist, "') ", + settings$model$id, "', '", + settings$run$site$id, "', '", + settings$run$start.date, "', '", + settings$run$end.date, "', '", + settings$run$outdir, "', ", + ensemble.id, ", '", + paramlist, "') ", "RETURNING id"), con = con)[["id"]] - + # associate posteriors with ensembles for (pft in defaults) { PEcAn.DB::db.query( @@ -195,7 +195,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, con = con ) } - + # associate inputs with runs if (!is.null(inputs)) { for (x in inputs) { @@ -215,7 +215,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, ensemble.id <- settings$sensitivity.analysis$ensemble.id %||% rlang::hash(settings) } medianrun <- run.id - + # create folders (cleaning up old ones if needed) if (clean) { unlink(file.path(settings$rundir, run.id)) @@ -223,7 +223,10 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } dir.create(file.path(settings$rundir, run.id), recursive = TRUE) dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE) - + + ## select single met path for SA runs without input_design (backward compatibility) + ## when input_design is provided (recommended), it supersedes this selection + ## and allows different met files per SA run # I check to make sure the path under the met is a list. # if it's specified what met needs to be used in 'met.id' under sensitivity # analysis of pecan xml we used that otherwise, I use the first met. @@ -235,7 +238,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]] } - + # Apply input design coordination for median run median_settings <- settings if (!is.null(input_design)) { @@ -251,21 +254,20 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } } } - + median_input_info <- "" for (input_tag in names(median_settings$run$inputs)) { input_data <- median_settings$run$inputs[[input_tag]] - # At SA stage, path is ALWAYS a resolved string (thanks to input design) if (!is.null(input_data) && !is.null(input_data$path)) { - median_input_info <- paste0(median_input_info, - format(input_tag, width = 12, justify = "left"), - ": ", - input_data$path, + median_input_info <- paste0(median_input_info, + format(input_tag, width = 12, justify = "left"), + ": ", + input_data$path, "\n") } } - + # write run information to disk TODO need to print list of pft names and trait # names cat("runtype : sensitivity analysis\n", @@ -287,7 +289,8 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "outdir : ", file.path(settings$host$outdir, run.id), "\n", file = file.path(settings$rundir, run.id, "README.txt"), sep = "") - + + # write configuration do.call(my.write.config, args = list(defaults = defaults, trait.values = median.samples, @@ -299,9 +302,9 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, sep = "\n", append = TRUE ) - + run_index <- 1 - + ## loop over pfts runs <- list() for (pft_idx in seq_along(names(quantile.samples))) { @@ -309,12 +312,12 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, if (pftname == "env") { next } - + traits <- colnames(quantile.samples[[pft_idx]]) quantiles.str <- rownames(quantile.samples[[pft_idx]]) - + runs[[pftname]] <- data.frame() - + ## loop over variables for (trait in traits) { for (quantile.str in quantiles.str) { @@ -324,28 +327,28 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, quantile <- as.numeric(quantile.str) / 100 trait.samples <- median.samples trait.samples[[pft_idx]][trait] <- quantile.samples[[pft_idx]][quantile.str, trait, drop = FALSE] - + if (!is.null(con)) { paramlist <- paste0("quantile=", quantile.str, ",trait=", trait, ",pft=", pftname) insert_result <- PEcAn.DB::db.query( paste0( "INSERT INTO runs (", - "model_id, site_id, start_time, finish_time, outdir,", - " ensemble_id, parameter_list) ", + "model_id, site_id, start_time, finish_time, outdir,", + " ensemble_id, parameter_list) ", "values ('", - settings$model$id, "', '", - settings$run$site$id, "', '", - settings$run$start.date, "', '", - settings$run$end.date, "', '", - settings$run$outdir, "', ", - ensemble.id, ", '", - paramlist, + settings$model$id, "', '", + settings$run$site$id, "', '", + settings$run$start.date, "', '", + settings$run$end.date, "', '", + settings$run$outdir, "', ", + ensemble.id, ", '", + paramlist, "') ", "RETURNING id"), con = con ) run.id <- insert_result[["id"]] - + # associate posteriors with ensembles for (pft in defaults) { PEcAn.DB::db.query( @@ -356,7 +359,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, con = con ) } - + # associate inputs with runs if (!is.null(inputs)) { for (x in inputs) { @@ -378,10 +381,10 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, ) } runs[[pftname]][quantile.str, trait] <- run.id - + # Increment run counter run_index <- run_index + 1 - + # create folders (cleaning up old ones if needed) if (clean) { unlink(file.path(settings$rundir, run.id)) @@ -389,7 +392,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } dir.create(file.path(settings$rundir, run.id), recursive = TRUE) dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE) - + # Apply input design coordination for SA runs settings_copy <- settings if (!is.null(input_design)) { @@ -403,21 +406,20 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } } } - - + # Build dynamic input info string for SA run README sa_input_info <- "" for (input_tag in names(settings_copy$run$inputs)) { input_data <- settings_copy$run$inputs[[input_tag]] if (!is.null(input_data) && !is.null(input_data$path)) { - sa_input_info <- paste0(sa_input_info, - format(input_tag, width = 12, justify = "left"), - ": ", - input_data$path, + sa_input_info <- paste0(sa_input_info, + format(input_tag, width = 12, justify = "left"), + ": ", + input_data$path, "\n") } } - + # write SA run information to disk cat("runtype : sensitivity analysis\n", "workflow id : ", workflow.id, "\n", @@ -438,8 +440,8 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, "outdir : ", file.path(settings$host$outdir, run.id), "\n", file = file.path(settings$rundir, run.id, "README.txt"), sep = "") - - + + # write configuration do.call(my.write.config, args = list(defaults = defaults, trait.values = trait.samples, @@ -455,7 +457,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model, } } } - + options(scipen = scipen) return(invisible(list(runs = runs, ensemble.id = ensemble.id))) } # write.sa.configs From 5dcfe5956565a9718f4b7882f7b49c4eea770c67 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Wed, 12 Nov 2025 09:30:19 +0000 Subject: [PATCH 6/6] add explicit namespace to modifyList calls --- base/workflow/R/run.write.configs.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index 17fd706cea..849efd3c92 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -238,11 +238,11 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } # Merge trait.samples if (!is.null(existing_data$trait.samples)) { - trait.samples <- modifyList(existing_data$trait.samples, trait.samples) + trait.samples <- utils::modifyList(existing_data$trait.samples, trait.samples) } # Merge sa.samples if (!is.null(existing_data$sa.samples)) { - sa.samples <- modifyList(existing_data$sa.samples, sa.samples) + sa.samples <- utils::modifyList(existing_data$sa.samples, sa.samples) } # Merge runs.samples if (!is.null(existing_data$runs.samples)) { @@ -286,7 +286,7 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU } # Merge trait.names if (!is.null(existing_data$trait.names)) { - trait.names <- modifyList(existing_data$trait.names, trait.names) + trait.names <- utils::modifyList(existing_data$trait.names, trait.names) } } ### Save output from SA/Ensemble runs