PecanProject · divine7022 · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,6 +35,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
  - Support for inspecting and plotting NetCDF output variables within the notebook workflow.
 - added support for soil temperature, relative humidity, soil moisture, and PPFD downscaling to `met_temporal_downscale.Gaussian_ensemble`
 - The PEcAn uncertainty analysis tutorial ("Demo 2") has been updated and reimplemented as a Quarto notebook at `documentation/tutorials/Demo_02_Uncertainty_Analysis/uncertainty.qmd`. (#3570)
+- Added `generate_OAT_SA_design()` function for sensitivity analysis input design. Unlike ensemble design which randomizes all inputs, SA design holds non-parameter inputs constant to isolate parameter effects (#3729)
 
 ### Fixed
 

diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
@@ -701,6 +701,7 @@
 "withr","*","modules/allometry","Suggests",FALSE
 "withr","*","modules/data.atmosphere","Suggests",FALSE
 "withr","*","modules/data.land","Suggests",FALSE
+"withr","*","modules/uncertainty","Suggests",FALSE
 "xgboost","*","modules/assim.sequential","Suggests",FALSE
 "XML","*","base/workflow","Imports",FALSE
 "XML","*","models/biocro","Imports",FALSE

diff --git a/modules/uncertainty/DESCRIPTION b/modules/uncertainty/DESCRIPTION
@@ -45,7 +45,8 @@ Imports:
     sensitivity
 Suggests:
     testthat (>= 1.0.2),
-    mockery
+    mockery,
+    withr
 License: BSD_3_clause + file LICENSE
 Copyright: Authors
 LazyLoad: yes

diff --git a/modules/uncertainty/NAMESPACE b/modules/uncertainty/NAMESPACE
@@ -4,6 +4,7 @@ export(compute_sobol_indices)
 export(ensemble.filename)
 export(ensemble.ts)
 export(flux.uncertainty)
+export(generate_OAT_SA_design)
 export(generate_joint_ensemble_design)
 export(get.change)
 export(get.coef.var)

diff --git a/modules/uncertainty/NEWS.md b/modules/uncertainty/NEWS.md
@@ -1,3 +1,10 @@
+# PEcAn.uncertainty 1.8.3
+
+* Added `generate_OAT_SA_design()` for creating input design matrices for
+sensitivity analysis. This function ensures non-parameter inputs
+(met, IC, soil) are held constant, which is required for valid 
+variance decomposition in one-at-a-time (OAT) sensitivity analysis (#3729)
+
 # PEcAn.uncertainty 1.8.2
 
 * PEcAn.uncertainty is now distributed under the BSD 3-clause license instead of the NCSA Open Source license.

diff --git a/modules/uncertainty/R/generate_OAT_SA_design.R b/modules/uncertainty/R/generate_OAT_SA_design.R
@@ -0,0 +1,151 @@
+#' Generate One-At-a-Time (OAT) input design for sensitivity analysis
+#'
+#' Creates an input design matrix for sensitivity analysis where non-parameter
+#' inputs (met, IC, soil, etc.) are held constant while parameters vary
+#' one-at-a-time across quantiles. This differs from ensemble design where
+#' all inputs vary together.
+#'
+#' @param settings PEcAn settings object. This function directly uses:
+#'   \itemize{
+#'     \item \code{settings$outdir} - Output directory path for samples.Rdata
+#'     \item \code{settings$pfts} - List of PFTs (extracts \code{posterior.files})
+#'     \item \code{settings$ensemble$samplingspace} - Input types to include in design
+#'   }
+#'   When \code{sa_samples = NULL}, settings is passed to
+#'   \code{\link{get.parameter.samples}} which additionally requires:
+#'   \itemize{
+#'     \item \code{settings$sensitivity.analysis} - SA quantile configuration
+#'     \item \code{settings$database$bety} - Database connection (optional)
+#'     \item \code{settings$host$name} - Host name for dbfile.check (optional)
+#'   }
+#' @param sa_samples Optional. Pre-loaded SA samples (named list with one
+#'   element per PFT, each a matrix with quantiles as rows and traits as columns).
+#'   If NULL (default), samples are generated via \code{get.parameter.samples}.
+#'
+#' @return list with component X: a data.frame with columns for each input type
+#'   and one row per SA run. Non-parameter columns are all 1 (constant).
+#'
+#' @details 
+#' For sensitivity analysis, we must isolate the effect of each 
+#' parameter by holding all other inputs constant. The param column contains
+#' sequential indices (1, 2, 3, ...) matching the SA run order in 
+#' \code{write.sa.configs}. All other columns (met, ic, soil, etc.) are set to 1,
+#' meaning the first input file is always used.
+#'
+#' Note on internal dependencies
+#'
+#' If sa_samples is NULL we hand off to get.parameter.samples(), which does
+#' the work of finding and loading parameter distributions.
+#'
+#' In practice it:
+#' - uses pft$posterior.files directly when it is defined (an Rdata file with
+#'   post.distns or prior.distns),
+#' - otherwise figures out an output directory from pft$outdir or, if needed,
+#'   via pft$posteriorid in the database,
+#' - then looks in that directory for post.distns.Rdata, falling back to
+#'   prior.distns.Rdata,
+#' - and, for MCMC posteriors, looks up trait.mcmc*.Rdata linked to the same
+#'   posteriorid or a trait.mcmc.Rdata file in that directory.
+#'
+#' @examples
+#' \dontrun{
+#' # Generate SA design for a multi-site run
+#' sa_design <- generate_OAT_SA_design(settings)
+#' 
+#' # View the design matrix
+#' print(sa_design$X)
+#' #   param met ic soil
+#' # 1     1   1  1    1   # Median run
+#' # 2     2   1  1    1   # trait1 @ q=2.3%
+#' # 3     3   1  1    1   # trait1 @ q=15.9%
+#' # 4     4   1  1    1   # trait1 @ q=84.1%
+#' # ...
+#' 
+#' # With pre-loaded sa_samples (skips get.parameter.samples call)
+#' load("samples.Rdata")
+#' sa_design <- generate_OAT_SA_design(settings, sa_samples = sa.samples)
+#' }
+#' @export
+#' @author Akash B V
+#' @importFrom rlang %||%
+generate_OAT_SA_design <- function(settings, sa_samples = NULL) {
+
+  samples_file <- file.path(settings$outdir, "samples.Rdata")
+
+  if (is.null(sa_samples)) {
+
+    posterior.files <- settings$pfts %>%
+      purrr::map_chr("posterior.files", .default = NA_character_)
+
+    # generate parameter samples - sa.samples created from quantiles
+    PEcAn.uncertainty::get.parameter.samples(
+      settings,
+      posterior.files = posterior.files
+    )
+
+    samples_env <- new.env()
+    load(samples_file, envir = samples_env)
+    sa_samples <- samples_env$sa.samples
+
+    if (is.null(sa_samples)) {
+      PEcAn.logger::logger.severe(
+        "sa.samples not found in samples.Rdata.",
+        "Ensure sensitivity.analysis is configured in settings."
+      )
+    }
+  }
+
+  # calculate total number of SA runs
+  # 1 median + (traits * non-median quantiles) per PFT
+  MEDIAN <- "50"
+  num_sa_runs <- 1 # start with median run
+
+  for (pft_name in names(sa_samples)) {
+    if (pft_name == "env") next
+
+    pft_samples <- sa_samples[[pft_name]]
+    n_traits <- ncol(pft_samples)
+    quantile_names <- rownames(pft_samples)
+    n_non_median <- sum(quantile_names != MEDIAN)
+
+    # add runs for this pft: (traits) * (non-median quantiles)
+    num_sa_runs <- num_sa_runs + (n_traits * n_non_median)
+  }
+
+  # get input types from samplingspace
+  samp <- settings$ensemble$samplingspace
+  input_types <- names(samp)
+  input_types[input_types == "parameters"] <- "param"
+
+  if (!"param" %in% input_types) {
+    input_types <- c("param", input_types)
+  }
+
+  # build design matrix
+  # key difference from ensemble design:
+  # - ensemble: all columns get random/quasi-random indices
+  # - SA (OAT): param column = sequential index, ALL other columns = 1
+  #
+  # the "1" means: use the FIRST (and only) input file for that type.
+  # this ensures all SA runs use the SAME met, same ic, etc.
+
+  design_list <- list()
+
+  for (input_type in input_types) {
+    if (input_type == "param") {
+      # sequential indices map to SA run order
+      #   1 = median run
+      #   2 = first (pft, trait, quantile) combination
+      #   3 = second (pft, trait, quantile) combination
+      #   ...
+      design_list[[input_type]] <- seq_len(num_sa_runs)
+    } else {
+      # all other inputs constant(always use first input file)
+      design_list[[input_type]] <- rep(1L, num_sa_runs)
+    }
+  }
+
+  design_matrix <- data.frame(design_list)
+
+  return(list(X = design_matrix))
+}
diff --git a/modules/uncertainty/R/generate_joint_ensemble_design.R b/modules/uncertainty/R/generate_joint_ensemble_design.R
@@ -3,17 +3,54 @@
 #' all sites in a multi-site run. This function generates sample indices that
 #' are shared across sites to ensure consistent parameter sampling.
 #'
-#' @param settings A PEcAn settings object containing ensemble configuration
-#' @param ensemble_size Integer specifying the number of ensemble members
-#' Since the `input_design` will only be generated once for the entire model run,
-#' the only situation, where we might want to recycle the existing `ensemble_samples`,
-#' is when we split and submit the larger SDA runs (e.g., 8,000 sites) into 
-#' smaller SDA experiments (e.g., 100 sites per job), where we want to keep using 
-#' the same parameters rather than creating new parameters for each job.
-#' @param sobol for activating sobol
-#' @return  A list containing ensemble samples and indices
-#'   If `sobol = TRUE`, the list will be a `sensitivity::soboljansen()` 
-#'   result and will contain the components documented therein.
+#' @param settings PEcAn settings object. This function directly uses:
+#'   \itemize{
+#'     \item \code{settings$outdir} - Output directory path for samples.Rdata
+#'     \item \code{settings$pfts} - List of PFTs (extracts \code{posterior.files})
+#'     \item \code{settings$ensemble$samplingspace} - Input sampling configuration
+#'     \item \code{settings$run$inputs} - Input paths for each input type
+#'   }
+#'   When samples.Rdata doesn't exist, settings is passed to
+#'   \code{\link{get.parameter.samples}} which additionally requires:
+#'   \itemize{
+#'     \item \code{settings$ensemble} - Ensemble configuration
+#'     \item \code{settings$database$bety} - Database connection (optional)
+#'     \item \code{settings$host$name} - Host name for dbfile.check (optional)
+#'   }
+#' @param ensemble_size Integer specifying the number of ensemble members.
+#'   The input_design is generated once for the entire model run. You might
+#'   want to recycle existing ensemble_samples when splitting larger runs
+#'   into smaller jobs while keeping the same parameters.
+#' @param sobol Logical. If TRUE, returns a \code{sensitivity::soboljansen}
+#'   object for Sobol sensitivity analysis.
+#'
+#' @return A list containing ensemble samples and indices.
+#'   If \code{sobol = FALSE}, returns \code{list(X = design_matrix)}.
+#'   If \code{sobol = TRUE}, returns a \code{sensitivity::soboljansen()}
+#'   result object with the design matrix in \code{$X} plus additional
+#'   components for Sobol index calculations.
+#'
+#' @details
+#' Note on internal dependencies
+#'
+#' If samples.Rdata doesn't exist we call get.parameter.samples(), which loads
+#' parameter distributions.
+#'
+#' In practice it:
+#' - uses pft$posterior.files directly when it is defined (an Rdata file with
+#'   post.distns or prior.distns),
+#' - otherwise figures out an output directory from pft$outdir or, if needed,
+#'   via pft$posteriorid in the database,
+#' - then looks in that directory for post.distns.Rdata, falling back to
+#'   prior.distns.Rdata,
+#' - and, for MCMC posteriors, looks up trait.mcmc*.Rdata linked to the same
+#'   posteriorid or a trait.mcmc.Rdata file in that directory.
+#'
+#' Difference from generate_OAT_SA_design: This function samples inputs
+#' randomly or quasi-randomly, while generate_OAT_SA_design holds all
+#' non-parameter inputs constant to isolate parameter effects.
+#'
+#'
 #' @export
 
 generate_joint_ensemble_design <- function(settings,

diff --git a/modules/uncertainty/man/generate_OAT_SA_design.Rd b/modules/uncertainty/man/generate_OAT_SA_design.Rd