From c1f6aa70d1e6d3d79d7c10cee5b96209ae2226e1 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 1 Dec 2025 15:31:14 -0500 Subject: [PATCH 1/9] Update computation config. --- .../assim.sequential/inst/anchor/NA_downscale_script.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R index a066bebbed..2b915a28de 100644 --- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -244,7 +244,7 @@ for (i in seq_along(date)) { for (j in seq_along(variables)) { # setup folder. variable <- variables[j] - folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts_noGEDI_rf"), paste0(variables[j], "_", date[i])) + folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts_noGEDI_debias_rf"), paste0(variables[j], "_", date[i])) dir.create(folder.path) saveRDS(list(settings = settings, analysis.yr = analysis.yr, @@ -254,8 +254,8 @@ for (i in seq_along(date)) { folder.path = folder.path, base.map.dir = base.map.dir, method = method, - cores = cores, - outdir = file.path(outdir, "downscale_maps_analysis_lc_ts_noGEDI_rf")), + cores = cores-1, + outdir = file.path(outdir, "downscale_maps_analysis_lc_ts_noGEDI_debias_rf")), file = file.path(folder.path, "dat.rds")) # prepare for qsub. jobsh <- c("#!/bin/bash -l", @@ -268,7 +268,7 @@ for (i in seq_along(date)) { jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh) writeLines(jobsh, con = file.path(folder.path, "job.sh")) # qsub command. - qsub <- "qsub -l h_rt=24:00:00 -l mem_per_core=4G -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash" + qsub <- "qsub -l h_rt=10:00:00 -l mem_per_core=8G -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash" qsub <- gsub("@CORES@", cores, qsub) qsub <- gsub("@NAME@", paste0("ds_", i, "_", j), qsub) qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub) From 1eeaeff0f13f9b385dd37e859f5fd119512f0e12 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 1 Dec 2025 20:00:10 -0500 Subject: [PATCH 2/9] Add a flag to decide whether to generate the samples file. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 38 ++++++++++++++----- .../assim.sequential/R/sda.enkf_parallel.R | 20 +++++++--- .../R/generate_joint_ensemble_design.R | 4 +- .../man/generate_joint_ensemble_design.Rd | 11 +++++- 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index f037fc96bc..0e611cf4c8 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -317,22 +317,40 @@ sda.enkf.multisite <- function(settings, # weight matrix wt.mat <- matrix(NA, nrow = nens, ncol = nt) # Reading param samples------------------------------- - #create params object using samples generated from TRAITS functions + # create params object using samples generated from TRAITS functions if(restart_flag){ new.params <- new.params } else { - if(!file.exists(file.path(settings$outdir, "samples.Rdata"))) PEcAn.logger::logger.severe("samples.Rdata cannot be found. Make sure you generate samples by running the get.parameter.samples function before running SDA.") - #Generate parameter needs to be run before this to generate the samples. This is hopefully done in the main workflow. - if(is.null(ensemble.samples)){ + gen.samples <- FALSE + if (is.null(ensemble.samples)) { + if (file.exists(file.path(settings$outdir, "samples.Rdata"))) { + load(file.path(settings$outdir, "samples.Rdata")) + } else { + gen.samples <- TRUE + } + } + # get the joint input design. + # here we are looping over sites + # to make sure we are grabbing the complete input lists. + for (i in seq_along(settings)) { + # get the input names that are registered for sampling. + names.sampler <- names(settings$ensemble$samplingspace) + # get the input names for the current site. + names.site.input <- names(settings[[i]]$run$inputs) + # remove parameters field from the list. + names.sampler <- names.sampler[-which(names.sampler == "parameters")] + # find a site that has all registered inputs except for the parameter field. + if (all(names.sampler %in% names.site.input)) { + input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[i]], + ensemble_size = nens)[[1]] + break + } + } + # if we generated new samples file within the `generate_joint_ensemble_design` function. + if (gen.samples) { load(file.path(settings$outdir, "samples.Rdata")) } - #reformatting params new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) - # if it's not a restart run, we will generate the joint input design. - # get the joint input design. - input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[1]], - ensemble_samples = ensemble.samples, - ensemble_size = nens)[[1]] } ###------------------------------------------------------------------------------------------------### ### loop over time ### diff --git a/modules/assim.sequential/R/sda.enkf_parallel.R b/modules/assim.sequential/R/sda.enkf_parallel.R index 937989f1a3..590497f6a4 100644 --- a/modules/assim.sequential/R/sda.enkf_parallel.R +++ b/modules/assim.sequential/R/sda.enkf_parallel.R @@ -194,12 +194,15 @@ sda.enkf_local <- function(settings, ### set up for data assimilation ### ###-------------------------------------------------------------------###---- # Reading param samples------------------------------- - #create params object using samples generated from TRAITS functions + # create params object using samples generated from TRAITS functions + gen.samples <- FALSE if (is.null(ensemble.samples)) { - load(file.path(settings$outdir, "samples.Rdata")) + if (file.exists(file.path(settings$outdir, "samples.Rdata"))) { + load(file.path(settings$outdir, "samples.Rdata")) + } else { + gen.samples <- TRUE + } } - #reformatting params - new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) # get the joint input design. for (i in seq_along(settings)) { # get the input names that are registered for sampling. @@ -210,12 +213,17 @@ sda.enkf_local <- function(settings, names.sampler <- names.sampler[-which(names.sampler == "parameters")] # find a site that has all registered inputs except for the parameter field. if (all(names.sampler %in% names.site.input)) { - input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[i]], - ensemble_samples = ensemble.samples, + input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[i]], ensemble_size = nens)[[1]] break } } + # reformatting params. + # if we generated new samples file within the `generate_joint_ensemble_design` function. + if (gen.samples) { + load(file.path(settings$outdir, "samples.Rdata")) + } + new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) ###------------------------------------------------------------------------------------------------### ### loop over time ### ###------------------------------------------------------------------------------------------------### diff --git a/modules/uncertainty/R/generate_joint_ensemble_design.R b/modules/uncertainty/R/generate_joint_ensemble_design.R index 02c9289871..a80c8782fa 100644 --- a/modules/uncertainty/R/generate_joint_ensemble_design.R +++ b/modules/uncertainty/R/generate_joint_ensemble_design.R @@ -5,6 +5,7 @@ #' #' @param settings A PEcAn settings object containing ensemble configuration #' @param ensemble_size Integer specifying the number of ensemble members +#' @param gen.samples Logical: logical variable determine if we want to generate the samples. #' Since the `input_design` will only be generated once for the entire model run, #' the only situation, where we might want to recycle the existing `ensemble_samples`, #' is when we split and submit the larger SDA runs (e.g., 8,000 sites) into @@ -18,6 +19,7 @@ generate_joint_ensemble_design <- function(settings, ensemble_size, + gen.samples = FALSE, sobol = FALSE) { if (sobol) { ensemble_size <- as.numeric(ensemble_size) * 2 @@ -58,7 +60,7 @@ generate_joint_ensemble_design <- function(settings, design_list[[input_tag]] <- input_result$ids } # Sample parameters if we don't have it. - if (!file.exists(file.path(settings$outdir, "samples.Rdata"))) { + if (gen.samples) { PEcAn.uncertainty::get.parameter.samples( settings, ensemble.size = ensemble_size, diff --git a/modules/uncertainty/man/generate_joint_ensemble_design.Rd b/modules/uncertainty/man/generate_joint_ensemble_design.Rd index 717c96103f..e2e796b243 100644 --- a/modules/uncertainty/man/generate_joint_ensemble_design.Rd +++ b/modules/uncertainty/man/generate_joint_ensemble_design.Rd @@ -7,12 +7,19 @@ Creates a joint ensemble design that maintains parameter correlations across all sites in a multi-site run. This function generates sample indices that are shared across sites to ensure consistent parameter sampling.} \usage{ -generate_joint_ensemble_design(settings, ensemble_size, sobol = FALSE) +generate_joint_ensemble_design( + settings, + ensemble_size, + gen.samples = FALSE, + sobol = FALSE +) } \arguments{ \item{settings}{A PEcAn settings object containing ensemble configuration} -\item{ensemble_size}{Integer specifying the number of ensemble members +\item{ensemble_size}{Integer specifying the number of ensemble members} + +\item{gen.samples}{Logical: logical variable determine if we want to generate the samples. Since the `input_design` will only be generated once for the entire model run, the only situation, where we might want to recycle the existing `ensemble_samples`, is when we split and submit the larger SDA runs (e.g., 8,000 sites) into From 6f92baa322aaed0b6dc92f0e33beadf4fc8ce96a Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 1 Dec 2025 20:17:33 -0500 Subject: [PATCH 3/9] Update the change log for the previous PR (#3634). --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10c7650918..c53e58531b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha ## Unreleased ### Added - +- Add function `qsub_sda()` for submitting SDA batch jobs by splitting a large number of sites into multiple small groups of sites (#3634). +- Add function `generate_joint_ensemble_design()` into the current SDA workflows to maintain the joint input sampling (#3634). - Add function `clip_and_save_raster_file()` for subsetting rasters to match a polygon of interest (#3537). - Add CH4 and N2O to standard_vars in PEcAn.utils - New function `sat_vapor_pressure()` added for computing saturation vapor pressure from temperature using various methods. From 19e8ef4cf4328580a26fc82245c48a1631a42057 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 1 Dec 2025 20:17:51 -0500 Subject: [PATCH 4/9] Update the document for the continental SDA settings. --- book_source/03_topical_pages/03_pecan_xml.Rmd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/book_source/03_topical_pages/03_pecan_xml.Rmd b/book_source/03_topical_pages/03_pecan_xml.Rmd index f99455cf36..4f115f3662 100644 --- a/book_source/03_topical_pages/03_pecan_xml.Rmd +++ b/book_source/03_topical_pages/03_pecan_xml.Rmd @@ -766,6 +766,13 @@ The following tags can be used for state data assimilation. More detailed inform Local.support 1 5 + + + 28 + 40 + + qsub -l h_rt=24:00:00 -l mem_per_core=4G -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash + AbvGrndWood @@ -860,6 +867,8 @@ The following tags can be used for state data assimilation. More detailed inform * **scalef** : [optional] The scale parameter used for the localization operation, the smaller the value is, the sites are more isolated. * **chains** : [optional] The number of chains needed to be estimated during the MCMC sampling process. * **_NOTE:_** If TRUE, you must also assign a vector of trait names to pick.trait.params within the sda.enkf function. +* **batch.settings** : [optional] The configurations are used to set up batch job submissions. It will be used only if you use the `qsub_sda` function for the SDA job submissions with a large number of sites (>500). The `general.job` contains the number of CPUs per job and the number of jobs you would like to submit to the entire SDA experiment (e.g., 8,000 sites with 40 folders will end up with 200 sites per job). +The `qsub.cmd` contains the string for configuring extra qsub arguments. * **state.variable** : [required] State variable that is to be assimilated (in PEcAn standard format, with pre-specified variable name, unit, and range). Four variables can be assimilated so far: including Aboveground biomass (AbvGrndWood), LAI, SoilMoistFrac, and Soil carbon (TotSoilCarb). * **Obs_Prep** : [required] This section will be handled through the SDA_Obs_Assembler function, if you want to proceed with this function, this section is required. * **spin.up** : [required] start.date and end.date for model spin up. From 0a06b57e6c0f5949399a05e0c97822b6922ba6d1 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 2 Dec 2025 10:10:35 -0500 Subject: [PATCH 5/9] Switch argument name. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 6 +++--- modules/assim.sequential/R/sda.enkf_parallel.R | 6 +++--- modules/uncertainty/R/generate_joint_ensemble_design.R | 5 +++-- modules/uncertainty/man/generate_joint_ensemble_design.Rd | 5 +++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 0e611cf4c8..4567169043 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -321,12 +321,12 @@ sda.enkf.multisite <- function(settings, if(restart_flag){ new.params <- new.params } else { - gen.samples <- FALSE + generate_samples <- FALSE if (is.null(ensemble.samples)) { if (file.exists(file.path(settings$outdir, "samples.Rdata"))) { load(file.path(settings$outdir, "samples.Rdata")) } else { - gen.samples <- TRUE + generate_samples <- TRUE } } # get the joint input design. @@ -347,7 +347,7 @@ sda.enkf.multisite <- function(settings, } } # if we generated new samples file within the `generate_joint_ensemble_design` function. - if (gen.samples) { + if (generate_samples) { load(file.path(settings$outdir, "samples.Rdata")) } new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) diff --git a/modules/assim.sequential/R/sda.enkf_parallel.R b/modules/assim.sequential/R/sda.enkf_parallel.R index 590497f6a4..f770547297 100644 --- a/modules/assim.sequential/R/sda.enkf_parallel.R +++ b/modules/assim.sequential/R/sda.enkf_parallel.R @@ -195,12 +195,12 @@ sda.enkf_local <- function(settings, ###-------------------------------------------------------------------###---- # Reading param samples------------------------------- # create params object using samples generated from TRAITS functions - gen.samples <- FALSE + generate_samples <- FALSE if (is.null(ensemble.samples)) { if (file.exists(file.path(settings$outdir, "samples.Rdata"))) { load(file.path(settings$outdir, "samples.Rdata")) } else { - gen.samples <- TRUE + generate_samples <- TRUE } } # get the joint input design. @@ -220,7 +220,7 @@ sda.enkf_local <- function(settings, } # reformatting params. # if we generated new samples file within the `generate_joint_ensemble_design` function. - if (gen.samples) { + if (generate_samples) { load(file.path(settings$outdir, "samples.Rdata")) } new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) diff --git a/modules/uncertainty/R/generate_joint_ensemble_design.R b/modules/uncertainty/R/generate_joint_ensemble_design.R index a80c8782fa..12cb372bea 100644 --- a/modules/uncertainty/R/generate_joint_ensemble_design.R +++ b/modules/uncertainty/R/generate_joint_ensemble_design.R @@ -5,7 +5,8 @@ #' #' @param settings A PEcAn settings object containing ensemble configuration #' @param ensemble_size Integer specifying the number of ensemble members -#' @param gen.samples Logical: logical variable determine if we want to generate the samples. +#' @param generate_samples Logical: logical variable determine if we want to generate the samples. +#' Default is TRUE. #' Since the `input_design` will only be generated once for the entire model run, #' the only situation, where we might want to recycle the existing `ensemble_samples`, #' is when we split and submit the larger SDA runs (e.g., 8,000 sites) into @@ -19,7 +20,7 @@ generate_joint_ensemble_design <- function(settings, ensemble_size, - gen.samples = FALSE, + generate_samples = TRUE, sobol = FALSE) { if (sobol) { ensemble_size <- as.numeric(ensemble_size) * 2 diff --git a/modules/uncertainty/man/generate_joint_ensemble_design.Rd b/modules/uncertainty/man/generate_joint_ensemble_design.Rd index e2e796b243..5d92d29c83 100644 --- a/modules/uncertainty/man/generate_joint_ensemble_design.Rd +++ b/modules/uncertainty/man/generate_joint_ensemble_design.Rd @@ -10,7 +10,7 @@ are shared across sites to ensure consistent parameter sampling.} generate_joint_ensemble_design( settings, ensemble_size, - gen.samples = FALSE, + generate_samples = TRUE, sobol = FALSE ) } @@ -19,7 +19,8 @@ generate_joint_ensemble_design( \item{ensemble_size}{Integer specifying the number of ensemble members} -\item{gen.samples}{Logical: logical variable determine if we want to generate the samples. +\item{generate_samples}{Logical: logical variable determine if we want to generate the samples. +Default is TRUE. Since the `input_design` will only be generated once for the entire model run, the only situation, where we might want to recycle the existing `ensemble_samples`, is when we split and submit the larger SDA runs (e.g., 8,000 sites) into From cd8128711a9b7f2d9610a5aa8a30dbb80a9f8754 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 2 Dec 2025 15:20:03 -0500 Subject: [PATCH 6/9] Change the logic of determining whether to load the samples.Rdata file. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 9 ++++----- modules/assim.sequential/R/sda.enkf_parallel.R | 12 +++++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 4567169043..5a141bc463 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -321,12 +321,11 @@ sda.enkf.multisite <- function(settings, if(restart_flag){ new.params <- new.params } else { - generate_samples <- FALSE if (is.null(ensemble.samples)) { - if (file.exists(file.path(settings$outdir, "samples.Rdata"))) { - load(file.path(settings$outdir, "samples.Rdata")) - } else { + if (!file.exists(file.path(settings$outdir, "samples.Rdata"))) { generate_samples <- TRUE + } else { + generate_samples <- FALSE } } # get the joint input design. @@ -347,7 +346,7 @@ sda.enkf.multisite <- function(settings, } } # if we generated new samples file within the `generate_joint_ensemble_design` function. - if (generate_samples) { + if (is.null(ensemble.samples)) { load(file.path(settings$outdir, "samples.Rdata")) } new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) diff --git a/modules/assim.sequential/R/sda.enkf_parallel.R b/modules/assim.sequential/R/sda.enkf_parallel.R index f770547297..fc3985bdd5 100644 --- a/modules/assim.sequential/R/sda.enkf_parallel.R +++ b/modules/assim.sequential/R/sda.enkf_parallel.R @@ -197,10 +197,12 @@ sda.enkf_local <- function(settings, # create params object using samples generated from TRAITS functions generate_samples <- FALSE if (is.null(ensemble.samples)) { - if (file.exists(file.path(settings$outdir, "samples.Rdata"))) { - load(file.path(settings$outdir, "samples.Rdata")) - } else { - generate_samples <- TRUE + if (is.null(ensemble.samples)) { + if (!file.exists(file.path(settings$outdir, "samples.Rdata"))) { + generate_samples <- TRUE + } else { + generate_samples <- FALSE + } } } # get the joint input design. @@ -220,7 +222,7 @@ sda.enkf_local <- function(settings, } # reformatting params. # if we generated new samples file within the `generate_joint_ensemble_design` function. - if (generate_samples) { + if (is.null(ensemble.samples)) { load(file.path(settings$outdir, "samples.Rdata")) } new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) From 3d4af807cd0f128ba781dfea913a544531b4df68 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 2 Dec 2025 18:20:49 -0500 Subject: [PATCH 7/9] Revert changes. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 37 +++++-------------- .../assim.sequential/R/sda.enkf_parallel.R | 22 +++-------- .../R/generate_joint_ensemble_design.R | 15 +++----- .../man/generate_joint_ensemble_design.Rd | 12 +----- 4 files changed, 24 insertions(+), 62 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 5a141bc463..f037fc96bc 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -317,39 +317,22 @@ sda.enkf.multisite <- function(settings, # weight matrix wt.mat <- matrix(NA, nrow = nens, ncol = nt) # Reading param samples------------------------------- - # create params object using samples generated from TRAITS functions + #create params object using samples generated from TRAITS functions if(restart_flag){ new.params <- new.params } else { - if (is.null(ensemble.samples)) { - if (!file.exists(file.path(settings$outdir, "samples.Rdata"))) { - generate_samples <- TRUE - } else { - generate_samples <- FALSE - } - } - # get the joint input design. - # here we are looping over sites - # to make sure we are grabbing the complete input lists. - for (i in seq_along(settings)) { - # get the input names that are registered for sampling. - names.sampler <- names(settings$ensemble$samplingspace) - # get the input names for the current site. - names.site.input <- names(settings[[i]]$run$inputs) - # remove parameters field from the list. - names.sampler <- names.sampler[-which(names.sampler == "parameters")] - # find a site that has all registered inputs except for the parameter field. - if (all(names.sampler %in% names.site.input)) { - input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[i]], - ensemble_size = nens)[[1]] - break - } - } - # if we generated new samples file within the `generate_joint_ensemble_design` function. - if (is.null(ensemble.samples)) { + if(!file.exists(file.path(settings$outdir, "samples.Rdata"))) PEcAn.logger::logger.severe("samples.Rdata cannot be found. Make sure you generate samples by running the get.parameter.samples function before running SDA.") + #Generate parameter needs to be run before this to generate the samples. This is hopefully done in the main workflow. + if(is.null(ensemble.samples)){ load(file.path(settings$outdir, "samples.Rdata")) } + #reformatting params new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) + # if it's not a restart run, we will generate the joint input design. + # get the joint input design. + input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[1]], + ensemble_samples = ensemble.samples, + ensemble_size = nens)[[1]] } ###------------------------------------------------------------------------------------------------### ### loop over time ### diff --git a/modules/assim.sequential/R/sda.enkf_parallel.R b/modules/assim.sequential/R/sda.enkf_parallel.R index fc3985bdd5..937989f1a3 100644 --- a/modules/assim.sequential/R/sda.enkf_parallel.R +++ b/modules/assim.sequential/R/sda.enkf_parallel.R @@ -194,17 +194,12 @@ sda.enkf_local <- function(settings, ### set up for data assimilation ### ###-------------------------------------------------------------------###---- # Reading param samples------------------------------- - # create params object using samples generated from TRAITS functions - generate_samples <- FALSE + #create params object using samples generated from TRAITS functions if (is.null(ensemble.samples)) { - if (is.null(ensemble.samples)) { - if (!file.exists(file.path(settings$outdir, "samples.Rdata"))) { - generate_samples <- TRUE - } else { - generate_samples <- FALSE - } - } + load(file.path(settings$outdir, "samples.Rdata")) } + #reformatting params + new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) # get the joint input design. for (i in seq_along(settings)) { # get the input names that are registered for sampling. @@ -215,17 +210,12 @@ sda.enkf_local <- function(settings, names.sampler <- names.sampler[-which(names.sampler == "parameters")] # find a site that has all registered inputs except for the parameter field. if (all(names.sampler %in% names.site.input)) { - input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[i]], + input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[i]], + ensemble_samples = ensemble.samples, ensemble_size = nens)[[1]] break } } - # reformatting params. - # if we generated new samples file within the `generate_joint_ensemble_design` function. - if (is.null(ensemble.samples)) { - load(file.path(settings$outdir, "samples.Rdata")) - } - new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) ###------------------------------------------------------------------------------------------------### ### loop over time ### ###------------------------------------------------------------------------------------------------### diff --git a/modules/uncertainty/R/generate_joint_ensemble_design.R b/modules/uncertainty/R/generate_joint_ensemble_design.R index 12cb372bea..f6af9c8750 100644 --- a/modules/uncertainty/R/generate_joint_ensemble_design.R +++ b/modules/uncertainty/R/generate_joint_ensemble_design.R @@ -5,8 +5,6 @@ #' #' @param settings A PEcAn settings object containing ensemble configuration #' @param ensemble_size Integer specifying the number of ensemble members -#' @param generate_samples Logical: logical variable determine if we want to generate the samples. -#' Default is TRUE. #' Since the `input_design` will only be generated once for the entire model run, #' the only situation, where we might want to recycle the existing `ensemble_samples`, #' is when we split and submit the larger SDA runs (e.g., 8,000 sites) into @@ -20,7 +18,6 @@ generate_joint_ensemble_design <- function(settings, ensemble_size, - generate_samples = TRUE, sobol = FALSE) { if (sobol) { ensemble_size <- as.numeric(ensemble_size) * 2 @@ -37,18 +34,18 @@ generate_joint_ensemble_design <- function(settings, unlist() ] samp.ordered <- samp[c(order, names(samp)[!(names(samp) %in% order)])] - + # loop over inputs. for (i in seq_along(samp.ordered)) { input_tag <- names(samp.ordered)[i] parent_name <- samp.ordered[[i]]$parent - + parent_ids <- if (!is.null(parent_name)) { sampled_inputs[[parent_name]] } else { NULL } - + input_result <- PEcAn.uncertainty::input.ens.gen( settings = settings, ensemble_size = ensemble_size, @@ -56,12 +53,12 @@ generate_joint_ensemble_design <- function(settings, method = samp.ordered[[i]]$method, parent_ids = parent_ids ) - + sampled_inputs[[input_tag]] <- input_result$ids design_list[[input_tag]] <- input_result$ids } # Sample parameters if we don't have it. - if (gen.samples) { + if (!file.exists(file.path(settings$outdir, "samples.Rdata"))) { PEcAn.uncertainty::get.parameter.samples( settings, ensemble.size = ensemble_size, @@ -73,7 +70,7 @@ generate_joint_ensemble_design <- function(settings, # parameters with replacement. design_list[["param"]] <- seq_len(ensemble_size) design_matrix <- data.frame(design_list) - + if (sobol) { half <- floor(ensemble_size / 2) X1 <- design_matrix[1:half, ] diff --git a/modules/uncertainty/man/generate_joint_ensemble_design.Rd b/modules/uncertainty/man/generate_joint_ensemble_design.Rd index 5d92d29c83..717c96103f 100644 --- a/modules/uncertainty/man/generate_joint_ensemble_design.Rd +++ b/modules/uncertainty/man/generate_joint_ensemble_design.Rd @@ -7,20 +7,12 @@ Creates a joint ensemble design that maintains parameter correlations across all sites in a multi-site run. This function generates sample indices that are shared across sites to ensure consistent parameter sampling.} \usage{ -generate_joint_ensemble_design( - settings, - ensemble_size, - generate_samples = TRUE, - sobol = FALSE -) +generate_joint_ensemble_design(settings, ensemble_size, sobol = FALSE) } \arguments{ \item{settings}{A PEcAn settings object containing ensemble configuration} -\item{ensemble_size}{Integer specifying the number of ensemble members} - -\item{generate_samples}{Logical: logical variable determine if we want to generate the samples. -Default is TRUE. +\item{ensemble_size}{Integer specifying the number of ensemble members Since the `input_design` will only be generated once for the entire model run, the only situation, where we might want to recycle the existing `ensemble_samples`, is when we split and submit the larger SDA runs (e.g., 8,000 sites) into From 06b78e0acef3119ce60ca006f53c995d9288bda2 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 2 Dec 2025 18:25:42 -0500 Subject: [PATCH 8/9] Revert change. --- modules/uncertainty/R/generate_joint_ensemble_design.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/uncertainty/R/generate_joint_ensemble_design.R b/modules/uncertainty/R/generate_joint_ensemble_design.R index f6af9c8750..02c9289871 100644 --- a/modules/uncertainty/R/generate_joint_ensemble_design.R +++ b/modules/uncertainty/R/generate_joint_ensemble_design.R @@ -34,18 +34,18 @@ generate_joint_ensemble_design <- function(settings, unlist() ] samp.ordered <- samp[c(order, names(samp)[!(names(samp) %in% order)])] - + # loop over inputs. for (i in seq_along(samp.ordered)) { input_tag <- names(samp.ordered)[i] parent_name <- samp.ordered[[i]]$parent - + parent_ids <- if (!is.null(parent_name)) { sampled_inputs[[parent_name]] } else { NULL } - + input_result <- PEcAn.uncertainty::input.ens.gen( settings = settings, ensemble_size = ensemble_size, @@ -53,7 +53,7 @@ generate_joint_ensemble_design <- function(settings, method = samp.ordered[[i]]$method, parent_ids = parent_ids ) - + sampled_inputs[[input_tag]] <- input_result$ids design_list[[input_tag]] <- input_result$ids } @@ -70,7 +70,7 @@ generate_joint_ensemble_design <- function(settings, # parameters with replacement. design_list[["param"]] <- seq_len(ensemble_size) design_matrix <- data.frame(design_list) - + if (sobol) { half <- floor(ensemble_size / 2) X1 <- design_matrix[1:half, ] From 90da0959832ac0632b68ccf630cb591ac6896db4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 2 Dec 2025 20:55:34 -0500 Subject: [PATCH 9/9] Add the input check. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index f037fc96bc..33d3913ee6 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -329,6 +329,30 @@ sda.enkf.multisite <- function(settings, #reformatting params new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) # if it's not a restart run, we will generate the joint input design. + # following code tries to catch if there is any mismatch between + # samplingspace and site inputs. + # get the input names that are registered for sampling. + names.sampler <- names(settings$ensemble$samplingspace) + # remove parameters field from the list. + names.sampler <- names.sampler[-which(names.sampler == "parameters")] + mis.match.table <- settings %>% purrr::map(function(s){ + # get the input names for the current site. + names.site.input <- names(s$run$inputs) + # check if there is any mismatch. + inds <- which(!names.sampler %in% names.site.input) + # if this site has missing inputs. + if (length(inds) > 0) { + return(data.frame(site_id = s$run$site$id, missed.input = names.sampler[inds])) + } + }) %>% dplyr::bind_rows() + # if we see any site that has missing inputs. + if (nrow(mis.match.table) > 0) { + PEcAn.logger::logger.info("There are sites that have missing inputs than the sampling space.") + return(mis.match.table) + } + + # find a site that has all registered inputs except for the parameter field. + if (all(names.sampler %in% names.site.input)) {} # get the joint input design. input_design <- PEcAn.uncertainty::generate_joint_ensemble_design(settings = settings[[1]], ensemble_samples = ensemble.samples,