Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 86 additions & 7 deletions base/workflow/R/run.write.configs.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,15 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU
options(scipen = 12)

samples.file <- file.path(settings$outdir, "samples.Rdata")
existing_data <- NULL
if (file.exists(samples.file)) {
samples <- new.env()
load(samples.file, envir = samples) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples
trait.samples <- samples$trait.samples
existing_data <- new.env()
load(samples.file, envir = existing_data)
trait.samples <- existing_data$trait.samples
sa.samples <- existing_data$sa.samples
runs.samples <- existing_data$runs.samples

# Generate ensemble.samples for current site
trait_sample_indices <- input_design[["param"]]
ensemble.samples <- list()
for (pft in names(trait.samples)) {
Expand All @@ -120,11 +125,9 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU
)
names(ensemble.samples[[pft]]) <- names(pft_traits)
}
sa.samples <- samples$sa.samples
runs.samples <- samples$runs.samples
## env.samples <- samples$env.samples
} else {
PEcAn.logger::logger.error(samples.file, "not found, this file is required by the run.write.configs function")
# cannot proceed without sample.Rdata
PEcAn.logger::logger.severe(samples.file, "not found, this file is required by the run.write.configs function")
}

## remove previous runs.txt
Expand Down Expand Up @@ -170,6 +173,7 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU
quantile.samples = sa.samples,
settings = settings,
model = model,
input_design = input_design,
write.to.db = write
)

Expand Down Expand Up @@ -211,6 +215,80 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU
PEcAn.logger::logger.info("###### Finished writing model run config files #####")
PEcAn.logger::logger.info("config files samples in ", file.path(settings$outdir, "run"))

## Use existing_data from earlier load
if (!is.null(existing_data)) {
# Merge ensemble.samples
if (!is.null(existing_data$ensemble.samples)) {
for (pft_name in names(ensemble.samples)) {
if (pft_name %in% names(existing_data$ensemble.samples)) {
# combine parameter samples for same PFT across sites
ensemble.samples[[pft_name]] <- rbind(
existing_data$ensemble.samples[[pft_name]],
ensemble.samples[[pft_name]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not seeing how one figures out after the fact which samples were used for which site

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The site information is preserved through the run id's; Each run id contains the site id
(e.g, ENS-00001-1000004925 --- > site 1000004925), and this becomes the output folder name. Downstream functions don't parse the site id, they just iterate through all run id's and read from folders named with those id's.

# site 1 : 
runs.samples$ensemble <- data.frame( 
id = c("ENS-00001-1000004925", # site id
"ENS-00002-1000004925") 
) 
ensemble.samples$temperate <- data.frame( 
growth_resp = c(0.28, 0.31) # rows 1-2 
) 

# after site 2 merging: 
runs.samples$ensemble <- data.frame( 
id = c("ENS-00001-1000004925", # site 1, run 1 
"ENS-00002-1000004925", # site 1, run 2 
"ENS-00001-1000004928", # site 2, run 1 <-- Different site id
"ENS-00002-1000004928") # site 2, run 2 
) 
ensemble.samples$temperate <- data.frame( 
growth_resp = c(0.28, 0.31, 0.33, 0.30) # rows match run id's 
)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case I don't think ensemble.samples should grow at all -- The point of passing in an input design is to enforce that every run with a given ensemble member uses the same value of growth_resp (and every other parameter too), so ensemble.samples needs to stay n_ens lines long. I guess one could repeat the same set of values for each site, but that seems VERY inefficient.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@infotroph @mdietze @dlebauer want to make sure that i got right logic with the merging approach ?

Explained in comments

  PEcAn.logger::logger.info("###### Finished writing model run config files #####")
  PEcAn.logger::logger.info("config files samples in ", file.path(settings$outdir, "run"))
  
  # -----------------------------------------------------------------------
  # multisite merging logic
  # -----------------------------------------------------------------------
  # In multisite ensemble analysis, we maintain parameter
  # consistency across sites while allowing run tracking to grow. 
  # This ensures that the same parameter samples are applied across all sites,
  # enabling valid comparison of model performance under different environmental
  # conditions while tracking site-specific run execution.
  # -----------------------------------------------------------------------
  
  samples.file <- file.path(settings$outdir, "samples.Rdata")
  
  if (file.exists(samples.file)) {
    PEcAn.logger::logger.info("Merging with existing samples.Rdata to support multisite run...")
    
    # load existing data
    existing_env <- new.env()
    load(samples.file, envir = existing_env)
    
    # Ensemble parameters must remain identical across sites to answer:
    # How do the SAME parameter sets perform under DIFFERENT conditions ?
    
    if (!is.null(existing_env$ensemble.samples)) {
      # does the existing list have names ?
      # this validates we have a properly structured ensemble.samples object
      if (length(names(existing_env$ensemble.samples)) > 0) {
        # use existing parameter set to maintain ensemble consistency
        # all sites use identical parameter samples for ensemble analysis
        ensemble.samples <- existing_env$ensemble.samples
      } else {
        # existing parameter structure is invalid - use current site's parameters
        PEcAn.logger::logger.warn("Existing ensemble.samples structure is invalid. Using current site parameters as new baseline.")
      }
    }
    
    # maintain consistency in trait and sensitivity analysis samples
    if (!is.null(existing_env$trait.samples)) {
      if (length(names(existing_env$trait.samples)) > 0) {
        trait.samples <- existing_env$trait.samples
      }
    }
    if (!is.null(existing_env$sa.samples)) {
      if (length(names(existing_env$sa.samples)) > 0) {
        sa.samples <- existing_env$sa.samples
      }
    }
    
    # Run IDs append (this SHOULD grow)
    # ---------------------------------------------------------------------
    # Run IDs accumulate to track which parameter sets execute at which sites
    # eg. with 2 ens members across 2 sites:
    # - site 1: ENS-00001-1000004925, ENS-00002-1000004925
    # - site 2: ENS-00001-1000004928, ENS-00002-1000004928  
    # - final: 4 run IDs tracking same 2 parameter sets across 2 sites
    # ---------------------------------------------------------------------
    
    if (!is.null(existing_env$runs.samples)) {
      
      # ensemble runs tracking (data frame -> rbind)
      # rbind here because run IDs must accumulate across sites
      if (!is.null(runs.samples$ensemble)) {
        if (!is.null(existing_env$runs.samples$ensemble)) {
          # check we are not rbinding garbage. Check for 'id' column.
          if("id" %in% names(existing_env$runs.samples$ensemble)) {
            runs.samples$ensemble <- rbind(existing_env$runs.samples$ensemble, runs.samples$ensemble)
            # run IDs grow to track which sites use which ensemble members
          }
        }
      } else if (!is.null(existing_env$runs.samples$ensemble)) {
        runs.samples$ensemble <- existing_env$runs.samples$ensemble
      }
      
      # SA runs tracking (list structure -> modifyList)
      if (!is.null(runs.samples$sa)) {
        if (!is.null(existing_env$runs.samples$sa)) {
          runs.samples$sa <- utils::modifyList(existing_env$runs.samples$sa, runs.samples$sa)
        }
      } else if (!is.null(existing_env$runs.samples$sa)) {
        runs.samples$sa <- existing_env$runs.samples$sa
      }
    }
    
    # metadata (pft names, trait names) should merge to create 
    # coverage across all sites while avoiding duplication
    # ---------------------------------------------------------------------
    
    if (!is.null(existing_env$pft.names)) {
      pft.names <- unique(c(existing_env$pft.names, pft.names))
    }
    if (!is.null(existing_env$trait.names)) {
      if (length(names(existing_env$trait.names)) > 0) {
        trait.names <- utils::modifyList(existing_env$trait.names, trait.names)
      }
    }
  }
  
  save(ensemble.samples, trait.samples, sa.samples, runs.samples, pft.names, trait.names,
       file = samples.file
  )
  
  PEcAn.logger::logger.info("parameter values for runs in ", samples.file)
  options(scipen = scipen)
  invisible(settings)
  return(settings)
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems overly complicated. get.param.samples should generate one matrix of parameters per PFT and currently there is not within-PFT variability in parameter samples across sites (though this may change in the future). For a particular site, the PFT and input_design should determine which parameter matrix you use, and which row in that matrix you select, respectively. There should be no need to rbind matrices.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with Mike here and am leaning toward an even stronger claim that run.write.configs should not be rewriting samples.Rdata at all. Its contents are all either generated once by get.parameter.samples before run.write.configs starts (ensemble.samples, sa.samples, trait.samples) or are not actually samples and can be taken from runs.txt (runs.samples)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not actually samples and can be taken from runs.txt (runs.samples)

does the downstream analysis expect runs.samples to be in samples.Rdata ?

Copy link
Collaborator Author

@divine7022 divine7022 Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into read.ensemble.output and read.sa.output to verify if we can stop writing samples.Rdata, but those functions explicitly load samples.Rdata to find the run ids :
ens.run.ids <- samples$runs.samples$ensemble
sa.run.ids <- samples$runs.samples$sa

they do not currently have logic to parse runs.txt

so here we can ---
1 ) reload the existing parameters (avoid duplication/growth) but I must append the run ids to runs.samples and save the file. (run.write.cofigs)
2 ) or refactor downstream to read runs.txt ( does this feel like to be a separate PR ? )

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know what you mean by "stop writing samples.Rdata". No one is saying this isn't needed, we're saying it should already exist before run.write.configs is called (it is now generated by the new input design function, which should be called before the write configs functions) and it shouldn't need to be modified.

Only start model runs should rely on runs.txt. There's not enough information in there for anything else downstream

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know what you mean by "stop writing samples.Rdata".

I was responding specifically to chris's suggestion that runs.samples might be redundant cuz that info

"can be taken from runs.txt".

I was verifying if run.write.configs could stop appending even the new run ids(runs.samples) to samples.Rdata ( relaying on runs.txt instead) , but downstream (read.ensemble.output , .. ) depends on finding those ids in samples.Rdata.

But it's clear now

Only start model runs should rely on runs.txt.

)
}
# New PFTs from current site are automatically preserved
}
# preserves existing PFTs -- add PFTs that exist in file but not current site
for (existing_pft in names(existing_data$ensemble.samples)) {
if (!existing_pft %in% names(ensemble.samples)) {
ensemble.samples[[existing_pft]] <- existing_data$ensemble.samples[[existing_pft]]
}
}
}
# Merge trait.samples
if (!is.null(existing_data$trait.samples)) {
trait.samples <- utils::modifyList(existing_data$trait.samples, trait.samples)
Comment on lines +240 to +241
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trait.samples ought to be generated exactly once at the beginning of setup (to define the set of posteriors that ensemble.samples is selected from) and not differ from site to site. Since it may contain MCMC results with potentially as many as millions of samples per trait, I really don't think we want to be duplicating it here.

}
# Merge sa.samples
if (!is.null(existing_data$sa.samples)) {
sa.samples <- utils::modifyList(existing_data$sa.samples, sa.samples)
}
# Merge runs.samples
if (!is.null(existing_data$runs.samples)) {
# Merge ensemble runs
if (!is.null(runs.samples$ensemble)) {
if (!is.null(existing_data$runs.samples$ensemble)) {
runs.samples$ensemble <- rbind(
existing_data$runs.samples$ensemble,
runs.samples$ensemble
)
}
} else if (!is.null(existing_data$runs.samples$ensemble)) {
# Current site has no ensemble, preserve existing
runs.samples$ensemble <- existing_data$runs.samples$ensemble
}
# Merge SA runs
if (!is.null(runs.samples$sa)) {
if (!is.null(existing_data$runs.samples$sa)) {
for (pft_name in names(runs.samples$sa)) {
if (pft_name %in% names(existing_data$runs.samples$sa)) {
runs.samples$sa[[pft_name]] <- rbind(
existing_data$runs.samples$sa[[pft_name]],
runs.samples$sa[[pft_name]]
)
}
}
for (existing_pft in names(existing_data$runs.samples$sa)) {
if (!existing_pft %in% names(runs.samples$sa)) {
runs.samples$sa[[existing_pft]] <- existing_data$runs.samples$sa[[existing_pft]]
}
}
}
} else if (!is.null(existing_data$runs.samples$sa)) {
# Current site has no SA, preserve existing
runs.samples$sa <- existing_data$runs.samples$sa
}
}
# Merge pft.names
if (!is.null(existing_data$pft.names)) {
pft.names <- unique(c(existing_data$pft.names, pft.names))
}
# Merge trait.names
if (!is.null(existing_data$trait.names)) {
trait.names <- utils::modifyList(existing_data$trait.names, trait.names)
}
}
### Save output from SA/Ensemble runs
# A lot of this is duplicate with the ensemble/sa specific output above, but kept for backwards compatibility.
save(ensemble.samples, trait.samples, sa.samples, runs.samples, pft.names, trait.names,
Expand All @@ -221,3 +299,4 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRU
invisible(settings)
return(settings)
}

117 changes: 90 additions & 27 deletions modules/uncertainty/R/sensitivity.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,15 @@ read.sa.output <- function(traits, quantiles, pecandir, outdir, pft.name = "",
#' by \code{settings$rundir} before writing to it?
#' @param write.to.db logical: Record this run to BETY? If TRUE, uses connection
#' settings specified in \code{settings$database}
#' @param input_design data.frame coordinating input files across runs
#'
#' @return list, containing $runs = data frame of runids,
#' and $ensemble.id = the ensemble ID for these runs.
#' Also writes sensitivity analysis configuration files as a side effect
#' @export
#' @author David LeBauer, Carl Davidson
write.sa.configs <- function(defaults, quantile.samples, settings, model,
clean = FALSE, write.to.db = TRUE) {
clean = FALSE, write.to.db = TRUE, input_design = NULL) {
scipen <- getOption("scipen")
options(scipen = 12)
my.write.config <- paste("write.config.", model, sep = "")
Expand Down Expand Up @@ -223,6 +224,50 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
dir.create(file.path(settings$rundir, run.id), recursive = TRUE)
dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE)

## select single met path for SA runs without input_design (backward compatibility)
## when input_design is provided (recommended), it supersedes this selection
## and allows different met files per SA run
# I check to make sure the path under the met is a list.
# if it's specified what met needs to be used in 'met.id' under sensitivity
# analysis of pecan xml we used that otherwise, I use the first met.
if (is.list(settings$run$inputs$met$path)) {
# This checks for met.id tag in the settings under sensitivity analysis -
# if it's not there it creates it. Then it's gonna use what it created.
if (is.null(settings$sensitivity.analysis$met.id)) {
settings$sensitivity.analysis$met.id <- 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 to the overall goal of picking one met (/other input) path to use across the whole SA.

But calling this "met.id" is confusing to me -- looks like this is basically an index into the ensemble, whereas I would usually expect "met ID" to mean an identifier for which source (e.g. CRUNCEP ERA5, etc) the met data came from.

Could it instead be something like settings$sensitivity.analysis$met_path and contain the actual filepath to be used by the SA instead of an index that needs to be extracted from the ensemble? That'd be clearer to me, and would also allow a user to pass an arbitrary path to the SA even if it doesn't exist in the ensemble (not that I expect most people to want that, but it could come in handy sometimes)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...But wait! Doesn't input_design handle this for us just below?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good question, it duplicates with input_design( new approach )

  • and also one of the reason is to be consistent with the ensemble pattern, so SA doing different (path) would be inconsistent
  • and works when input_design is NOT provided (single-site or legacy workflows)
  • The met.id resolution is indeed redundant when input_design is present. The met.id logic serves as a backward compatibility fallback for workflows that don't use input_design. However, when input_design exists it supersedes met.id and handles ALL inputs (met, IC, soil, etc.) .
  • Alternatively we could skip met.id resolution entirely when input_design exists

short-term fix (This PR):
keep current approach, add clarifying comment:

# select single met path for SA runs without input_design (backward compatibility)
# when input_design is provided (recommended), it supersedes this selection 
# and allows different met files per SA run 
if (is.list(settings$run$inputs$met$path)) { 
if (is.null(settings$sensitivity.analysis$met.id)) { 
settings$sensitivity.analysis$met.id <- 1 
} 
settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]] 
}

Long-term (Further iteration):
Document input_design as recommended approach , consider deprecating met.id in favor of input_design.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re met path: You've talked me out of the idea, especially since it would need to be site-specific -- so my proposal wouldn't need just one settings$sensitivity.analysis$met_path, it'd be at least a settings$sensitivity.analysis$<site>$met_path for every site. I do still suggest naming it something like met.index instead of met.id.

Copy link
Member

@infotroph infotroph Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re input design: I'll defer to @dlebauer but my sense is that since we've bought into making input_design mandatory for other parts of the workflow and it's specifically designed to be where we keep all the pieces of the sampling design together in one place, it may be sensible to make it mandatory here too rather than duplicate its functionality.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do still suggest naming it something like met.index instead of met.id

that's renaming to met.index would be nice clarity, but i am afraid of breaking backward compatibility for existing workflows may have <met.id> , and a direct rename would silently ignore those settings ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm in favor of having a consistent input_design object. It is hard to see an advantage of different approaches if one generalize solution is available.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious - where is met.id used? I can't find the string in either the PEcAn repository or in xml settings files used by CCMMF or Dongchen's workflows.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dlebauer it's only used immediately below to set the met$path.

I agree with the consensus to drop the met.id solution proposed here and just use input_design. If one wants to only vary parameters and hold everything else fixed it's really easy to generate an input_design that's all 1's for everything other than parameters. The met.id "solution" isn't really a solution anyways as it only fixes met ensemble members and not the other ensembled inputs (phenology, soil physics, initial conditions, etc.)

}
settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]]
}

# Apply input design coordination for median run
median_settings <- settings
if (!is.null(input_design)) {
# Coordinate inputs for median run (use first row)
for (input_tag in colnames(input_design)) {
if (input_tag != "param" && !is.null(median_settings$run$inputs[[input_tag]]$path)) {
input_paths <- median_settings$run$inputs[[input_tag]]$path
# Assume list structure (consistent with write.ensemble.configs)
if (length(input_paths) > 1) {
input_index <- input_design[[input_tag]][1]
median_settings$run$inputs[[input_tag]]$path <- input_paths[[input_index]]
}
}
}
}

median_input_info <- ""
for (input_tag in names(median_settings$run$inputs)) {
input_data <- median_settings$run$inputs[[input_tag]]
# At SA stage, path is ALWAYS a resolved string (thanks to input design)
if (!is.null(input_data) && !is.null(input_data$path)) {
median_input_info <- paste0(median_input_info,
format(input_tag, width = 12, justify = "left"),
": ",
input_data$path,
"\n")
}
}

# write run information to disk TODO need to print list of pft names and trait
# names
cat("runtype : sensitivity analysis\n",
Expand All @@ -236,7 +281,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
"model id : ", settings$model$id, "\n",
"site : ", settings$run$site$name, "\n",
"site id : ", settings$run$site$id, "\n",
"met data : ", settings$run$site$met, "\n",
median_input_info,
"start date : ", settings$run$start.date, "\n",
"end date : ", settings$run$end.date, "\n",
"hostname : ", settings$host$name, "\n",
Expand All @@ -246,24 +291,10 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
sep = "")


# I check to make sure the path under the met is a list.
# if it's specified what met needs to be used in 'met.id' under sensitivity
# analysis of pecan xml we used that otherwise, I use the first met.
if (is.list(settings$run$inputs$met$path)) {
# This checks for met.id tag in the settings under sensitivity analysis -
# if it's not there it creates it. Then it's gonna use what it created.
if (is.null(settings$sensitivity.analysis$met.id)) {
settings$sensitivity.analysis$met.id <- 1
}
settings$run$inputs$met$path <- settings$run$inputs$met$path[[settings$sensitivity.analysis$met.id]]

}


# write configuration
do.call(my.write.config, args = list(defaults = defaults,
trait.values = median.samples,
settings = settings,
settings = median_settings,
run.id = run.id))
cat(
run.id,
Expand All @@ -272,16 +303,18 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
append = TRUE
)

run_index <- 1

## loop over pfts
runs <- list()
for (i in seq_along(names(quantile.samples))) {
pftname <- names(quantile.samples)[i]
for (pft_idx in seq_along(names(quantile.samples))) {
pftname <- names(quantile.samples)[pft_idx]
if (pftname == "env") {
next
}

traits <- colnames(quantile.samples[[i]])
quantiles.str <- rownames(quantile.samples[[i]])
traits <- colnames(quantile.samples[[pft_idx]])
quantiles.str <- rownames(quantile.samples[[pft_idx]])

runs[[pftname]] <- data.frame()

Expand All @@ -293,7 +326,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
} else {
quantile <- as.numeric(quantile.str) / 100
trait.samples <- median.samples
trait.samples[[i]][trait] <- quantile.samples[[i]][quantile.str, trait, drop = FALSE]
trait.samples[[pft_idx]][trait] <- quantile.samples[[pft_idx]][quantile.str, trait, drop = FALSE]

if (!is.null(con)) {
paramlist <- paste0("quantile=", quantile.str, ",trait=", trait, ",pft=", pftname)
Expand Down Expand Up @@ -343,12 +376,15 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
run.type = "SA",
index = round(quantile, 3),
trait = trait,
pft.name = names(trait.samples)[i],
pft.name = names(trait.samples)[pft_idx],
site.id = settings$run$site$id
)
}
runs[[pftname]][quantile.str, trait] <- run.id

# Increment run counter
run_index <- run_index + 1

# create folders (cleaning up old ones if needed)
if (clean) {
unlink(file.path(settings$rundir, run.id))
Expand All @@ -357,19 +393,46 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
dir.create(file.path(settings$rundir, run.id), recursive = TRUE)
dir.create(file.path(settings$modeloutdir, run.id), recursive = TRUE)

# write run information to disk
# Apply input design coordination for SA runs
settings_copy <- settings
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's confusing that below here some values are taken from settings and others from settings_copy. Is there actually a need for two separate objects or could these mutations be applied straight to settings?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SA loop without settings_copy:

for (trait in traits) {
  for (quantile in quantiles) {
    settings$run$inputs$met$path <- input_paths[[run_index]]  # mutates
    ...
    # write config
    write.config.SIPNET(settings, ...)
    # next iteration uses MUTATED settings; 
    # quantile 2 sees met path from quantile 1
  }
}

  • so by having settings_copy it modify copy only (settings_copy$run$inputs$met$path <- input_paths[[run_index]]) and write.config.* gets clean settings each iteration.
  • If we mutated settings directly, run 2's config would inherit run 1's modified input paths, causing incorrect configs.

The answer for question: why settings is used for some values ?
cat("model id : ", settings$model$id, "\n") # never changes across runs
cat("site id : ", settings$run$site$id, "\n") # same for all SA runs

BUT inputs must come from settings_copy:
do.call(my.write.config, settings = settings_copy) # has resolved input paths

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but the path is updated earlier in the loop than the write.config call, so by the time write.config gets called for run 2 the path from run 1 has already been overwritten by the path from run 2. As long as you're sure that every iteration will update the same fields of the settings object, it doesn't matter whether it started from a "clean" object or not.

Similarly, if you are using settings_copy then it also contains all the fields from settings that aren't updated each iteration, so there's definitely no reason to interleave uses of both.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it makes sense to use settings_copy instead of settings. It seems this only concerns the README block that pulls fields from the original settings. If we just switch those cat() calls to read from settings_copy, it’ll be obvious that each iteration uses one clean settings object end-to-end.

I've proposed this change below in https://github.com/PecanProject/pecan/pull/3660/files/f075e4d7d0f9ad85ee340bbe01f7dca464988677#r2561591261

if (!is.null(input_design)) {
for (input_tag in colnames(input_design)) {
if (input_tag != "param" && !is.null(settings_copy$run$inputs[[input_tag]]$path)) {
input_paths <- settings_copy$run$inputs[[input_tag]]$path
if (length(input_paths) > 1) {
input_index <- input_design[[input_tag]][run_index]
settings_copy$run$inputs[[input_tag]]$path <- input_paths[[input_index]]
}
}
}
}

# Build dynamic input info string for SA run README
sa_input_info <- ""
for (input_tag in names(settings_copy$run$inputs)) {
input_data <- settings_copy$run$inputs[[input_tag]]
if (!is.null(input_data) && !is.null(input_data$path)) {
sa_input_info <- paste0(sa_input_info,
format(input_tag, width = 12, justify = "left"),
": ",
input_data$path,
"\n")
}
}

# write SA run information to disk
cat("runtype : sensitivity analysis\n",
"workflow id : ", workflow.id, "\n",
"ensemble id : ", ensemble.id, "\n",
"pft name : ", names(trait.samples)[i], "\n",
"pft name : ", names(trait.samples)[pft_idx], "\n",
"quantile : ", quantile.str, "\n",
"trait : ", trait, "\n",
"run id : ", run.id, "\n",
"model : ", model, "\n",
"model id : ", settings$model$id, "\n",
"site : ", settings$run$site$name, "\n",
"site id : ", settings$run$site$id, "\n",
"met data : ", settings$run$site$met, "\n",
sa_input_info,
"start date : ", settings$run$start.date, "\n",
"end date : ", settings$run$end.date, "\n",
"hostname : ", settings$host$name, "\n",
Expand All @@ -382,7 +445,7 @@ write.sa.configs <- function(defaults, quantile.samples, settings, model,
# write configuration
do.call(my.write.config, args = list(defaults = defaults,
trait.values = trait.samples,
settings = settings,
settings = settings_copy,
run.id))
cat(
run.id,
Expand Down
5 changes: 4 additions & 1 deletion modules/uncertainty/man/write.sa.configs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading