From 4c803220b4715ee054c1e7ff3e21c6930c342554 Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Tue, 15 Oct 2019 16:37:18 -0400
Subject: [PATCH 0001/1193] added updated thredds function
---
modules/data.remote/R/download.thredds.R | 194 ++++++++++++++---------
1 file changed, 122 insertions(+), 72 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index 4b3fadacd2f..7c1babaa7a7 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -1,17 +1,27 @@
#
-##' @title download.thredds.AGB
-##' @name download.thredds.AGB
+##' @title download.thredds.data
+##' @name download.thredds.data
##'
-##' @param outdir Where to place output
-##' @param site_ids What locations to download data at?
+##' @param outdir file location to place output
+##' @param site_info information about the site. i.e. site_id, latitude, longitude
+##' @param dates character vector of start and end date for dataset as YYYYmmdd
+##' @param varid character vector of shorthand variable name. i.e. LAI
+##' @param dir_url catalog url of data from ncei.noaa.gov/thredds website
+##' @param data_url opendap url of data from ncei.noaa.gov/thredds website
##' @param run_parallel Logical. Download and extract files in parallel?
-##' @param ncores Optional. If run_parallel=TRUE how many cores to use? If left as NULL will select max number -1
##'
##' @return data.frame summarize the results of the function call
##'
##' @examples
##' \dontrun{
-##' outdir <- "~/scratch/abg_data/"
+##' outdir <- directory to store downloaded data
+##' site_info <- dataframe that contains information about site_id, latitude, longitude, and site_names
+##' dates <- date range to download data. Should be a character vector with start and end date as YYYYmmdd
+##' varod <- character shorthand name of variable to download. Example: LAI for leaf area index.
+##' dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP
+##' data_url <- OpenDAP URL that actually downloads the netcdf file.
+##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer
+##'
##' results <- PEcAn.data.remote::download.thredds.AGB(outdir=outdir,
##' site_ids = c(676, 678, 679, 755, 767, 1000000030, 1000000145, 1000025731),
@@ -20,84 +30,124 @@
##' @export
##' @author Bailey Morrison
##'
-download.thredds.AGB <- function(outdir = NULL, site_ids, run_parallel = FALSE,
- ncores = NULL) {
+download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201", "19961215"),
+ varid = "LAI",
+ dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files",
+ data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files",
+ run_parallel = TRUE) {
+ # require("XML")
+ # require("RCurl")
+ require("foreach")
+
+ # check that dates are within the date range of the dataset
+ dates = c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d"))
+ if (!(is.null(dir_url)))
+ {
+ #https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files/1981/catalog.html -> link for directory files, not downloads
+ result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=TRUE, dirlistonly = TRUE)
+ files = XML::getHTMLLinks(result)
+
+ date_year_range = unique(range(c(year(as.Date(dates[1], "%Y")), year(as.Date(dates[2], "%Y")))))
+ if (all((!(substr(files, 1, 4) %in% date_year_range))))
+ {
+ # give warning that dates aren't available
+ print(test)
+ }
+
+ }
+
+ # get list of catalog file links to determine actual dates that can be downloaded with in user range
+ links = vector()
+ for (i in 1:length(date_year_range))
+ {
+ links[i] = RCurl::getURL(paste(dir_url, date_year_range[i], "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=T, dirlistonly = T)
+ }
+ # get list of all dates available from year range provided
+ files = foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i])
- bety <- list(user='bety', password='bety', host='localhost',
- dbname='bety', driver='PostgreSQL',write=TRUE)
- con <- PEcAn.DB::db.open(bety)
- bety$con <- con
- site_ID <- as.character(site_ids)
- suppressWarnings(site_qry <- glue::glue_sql("SELECT *, ST_X(ST_CENTROID(geometry)) AS lon,
- ST_Y(ST_CENTROID(geometry)) AS lat FROM sites WHERE id IN ({ids*})",
- ids = site_ID, .con = con))
- suppressWarnings(qry_results <- DBI::dbSendQuery(con,site_qry))
- suppressWarnings(qry_results <- DBI::dbFetch(qry_results))
- site_info <- list(site_id=qry_results$id, site_name=qry_results$sitename, lat=qry_results$lat,
- lon=qry_results$lon, time_zone=qry_results$time_zone)
+ #remove files with no dates and get list of dates available.
+ index_dates = regexpr(pattern = "[0-9]{8}", files)
+ files = files[-(which(index_dates < 0))]
+ index_dates = index_dates[which(index_dates > 0)]
- mylat = site_info$lat
- mylon = site_info$lon
+ # get list of files that fall within the specific date range user asks for (Ymd, not Y)
+ dates_avail = as.Date(substr(files, index_dates, index_dates+7), "%Y%m%d")
+ date_range = seq(dates[1], dates[2], by = "day")
+ get_dates = date_range[which(date_range %in% dates_avail)]
- # site specific URL for dataset --> these will be made to work for all THREDDS datasets in the future, but for now, just testing with
- # this one dataset. This specific dataset only has 1 year (2005), so no temporal looping for now.
- obs_file = "https://thredds.daac.ornl.gov/thredds/dodsC/ornldaac/1221/agb_5k.nc4"
- obs_err = "https://thredds.daac.ornl.gov/thredds/dodsC/ornldaac/1221/agb_SE_5k.nc4"
- files = c(obs_file, obs_err)
+ # only keep files that are within the true yyyymmdd date range user requested
+ files = files[foreach(i = seq_along(get_dates), .combine = c) %do% grep(files, pattern = format(get_dates[i], '%Y%m%d'))]
+ filenames = basename(files)
- # function to extract ncdf data from lat and lon values for value + SE URLs
- get_data = function(i)
+ # user must supply data_URL or the netcdf files cannot be downloaded through thredds. if user has supplied no data_url, the job will fail
+ # supply a warning
+ if (!(is.null(data_url)))
{
- data = ncdf4::nc_open(files[1])
- agb_lats = ncdf4::ncvar_get(data, "latitude")
- agb_lons = ncdf4::ncvar_get(data, "longitude")
-
- agb_x = which(abs(agb_lons- mylon[i]) == min(abs(agb_lons - mylon[i])))
- agb_y = which(abs(agb_lats- mylat[i]) == min(abs(agb_lats - mylat[i])))
-
- start = c(agb_x, agb_y)
- count = c(1,1)
- d = ncdf4::ncvar_get(ncdf4::nc_open(files[1]), "abvgrndbiomass", start=start, count = count)
- if (is.na(d)) d <- NA
- sd = ncdf4::ncvar_get(ncdf4::nc_open(files[2]), "agbSE", start=start, count = count)
- if (is.na(sd)) sd <- NA
- date = "2005"
- site = site_ID[i]
- output = as.data.frame(cbind(d, sd, date, site))
- names(output) = c("value", "sd", "date", "siteID")
+ #https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1981/AVHRR-Land_v005_AVH15C1_NOAA-07_19810624_c20181025194251.nc.html
+ # this is what a link looks like to download threeds data.
+ urls = sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/"))
- # option to save output dataset to directory for user.
- if (!(is.null(outdir)))
+ extract_nc = function(site_info, url, run_parallel)
{
- write.csv(output, file = paste0(outdir, "THREDDS_", sub("^([^.]*).*", "\\1",basename(files[1])), "_site_", site, ".csv"), row.names = FALSE)
+ require("foreach")
+ require("ncdf4")
+
+ mylats = site_info$lat
+ mylons = site_info$lon
+ sites = site_info$site_id
+
+ # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf
+ data = ncdf4::nc_open(url)
+ vars = names(data$var)
+ var = vars[grep(vars, pattern = varid, ignore.case = T)]
+
+ # get list of all xy coordinates in netcdf
+ lats = ncdf4::ncvar_get(data, "latitude")
+ lons = ncdf4::ncvar_get(data, "longitude")
+
+ # find the cell that site coordinates are located in
+ dist_y = foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2)
+ dist_x = foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2)
+ y = foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = T)
+ x = foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = T)
+
+ scale = data$var[[var]]$scaleFact
+
+ d = as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1)))
+
+ info = as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = F)
+ names(info) = c("site_id", "lon", "lat", "value")
+
+ return(info)
}
- return(output)
- }
-
- ## setup parallel
- if (run_parallel) {
- if (!is.null(ncores)) {
- ncores <- ncores
- } else {
- ncores <- parallel::detectCores() -1
- }
- require(doParallel)
- PEcAn.logger::logger.info(paste0("Running in parallel with: ", ncores))
- cl = parallel::makeCluster(ncores)
- doParallel::registerDoParallel(cl)
- data = foreach(i = seq_along(mylat), .combine = rbind) %dopar% get_data(i)
- stopCluster(cl)
- } else {
- # setup sequential run
- data = data.frame()
- for (i in seq_along(mylat))
+
+ if (run_parallel)
{
- data = rbind(data, get_data(i))
+ require("parallel")
+ require("doParallel")
+ ncores = parallel::detectCores(all.tests = FALSE, logical = TRUE)
+ if (ncores >= 3)
+ {
+ # failsafe in case someone has a computer with 2 nodes.
+ ncores = ncores-2
+ }
+ # THREDDS has a 10 job limit. Will fail if you try to download more than 10 values at a time
+ if (ncores >= 10)
+ {
+ ncores = 9 # went 1 less becasue it still fails sometimes
+ }
+ cl <- parallel::makeCluster(ncores, outfile="")
+ doParallel::registerDoParallel(cl)
+ output = foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel)
+ stopCluster(cl)
+ } else {
+ output = foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel)
}
+
+ return(output)
+
}
-
- return(data)
}
From 2071b3eb7492e1ad7c886f45ce1c7111d2ce174d Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 12:53:08 -0400
Subject: [PATCH 0002/1193] some updated changes
---
modules/data.remote/R/download.thredds.R | 151 +++++++++++++++--------
1 file changed, 97 insertions(+), 54 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index 7c1babaa7a7..aed5d5ed835 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -1,9 +1,58 @@
-#
-##' @title download.thredds.data
-##' @name download.thredds.data
+##' @title get_site_info
+##' @name get_site_info
+##'
+##'
+##' @param xmlfile full path to pecan xml settings file
+##'
+##'
+##' @return a list of site information derived from BETY using a pecan .xml settings file with site_id, site_name, lat, lon, and time_zone.
+##'
+##' @examples
+##' \dontrun{
+##' xmlfile <- the full path to a pecan .xml settings file.
+##'
+
+##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml")
+##'
+##' @export
+##' @author Bailey Morrison
+##'
+get_site_info <- function(xmlfile) {
+ require(PEcAn.all)
+
+ settings <- read.settings(xmlfile)
+
+ observation <- c()
+ for (i in seq_along(1:length(settings$run))) {
+ command <- paste0("settings$run$settings.", i, "$site$id")
+ obs <- eval(parse(text = command))
+ observation <- c(observation, obs)
+ }
+
+
+ PEcAn.logger::logger.info("**** Extracting LandTrendr AGB data for model sites ****")
+ bety <- list(user = 'bety', password = 'bety', host = 'localhost',
+ dbname = 'bety', driver = 'PostgreSQL', write = TRUE)
+ con <- PEcAn.DB::db.open(bety)
+ bety$con <- con
+ site_ID <- observation
+ suppressWarnings(site_qry <- glue::glue_sql("SELECT *, ST_X(ST_CENTROID(geometry)) AS lon,
+ ST_Y(ST_CENTROID(geometry)) AS lat FROM sites WHERE id IN ({ids*})",
+ ids = site_ID, .con = con))
+ suppressWarnings(qry_results <- DBI::dbSendQuery(con,site_qry))
+ suppressWarnings(qry_results <- DBI::dbFetch(qry_results))
+ site_info <- list(site_id = qry_results$id, site_name = qry_results$sitename, lat = qry_results$lat,
+ lon = qry_results$lon, time_zone = qry_results$time_zone)
+ return(site_info)
+}
+
+
+##' @title download.thredds
+##' @name download.thredds
+##'
##'
##' @param outdir file location to place output
-##' @param site_info information about the site. i.e. site_id, latitude, longitude
+##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
##' @param dates character vector of start and end date for dataset as YYYYmmdd
##' @param varid character vector of shorthand variable name. i.e. LAI
##' @param dir_url catalog url of data from ncei.noaa.gov/thredds website
@@ -15,7 +64,7 @@
##' @examples
##' \dontrun{
##' outdir <- directory to store downloaded data
-##' site_info <- dataframe that contains information about site_id, latitude, longitude, and site_names
+##' site_info <- list that contains information about site_id, site_name, latitude, longitude, and time_zone
##' dates <- date range to download data. Should be a character vector with start and end date as YYYYmmdd
##' varod <- character shorthand name of variable to download. Example: LAI for leaf area index.
##' dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP
@@ -23,62 +72,56 @@
##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer
##'
-##' results <- PEcAn.data.remote::download.thredds.AGB(outdir=outdir,
-##' site_ids = c(676, 678, 679, 755, 767, 1000000030, 1000000145, 1000025731),
-##' run_parallel = TRUE, ncores = 8)
+##' results <- download_thredds(outdir = NULL, site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE)
+##'
##'
##' @export
##' @author Bailey Morrison
##'
-download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201", "19961215"),
- varid = "LAI",
- dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files",
- data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files",
- run_parallel = TRUE) {
- # require("XML")
- # require("RCurl")
+download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, data_url,run_parallel = TRUE) {
+
require("foreach")
# check that dates are within the date range of the dataset
- dates = c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d"))
+ dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d"))
if (!(is.null(dir_url)))
{
#https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files/1981/catalog.html -> link for directory files, not downloads
- result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=TRUE, dirlistonly = TRUE)
- files = XML::getHTMLLinks(result)
+ result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=FALSE ,ftp.use.epsv = TRUE, dirlistonly = TRUE)
+ files <- XML::getHTMLLinks(result)
- date_year_range = unique(range(c(year(as.Date(dates[1], "%Y")), year(as.Date(dates[2], "%Y")))))
+ date_year_range <- unique(range(c(lubridate::year(as.Date(dates[1], "%Y")), lubridate::year(as.Date(dates[2], "%Y")))))
if (all((!(substr(files, 1, 4) %in% date_year_range))))
{
# give warning that dates aren't available
- print(test)
+ print("something")
}
}
# get list of catalog file links to determine actual dates that can be downloaded with in user range
- links = vector()
+ links <- vector()
for (i in 1:length(date_year_range))
{
- links[i] = RCurl::getURL(paste(dir_url, date_year_range[i], "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=T, dirlistonly = T)
+ links[i] <- RCurl::getURL(paste(dir_url, date_year_range[i], "catalog.html", sep = "/"), verbose= FALSE, ftp.use.epsv = TRUE, dirlistonly = TRUE)
}
# get list of all dates available from year range provided
- files = foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i])
+ files <- foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i])
#remove files with no dates and get list of dates available.
- index_dates = regexpr(pattern = "[0-9]{8}", files)
- files = files[-(which(index_dates < 0))]
- index_dates = index_dates[which(index_dates > 0)]
+ index_dates <- regexpr(pattern = "[0-9]{8}", files)
+ files <- files[-(which(index_dates < 0))]
+ index_dates <- index_dates[which(index_dates > 0)]
# get list of files that fall within the specific date range user asks for (Ymd, not Y)
- dates_avail = as.Date(substr(files, index_dates, index_dates+7), "%Y%m%d")
- date_range = seq(dates[1], dates[2], by = "day")
- get_dates = date_range[which(date_range %in% dates_avail)]
+ dates_avail <- as.Date(substr(files, index_dates, index_dates+7), "%Y%m%d")
+ date_range <- seq(dates[1], dates[2], by = "day")
+ get_dates <- date_range[which(date_range %in% dates_avail)]
# only keep files that are within the true yyyymmdd date range user requested
- files = files[foreach(i = seq_along(get_dates), .combine = c) %do% grep(files, pattern = format(get_dates[i], '%Y%m%d'))]
- filenames = basename(files)
+ files <- files[foreach(i = seq_along(get_dates), .combine = c) %do% grep(files, pattern = format(get_dates[i], '%Y%m%d'))]
+ filenames <- basename(files)
# user must supply data_URL or the netcdf files cannot be downloaded through thredds. if user has supplied no data_url, the job will fail
# supply a warning
@@ -86,38 +129,38 @@ download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201"
{
#https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1981/AVHRR-Land_v005_AVH15C1_NOAA-07_19810624_c20181025194251.nc.html
# this is what a link looks like to download threeds data.
- urls = sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/"))
+ urls <- sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/"))
- extract_nc = function(site_info, url, run_parallel)
+ extract_nc <- function(site_info, url, run_parallel)
{
require("foreach")
require("ncdf4")
- mylats = site_info$lat
- mylons = site_info$lon
- sites = site_info$site_id
+ mylats <- site_info$lat
+ mylons <- site_info$lon
+ sites <- site_info$site_id
# open netcdf file and get the correct variable name based on varid parameter + var names of netcdf
- data = ncdf4::nc_open(url)
- vars = names(data$var)
- var = vars[grep(vars, pattern = varid, ignore.case = T)]
+ data <- ncdf4::nc_open(url)
+ vars <- names(data$var)
+ var <- vars[grep(vars, pattern = varid, ignore.case = TRUE)]
# get list of all xy coordinates in netcdf
- lats = ncdf4::ncvar_get(data, "latitude")
- lons = ncdf4::ncvar_get(data, "longitude")
+ lats <- ncdf4::ncvar_get(data, "latitude")
+ lons <- ncdf4::ncvar_get(data, "longitude")
# find the cell that site coordinates are located in
- dist_y = foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2)
- dist_x = foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2)
- y = foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = T)
- x = foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = T)
+ dist_y <- foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2)
+ dist_x <- foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2)
+ y <- foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = TRUE)
+ x <- foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = TRUE)
- scale = data$var[[var]]$scaleFact
+ scale <- data$var[[var]]$scaleFact
- d = as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1)))
+ d <- as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1)))
- info = as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = F)
- names(info) = c("site_id", "lon", "lat", "value")
+ info <- as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = FALSE)
+ names(info) <- c("site_id", "lon", "lat", "value")
return(info)
}
@@ -128,23 +171,23 @@ download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201"
{
require("parallel")
require("doParallel")
- ncores = parallel::detectCores(all.tests = FALSE, logical = TRUE)
+ ncores <- parallel::detectCores(all.tests = FALSE, logical = TRUE)
if (ncores >= 3)
{
# failsafe in case someone has a computer with 2 nodes.
- ncores = ncores-2
+ ncores <- ncores-2
}
# THREDDS has a 10 job limit. Will fail if you try to download more than 10 values at a time
if (ncores >= 10)
{
- ncores = 9 # went 1 less becasue it still fails sometimes
+ ncores <- 9 # went 1 less becasue it still fails sometimes
}
cl <- parallel::makeCluster(ncores, outfile="")
doParallel::registerDoParallel(cl)
- output = foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel)
+ output <- foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel)
stopCluster(cl)
} else {
- output = foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel)
+ output <- foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel)
}
return(output)
From 0ff2b048ffb869de53ec32297cc41dba63e520c2 Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 13:08:05 -0400
Subject: [PATCH 0003/1193] added outdir option in function
---
modules/data.remote/R/download.thredds.R | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index aed5d5ed835..0f94781948e 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -72,13 +72,13 @@ get_site_info <- function(xmlfile) {
##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer
##'
-##' results <- download_thredds(outdir = NULL, site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE)
+##' results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE, outdir = NULL)
##'
##'
##' @export
##' @author Bailey Morrison
##'
-download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, data_url,run_parallel = TRUE) {
+download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = TRUE, outdir = NULL) {
require("foreach")
@@ -107,7 +107,7 @@ download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, da
}
# get list of all dates available from year range provided
- files <- foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i])
+ files <- foreach::foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i])
#remove files with no dates and get list of dates available.
index_dates <- regexpr(pattern = "[0-9]{8}", files)
@@ -190,6 +190,11 @@ download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, da
output <- foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel)
}
+ if (outdir)
+ {
+ write.csv(output, file = paste(outdir, "/THREDDS_", varid, "_", dates[1], "-", dates[2], ".csv", sep = ""))
+ }
+
return(output)
}
From bd136a267c55844631937998b399cec9da9112ea Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 13:24:25 -0400
Subject: [PATCH 0004/1193] added date corrections
---
modules/data.remote/R/download.thredds.R | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index 0f94781948e..a82eb30e35e 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -82,8 +82,24 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
require("foreach")
- # check that dates are within the date range of the dataset
- dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d"))
+ #### check that dates are within the date range of the dataset
+
+ #first make sure dates are in date format. Correct if not.
+ if (!(lubridate::is.Date(dates))){
+ if (!(is.character(dates))) {
+ dates = as.character(dates)
+ }
+ if (length(grep(dates, pattern = "-")) > 0) {
+ dates <- c(as.Date(dates[1], "%Y-%m-%d"), as.Date(dates[2], "%Y-%m-%d"))
+ } else {
+ dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d"))
+ }
+ # Julien Date
+ if (nchar(dates) == 7) {
+ dates <- c(as.Date(dates[1], "%Y%j"), as.Date(dates[2], "%Y%j"))
+ }
+ }
+
if (!(is.null(dir_url)))
{
#https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files/1981/catalog.html -> link for directory files, not downloads
From 89af55e67fe90a46daa0d33521c7e38e0a6e16a1 Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 13:27:58 -0400
Subject: [PATCH 0005/1193] updated @params
---
modules/data.remote/R/download.thredds.R | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index a82eb30e35e..99bc83a50b8 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -53,7 +53,7 @@ get_site_info <- function(xmlfile) {
##'
##' @param outdir file location to place output
##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
-##' @param dates character vector of start and end date for dataset as YYYYmmdd
+##' @param dates vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object.
##' @param varid character vector of shorthand variable name. i.e. LAI
##' @param dir_url catalog url of data from ncei.noaa.gov/thredds website
##' @param data_url opendap url of data from ncei.noaa.gov/thredds website
@@ -65,7 +65,7 @@ get_site_info <- function(xmlfile) {
##' \dontrun{
##' outdir <- directory to store downloaded data
##' site_info <- list that contains information about site_id, site_name, latitude, longitude, and time_zone
-##' dates <- date range to download data. Should be a character vector with start and end date as YYYYmmdd
+##' dates <- date range to download data. Should be a vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object.
##' varod <- character shorthand name of variable to download. Example: LAI for leaf area index.
##' dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP
##' data_url <- OpenDAP URL that actually downloads the netcdf file.
From 0629c0b986798f7c28cd4fab7aab78ebbbfbd75c Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 13:37:35 -0400
Subject: [PATCH 0006/1193] updated date_year_range
---
modules/data.remote/R/download.thredds.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index 99bc83a50b8..d316df02415 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -106,7 +106,7 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=FALSE ,ftp.use.epsv = TRUE, dirlistonly = TRUE)
files <- XML::getHTMLLinks(result)
- date_year_range <- unique(range(c(lubridate::year(as.Date(dates[1], "%Y")), lubridate::year(as.Date(dates[2], "%Y")))))
+ date_year_range = unique(lubridate::year(dates))
if (all((!(substr(files, 1, 4) %in% date_year_range))))
{
# give warning that dates aren't available
From 10eae350b20bee234656ab059e09494e997010d0 Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 14:36:45 -0400
Subject: [PATCH 0007/1193] separated nc extract function from download
function
---
modules/data.remote/R/download.thredds.R | 113 ++++++++++++++---------
1 file changed, 70 insertions(+), 43 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index d316df02415..0060d4c1e2f 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -78,8 +78,10 @@ get_site_info <- function(xmlfile) {
##' @export
##' @author Bailey Morrison
##'
-download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = TRUE, outdir = NULL) {
+download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = FALSE, outdir = NULL) {
+ #until the issues with parallel runs are fixed.
+ run_parallel = FALSE
require("foreach")
#### check that dates are within the date range of the dataset
@@ -147,50 +149,16 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
# this is what a link looks like to download threeds data.
urls <- sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/"))
- extract_nc <- function(site_info, url, run_parallel)
- {
- require("foreach")
- require("ncdf4")
-
- mylats <- site_info$lat
- mylons <- site_info$lon
- sites <- site_info$site_id
-
- # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf
- data <- ncdf4::nc_open(url)
- vars <- names(data$var)
- var <- vars[grep(vars, pattern = varid, ignore.case = TRUE)]
-
- # get list of all xy coordinates in netcdf
- lats <- ncdf4::ncvar_get(data, "latitude")
- lons <- ncdf4::ncvar_get(data, "longitude")
-
- # find the cell that site coordinates are located in
- dist_y <- foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2)
- dist_x <- foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2)
- y <- foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = TRUE)
- x <- foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = TRUE)
-
- scale <- data$var[[var]]$scaleFact
-
- d <- as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1)))
-
- info <- as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = FALSE)
- names(info) <- c("site_id", "lon", "lat", "value")
-
- return(info)
- }
-
-
-
+ # parallel seems to have a problem right now with > 500 urls.
if (run_parallel)
{
- require("parallel")
+ #require("parallel")
require("doParallel")
ncores <- parallel::detectCores(all.tests = FALSE, logical = TRUE)
+ # This is a failsafe for computers with low numbers of CPUS to reduce risk of blowing RAM.
if (ncores >= 3)
{
- # failsafe in case someone has a computer with 2 nodes.
+ # failsafe in case someone has a computer with 2-4 nodes.
ncores <- ncores-2
}
# THREDDS has a 10 job limit. Will fail if you try to download more than 10 values at a time
@@ -200,14 +168,15 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
}
cl <- parallel::makeCluster(ncores, outfile="")
doParallel::registerDoParallel(cl)
- output <- foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel)
- stopCluster(cl)
+ output <- foreach(i = urls, .combine = rbind) %dopar% extract_thredds_nc(site_info = site_info, url = i)
+ parallel::stopCluster(cl)
} else {
- output <- foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel)
+ output <- foreach(i = urls, .combine = rbind) %do% extract_thredds_nc(site_info, url = i)
}
- if (outdir)
+ if (!(is.null(outdir)))
{
+ # this will need to be changed in the future if users want to be able to save data they haven't already extracted at different sites/dates.
write.csv(output, file = paste(outdir, "/THREDDS_", varid, "_", dates[1], "-", dates[2], ".csv", sep = ""))
}
@@ -215,3 +184,61 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
}
}
+
+##' @title extract_thredds_nc
+##' @name extract_thredds_nc
+##'
+##'
+##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
+##' @param url a THREDDS url of a .nc file to extract data from.
+##' @param run_parallel T or F option to extra data in parallel.
+##'
+##'
+##' @return a dataframe with the values for each date/site combination from a THREDDS file
+##'
+##' @examples
+##' \dontrun{
+##' site_info <- list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
+##' url <- url a THREDDS url of a .nc file to extract data from.
+##' run_parallel <- T or F option to extra data in parallel.
+
+##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml")
+##'
+##' @export
+##' @author Bailey Morrison
+##'
+extract_thredds_nc <- function(site_info, url_info, run_parallel)
+{
+ #print(url)
+ require("foreach")
+ require("ncdf4")
+
+ mylats <- site_info$lat
+ mylons <- site_info$lon
+ sites <- site_info$site_id
+
+ # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf
+ data <- ncdf4::nc_open(url_info)
+ vars <- names(data$var)
+ var <- vars[grep(vars, pattern = varid, ignore.case = TRUE)]
+
+ # get list of all xy coordinates in netcdf
+ lats <- ncdf4::ncvar_get(data, "latitude")
+ lons <- ncdf4::ncvar_get(data, "longitude")
+
+ # find the cell that site coordinates are located in
+ dist_y <- foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2)
+ dist_x <- foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2)
+ y <- foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = TRUE)
+ x <- foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = TRUE)
+
+ scale <- data$var[[var]]$scaleFact
+
+ d <- as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1)))
+
+ info <- as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = FALSE)
+ names(info) <- c("site_id", "lon", "lat", "value")
+
+ return(info)
+}
+
From 24b835fa41271c66d8b2a6b388473e9ca5d7cba3 Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Wed, 16 Oct 2019 14:45:28 -0400
Subject: [PATCH 0008/1193] fixed documentation issue
---
modules/data.remote/NAMESPACE | 4 ++-
modules/data.remote/R/download.thredds.R | 14 ++++----
.../data.remote/man/download.thredds.AGB.Rd | 27 ---------------
modules/data.remote/man/download.thredds.Rd | 34 +++++++++++++++++++
modules/data.remote/man/extract_thredds_nc.Rd | 24 +++++++++++++
modules/data.remote/man/get_site_info.Rd | 20 +++++++++++
6 files changed, 87 insertions(+), 36 deletions(-)
delete mode 100644 modules/data.remote/man/download.thredds.AGB.Rd
create mode 100644 modules/data.remote/man/download.thredds.Rd
create mode 100644 modules/data.remote/man/extract_thredds_nc.Rd
create mode 100644 modules/data.remote/man/get_site_info.Rd
diff --git a/modules/data.remote/NAMESPACE b/modules/data.remote/NAMESPACE
index d84c728b44c..2bb4941b69a 100644
--- a/modules/data.remote/NAMESPACE
+++ b/modules/data.remote/NAMESPACE
@@ -3,6 +3,8 @@
export(call_MODIS)
export(download.LandTrendr.AGB)
export(download.NLCD)
-export(download.thredds.AGB)
+export(download_thredds)
export(extract.LandTrendr.AGB)
export(extract_NLCD)
+export(extract_thredds_nc)
+export(get_site_info)
diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R
index 0060d4c1e2f..b3b17b59891 100755
--- a/modules/data.remote/R/download.thredds.R
+++ b/modules/data.remote/R/download.thredds.R
@@ -13,7 +13,7 @@
##'
##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml")
-##'
+##' }
##' @export
##' @author Bailey Morrison
##'
@@ -73,7 +73,7 @@ get_site_info <- function(xmlfile) {
##'
##' results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE, outdir = NULL)
-##'
+##' }
##'
##' @export
##' @author Bailey Morrison
@@ -191,7 +191,6 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
##'
##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
##' @param url a THREDDS url of a .nc file to extract data from.
-##' @param run_parallel T or F option to extra data in parallel.
##'
##'
##' @return a dataframe with the values for each date/site combination from a THREDDS file
@@ -200,14 +199,13 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
##' \dontrun{
##' site_info <- list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
##' url <- url a THREDDS url of a .nc file to extract data from.
-##' run_parallel <- T or F option to extra data in parallel.
-
-##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml")
-##'
+##'
+##' output <- extract_thredds_nc(site_info = site_info, url_info = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1995/AVHRR-Land_v005_AVH15C1_NOAA-14_19950201_c20180831220722.nc")
+##'}
##' @export
##' @author Bailey Morrison
##'
-extract_thredds_nc <- function(site_info, url_info, run_parallel)
+extract_thredds_nc <- function(site_info, url_info)
{
#print(url)
require("foreach")
diff --git a/modules/data.remote/man/download.thredds.AGB.Rd b/modules/data.remote/man/download.thredds.AGB.Rd
deleted file mode 100644
index 35dfd405cd5..00000000000
--- a/modules/data.remote/man/download.thredds.AGB.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/download.thredds.R
-\name{download.thredds.AGB}
-\alias{download.thredds.AGB}
-\title{download.thredds.AGB}
-\usage{
-download.thredds.AGB(outdir = NULL, site_ids, run_parallel = FALSE,
- ncores = NULL)
-}
-\arguments{
-\item{outdir}{Where to place output}
-
-\item{site_ids}{What locations to download data at?}
-
-\item{run_parallel}{Logical. Download and extract files in parallel?}
-
-\item{ncores}{Optional. If run_parallel=TRUE how many cores to use? If left as NULL will select max number -1}
-}
-\value{
-data.frame summarize the results of the function call
-}
-\description{
-download.thredds.AGB
-}
-\author{
-Bailey Morrison
-}
diff --git a/modules/data.remote/man/download.thredds.Rd b/modules/data.remote/man/download.thredds.Rd
new file mode 100644
index 00000000000..048f78957ac
--- /dev/null
+++ b/modules/data.remote/man/download.thredds.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/download.thredds.R
+\name{download.thredds}
+\alias{download.thredds}
+\alias{download_thredds}
+\title{download.thredds}
+\usage{
+download_thredds(site_info, dates, varid, dir_url, data_url,
+ run_parallel = FALSE, outdir = NULL)
+}
+\arguments{
+\item{site_info}{list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.}
+
+\item{dates}{vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object.}
+
+\item{varid}{character vector of shorthand variable name. i.e. LAI}
+
+\item{dir_url}{catalog url of data from ncei.noaa.gov/thredds website}
+
+\item{data_url}{opendap url of data from ncei.noaa.gov/thredds website}
+
+\item{run_parallel}{Logical. Download and extract files in parallel?}
+
+\item{outdir}{file location to place output}
+}
+\value{
+data.frame summarize the results of the function call
+}
+\description{
+download.thredds
+}
+\author{
+Bailey Morrison
+}
diff --git a/modules/data.remote/man/extract_thredds_nc.Rd b/modules/data.remote/man/extract_thredds_nc.Rd
new file mode 100644
index 00000000000..1f8e41ed231
--- /dev/null
+++ b/modules/data.remote/man/extract_thredds_nc.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/download.thredds.R
+\name{extract_thredds_nc}
+\alias{extract_thredds_nc}
+\title{extract_thredds_nc}
+\usage{
+extract_thredds_nc(site_info, url_info, run_parallel)
+}
+\arguments{
+\item{site_info}{list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.}
+
+\item{run_parallel}{T or F option to extra data in parallel.}
+
+\item{url}{a THREDDS url of a .nc file to extract data from.}
+}
+\value{
+a dataframe with the values for each date/site combination from a THREDDS file
+}
+\description{
+extract_thredds_nc
+}
+\author{
+Bailey Morrison
+}
diff --git a/modules/data.remote/man/get_site_info.Rd b/modules/data.remote/man/get_site_info.Rd
new file mode 100644
index 00000000000..e73834879ce
--- /dev/null
+++ b/modules/data.remote/man/get_site_info.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/download.thredds.R
+\name{get_site_info}
+\alias{get_site_info}
+\title{get_site_info}
+\usage{
+get_site_info(xmlfile)
+}
+\arguments{
+\item{xmlfile}{full path to pecan xml settings file}
+}
+\value{
+a list of site information derived from BETY using a pecan .xml settings file with site_id, site_name, lat, lon, and time_zone.
+}
+\description{
+get_site_info
+}
+\author{
+Bailey Morrison
+}
From bd6d4b57e8f101f2dcbac16d659de6e3125b94ee Mon Sep 17 00:00:00 2001
From: "bmorrison@bnl.gov"
Date: Thu, 24 Oct 2019 15:49:49 -0400
Subject: [PATCH 0009/1193] some other changes i dont remember
---
modules/data.remote/man/download.thredds.Rd | 14 ++++++++++++++
modules/data.remote/man/extract_thredds_nc.Rd | 12 +++++++++---
modules/data.remote/man/get_site_info.Rd | 7 +++++++
3 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/modules/data.remote/man/download.thredds.Rd b/modules/data.remote/man/download.thredds.Rd
index 048f78957ac..9983594ad5a 100644
--- a/modules/data.remote/man/download.thredds.Rd
+++ b/modules/data.remote/man/download.thredds.Rd
@@ -28,6 +28,20 @@ data.frame summarize the results of the function call
}
\description{
download.thredds
+}
+\examples{
+\dontrun{
+outdir <- directory to store downloaded data
+site_info <- list that contains information about site_id, site_name, latitude, longitude, and time_zone
+dates <- date range to download data. Should be a vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object.
+varod <- character shorthand name of variable to download. Example: LAI for leaf area index.
+dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP
+data_url <- OpenDAP URL that actually downloads the netcdf file.
+run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer
+
+results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE, outdir = NULL)
+}
+
}
\author{
Bailey Morrison
diff --git a/modules/data.remote/man/extract_thredds_nc.Rd b/modules/data.remote/man/extract_thredds_nc.Rd
index 1f8e41ed231..694fdafcc1c 100644
--- a/modules/data.remote/man/extract_thredds_nc.Rd
+++ b/modules/data.remote/man/extract_thredds_nc.Rd
@@ -4,13 +4,11 @@
\alias{extract_thredds_nc}
\title{extract_thredds_nc}
\usage{
-extract_thredds_nc(site_info, url_info, run_parallel)
+extract_thredds_nc(site_info, url_info)
}
\arguments{
\item{site_info}{list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.}
-\item{run_parallel}{T or F option to extra data in parallel.}
-
\item{url}{a THREDDS url of a .nc file to extract data from.}
}
\value{
@@ -19,6 +17,14 @@ a dataframe with the values for each date/site combination from a THREDDS file
\description{
extract_thredds_nc
}
+\examples{
+\dontrun{
+site_info <- list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.
+url <- url a THREDDS url of a .nc file to extract data from.
+
+output <- extract_thredds_nc(site_info = site_info, url_info = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1995/AVHRR-Land_v005_AVH15C1_NOAA-14_19950201_c20180831220722.nc")
+}
+}
\author{
Bailey Morrison
}
diff --git a/modules/data.remote/man/get_site_info.Rd b/modules/data.remote/man/get_site_info.Rd
index e73834879ce..98d06bff0f5 100644
--- a/modules/data.remote/man/get_site_info.Rd
+++ b/modules/data.remote/man/get_site_info.Rd
@@ -15,6 +15,13 @@ a list of site information derived from BETY using a pecan .xml settings file wi
\description{
get_site_info
}
+\examples{
+\dontrun{
+xmlfile <- the full path to a pecan .xml settings file.
+
+site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml")
+ }
+}
\author{
Bailey Morrison
}
From 1e1fbefda2a094d2f26838189ba091ec619cc3ff Mon Sep 17 00:00:00 2001
From: Morrison
Date: Thu, 14 May 2020 15:37:45 -0400
Subject: [PATCH 0010/1193] some changes I dont remember
---
.../R/download.thredds.AVHRR.monthAGG.R | 78 +++++++++----------
1 file changed, 37 insertions(+), 41 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R b/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R
index 0084adda9fd..16aaf2b2e6b 100755
--- a/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R
+++ b/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R
@@ -72,49 +72,32 @@ get_site_info <- function(xmlfile) {
##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer
##'
-##' results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = FALSE, outdir = NULL)
+##' results <- download_thredds(site_info = site_info, years = c("2000", "2003"), months = c(6,7,8), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = FALSE, outdir = NULL)
##' }
##' @importFrom foreach %do% %dopar%
##' @export
##' @author Bailey Morrison
##'
-download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = FALSE, outdir = NULL) {
+download_thredds <- function(site_info, years, months, varid, dir_url, data_url,run_parallel = FALSE, outdir = NULL) {
#until the issues with parallel runs are fixed.
run_parallel = FALSE
- #require("foreach")
-
-
- #### check that dates are within the date range of the dataset
-
- #first make sure dates are in date format. Correct if not.
- if (!(lubridate::is.Date(dates))){
- if (!(is.character(dates))) {
- dates = as.character(dates)
- }
- if (length(grep(dates, pattern = "-")) > 0) {
- dates <- c(as.Date(dates[1], "%Y-%m-%d"), as.Date(dates[2], "%Y-%m-%d"))
- } else {
- dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d"))
- }
- # Julien Date
- if (any(nchar(dates) == 7)) {
- dates <- c(as.Date(dates[1], "%Y%j"), as.Date(dates[2], "%Y%j"))
- }
- }
-
- date_range = unique(lubridate::year(seq(dates[1], dates[2], by = '1 year')))
+
- output = data.frame()
+ #assumes there is a max of 31 possible days in a month. This covers leap years!
+ years_range = sort(rep(seq(years[1], years[2]), 31))
+
if (!(is.null(dir_url)))
{
- for (i in seq_along(date_range))
+ output = data.frame()
+
+ for (i in seq_along(unique(years_range)))
{
- result <- RCurl::getURL(paste(dir_url, date_range[i], "/catalog.html", sep = "/"),
+ result <- RCurl::getURL(paste(dir_url, unique(years_range)[i], "/catalog.html", sep = "/"),
verbose=FALSE ,ftp.use.epsv = TRUE, dirlistonly = TRUE)
files <- XML::getHTMLLinks(result)
- index_dates <- regexpr(pattern = "_[0-9]{8}_", files)
+ index_dates <- regexpr(pattern = paste0("_[0-9]{4}0[", months[1], "-", months[length(months)], "]{1}[0-9]{2}_"), files)
files <- files[-(which(index_dates < 0))]
index_dates <- index_dates[which(index_dates > 0)]
@@ -148,19 +131,35 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
} else {
out <- foreach::foreach(i = urls, .combine = rbind) %do%
extract_thredds_nc(site_info, url_info = i)
- }
- output = rbind(output, out)
-
- if (!(is.null(outdir)))
- {
- # this will need to be changed in the future if users want to be able to save data they haven't already extracted at different sites/dates.
- write.csv(out, file = paste(outdir, "/THREDDS_", varid, "_", dates[1], "-", dates[2], ".csv", sep = ""))
- }
+
+ # get max LAI for each site instead of all days with missing NA fillers
+ test = foreach::foreach(i = unique(out$site_id), .combine = rbind) %do%
+ max_lai(x = out, site = i)
+ test$date = lubridate::year(test$date)
+
+ output = rbind(output, test)
+
+ }
}
-
}
+
+ # if (!(is.null(outdir)))
+ # {
+ # # this will need to be changed in the future if users want to be able to save data they haven't already extracted at different sites/dates.
+ # write.csv(output, file = paste(outdir, "/THREDDS_", varid, "_", years[1], "-", years[2], "_",months[1], "-", months[length(months)], ".csv", sep = ""))
+ # }
+ return(output)
}
- return(output)
+}
+
+
+
+
+max_lai = function(x, site)
+{
+ site_info_max = as.data.frame(x[x$site_id == site,][1,1:4], stringsAsFactors = FALSE)
+ site_info_max$max = as.numeric(max(x[x$site_id == site,]$value, na.rm = TRUE))
+ return(site_info_max)
}
@@ -186,9 +185,6 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
##'
extract_thredds_nc <- function(site_info, url_info)
{
- #print(url)
- #require("foreach")
- #require("ncdf4")
index = regexpr(pattern = "_[0-9]{8}_", url_info)
date<- as.Date(substr(url_info, index+1, index+8), "%Y%m%d")
From 936e3b77c4daa29d5530e674306b1adab4a6e008 Mon Sep 17 00:00:00 2001
From: Morrison
Date: Mon, 1 Jun 2020 22:54:08 -0400
Subject: [PATCH 0011/1193] update thredds function
---
modules/data.remote/R/download.thredds.AVHRR.R | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/modules/data.remote/R/download.thredds.AVHRR.R b/modules/data.remote/R/download.thredds.AVHRR.R
index 9dfad069062..5575396db58 100755
--- a/modules/data.remote/R/download.thredds.AVHRR.R
+++ b/modules/data.remote/R/download.thredds.AVHRR.R
@@ -146,8 +146,11 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para
# extract_thredds_nc(site_info = site_info, url_info = i)
# parallel::stopCluster(cl)
} else {
- out <- foreach::foreach(i = urls, .combine = rbind) %do%
- extract_thredds_nc(site_info, url_info = i)
+ #start_time <- Sys.time()
+ out <- foreach::foreach(j = urls, .combine = rbind) %do%
+ extract_thredds_nc(site_info, url_info = j)
+ # end_time <- Sys.time()
+ # end_time - start_time
}
output = rbind(output, out)
From eb8ab25a67a1134231330b436cb2ffb233a35237 Mon Sep 17 00:00:00 2001
From: koolgax99
Date: Thu, 7 Jul 2022 01:44:38 +0530
Subject: [PATCH 0012/1193] added initial postrior apis
---
apps/api/R/entrypoint.R | 4 +
apps/api/R/posteriors.R | 161 +++++++++++++++++++++++++++++++++++++
apps/api/pecanapi-spec.yml | 119 +++++++++++++++++++++++++++
3 files changed, 284 insertions(+)
create mode 100644 apps/api/R/posteriors.R
diff --git a/apps/api/R/entrypoint.R b/apps/api/R/entrypoint.R
index 5f1d8a3fb94..18cb4a91d81 100755
--- a/apps/api/R/entrypoint.R
+++ b/apps/api/R/entrypoint.R
@@ -69,6 +69,10 @@ root$mount("/api/runs", runs_pr)
runs_pr <- plumber::Plumber$new("available-models.R")
root$mount("/api/availableModels", runs_pr)
+# The endpoints mounted here are related to details of PEcAn posteriors
+runs_pr <- plumber::Plumber$new("posteriors.R")
+root$mount("/api/posteriors", runs_pr)
+
# set swagger documentation
root$setApiSpec("../pecanapi-spec.yml")
diff --git a/apps/api/R/posteriors.R b/apps/api/R/posteriors.R
new file mode 100644
index 00000000000..9d867ca5adf
--- /dev/null
+++ b/apps/api/R/posteriors.R
@@ -0,0 +1,161 @@
+library(dplyr)
+
+#' Search for Posteriors containing wildcards for filtering
+#' @param pft_id PFT Id (character)
+#' @param offset
+#' @param limit
+#' @return Information about Posteriors based on pft
+#' @author Nihar Sanda
+#* @get /
+searchPosteriors <- function(req, pft_id = NA, host_id = NA, offset = 0, limit = 50, res) {
+ if (!limit %in% c(10, 20, 50, 100, 500)) {
+ res$status <- 400
+ return(list(error = "Invalid value for parameter"))
+ }
+
+ posteriors <- tbl(global_db_pool, "posteriors") %>%
+ select(everything())
+
+ posteriors <- tbl(global_db_pool, "dbfiles") %>%
+ select(file_name, file_path, container_type, id = container_id, machine_id) %>%
+ inner_join(posteriors, by = "id") %>%
+ filter(container_type == "Posterior") %>%
+ select(-container_type)
+
+ posteriors <- tbl(global_db_pool, "machines") %>%
+ select(hostname, machine_id = id) %>%
+ inner_join(posteriors, by = "machine_id")
+
+ posteriors <- tbl(global_db_pool, "pfts") %>%
+ select(pft_name = name, pft_id = id) %>%
+ inner_join(posteriors, by = "pft_id")
+
+ if (!is.na(pft_id)) {
+ posteriors <- posteriors %>%
+ filter(pft_id == !!pft_id)
+ }
+
+ if (!is.na(host_id)) {
+ posteriors <- posteriors %>%
+ filter(machine_id == !!host_id)
+ }
+
+ qry_res <- posteriors %>%
+ select(-pft_id, -machine_id) %>%
+ distinct() %>%
+ arrange(id) %>%
+ collect()
+
+ if (nrow(qry_res) == 0 || as.numeric(offset) >= nrow(qry_res)) {
+ res$status <- 404
+ return(list(error = "Posterior(s) not found"))
+ } else {
+ has_next <- FALSE
+ has_prev <- FALSE
+ if (nrow(qry_res) > (as.numeric(offset) + as.numeric(limit))) {
+ has_next <- TRUE
+ }
+ if (as.numeric(offset) != 0) {
+ has_prev <- TRUE
+ }
+
+ qry_res <- qry_res[(as.numeric(offset) + 1):min((as.numeric(offset) + as.numeric(limit)), nrow(qry_res)), ]
+
+ result <- list(posteriors = qry_res)
+ result$count <- nrow(qry_res)
+ if (has_next) {
+ if (grepl("offset=", req$QUERY_STRING, fixed = TRUE)) {
+ result$next_page <- paste0(
+ req$rook.url_scheme, "://",
+ req$HTTP_HOST,
+ "/api/posteriors",
+ req$PATH_INFO,
+ substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
+ (as.numeric(limit) + as.numeric(offset)),
+ "&limit=",
+ limit
+ )
+ } else {
+ result$next_page <- paste0(
+ req$rook.url_scheme, "://",
+ req$HTTP_HOST,
+ "/api/posteriors",
+ req$PATH_INFO,
+ substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "limit=")[[2]] - 6),
+ "offset=",
+ (as.numeric(limit) + as.numeric(offset)),
+ "&limit=",
+ limit
+ )
+ }
+ }
+ if (has_prev) {
+ result$prev_page <- paste0(
+ req$rook.url_scheme, "://",
+ req$HTTP_HOST,
+ "/api/workflows",
+ req$PATH_INFO,
+ substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
+ max(0, (as.numeric(offset) - as.numeric(limit))),
+ "&limit=",
+ limit
+ )
+ }
+
+ return(result)
+ }
+}
+
+#################################################################################################
+
+#' Download the posterior specified by the id
+#' @param id Posterior id (character)
+#' @param filename Optional filename specified if the id points to a folder instead of file (character)
+#' If this is passed with an id that actually points to a file, this name will be ignored
+#' @return Posterior file specified by user
+#' @author Nihar Sanda
+#* @serializer contentType list(type="application/octet-stream")
+#* @get /
+downloadPosterior <- function(posterior_id, filename = "", req, res) {
+ db_hostid <- PEcAn.DB::dbHostInfo(global_db_pool)$hostid
+
+ # This is just for temporary testing due to the existing issue in dbHostInfo()
+ db_hostid <- ifelse(db_hostid == 99, 99000000001, db_hostid)
+
+ posterior <- tbl(global_db_pool, "dbfiles") %>%
+ select(file_name, file_path, container_id, machine_id, container_type) %>%
+ filter(machine_id == !!db_hostid) %>%
+ filter(container_type == "Posterior") %>%
+ filter(container_id == !!posterior_id) %>%
+ collect()
+
+ if (nrow(posterior) == 0) {
+ res$status <- 404
+ return()
+ } else {
+ # Generate the full file path using the file_path & file_name
+ filepath <- paste0(posterior$file_path, "/", posterior$file_name)
+
+ # If the id points to a directory, check if 'filename' within this directory has been specified
+ if (dir.exists(filepath)) {
+ # If no filename is provided, return 400 Bad Request error
+ if (filename == "") {
+ res$status <- 400
+ return()
+ }
+
+ # Append the filename to the filepath
+ filepath <- paste0(filepath, filename)
+ }
+
+ # If the file doesn't exist, return 404 error
+ if (!file.exists(filepath)) {
+ res$status <- 404
+ return()
+ }
+
+ # Read the data in binary form & return it
+ bin <- readBin(filepath, "raw", n = file.info(filepath)$size)
+ return(bin)
+ }
+}
diff --git a/apps/api/pecanapi-spec.yml b/apps/api/pecanapi-spec.yml
index b6ea7e3a757..f3f874e60df 100644
--- a/apps/api/pecanapi-spec.yml
+++ b/apps/api/pecanapi-spec.yml
@@ -41,6 +41,8 @@ tags:
description: Everything about PEcAn PFTs (Plant Functional Types)
- name: inputs
description: Everything about PEcAn inputs
+ - name: posteriors
+ description: Everything about PEcAn posteriors
#####################################################################################################################
##################################################### API Endpoints #################################################
@@ -992,6 +994,123 @@ paths:
description: Access forbidden
'404':
description: Run data not found
+
+ /api/posteriors/:
+ get:
+ tags:
+ - posteriors
+ summary: Search for the posteriors
+ parameters:
+ - in: query
+ name: pft_id
+ description: If provided, returns all posteriors for the provided model_id
+ required: false
+ schema:
+ type: string
+ - in: query
+ name: host_id
+ description: If provided, returns all posteriors for the provided host_id
+ required: false
+ schema:
+ type: string
+ - in: query
+ name: offset
+ description: The number of posteriors to skip before starting to collect the result set.
+ schema:
+ type: integer
+ minimum: 0
+ default: 0
+ required: false
+ - in: query
+ name: limit
+ description: The number of posteriors to return.
+ schema:
+ type: integer
+ default: 50
+ enum:
+ - 10
+ - 20
+ - 50
+ - 100
+ - 500
+ required: false
+ responses:
+ '200':
+ description: List of posteriors
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ inputs:
+ type: array
+ items:
+ type: object
+ properties:
+ id:
+ type: string
+ file_name:
+ type: string
+ file_path:
+ type: string
+ pft_name:
+ type: string
+ tag:
+ type: string
+ hostname:
+ type: string
+ start_date:
+ type: string
+ end_date:
+ type: string
+ count:
+ type: integer
+ next_page:
+ type: string
+ prev_page:
+ type: string
+
+ '401':
+ description: Authentication required
+ '403':
+ description: Access forbidden
+ '404':
+ description: Workflows not found
+
+ /api/posteriors/{posterior_id}:
+ get:
+ tags:
+ - posteriors
+ summary: Download a desired PEcAn posterior file
+ parameters:
+ - in: path
+ name: posterior_id
+ description: ID of the PEcAn Posterior to be downloaded
+ required: true
+ schema:
+ type: string
+ - in: query
+ name: filename
+ description: Optional filename specified if the id points to a folder instead of file
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Contents of the desired posterior file
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ '400':
+ description: Bad request. Posterior ID points to directory & filename is not specified
+ '401':
+ description: Authentication required
+ '403':
+ description: Access forbidden
+
+
#####################################################################################################################
###################################################### Components ###################################################
#####################################################################################################################
From 3feea99da53763dd100be24e93fd946b652672d2 Mon Sep 17 00:00:00 2001
From: koolgax99
Date: Thu, 14 Jul 2022 04:20:24 +0530
Subject: [PATCH 0013/1193] Added multiple file download API
---
apps/api/R/workflows.R | 116 +++++++++++++++++++++++---
apps/api/pecanapi-spec.yml | 167 +++++++++++++++++++++++++++++++++++++
2 files changed, 272 insertions(+), 11 deletions(-)
diff --git a/apps/api/R/workflows.R b/apps/api/R/workflows.R
index 44cb9196f18..304ca9f1391 100644
--- a/apps/api/R/workflows.R
+++ b/apps/api/R/workflows.R
@@ -49,16 +49,31 @@ getWorkflows <- function(req, model_id=NA, site_id=NA, offset=0, limit=50, res){
result <- list(workflows = qry_res)
result$count <- nrow(qry_res)
if(has_next){
- result$next_page <- paste0(
- req$rook.url_scheme, "://",
- req$HTTP_HOST,
- "/api/workflows",
- req$PATH_INFO,
- substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
- (as.numeric(limit) + as.numeric(offset)),
- "&limit=",
- limit
- )
+ if(grepl("offset=", req$QUERY_STRING, fixed = TRUE)){
+ result$next_page <- paste0(
+ req$rook.url_scheme, "://",
+ req$HTTP_HOST,
+ "/api/workflows",
+ req$PATH_INFO,
+ substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
+ (as.numeric(limit) + as.numeric(offset)),
+ "&limit=",
+ limit
+ )
+ }
+ else {
+ result$next_page <- paste0(
+ req$rook.url_scheme, "://",
+ req$HTTP_HOST,
+ "/api/workflows",
+ req$PATH_INFO,
+ substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "limit=")[[2]] - 6),
+ "offset=",
+ (as.numeric(limit) + as.numeric(offset)),
+ "&limit=",
+ limit
+ )
+ }
}
if(has_prev) {
result$prev_page <- paste0(
@@ -200,7 +215,7 @@ getWorkflowStatus <- function(req, id, res){
#* @get //file/
getWorkflowFile <- function(req, id, filename, res){
Workflow <- tbl(global_db_pool, "workflows") %>%
- select(id, user_id) %>%
+ select(id, user_id) %>%
filter(id == !!id)
qry_res <- Workflow %>% collect()
@@ -229,3 +244,82 @@ getWorkflowFile <- function(req, id, filename, res){
return(bin)
}
}
+
+
+#################################################################################################
+#' Get the list of files in a workflow specified by the id
+#' @param id Workflow id (character)
+#' @return List of files
+#' @author Nihar Sanda
+#* @serializer contentType list(type="application/octet-stream")
+#* @get //files
+
+getWorkflowFileDetails <- function(req, id, res){
+ Workflow <- tbl(global_db_pool, "workflows") %>%
+ select(id, user_id) %>%
+ filter(id == !!id)
+
+ qry_res <- Workflow %>% collect()
+
+ if (nrow(qry_res) == 0) {
+ res$status <- 404
+ return(list(error="Workflow with specified ID was not found"))
+ }
+ else {
+ file_names <- list()
+ file_names <- list.files(paste0(Sys.getenv("DATA_DIR", "/data/"), "workflows/PEcAn_", id))
+
+ return(list(workflow_id = id))
+ }
+}
+
+#################################################################################################
+#' Get the zip of specified files of the workflow specified by the id
+#' @param id Workflow id (character)
+#' @return Details of requested workflow
+#' @author Nihar Sanda
+#* @serializer contentType list(type="application/octet-stream")
+#* @post //file-multiple/
+
+getWorkflowFilesAsZip <- function(req, id, filenames, res){
+ if(req$HTTP_CONTENT_TYPE == "application/json") {
+ filenames_req <- req$postBody
+ }
+
+ filenamesList <- jsonlite::fromJSON(filenames_req)
+ filenames <- filenamesList$files
+
+ Workflow <- tbl(global_db_pool, "workflows") %>%
+ select(id, user_id) %>%
+ filter(id == !!id)
+
+ qry_res <- Workflow %>% collect()
+
+ if (nrow(qry_res) == 0) {
+ res$status <- 404
+ return()
+ }
+ else {
+ full_files <- vector(mode = "character", length = length(filenames))
+ for (i in 1:length(filenames)) {
+
+ # Check if the requested file exists on the host
+ filepath <- paste0(Sys.getenv("DATA_DIR", "/data/"), "workflows/PEcAn_", id, "/", filenames[i])
+ if(! file.exists(filepath)){
+ res$status <- 404
+ return()
+ }
+
+ if(Sys.getenv("AUTH_REQ") == TRUE){
+ if(qry_res$user_id != req$user$userid) {
+ res$status <- 403
+ return()
+ }
+ }
+
+ full_files[i] <- filepath
+ }
+ zip_file <- zip::zipr("output.zip", full_files)
+ return(zip_file)
+ }
+}
\ No newline at end of file
diff --git a/apps/api/pecanapi-spec.yml b/apps/api/pecanapi-spec.yml
index b6ea7e3a757..206687693d7 100644
--- a/apps/api/pecanapi-spec.yml
+++ b/apps/api/pecanapi-spec.yml
@@ -41,6 +41,8 @@ tags:
description: Everything about PEcAn PFTs (Plant Functional Types)
- name: inputs
description: Everything about PEcAn inputs
+ - name: posteriors
+ description: Everything about PEcAn posteriors
#####################################################################################################################
##################################################### API Endpoints #################################################
@@ -779,6 +781,40 @@ paths:
description: Authentication required
'403':
description: Access forbidden
+
+ /api/workflows/{id}/file-multiple/:
+ post:
+ tags:
+ - workflows
+ summary: Download multiple files
+ parameters:
+ - in: path
+ name: id
+ description: ID of the PEcAn Workflow
+ required: true
+ schema:
+ type: string
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/WorkflowFiles_POST'
+
+
+ responses:
+ '200':
+ description: Download the zip file consisting of the desired files
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ '401':
+ description: Authentication required
+ '415':
+ description: Unsupported request content type
+
/api/runs/:
get:
@@ -992,6 +1028,123 @@ paths:
description: Access forbidden
'404':
description: Run data not found
+
+ /api/posteriors/:
+ get:
+ tags:
+ - posteriors
+ summary: Search for the posteriors
+ parameters:
+ - in: query
+ name: pft_id
+ description: If provided, returns all posteriors for the provided model_id
+ required: false
+ schema:
+ type: string
+ - in: query
+ name: host_id
+ description: If provided, returns all posteriors for the provided host_id
+ required: false
+ schema:
+ type: string
+ - in: query
+ name: offset
+ description: The number of posteriors to skip before starting to collect the result set.
+ schema:
+ type: integer
+ minimum: 0
+ default: 0
+ required: false
+ - in: query
+ name: limit
+ description: The number of posteriors to return.
+ schema:
+ type: integer
+ default: 50
+ enum:
+ - 10
+ - 20
+ - 50
+ - 100
+ - 500
+ required: false
+ responses:
+ '200':
+ description: List of posteriors
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ inputs:
+ type: array
+ items:
+ type: object
+ properties:
+ id:
+ type: string
+ file_name:
+ type: string
+ file_path:
+ type: string
+ pft_name:
+ type: string
+ tag:
+ type: string
+ hostname:
+ type: string
+ start_date:
+ type: string
+ end_date:
+ type: string
+ count:
+ type: integer
+ next_page:
+ type: string
+ prev_page:
+ type: string
+
+ '401':
+ description: Authentication required
+ '403':
+ description: Access forbidden
+ '404':
+ description: Workflows not found
+
+ /api/posteriors/{posterior_id}:
+ get:
+ tags:
+ - posteriors
+ summary: Download a desired PEcAn posterior file
+ parameters:
+ - in: path
+ name: posterior_id
+ description: ID of the PEcAn Posterior to be downloaded
+ required: true
+ schema:
+ type: string
+ - in: query
+ name: filename
+ description: Optional filename specified if the id points to a folder instead of file
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ description: Contents of the desired input file
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ '400':
+ description: Bad request. Input ID points to directory & filename is not specified
+ '401':
+ description: Authentication required
+ '403':
+ description: Access forbidden
+
+
#####################################################################################################################
###################################################### Components ###################################################
#####################################################################################################################
@@ -1275,6 +1428,20 @@ components:
dbfiles:
type: string
example: pecan/dbfiles
+
+ WorkflowFiles_POST:
+ type: object
+
+ properties:
+ files:
+ type: array
+ items:
+ type: string
+ example: [
+ "pecan.xml",
+ "workflow.R"
+ ]
+
securitySchemes:
basicAuth:
type: http
From e937e8644db06c85c3fe44e1e6da999555f20a73 Mon Sep 17 00:00:00 2001
From: koolgax99
Date: Wed, 10 Aug 2022 19:51:48 +0530
Subject: [PATCH 0014/1193] fixed a minor typo
---
apps/api/pecanapi-spec.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apps/api/pecanapi-spec.yml b/apps/api/pecanapi-spec.yml
index 206687693d7..807df0ea8a4 100644
--- a/apps/api/pecanapi-spec.yml
+++ b/apps/api/pecanapi-spec.yml
@@ -1109,7 +1109,7 @@ paths:
'403':
description: Access forbidden
'404':
- description: Workflows not found
+ description: Posteriors not found
/api/posteriors/{posterior_id}:
get:
From 3cbce66c354cdc6e259665524e830a00fb535a37 Mon Sep 17 00:00:00 2001
From: koolgax99
Date: Fri, 7 Oct 2022 20:11:23 +0530
Subject: [PATCH 0015/1193] run meta analysis for a settings file
---
apps/api/R/entrypoint.R | 3 ++
apps/api/R/ma.R | 67 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+)
create mode 100644 apps/api/R/ma.R
diff --git a/apps/api/R/entrypoint.R b/apps/api/R/entrypoint.R
index 5f1d8a3fb94..1aec6dcf02b 100755
--- a/apps/api/R/entrypoint.R
+++ b/apps/api/R/entrypoint.R
@@ -69,6 +69,9 @@ root$mount("/api/runs", runs_pr)
runs_pr <- plumber::Plumber$new("available-models.R")
root$mount("/api/availableModels", runs_pr)
+ma_pr <- plumber::Plumber$new("ma.R")
+root$mount("/api/ma", ma_pr)
+
# set swagger documentation
root$setApiSpec("../pecanapi-spec.yml")
diff --git a/apps/api/R/ma.R b/apps/api/R/ma.R
new file mode 100644
index 00000000000..26330c402b8
--- /dev/null
+++ b/apps/api/R/ma.R
@@ -0,0 +1,67 @@
+library(dplyr)
+library("PEcAn.all")
+library("RCurl")
+
+#' Post a settings file for running a Meta-Analysis
+#' @param req Send pecan.xml in bodyas xml filetype
+#' @return A list of post.distns.MA.R
+#' @author Nihar Sanda
+#* @post /run
+submitWorkflow <- function(req, res){
+ if(req$HTTP_CONTENT_TYPE == "application/xml") {
+ # read req$bosy as xml
+ settingsXml <- XML::xmlParseString(stringr::str_replace(req$body, ".*?>\n", ""))
+
+ ## convert the xml to a list
+ settings <- XML::xmlToList(settingsXml)
+ settings <- as.Settings(settings)
+ settings <- expandMultiSettings(settings)
+
+ # Update/fix/check settings.
+ # Will only run the first time it's called, unless force=TRUE
+ settings <-
+ PEcAn.settings::prepare.settings(settings, force = FALSE)
+
+ # Changing update to TRUE
+ settings$meta.analysis$update <- TRUE
+
+ # Write pecan.CHECKED.xml
+ PEcAn.settings::write.settings(settings, outputfile = "pecan.CHECKED.xml")
+
+ # Do conversions
+ settings <- PEcAn.workflow::do_conversions(settings)
+ settings <- PEcAn.workflow::runModule.get.trait.data(settings)
+
+ # initiating variables needed for running meta analysis
+ pfts <- settings$pfts
+ iterations <- settings$meta.analysis$iter
+ random <- settings$meta.analysis$random.effects$on
+ use_ghs <- settings$meta.analysis$random.effects$use_ghs
+ threshold <- settings$meta.analysis$threshold
+ dbfiles <- settings$database$dbfiles
+ database <- settings$database$bety
+
+ # running meta analysis
+ run.meta.analysis(pfts, iterations, random, threshold,
+ dbfiles, database, use_ghs)
+
+ #PEcAn.MA::runModule.run.meta.analysis(settings = ma_settings)
+
+ if(dir.exists(settings$pfts$pft$outdir)){
+ filepath <- paste0(settings$pfts$pft$outdir, "/post.distns.Rdata")
+ e <- new.env(parent = emptyenv())
+ load(filepath, envir = e)
+ objs <- ls(envir = e, all.names = TRUE)
+ for(obj in objs) {
+ data <- get(obj, envir =e)
+ }
+ #csv_file <- paste0(settings$pfts$pft$outdir, '/post.distns.csv')
+ #plumber::include_file(csv_file, res)
+ return(list(status = "Meta Analysis ran successfully", data=data))
+ }
+ }
+ else{
+ res$status <- 415
+ return(paste("Unsupported request content type:", req$HTTP_CONTENT_TYPE))
+ }
+}
\ No newline at end of file
From 614b8f931929e571958c87db6c0bd2738a86ac13 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 18 Jul 2024 13:04:38 +0530
Subject: [PATCH 0016/1193] Shift functions to check for missing files
Return from convert_input via a helper function
Update corresponding test files and add tests to ensure do_conversions isn't affected by current applied changes
Signed-off-by: Abhinav Pandey
---
base/db/R/add.database.entries.R | 111 +++++++++++++
base/db/R/check.missing.files.R | 49 ++++++
base/db/R/convert_input.R | 150 ++----------------
base/db/man/add.database.entries.Rd | 70 ++++++++
base/db/man/check_missing_files.Rd | 31 ++++
.../tests/testthat/test.check.missing.files.R | 24 +++
base/db/tests/testthat/test.convert_input.R | 29 ++--
7 files changed, 320 insertions(+), 144 deletions(-)
create mode 100644 base/db/R/add.database.entries.R
create mode 100644 base/db/R/check.missing.files.R
create mode 100644 base/db/man/add.database.entries.Rd
create mode 100644 base/db/man/check_missing_files.Rd
create mode 100644 base/db/tests/testthat/test.check.missing.files.R
diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R
new file mode 100644
index 00000000000..3c253c07e73
--- /dev/null
+++ b/base/db/R/add.database.entries.R
@@ -0,0 +1,111 @@
+#' Return new arrangement of database while adding code to deal with ensembles
+#'
+#' @param result list of results from the download function
+#' @param con database connection
+#' @param start_date start date of the data
+#' @param end_date end date of the data
+#' @param write whether to write to the database
+#' @param overwrite Logical: If a file already exists, create a fresh copy?
+#' @param insert.new.file whether to insert a new file
+#' @param input.args input arguments obtained from the convert_input function
+#' @param machine machine information
+#' @param mimetype data product specific file format
+#' @param formatname format name of the data
+#' @param allow.conflicting.dates whether to allow conflicting dates
+#' @param ensemble ensemble id
+#' @param ensemble_name ensemble name
+#' @param existing.input existing input records
+#' @param existing.dbfile existing dbfile records
+#' @param input input records
+#' @return list of input and dbfile ids
+#'
+#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+
+add.database.entries <- function(
+ result, con, start_date,
+ end_date, write, overwrite,
+ insert.new.file, input.args,
+ machine, mimetype, formatname,
+ allow.conflicting.dates, ensemble,
+ ensemble_name, existing.input,
+ existing.dbfile, input) {
+ if (write) {
+ # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
+ # This list will be returned.
+ newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
+ for (i in 1:length(result)) { # Master for loop
+ id_not_added <- TRUE
+
+ if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
+ (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
+ # Updating record with new dates
+ db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con)
+ id_not_added <- FALSE
+
+ # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration.
+ newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+ newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id)
+ }
+
+ if (overwrite) {
+ # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record)
+ if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) {
+ db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con)
+ }
+
+ if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) {
+ db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con)
+ }
+ }
+
+ # If there is no ensemble then for each record there should be one parent
+ # But when you have ensembles, all of the members have one parent !!
+ parent.id <- if (is.numeric(ensemble)) {
+ ifelse(is.null(input[[i]]), NA, input[[1]]$id)
+ } else {
+ ifelse(is.null(input[[i]]), NA, input[[i]]$id)
+ }
+
+
+ if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
+ site.id <- input.args$newsite
+ }
+
+ if (insert.new.file && id_not_added) {
+ dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), in.prefix = result[[i]]$dbfile.name[1], "Input", existing.input[[i]]$id, con, reuse = TRUE, hostname = machine$hostname)
+ newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+ newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
+ } else if (id_not_added) {
+ # This is to tell input.insert if we are writing ensembles
+ # Why does it need it? Because it checks for inputs with the same time period, site, and machine
+ # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition
+ ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE
+
+ new_entry <- dbfile.input.insert(
+ in.path = dirname(result[[i]]$file[1]),
+ in.prefix = result[[i]]$dbfile.name[1],
+ siteid = site.id,
+ startdate = start_date,
+ enddate = end_date,
+ mimetype = mimetype,
+ formatname = formatname,
+ parentid = parent.id,
+ con = con,
+ hostname = machine$hostname,
+ allow.conflicting.dates = allow.conflicting.dates,
+ ens = ens.flag
+ )
+
+ newinput$input.id <- c(newinput$input.id, new_entry$input.id)
+ newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
+ }
+ } # End for loop
+
+ successful <- TRUE
+ return(newinput)
+ } else {
+ PEcAn.logger::logger.warn("Input was not added to the database")
+ successful <- TRUE
+ return(NULL)
+ }
+}
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
new file mode 100644
index 00000000000..bde3d7ebe97
--- /dev/null
+++ b/base/db/R/check.missing.files.R
@@ -0,0 +1,49 @@
+#' Function to check if result has empty or missing files
+#'
+#' @param result A list of dataframes with file paths
+#' @param outname Name of the output file
+#' @param existing.input Existing input records
+#' @param existing.dbfile Existing dbfile records
+#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
+#'
+#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+
+check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) {
+ result_sizes <- purrr::map_dfr(
+ result,
+ ~ dplyr::mutate(
+ .,
+ file_size = purrr::map_dbl(file, file.size),
+ missing = is.na(file_size),
+ empty = file_size == 0
+ )
+ )
+
+ if (any(result_sizes$missing) || any(result_sizes$empty)) {
+ log_format_df <- function(df) {
+ formatted_df <- rbind(colnames(df), format(df))
+ formatted_text <- purrr::reduce(formatted_df, paste, sep = " ")
+ paste(formatted_text, collapse = "\n")
+ }
+
+ PEcAn.logger::logger.severe(
+ "Requested Processing produced empty files or Nonexistent files:\n",
+ log_format_df(result_sizes[, c(1, 8, 9, 10)]),
+ "\n Table of results printed above.",
+ wrap = FALSE
+ )
+ }
+
+ # Insert into Database
+ outlist <- unlist(strsplit(outname, "_"))
+
+ # Wrap in a list for consistant processing later
+ if (exists("existing.input") && is.data.frame(existing.input)) {
+ existing.input <- list(existing.input)
+ }
+
+ if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
+ existing.dbfile <- list(existing.dbfile)
+ }
+ return(list(result_sizes, outlist, existing.input, existing.dbfile))
+}
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 8203fa7244b..d5af069d0eb 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -384,7 +384,7 @@ convert_input <-
if (!is.null(ensemble) && ensemble) {
return.all <-TRUE
- }else{
+ } else{
return.all <- FALSE
}
existing.dbfile <- dbfile.input.check(siteid = site.id,
@@ -734,143 +734,23 @@ convert_input <-
#--------------------------------------------------------------------------------------------------#
# Check if result has empty or missing files
- result_sizes <- purrr::map_dfr(
- result,
- ~ dplyr::mutate(
- .,
- file_size = purrr::map_dbl(file, file.size),
- missing = is.na(file_size),
- empty = file_size == 0
- )
- )
-
- if (any(result_sizes$missing) || any(result_sizes$empty)){
- log_format_df = function(df){
- rbind(colnames(df), format(df))
- purrr::reduce( paste, sep=" ") %>%
- paste(collapse="\n")
- }
-
- PEcAn.logger::logger.severe(
- "Requested Processing produced empty files or Nonexistant files :\n",
- log_format_df(result_sizes[,c(1,8,9,10)]),
- "\n Table of results printed above.",
- wrap = FALSE)
- }
-
- # Insert into Database
- outlist <- unlist(strsplit(outname, "_"))
-
- # Wrap in a list for consistant processing later
- if (exists("existing.input") && is.data.frame(existing.input)) {
- existing.input <- list(existing.input)
- }
-
- if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
- existing.dbfile <- list(existing.dbfile)
- }
+ checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile)
+
+ # Unwrap parameters after performing checks for missing files
+ result_sizes <- checked.missing.files$result_sizes;
+ outlist <- checked.missing.files$outlist;
+ existing.input <- checked.missing.files$existing.input;
+ existing.dbfile <- checked.missing.files$existing.dbfile;
#---------------------------------------------------------------#
# New arrangement of database adding code to deal with ensembles.
- if (write) {
-
- # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
- # This list will be returned.
- newinput = list(input.id = NULL, dbfile.id = NULL) #Blank vectors are null.
- for(i in 1:length(result)) { # Master for loop
- id_not_added <- TRUE
-
- if (exists("existing.input") && nrow(existing.input[[i]]) > 0 &&
- (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
-
- # Updating record with new dates
- db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='",
- end_date, "' WHERE id=", existing.input[[i]]$id),
- con)
- id_not_added = FALSE
-
- # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every interation.
- newinput$input.id = c(newinput$input.id, existing.input[[i]]$id)
- newinput$dbfile.id = c(newinput$dbfile.id, existing.dbfile[[i]]$id)
- }
-
- if (overwrite) {
- # A bit hacky, but need to make sure that all fields are updated to expected
- # values (i.e., what they'd be if convert_input was creating a new record)
- if (exists("existing.input") && nrow(existing.input[[i]]) > 0) {
- db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])),
- "' WHERE id=", existing.input[[i]]$id), con)
-
- }
-
- if (exists("existing.dbfile") && nrow(existing.dbfile[[i]]) > 0) {
- db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]),
- "', ", "file_name='", result[[i]]$dbfile.name[1],
- "' WHERE id=", existing.dbfile[[i]]$id), con)
-
- }
- }
-
- # If there is no ensemble then for each record there should be one parent
- #But when you have ensembles, all of the members have one parent !!
- if (is.numeric(ensemble)){
- parent.id <- ifelse(is.null(input[i]), NA, input[1]$id)
- }else{
- parent.id <- ifelse(is.null(input[i]), NA, input[i]$id)
- }
-
-
-
- if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
- site.id <- input.args$newsite
- }
-
- if (insert.new.file && id_not_added) {
- dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]),
- in.prefix = result[[i]]$dbfile.name[1],
- 'Input', existing.input[[i]]$id,
- con, reuse=TRUE, hostname = machine$hostname)
- newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
- newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
- } else if (id_not_added) {
-
- # This is to tell input.insert if we are wrting ensembles
- # Why does it need it ? bc it checks for inputs with the same time period, site and machine
- # and if it returns somethings it does not insert anymore, but for ensembles it needs to bypass this condition
- if (!is.null(ensemble) | is.null(ensemble_name)){
- ens.flag <- TRUE
- }else{
- ens.flag <- FALSE
- }
-
- new_entry <- dbfile.input.insert(in.path = dirname(result[[i]]$file[1]),
- in.prefix = result[[i]]$dbfile.name[1],
- siteid = site.id,
- startdate = start_date,
- enddate = end_date,
- mimetype,
- formatname,
- parentid = parent.id,
- con = con,
- hostname = machine$hostname,
- allow.conflicting.dates = allow.conflicting.dates,
- ens=ens.flag
- )
-
-
- newinput$input.id <- c(newinput$input.id, new_entry$input.id)
- newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
- }
-
- } #End for loop
-
- successful <- TRUE
- return(newinput)
- } else {
- PEcAn.logger::logger.warn("Input was not added to the database")
- successful <- TRUE
- return(NULL)
- }
+ return (add.database.entries(result, con, start_date,
+ end_date, write, overwrite,
+ insert.new.file, input.args,
+ machine, mimetype, formatname,
+ allow.conflicting.dates, ensemble,
+ ensemble_name, existing.input,
+ existing.dbfile, input))
} # convert_input
diff --git a/base/db/man/add.database.entries.Rd b/base/db/man/add.database.entries.Rd
new file mode 100644
index 00000000000..5de01cd1705
--- /dev/null
+++ b/base/db/man/add.database.entries.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add.database.entries.R
+\name{add.database.entries}
+\alias{add.database.entries}
+\title{Return new arrangement of database while adding code to deal with ensembles}
+\usage{
+add.database.entries(
+ result,
+ con,
+ start_date,
+ end_date,
+ write,
+ overwrite,
+ insert.new.file,
+ input.args,
+ machine,
+ mimetype,
+ formatname,
+ allow.conflicting.dates,
+ ensemble,
+ ensemble_name,
+ existing.input,
+ existing.dbfile,
+ input
+)
+}
+\arguments{
+\item{result}{list of results from the download function}
+
+\item{con}{database connection}
+
+\item{start_date}{start date of the data}
+
+\item{end_date}{end date of the data}
+
+\item{write}{whether to write to the database}
+
+\item{overwrite}{Logical: If a file already exists, create a fresh copy?}
+
+\item{insert.new.file}{whether to insert a new file}
+
+\item{input.args}{input arguments obtained from the convert_input function}
+
+\item{machine}{machine information}
+
+\item{mimetype}{data product specific file format}
+
+\item{formatname}{format name of the data}
+
+\item{allow.conflicting.dates}{whether to allow conflicting dates}
+
+\item{ensemble}{ensemble id}
+
+\item{ensemble_name}{ensemble name}
+
+\item{existing.input}{existing input records}
+
+\item{existing.dbfile}{existing dbfile records}
+
+\item{input}{input records}
+}
+\value{
+list of input and dbfile ids
+}
+\description{
+Return new arrangement of database while adding code to deal with ensembles
+}
+\author{
+Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+}
diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd
new file mode 100644
index 00000000000..8dd541f9380
--- /dev/null
+++ b/base/db/man/check_missing_files.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/check.missing.files.R
+\name{check_missing_files}
+\alias{check_missing_files}
+\title{Function to check if result has empty or missing files}
+\usage{
+check_missing_files(
+ result,
+ outname,
+ existing.input = NULL,
+ existing.dbfile = NULL
+)
+}
+\arguments{
+\item{result}{A list of dataframes with file paths}
+
+\item{outname}{Name of the output file}
+
+\item{existing.input}{Existing input records}
+
+\item{existing.dbfile}{Existing dbfile records}
+}
+\value{
+A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
+}
+\description{
+Function to check if result has empty or missing files
+}
+\author{
+Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+}
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
new file mode 100644
index 00000000000..e779077294a
--- /dev/null
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -0,0 +1,24 @@
+test_that("`check_missing_files()` able to return correct missing files", {
+ mocked_res <- mockery::mock(list(c("A", "B")))
+ mockery::stub(check_missing_files, "purrr::map_dfr", data.frame(missing = c(FALSE), empty = c(FALSE)))
+ res <- check_missing_files(
+ result = list(data.frame(file = c("A", "B"))),
+ outname = "test",
+ existing.input = data.frame(),
+ existing.dbfile = data.frame()
+ )
+
+ # Print the structure of `res` for debugging
+ str(res)
+
+ # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
+ # Perform checks to compare results from stubbed functions to actual results
+ expect_equal(nrow(res[[1]]), 1)
+ expect_equal(res[[1]]$missing, FALSE)
+ expect_equal(res[[1]]$empty, FALSE)
+ expect_equal(res[[2]], "test")
+ expect_equal(nrow(res[[3]][[1]]), 0)
+ expect_equal(ncol(res[[3]][[1]]), 0)
+ expect_equal(nrow(res[[4]][[1]]), 0)
+ expect_equal(ncol(res[[4]][[1]]), 0)
+})
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
index 29513187c9e..931d8a7f26b 100644
--- a/base/db/tests/testthat/test.convert_input.R
+++ b/base/db/tests/testthat/test.convert_input.R
@@ -1,10 +1,21 @@
test_that("`convert_input()` able to call the respective download function for a data item with the correct arguments", {
mocked_res <- mockery::mock(list(c("A", "B")))
- mockery::stub(convert_input, 'dbfile.input.check', data.frame())
- mockery::stub(convert_input, 'db.query', data.frame(id = 1))
- mockery::stub(convert_input, 'PEcAn.remote::remote.execute.R', mocked_res)
- mockery::stub(convert_input, 'purrr::map_dfr', data.frame(missing = c(FALSE), empty = c(FALSE)))
+ mockery::stub(convert_input, "dbfile.input.check", data.frame())
+ mockery::stub(convert_input, "db.query", data.frame(id = 1))
+ mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res)
+ mockery::stub(convert_input, "check_missing_files", list(
+ result_sizes = data.frame(
+ file = c("A", "B"),
+ file_size = c(100, 200),
+ missing = c(FALSE, FALSE),
+ empty = c(FALSE, FALSE)
+ ),
+ outlist = "test",
+ existing.input = list(data.frame(file = character(0))),
+ existing.dbfile = list(data.frame(file = character(0)))
+ ))
+ mockery::stub(convert_input, "add.database.entries", list(input.id = 1, dbfile.id = 1))
convert_input(
input.id = NA,
@@ -14,8 +25,8 @@ test_that("`convert_input()` able to call the respective download function for a
site.id = 1,
start_date = "2011-01-01",
end_date = "2011-12-31",
- pkg = 'PEcAn.data.atmosphere',
- fcn = 'download.AmerifluxLBL',
+ pkg = "PEcAn.data.atmosphere",
+ fcn = "download.AmerifluxLBL",
con = NULL,
host = data.frame(name = "localhost"),
browndog = NULL,
@@ -23,10 +34,10 @@ test_that("`convert_input()` able to call the respective download function for a
lat.in = 40,
lon.in = -88
)
-
+
args <- mockery::mock_args(mocked_res)
expect_equal(
- args[[1]]$script,
+ args[[1]]$script,
"PEcAn.data.atmosphere::download.AmerifluxLBL(lat.in=40, lon.in=-88, overwrite=FALSE, outfolder='test/', start_date='2011-01-01', end_date='2011-12-31')"
)
})
@@ -36,4 +47,4 @@ test_that("`.get.file.deletion.commands()` able to return correct file deletion
expect_equal(res$move.to.tmp, "dir.create(c('./tmp'), recursive=TRUE, showWarnings=FALSE); file.rename(from=c('test'), to=c('./tmp/test'))")
expect_equal(res$delete.tmp, "unlink(c('./tmp'), recursive=TRUE)")
expect_equal(res$replace.from.tmp, "file.rename(from=c('./tmp/test'), to=c('test'));unlink(c('./tmp'), recursive=TRUE)")
-})
\ No newline at end of file
+})
From 838af61ec8011022c9cf73e3a2f11f75f49f5492 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 18 Jul 2024 13:17:33 +0530
Subject: [PATCH 0017/1193] Update CHANGELOG
Signed-off-by: Abhinav Pandey
---
CHANGELOG.md | 3 +++
1 file changed, 3 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2280cfc967e..e0c0bcbc731 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
# Change Log
+
All notable changes are kept in this file. All changes made should be added to the section called
`Unreleased`. Once a new release is made this file will be updated to create a new `Unreleased`
section for the next release.
@@ -9,6 +10,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha
### Added
+- Refactor `convert_input` to Perform tasks via helper function. Subtask of [#3307](https://github.com/PecanProject/pecan/issues/3307)
+
### Fixed
### Changed
From f22b962691ce03adf12c3e79907bd52372351b24 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 25 Jul 2024 14:51:01 +0530
Subject: [PATCH 0018/1193] Remove unutilized variables from convert_input
Signed-off-by: Abhinav Pandey
---
base/db/R/add.database.entries.R | 12 +++++++++++-
base/db/R/check.missing.files.R | 18 ++++++++----------
base/db/R/convert_input.R | 6 ++----
3 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R
index 3c253c07e73..d3eb994a646 100644
--- a/base/db/R/add.database.entries.R
+++ b/base/db/R/add.database.entries.R
@@ -33,6 +33,7 @@ add.database.entries <- function(
# Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
# This list will be returned.
newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
+
for (i in 1:length(result)) { # Master for loop
id_not_added <- TRUE
@@ -72,7 +73,16 @@ add.database.entries <- function(
}
if (insert.new.file && id_not_added) {
- dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), in.prefix = result[[i]]$dbfile.name[1], "Input", existing.input[[i]]$id, con, reuse = TRUE, hostname = machine$hostname)
+ dbfile.id <- dbfile.insert(
+ in.path = dirname(result[[i]]$file[1]),
+ in.prefix = result[[i]]$dbfile.name[1],
+ "Input",
+ existing.input[[i]]$id,
+ con,
+ reuse = TRUE,
+ hostname = machine$hostname
+ )
+
newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
} else if (id_not_added) {
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index bde3d7ebe97..617878496de 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -1,11 +1,11 @@
#' Function to check if result has empty or missing files
-#'
+#'
#' @param result A list of dataframes with file paths
#' @param outname Name of the output file
#' @param existing.input Existing input records
#' @param existing.dbfile Existing dbfile records
#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
-#'
+#'
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) {
@@ -18,14 +18,14 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing
empty = file_size == 0
)
)
-
+
if (any(result_sizes$missing) || any(result_sizes$empty)) {
log_format_df <- function(df) {
formatted_df <- rbind(colnames(df), format(df))
formatted_text <- purrr::reduce(formatted_df, paste, sep = " ")
paste(formatted_text, collapse = "\n")
}
-
+
PEcAn.logger::logger.severe(
"Requested Processing produced empty files or Nonexistent files:\n",
log_format_df(result_sizes[, c(1, 8, 9, 10)]),
@@ -33,17 +33,15 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing
wrap = FALSE
)
}
-
- # Insert into Database
- outlist <- unlist(strsplit(outname, "_"))
-
+
+
# Wrap in a list for consistant processing later
if (exists("existing.input") && is.data.frame(existing.input)) {
existing.input <- list(existing.input)
}
-
+
if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
existing.dbfile <- list(existing.dbfile)
}
- return(list(result_sizes, outlist, existing.input, existing.dbfile))
+ return(list(existing.input, existing.dbfile))
}
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index d5af069d0eb..265559798be 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -737,10 +737,8 @@ convert_input <-
checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile)
# Unwrap parameters after performing checks for missing files
- result_sizes <- checked.missing.files$result_sizes;
- outlist <- checked.missing.files$outlist;
- existing.input <- checked.missing.files$existing.input;
- existing.dbfile <- checked.missing.files$existing.dbfile;
+ existing.input <- checked.missing.files$existing.input
+ existing.dbfile <- checked.missing.files$existing.dbfile
#---------------------------------------------------------------#
# New arrangement of database adding code to deal with ensembles.
From d884203d1388b219268daa4e95b95b8134a5e69f Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 25 Jul 2024 14:58:55 +0530
Subject: [PATCH 0019/1193] Update logger statements in convert_input
Signed-off-by: Abhinav Pandey
---
base/db/R/convert_input.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 265559798be..275b6f54d49 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -514,7 +514,7 @@ convert_input <-
# we'll need to update its start/end dates .
}
} else {
- # No existing record found. Should be good to go.
+ PEcAn.logger::logger.debug("No existing record found. Should be good to go.")
}
}
From 68d9516a3ccecb7c5c1b31907849b8fc7a3ba34e Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 25 Jul 2024 15:43:31 +0530
Subject: [PATCH 0020/1193] Added seperate function to check machine info
Signed-off-by: Abhinav Pandey
---
base/db/R/convert_input.R | 57 ++-------------------------
base/db/R/get.machine.info.R | 68 +++++++++++++++++++++++++++++++++
base/db/man/get.machine.info.Rd | 26 +++++++++++++
3 files changed, 98 insertions(+), 53 deletions(-)
create mode 100644 base/db/R/get.machine.info.R
create mode 100644 base/db/man/get.machine.info.Rd
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 275b6f54d49..ad83753e299 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -520,60 +520,11 @@ convert_input <-
#---------------------------------------------------------------------------------------------------------------#
# Get machine information
+ machine.info <- get.machine.info(host, dbfile.id = input.args$dbfile.id, input.id = input.id)
- machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
- machine <- db.query(paste0("SELECT * from machines where hostname = '",
- machine.host, "'"), con)
-
- if (nrow(machine) == 0) {
- PEcAn.logger::logger.error("machine not found", host$name)
- return(NULL)
- }
-
- if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
- input <- dbfile <- NULL
- } else {
- input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
- if (nrow(input) == 0) {
- PEcAn.logger::logger.error("input not found", input.id)
- return(NULL)
- }
-
- if(!is.null(input.args$dbfile.id)){
- dbfile <-
- db.query(
- paste(
- "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =",
- input.id,
- " and container_type = 'Input' and machine_id =",
- machine$id
- ),
- con
- )
- }else{
- dbfile <-
- db.query(
- paste(
- "SELECT * from dbfiles where container_id =",
- input.id,
- " and container_type = 'Input' and machine_id =",
- machine$id
- ),
- con
- )
- }
-
-
-
- if (nrow(dbfile) == 0) {
- PEcAn.logger::logger.error("dbfile not found", input.id)
- return(NULL)
- }
- if (nrow(dbfile) > 1) {
- PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
- dbfile <- dbfile[nrow(dbfile), ]
- }
- }
+ machine <- machine.info$machine
+ input <- machine.info$input
+ dbfile <- machine.info$dbfile
#--------------------------------------------------------------------------------------------------#
# Perform Conversion
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
new file mode 100644
index 00000000000..6685e1062ef
--- /dev/null
+++ b/base/db/R/get.machine.info.R
@@ -0,0 +1,68 @@
+#' Get machine information from db
+#' @param host host information
+#' @param dbfile.id dbfile id for existing records
+#' @param input.id input id for existing records
+#' @param con database connection
+#'
+#' @return list of machine, input, and dbfile records
+#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+
+get.machine.info <- function(host, dbfile.id, input.id = NULL, con) {
+ machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
+ machine <- db.query(paste0(
+ "SELECT * from machines where hostname = '",
+ machine.host, "'"
+ ), con)
+
+ if (nrow(machine) == 0) {
+ PEcAn.logger::logger.error("machine not found", host$name)
+ return(NULL)
+ }
+
+ if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
+ input <- dbfile <- NULL
+ } else {
+ input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
+ if (nrow(input) == 0) {
+ PEcAn.logger::logger.error("input not found", input.id)
+ return(NULL)
+ }
+
+ if (!is.null(input.args$dbfile.id)) {
+ dbfile <-
+ db.query(
+ paste(
+ "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =",
+ input.id,
+ " and container_type = 'Input' and machine_id =",
+ machine$id
+ ),
+ con
+ )
+ } else {
+ dbfile <-
+ db.query(
+ paste(
+ "SELECT * from dbfiles where container_id =",
+ input.id,
+ " and container_type = 'Input' and machine_id =",
+ machine$id
+ ),
+ con
+ )
+ }
+
+
+
+ if (nrow(dbfile) == 0) {
+ PEcAn.logger::logger.error("dbfile not found", input.id)
+ return(NULL)
+ }
+ if (nrow(dbfile) > 1) {
+ PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
+ dbfile <- dbfile[nrow(dbfile), ]
+ }
+ }
+
+ return(list(machine = machine, input = input, dbfile = dbfile))
+}
diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get.machine.info.Rd
new file mode 100644
index 00000000000..8989221ea5b
--- /dev/null
+++ b/base/db/man/get.machine.info.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get.machine.info.R
+\name{get.machine.info}
+\alias{get.machine.info}
+\title{Get machine information from db}
+\usage{
+get.machine.info(host, dbfile.id, input.id = NULL, con)
+}
+\arguments{
+\item{host}{host information}
+
+\item{dbfile.id}{dbfile id for existing records}
+
+\item{input.id}{input id for existing records}
+
+\item{con}{database connection}
+}
+\value{
+list of machine, input, and dbfile records
+}
+\description{
+Get machine information from db
+}
+\author{
+Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
+}
From 5208b02a1d98c27f24c8e4e9da56424537fa5852 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 25 Jul 2024 16:40:41 +0530
Subject: [PATCH 0021/1193] Update input args to get machine info
Signed-off-by: Abhinav Pandey
---
base/db/R/convert_input.R | 2 +-
base/db/R/get.machine.info.R | 2 +-
base/db/tests/testthat/test.check.missing.files.R | 15 ++++++++-------
base/db/tests/testthat/test.convert_input.R | 5 +++++
4 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index ad83753e299..ba2d7a3a5f0 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -520,7 +520,7 @@ convert_input <-
#---------------------------------------------------------------------------------------------------------------#
# Get machine information
- machine.info <- get.machine.info(host, dbfile.id = input.args$dbfile.id, input.id = input.id)
+ machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id)
machine <- machine.info$machine
input <- machine.info$input
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 6685e1062ef..d23e5416f9e 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -7,7 +7,7 @@
#' @return list of machine, input, and dbfile records
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
-get.machine.info <- function(host, dbfile.id, input.id = NULL, con) {
+get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
machine <- db.query(paste0(
"SELECT * from machines where hostname = '",
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index e779077294a..c2de074d5d3 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -14,11 +14,12 @@ test_that("`check_missing_files()` able to return correct missing files", {
# This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
# Perform checks to compare results from stubbed functions to actual results
expect_equal(nrow(res[[1]]), 1)
- expect_equal(res[[1]]$missing, FALSE)
- expect_equal(res[[1]]$empty, FALSE)
- expect_equal(res[[2]], "test")
- expect_equal(nrow(res[[3]][[1]]), 0)
- expect_equal(ncol(res[[3]][[1]]), 0)
- expect_equal(nrow(res[[4]][[1]]), 0)
- expect_equal(ncol(res[[4]][[1]]), 0)
+ PEcAn.logger::logger.debug(res)
+ # expect_equal(res[[1]]$missing, FALSE)
+ # expect_equal(res[[1]]$empty, FALSE)
+ # expect_equal(res[[2]], "test")
+ # expect_equal(nrow(res[[3]][[1]]), 0)
+ # expect_equal(ncol(res[[3]][[1]]), 0)
+ # expect_equal(nrow(res[[4]][[1]]), 0)
+ # expect_equal(ncol(res[[4]][[1]]), 0)
})
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
index 931d8a7f26b..cd33523f86c 100644
--- a/base/db/tests/testthat/test.convert_input.R
+++ b/base/db/tests/testthat/test.convert_input.R
@@ -3,6 +3,11 @@ test_that("`convert_input()` able to call the respective download function for a
mockery::stub(convert_input, "dbfile.input.check", data.frame())
mockery::stub(convert_input, "db.query", data.frame(id = 1))
+ mockery::stub(convert_input, "get.machine.info", list(
+ machine = data.frame(id = 1),
+ input = data.frame(id = 1),
+ dbfile = data.frame(id = 1)
+ ))
mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res)
mockery::stub(convert_input, "check_missing_files", list(
result_sizes = data.frame(
From f570646849433f89d8335b25be2539bc3c2ae4bb Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 25 Jul 2024 16:59:21 +0530
Subject: [PATCH 0022/1193] Correct roxygen documentations
Signed-off-by: Abhinav Pandey
---
base/db/R/get.machine.info.R | 2 +-
base/db/man/get.machine.info.Rd | 4 ++--
base/db/tests/testthat/test.check.missing.files.R | 15 +++++++--------
3 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index d23e5416f9e..4683cde1573 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -1,6 +1,6 @@
#' Get machine information from db
#' @param host host information
-#' @param dbfile.id dbfile id for existing records
+#' @param input.args input args.r for existing records
#' @param input.id input id for existing records
#' @param con database connection
#'
diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get.machine.info.Rd
index 8989221ea5b..6e57013c4d7 100644
--- a/base/db/man/get.machine.info.Rd
+++ b/base/db/man/get.machine.info.Rd
@@ -4,12 +4,12 @@
\alias{get.machine.info}
\title{Get machine information from db}
\usage{
-get.machine.info(host, dbfile.id, input.id = NULL, con)
+get.machine.info(host, input.args, input.id = NULL, con = NULL)
}
\arguments{
\item{host}{host information}
-\item{dbfile.id}{dbfile id for existing records}
+\item{input.args}{input args for existing records}
\item{input.id}{input id for existing records}
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index c2de074d5d3..e779077294a 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -14,12 +14,11 @@ test_that("`check_missing_files()` able to return correct missing files", {
# This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
# Perform checks to compare results from stubbed functions to actual results
expect_equal(nrow(res[[1]]), 1)
- PEcAn.logger::logger.debug(res)
- # expect_equal(res[[1]]$missing, FALSE)
- # expect_equal(res[[1]]$empty, FALSE)
- # expect_equal(res[[2]], "test")
- # expect_equal(nrow(res[[3]][[1]]), 0)
- # expect_equal(ncol(res[[3]][[1]]), 0)
- # expect_equal(nrow(res[[4]][[1]]), 0)
- # expect_equal(ncol(res[[4]][[1]]), 0)
+ expect_equal(res[[1]]$missing, FALSE)
+ expect_equal(res[[1]]$empty, FALSE)
+ expect_equal(res[[2]], "test")
+ expect_equal(nrow(res[[3]][[1]]), 0)
+ expect_equal(ncol(res[[3]][[1]]), 0)
+ expect_equal(nrow(res[[4]][[1]]), 0)
+ expect_equal(ncol(res[[4]][[1]]), 0)
})
From e479c468f1fcca1c02dec919e4fabca1dcbf792e Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Thu, 25 Jul 2024 17:23:15 +0530
Subject: [PATCH 0023/1193] Update tests
Signed-off-by: Abhinav Pandey
---
base/db/R/get.machine.info.R | 105 +++++++++---------
.../tests/testthat/test.check.missing.files.R | 23 ++--
2 files changed, 61 insertions(+), 67 deletions(-)
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 4683cde1573..c98bee6cf20 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -1,6 +1,6 @@
#' Get machine information from db
#' @param host host information
-#' @param input.args input args.r for existing records
+#' @param input.args input args for existing records
#' @param input.id input id for existing records
#' @param con database connection
#'
@@ -8,61 +8,60 @@
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
- machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
- machine <- db.query(paste0(
- "SELECT * from machines where hostname = '",
- machine.host, "'"
- ), con)
-
- if (nrow(machine) == 0) {
- PEcAn.logger::logger.error("machine not found", host$name)
- return(NULL)
+
+ machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
+ machine <- db.query(paste0("SELECT * from machines where hostname = '",
+ machine.host, "'"), con)
+
+ if (nrow(machine) == 0) {
+ PEcAn.logger::logger.error("machine not found", host$name)
+ return(NULL)
+ }
+
+ if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
+ input <- dbfile <- NULL
+ } else {
+ input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
+ if (nrow(input) == 0) {
+ PEcAn.logger::logger.error("input not found", input.id)
+ return(NULL)
}
+
+ if(!is.null(input.args$dbfile.id)){
+ dbfile <-
+ db.query(
+ paste(
+ "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =",
+ input.id,
+ " and container_type = 'Input' and machine_id =",
+ machine$id
+ ),
+ con
+ )
+ }else{
+ dbfile <-
+ db.query(
+ paste(
+ "SELECT * from dbfiles where container_id =",
+ input.id,
+ " and container_type = 'Input' and machine_id =",
+ machine$id
+ ),
+ con
+ )
+ }
+
- if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
- input <- dbfile <- NULL
- } else {
- input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
- if (nrow(input) == 0) {
- PEcAn.logger::logger.error("input not found", input.id)
- return(NULL)
- }
-
- if (!is.null(input.args$dbfile.id)) {
- dbfile <-
- db.query(
- paste(
- "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =",
- input.id,
- " and container_type = 'Input' and machine_id =",
- machine$id
- ),
- con
- )
- } else {
- dbfile <-
- db.query(
- paste(
- "SELECT * from dbfiles where container_id =",
- input.id,
- " and container_type = 'Input' and machine_id =",
- machine$id
- ),
- con
- )
- }
-
-
-
- if (nrow(dbfile) == 0) {
- PEcAn.logger::logger.error("dbfile not found", input.id)
- return(NULL)
- }
- if (nrow(dbfile) > 1) {
- PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
- dbfile <- dbfile[nrow(dbfile), ]
- }
+
+ if (nrow(dbfile) == 0) {
+ PEcAn.logger::logger.error("dbfile not found", input.id)
+ return(NULL)
+ }
+ if (nrow(dbfile) > 1) {
+ PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile)
+ dbfile <- dbfile[nrow(dbfile), ]
}
+ }
return(list(machine = machine, input = input, dbfile = dbfile))
}
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index e779077294a..c0ad6794f65 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -1,24 +1,19 @@
test_that("`check_missing_files()` able to return correct missing files", {
- mocked_res <- mockery::mock(list(c("A", "B")))
- mockery::stub(check_missing_files, "purrr::map_dfr", data.frame(missing = c(FALSE), empty = c(FALSE)))
+ # Mock `purrr::map_dfr`
+ mocked_res <- mockery::mock(data.frame(file = c("A", "B"), file_size = c(100, 200), missing = c(FALSE, FALSE), empty = c(FALSE, FALSE)))
+ mockery::stub(check_missing_files, "purrr::map_dfr", mocked_res)
+
res <- check_missing_files(
result = list(data.frame(file = c("A", "B"))),
outname = "test",
existing.input = data.frame(),
existing.dbfile = data.frame()
)
-
+
# Print the structure of `res` for debugging
str(res)
-
- # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile))
- # Perform checks to compare results from stubbed functions to actual results
- expect_equal(nrow(res[[1]]), 1)
- expect_equal(res[[1]]$missing, FALSE)
- expect_equal(res[[1]]$empty, FALSE)
- expect_equal(res[[2]], "test")
- expect_equal(nrow(res[[3]][[1]]), 0)
- expect_equal(ncol(res[[3]][[1]]), 0)
- expect_equal(nrow(res[[4]][[1]]), 0)
- expect_equal(ncol(res[[4]][[1]]), 0)
+
+ expect_equal(length(res), 2)
+ expect_true(is.list(res[[1]]))
+ expect_true(is.list(res[[2]]))
})
From a1328e1e5f75f4d5708913a7fa7b2b2250665473 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Wed, 31 Jul 2024 00:56:56 -0700
Subject: [PATCH 0024/1193] get GEFS download working again
---
modules/data.atmosphere/NEWS.md | 7 +
.../data.atmosphere/R/GEFS_helper_functions.R | 182 ++++++++----------
.../data.atmosphere/R/download.NOAA_GEFS.R | 47 +++--
.../data.atmosphere/R/half_hour_downscale.R | 16 +-
.../data.atmosphere/man/download.NOAA_GEFS.Rd | 43 +++--
5 files changed, 148 insertions(+), 147 deletions(-)
diff --git a/modules/data.atmosphere/NEWS.md b/modules/data.atmosphere/NEWS.md
index ecd7801d184..fca4c6c3a04 100644
--- a/modules/data.atmosphere/NEWS.md
+++ b/modules/data.atmosphere/NEWS.md
@@ -1,5 +1,12 @@
# PEcAn.data.atmosphere 1.8.0.9000
+## Fixed
+* `download.NOAA_GEFS` is updated to work again with GEFS v12.3
+ (the current release as of this writing in July 2024).
+
+## Changed
+* Removed `sitename` and `username` from the formal arguments of `download.NOAA_GEFS`.
+ Before they were silently ignored, now they're treated as part of `...` (which is also ignored!).
# PEcAn.data.atmosphere 1.8.0
diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R
index 754580ae0da..684fcea9724 100644
--- a/modules/data.atmosphere/R/GEFS_helper_functions.R
+++ b/modules/data.atmosphere/R/GEFS_helper_functions.R
@@ -17,21 +17,11 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
download_grid <- function(ens_index, location, directory, hours_char, cycle, base_filename1, vars,working_directory){
- #for(j in 1:31){
- if(ens_index == 1){
- base_filename2 <- paste0("gec00",".t",cycle,"z.pgrb2a.0p50.f")
- curr_hours <- hours_char[hours <= 384]
- }else{
- if((ens_index-1) < 10){
- ens_name <- paste0("0",ens_index - 1)
- }else{
- ens_name <- as.character(ens_index -1)
- }
- base_filename2 <- paste0("gep",ens_name,".t",cycle,"z.pgrb2a.0p50.f")
- curr_hours <- hours_char
- }
-
-
+ ens_base <- if (ens_index == 1) { "gec" } else { "gep" }
+ ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
+ base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f")
+ curr_hours <- hours_char
+
for(i in 1:length(curr_hours)){
file_name <- paste0(base_filename2, curr_hours[i])
@@ -73,36 +63,11 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
model_dir <- file.path(output_directory, model_name_raw)
+ #Availability: most recent 4 days
curr_time <- lubridate::with_tz(Sys.time(), tzone = "UTC")
curr_date <- lubridate::as_date(curr_time)
-
- noaa_page <- readLines('https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/')
-
- potential_dates <- NULL
- for(i in 1:length(noaa_page)){
- if(stringr::str_detect(noaa_page[i], ">gefs.")){
- end <- stringr::str_locate(noaa_page[i], ">gefs.")[2]
- dates <- stringr::str_sub(noaa_page[i], start = end+1, end = end+8)
- potential_dates <- c(potential_dates, dates)
- }
- }
-
-
- last_cycle_page <- readLines(paste0('https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/gefs.', dplyr::last(potential_dates)))
-
- potential_cycle <- NULL
- for(i in 1:length(last_cycle_page)){
- if(stringr::str_detect(last_cycle_page[i], 'href=\"')){
- end <- stringr::str_locate(last_cycle_page[i], 'href=\"')[2]
- cycles <- stringr::str_sub(last_cycle_page[i], start = end+1, end = end+2)
- if(cycles %in% c("00","06", "12", "18")){
- potential_cycle <- c(potential_cycle, cycles)
- }
- }
- }
-
- potential_dates <- lubridate::as_date(potential_dates)
-
+ potential_dates <- curr_date - lubridate::days(3:0)
+
potential_dates = potential_dates[which(potential_dates == forecast_date)]
if(length(potential_dates) == 0){PEcAn.logger::logger.error("Forecast Date not available")}
@@ -118,7 +83,10 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
floor(min(lat_list)))
base_filename1 <- "https://nomads.ncep.noaa.gov/cgi-bin/filter_gefs_atmos_0p50a.pl?file="
- vars <- "&lev_10_m_above_ground=on&lev_2_m_above_ground=on&lev_surface=on&lev_entire_atmosphere=on&var_APCP=on&var_DLWRF=on&var_DSWRF=on&var_PRES=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&var_TCDC=on"
+ vars <- paste0(
+ "&lev_10_m_above_ground=on&lev_2_m_above_ground=on&lev_surface=on&lev_entire_atmosphere=on",
+ "&var_APCP=on&var_DLWRF=on&var_DSWRF=on&var_PRES=on&var_RH=on&var_TMP=on",
+ "&var_UGRD=on&var_VGRD=on&var_TCDC=on")
for(i in 1:length(potential_dates)){
@@ -143,11 +111,11 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
print(paste("Downloading", forecast_date, cycle))
if(cycle == "00"){
- hours <- c(seq(0, 240, 3),seq(246, 384, 6))
- hours <- hours[hours<=end_hr]
+ hours <- c(seq(0, 240, 3),seq(246, 840, 6))
}else{
- hours <- c(seq(0, 240, 3),seq(246, min(end_hr, 840) , 6))
+ hours <- c(seq(0, 240, 3),seq(246, 384 , 6))
}
+ hours <- hours[hours<=end_hr]
hours_char <- hours
hours_char[which(hours < 100)] <- paste0("0",hours[which(hours < 100)])
hours_char[which(hours < 10)] <- paste0("0",hours_char[which(hours < 10)])
@@ -163,12 +131,12 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
parallel::mclapply(X = ens_index,
FUN = download_grid,
- location,
- directory,
- hours_char,
- cycle,
- base_filename1,
- vars,
+ location = location,
+ directory = directory,
+ hours_char = hours_char,
+ cycle = cycle,
+ base_filename1 = base_filename1,
+ vars = vars,
working_directory = model_date_hour_dir,
mc.cores = 1)
}else{
@@ -177,6 +145,9 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
}
}
}
+
+
+
#' Extract and temporally downscale points from downloaded grid files
#'
#' @param lat_list lat for site
@@ -222,23 +193,13 @@ process_gridded_noaa_download <- function(lat_list,
dlwrfsfc <- array(NA, dim = c(site_length, length(hours_char)))
dswrfsfc <- array(NA, dim = c(site_length, length(hours_char)))
- if(ens_index == 1){
- base_filename2 <- paste0("gec00",".t",cycle,"z.pgrb2a.0p50.f")
- }else{
- if(ens_index-1 < 10){
- ens_name <- paste0("0",ens_index-1)
- }else{
- ens_name <- as.character(ens_index-1)
- }
- base_filename2 <- paste0("gep",ens_name,".t",cycle,"z.pgrb2a.0p50.f")
- }
+ ens_base <- if (ens_index == 1) { "gec" } else { "gep" }
+ ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
+ base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f")
lats <- round(lat_list/.5)*.5
lons <- round(lon_list/.5)*.5
- if(lons < 0){
- lons <- 360 + lons
- }
curr_hours <- hours_char
for(hr in 1:length(curr_hours)){
@@ -263,8 +224,13 @@ process_gridded_noaa_download <- function(lat_list,
vgrd10m[s, hr] <- grib_data_df$`10[m] HTGL=Specified height level above ground; v-component of wind [m/s]`[index]
if(curr_hours[hr] != "000"){
- apcpsfc[s, hr] <- grib_data_df$`SFC=Ground or water surface; 03 hr Total precipitation [kg/(m^2)]`[index]
- tcdcclm[s, hr] <- grib_data_df$`RESERVED(10) (Reserved); Total cloud cover [%]`[index]
+ # total precip alternates being named as 3 or 6 hr total
+ # TODO: not sure if the contents actually differ or if this is a labeling bug in the grib files
+ precip_hr <- if ((as.numeric(curr_hours[hr]) %% 2) == 1) { "03" } else { "06" }
+ precip_name <- paste("SFC=Ground or water surface;", precip_hr, "hr Total precipitation [kg/(m^2)]")
+ apcpsfc[s, hr] <- grib_data_df[[precip_name]][index]
+
+ tcdcclm[s, hr] <- grib_data_df$`EATM=Entire Atmosphere; Total cloud cover [%]`[index]
dswrfsfc[s, hr] <- grib_data_df$`SFC=Ground or water surface; Downward Short-Wave Rad. Flux [W/(m^2)]`[index]
dlwrfsfc[s, hr] <- grib_data_df$`SFC=Ground or water surface; Downward Long-Wave Rad. Flux [W/(m^2)]`[index]
}
@@ -301,17 +267,15 @@ process_gridded_noaa_download <- function(lat_list,
- cycle <-forecast_time
+ cycle <- forecast_time
curr_forecast_time <- forecast_date + lubridate::hours(cycle)
- if(cycle < 10) cycle <- paste0("0",cycle)
- if(cycle == "00"){
- hours <- c(seq(0, 240, 3),seq(246, 840 , 6))
- }else{
- hours <- c(seq(0, 240, 3),seq(246, 384 , 6))
+ cycle <- stringr::str_pad(cycle, width = 2, pad = "0")
+ if (cycle == "00") {
+ hours <- c(seq(0, 240, 3),seq(246, 840, 6))
+ } else {
+ hours <- c(seq(0, 240, 3),seq(246, 384, 6))
}
- hours_char <- hours
- hours_char[which(hours < 100)] <- paste0("0",hours[which(hours < 100)])
- hours_char[which(hours < 10)] <- paste0("0",hours_char[which(hours < 10)])
+ hours_char <- stringr::str_pad(hours, width = 3, pad = "0") # 3->"003", 384->"384"
raw_files <- list.files(file.path(model_name_raw_dir,forecast_date,cycle))
hours_present <- as.numeric(stringr::str_sub(raw_files, start = 25, end = 27))
@@ -341,19 +305,21 @@ process_gridded_noaa_download <- function(lat_list,
FUN = extract_sites,
hours_char = hours_char,
hours = hours,
- cycle,
- site_id,
- lat_list,
- lon_list,
+ cycle = cycle,
+ site_id = site_id,
+ lat_list = lat_list,
+ lon_list = lon_list,
working_directory = file.path(model_name_raw_dir,forecast_date,cycle),
mc.cores = 1)
- forecast_times <- lubridate::as_datetime(forecast_date) + lubridate::hours(as.numeric(cycle)) + lubridate::hours(as.numeric(hours_char))
+ forecast_times <- lubridate::as_datetime(forecast_date) +
+ lubridate::hours(as.numeric(cycle)) +
+ lubridate::hours(as.numeric(hours_char))
- #Convert negetive longitudes to degrees east
+ #Convert negative longitudes to degrees east
if(lon_list < 0){
lon_east <- 360 + lon_list
}else{
@@ -425,17 +391,18 @@ process_gridded_noaa_download <- function(lat_list,
#Calculate wind speed from east and north components
wind_speed <- sqrt(noaa_data$eastward_wind$value^2 + noaa_data$northward_wind$value^2)
- forecast_noaa <- tibble::tibble(time = noaa_data$air_temperature$forecast.date,
- NOAA.member = noaa_data$air_temperature$ensembles,
- air_temperature = noaa_data$air_temperature$value,
- air_pressure= noaa_data$air_pressure$value,
- relative_humidity = noaa_data$relative_humidity$value,
- surface_downwelling_longwave_flux_in_air = noaa_data$surface_downwelling_longwave_flux_in_air$value,
- surface_downwelling_shortwave_flux_in_air = noaa_data$surface_downwelling_shortwave_flux_in_air$value,
- precipitation_flux = noaa_data$precipitation_flux$value,
- specific_humidity = specific_humidity,
- cloud_area_fraction = noaa_data$cloud_area_fraction$value,
- wind_speed = wind_speed)
+ forecast_noaa <- tibble::tibble(
+ time = noaa_data$air_temperature$forecast.date,
+ NOAA.member = noaa_data$air_temperature$ensembles,
+ air_temperature = noaa_data$air_temperature$value,
+ air_pressure= noaa_data$air_pressure$value,
+ relative_humidity = noaa_data$relative_humidity$value,
+ surface_downwelling_longwave_flux_in_air = noaa_data$surface_downwelling_longwave_flux_in_air$value,
+ surface_downwelling_shortwave_flux_in_air = noaa_data$surface_downwelling_shortwave_flux_in_air$value,
+ precipitation_flux = noaa_data$precipitation_flux$value,
+ specific_humidity = specific_humidity,
+ cloud_area_fraction = noaa_data$cloud_area_fraction$value,
+ wind_speed = wind_speed)
forecast_noaa$cloud_area_fraction <- forecast_noaa$cloud_area_fraction / 100 #Convert from % to proportion
@@ -455,14 +422,10 @@ process_gridded_noaa_download <- function(lat_list,
for (ens in 1:31) { # i is the ensemble number
#Turn the ensemble number into a string
- if(ens-1< 10){
- ens_name <- paste0("0",ens-1)
- }else{
- ens_name <- ens - 1
- }
+ ens_name <- stringr::str_pad(ens - 1, width = 2, pad = "0")
forecast_noaa_ens <- forecast_noaa %>%
- dplyr::filter(NOAA.member == ens) %>%
+ dplyr::filter(.data$NOAA.member == ens) %>%
dplyr::filter(!is.na(.data$air_temperature))
end_date <- forecast_noaa_ens %>%
@@ -525,6 +488,15 @@ process_gridded_noaa_download <- function(lat_list,
return(results_list)
} #process_gridded_noaa_download
+
+
+
+
+
+
+
+
+
#' @title Downscale NOAA GEFS from 6hr to 1hr
#' @return None
#'
@@ -645,6 +617,14 @@ temporal_downscale <- function(input_file, output_file, overwrite = TRUE, hr = 1
+
+
+
+
+
+
+
+
##' @title Write NOAA GEFS netCDF
##' @name write_noaa_gefs_netcdf
##' @param df data frame of meterological variables to be written to netcdf. Columns
@@ -711,4 +691,4 @@ write_noaa_gefs_netcdf <- function(df, ens = NA, lat, lon, cf_units, output_file
ncdf4::nc_close(nc_flptr) #Write to the disk/storage
}
-}
\ No newline at end of file
+}
diff --git a/modules/data.atmosphere/R/download.NOAA_GEFS.R b/modules/data.atmosphere/R/download.NOAA_GEFS.R
index e68bc7d166a..f8f9631ae6d 100644
--- a/modules/data.atmosphere/R/download.NOAA_GEFS.R
+++ b/modules/data.atmosphere/R/download.NOAA_GEFS.R
@@ -6,36 +6,43 @@
##' @references https://www.ncdc.noaa.gov/crn/measurements.html
##'
##' @section NOAA_GEFS General Information:
-##' This function downloads NOAA GEFS weather data. GEFS is an ensemble of 21 different weather forecast models.
-##' A 16 day forecast is avaliable every 6 hours. Each forecast includes information on a total of 8 variables.
-##' These are transformed from the NOAA standard to the internal PEcAn
-##' standard.
-##'
-##' @section Data Avaliability:
-##' NOAA GEFS weather data is avaliable on a rolling 12 day basis; dates provided in "start_date" must be within this range. The end date can be any point after
-##' that, but if the end date is beyond 16 days, only 16 days worth of forecast are recorded. Times are rounded down to the previous 6 hour forecast. NOAA
-##' GEFS weather data isn't always posted immediately, and to compensate, this function adjusts requests made in the last two hours
-##' back two hours (approximately the amount of time it takes to post the data) to make sure the most current forecast is used.
+##' This function downloads NOAA GEFS weather data. GEFS is an ensemble of 31 different weather forecast models.
+##' A 16 day forecast is available every 6 hours and a 35 day forecast is available every 24 hours.
+##' Both are at 3-hour frequency for the first 10 days of the forecast and 6-hour frequency beyond that.
+##' Each forecast includes information on a total of 8 variables.
+##' These are transformed from the NOAA standard to the internal PEcAn standard.
##'
+##' @section Data Availability:
+##' NOAA GEFS weather data is available on a rolling 4 day basis.
+##' Dates provided in "start_date" must be within this range.
+##' The end date can be any point after that, but if the end date is beyond 16 days
+##' (35 days for the midnight UTC forecast), only 16 (35) days worth of forecast are retrieved.
+##' Times are rounded down to the previous 6 hour forecast.
+##'
+##' NOAA GEFS weather data isn't always posted immediately. Each 16-day forecast takes
+##' approximately three hours to run, and the once-a-day forecasts for days 17-35 are
+##' posted much later (up to 21 hours) than the forecasts for days 0 to 16.
+##' See the [GEFS v12 release announcement](https://www.weather.gov/media/notification/pdf2/scn20-75gefs_v12_changes.pdf)
+##' for details.
+##'
##' @section Data Save Format:
-##' Data is saved in the netcdf format to the specified directory. File names reflect the precision of the data to the given range of days.
+##' Data is saved in the netcdf format to the specified directory.
+##' File names reflect the precision of the data to the given range of days.
##' NOAA.GEFS.willow creek.3.2018-06-08T06:00.2018-06-24T06:00.nc specifies the forecast, using ensemble number 3 at willow creek on
##' June 6th, 2018 at 6:00 a.m. to June 24th, 2018 at 6:00 a.m.
##'
##' @return A list of data frames is returned containing information about the data file that can be used to locate it later. Each
##' data frame contains information about one file.
##'
-##' @param outfolder Directory where results should be written
-##' @param start_date, Range of dates/times to be downloaded (default assumed to be time that function is run)
-##' @param end_date, end date for range of dates to be downloaded (default 16 days from start_date)
+##' @param site_id The unique ID given to each site. This is used as part of the file name.
##' @param lat.in site latitude in decimal degrees
##' @param lon.in site longitude in decimal degrees
-##' @param site_id The unique ID given to each site. This is used as part of the file name.
-##' @param sitename Site name
-##' @param username username from pecan workflow
+##' @param outfolder Directory where results should be written
+##' @param start_date Range of dates/times to be downloaded (default assumed to be time that function is run)
+##' @param end_date end date for range of dates to be downloaded (default 16 days from start_date)
+##' @param downscale logical, assumed True. Indicates whether data should be downscaled to hourly
##' @param overwrite logical. Download a fresh version even if a local file with the same name already exists?
-##' @param downscale logical, assumed True. Indicated whether data should be downscaled to hourly
-##' @param ... Additional optional parameters
+##' @param ... Additional optional parameters, currently ignored
##'
##' @export
##'
@@ -50,8 +57,6 @@
##' @author Quinn Thomas, modified by K Zarada
##'
download.NOAA_GEFS <- function(site_id,
- sitename = NULL,
- username = 'pecan',
lat.in,
lon.in,
outfolder,
diff --git a/modules/data.atmosphere/R/half_hour_downscale.R b/modules/data.atmosphere/R/half_hour_downscale.R
index bb14748412a..87867093d4b 100644
--- a/modules/data.atmosphere/R/half_hour_downscale.R
+++ b/modules/data.atmosphere/R/half_hour_downscale.R
@@ -223,8 +223,16 @@ downscale_ShortWave_to_half_hrly <- function(df,lat, lon, hr = 0.5){
for (k in 1:nrow(data.hrly)) {
if(is.na(data.hrly$surface_downwelling_shortwave_flux_in_air[k])){
- SWflux <- as.matrix(subset(df, .data$day == data.hrly$day[k] & .data$hour == data.hrly$hour[k], data.hrly$surface_downwelling_shortwave_flux_in_air[k]))
- data.hrly$surface_downwelling_shortwave_flux_in_air[k] <- ifelse(data.hrly$rpotHM[k] > 0, as.numeric(SWflux[1])*(data.hrly$rpotH[k]/data.hrly$rpotHM[k]),0)
+ SWflux <- as.matrix(
+ df$surface_downwelling_shortwave_flux_in_air[
+ df$day == data.hrly$day[k] & df$hour == data.hrly$hour[k]
+ ]
+ )
+ data.hrly$surface_downwelling_shortwave_flux_in_air[k] <- ifelse(
+ data.hrly$rpotHM[k] > 0,
+ as.numeric(SWflux[1]) * (data.hrly$rpotH[k] / data.hrly$rpotHM[k]),
+ 0
+ )
}
}
@@ -284,11 +292,9 @@ downscale_repeat_6hr_to_half_hrly <- function(df, varName, hr = 0.5){
#previous 6hr period
dplyr::mutate(lead_var = dplyr::lead(df[,varName], 1))
#check for NA values and gapfill using closest timestep
- for(k in 1:dim(df)[1]){
+ for(k in 2:dim(df)[1]){
if (is.na(df$lead_var[k])) {
df$lead_var[k] <- df$lead_var[k-1]
- }else{
- df$lead_var[k] <- df$lead_var[k]
}
}
diff --git a/modules/data.atmosphere/man/download.NOAA_GEFS.Rd b/modules/data.atmosphere/man/download.NOAA_GEFS.Rd
index 05aa332be43..47dd834cc5a 100644
--- a/modules/data.atmosphere/man/download.NOAA_GEFS.Rd
+++ b/modules/data.atmosphere/man/download.NOAA_GEFS.Rd
@@ -6,8 +6,6 @@
\usage{
download.NOAA_GEFS(
site_id,
- sitename = NULL,
- username = "pecan",
lat.in,
lon.in,
outfolder,
@@ -21,25 +19,21 @@ download.NOAA_GEFS(
\arguments{
\item{site_id}{The unique ID given to each site. This is used as part of the file name.}
-\item{sitename}{Site name}
-
-\item{username}{username from pecan workflow}
-
\item{lat.in}{site latitude in decimal degrees}
\item{lon.in}{site longitude in decimal degrees}
\item{outfolder}{Directory where results should be written}
-\item{start_date, }{Range of dates/times to be downloaded (default assumed to be time that function is run)}
+\item{start_date}{Range of dates/times to be downloaded (default assumed to be time that function is run)}
-\item{end_date, }{end date for range of dates to be downloaded (default 16 days from start_date)}
+\item{end_date}{end date for range of dates to be downloaded (default 16 days from start_date)}
-\item{downscale}{logical, assumed True. Indicated whether data should be downscaled to hourly}
+\item{downscale}{logical, assumed True. Indicates whether data should be downscaled to hourly}
\item{overwrite}{logical. Download a fresh version even if a local file with the same name already exists?}
-\item{...}{Additional optional parameters}
+\item{...}{Additional optional parameters, currently ignored}
}
\value{
A list of data frames is returned containing information about the data file that can be used to locate it later. Each
@@ -56,23 +50,32 @@ but is converted at the station and downloaded in Kelvin.
\section{NOAA_GEFS General Information}{
-This function downloads NOAA GEFS weather data. GEFS is an ensemble of 21 different weather forecast models.
-A 16 day forecast is avaliable every 6 hours. Each forecast includes information on a total of 8 variables.
-These are transformed from the NOAA standard to the internal PEcAn
-standard.
+This function downloads NOAA GEFS weather data. GEFS is an ensemble of 31 different weather forecast models.
+A 16 day forecast is available every 6 hours and a 35 day forecast is available every 24 hours.
+Both are at 3-hour frequency for the first 10 days of the forecast and 6-hour frequency beyond that.
+Each forecast includes information on a total of 8 variables.
+These are transformed from the NOAA standard to the internal PEcAn standard.
}
-\section{Data Avaliability}{
+\section{Data Availability}{
+
+NOAA GEFS weather data is available on a rolling 4 day basis.
+Dates provided in "start_date" must be within this range.
+The end date can be any point after that, but if the end date is beyond 16 days
+(35 days for the midnight UTC forecast), only 16 (35) days worth of forecast are retrieved.
+Times are rounded down to the previous 6 hour forecast.
-NOAA GEFS weather data is avaliable on a rolling 12 day basis; dates provided in "start_date" must be within this range. The end date can be any point after
-that, but if the end date is beyond 16 days, only 16 days worth of forecast are recorded. Times are rounded down to the previous 6 hour forecast. NOAA
-GEFS weather data isn't always posted immediately, and to compensate, this function adjusts requests made in the last two hours
-back two hours (approximately the amount of time it takes to post the data) to make sure the most current forecast is used.
+NOAA GEFS weather data isn't always posted immediately. Each 16-day forecast takes
+approximately three hours to run, and the once-a-day forecasts for days 17-35 are
+posted much later (up to 21 hours) than the forecasts for days 0 to 16.
+See the [GEFS v12 release announcement](https://www.weather.gov/media/notification/pdf2/scn20-75gefs_v12_changes.pdf)
+for details.
}
\section{Data Save Format}{
-Data is saved in the netcdf format to the specified directory. File names reflect the precision of the data to the given range of days.
+Data is saved in the netcdf format to the specified directory.
+ File names reflect the precision of the data to the given range of days.
NOAA.GEFS.willow creek.3.2018-06-08T06:00.2018-06-24T06:00.nc specifies the forecast, using ensemble number 3 at willow creek on
June 6th, 2018 at 6:00 a.m. to June 24th, 2018 at 6:00 a.m.
}
From 83db1ba8cf72ebd5f44e4204463ac9c86dc78a25 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Wed, 31 Jul 2024 02:14:36 -0700
Subject: [PATCH 0025/1193] typo
---
modules/data.atmosphere/R/half_hour_downscale.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.atmosphere/R/half_hour_downscale.R b/modules/data.atmosphere/R/half_hour_downscale.R
index 87867093d4b..9b2efbb08be 100644
--- a/modules/data.atmosphere/R/half_hour_downscale.R
+++ b/modules/data.atmosphere/R/half_hour_downscale.R
@@ -292,7 +292,7 @@ downscale_repeat_6hr_to_half_hrly <- function(df, varName, hr = 0.5){
#previous 6hr period
dplyr::mutate(lead_var = dplyr::lead(df[,varName], 1))
#check for NA values and gapfill using closest timestep
- for(k in 2:dim(df)[1]){
+ for(k in 1:dim(df)[1]){
if (is.na(df$lead_var[k])) {
df$lead_var[k] <- df$lead_var[k-1]
}
From 1c63c21c5c34e0a9b6442d2f33bb8f15dbf6c78f Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Wed, 31 Jul 2024 02:15:33 -0700
Subject: [PATCH 0026/1193] Update modules/data.atmosphere/NEWS.md
---
modules/data.atmosphere/NEWS.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/data.atmosphere/NEWS.md b/modules/data.atmosphere/NEWS.md
index fca4c6c3a04..02b235f3718 100644
--- a/modules/data.atmosphere/NEWS.md
+++ b/modules/data.atmosphere/NEWS.md
@@ -1,8 +1,8 @@
# PEcAn.data.atmosphere 1.8.0.9000
## Fixed
-* `download.NOAA_GEFS` is updated to work again with GEFS v12.3
- (the current release as of this writing in July 2024).
+* `download.NOAA_GEFS` is updated to work again with GEFS v12.3,
+ the current release as of this writing in July 2024 (#3349).
## Changed
* Removed `sitename` and `username` from the formal arguments of `download.NOAA_GEFS`.
From bd267b44c50360d34c3ec58d2e08284dac35e30f Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Wed, 31 Jul 2024 11:17:56 -0700
Subject: [PATCH 0027/1193] Update
modules/data.atmosphere/R/GEFS_helper_functions.R
---
modules/data.atmosphere/R/GEFS_helper_functions.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R
index 684fcea9724..9424d413138 100644
--- a/modules/data.atmosphere/R/GEFS_helper_functions.R
+++ b/modules/data.atmosphere/R/GEFS_helper_functions.R
@@ -68,7 +68,7 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
curr_date <- lubridate::as_date(curr_time)
potential_dates <- curr_date - lubridate::days(3:0)
- potential_dates = potential_dates[which(potential_dates == forecast_date)]
+ potential_dates <- potential_dates[which(potential_dates == forecast_date)]
if(length(potential_dates) == 0){PEcAn.logger::logger.error("Forecast Date not available")}
From ecf3d043d81d11c0d087b5576a29be162b1fd1d6 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Mon, 5 Aug 2024 16:03:59 -0700
Subject: [PATCH 0028/1193] Update
modules/data.atmosphere/R/GEFS_helper_functions.R
---
modules/data.atmosphere/R/GEFS_helper_functions.R | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R
index 9424d413138..b60392c2964 100644
--- a/modules/data.atmosphere/R/GEFS_helper_functions.R
+++ b/modules/data.atmosphere/R/GEFS_helper_functions.R
@@ -18,8 +18,8 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
download_grid <- function(ens_index, location, directory, hours_char, cycle, base_filename1, vars,working_directory){
ens_base <- if (ens_index == 1) { "gec" } else { "gep" }
- ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
- base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f")
+ ens_idx <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
+ base_filename2 <- paste0(ens_base,ens_idx,".t",cycle,"z.pgrb2a.0p50.f")
curr_hours <- hours_char
for(i in 1:length(curr_hours)){
From df252a071c02a2a492d8b1c4249859df3af1ae8c Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Mon, 5 Aug 2024 16:26:02 -0700
Subject: [PATCH 0029/1193] clarify names
---
modules/data.atmosphere/R/GEFS_helper_functions.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R
index b60392c2964..67ac51903d8 100644
--- a/modules/data.atmosphere/R/GEFS_helper_functions.R
+++ b/modules/data.atmosphere/R/GEFS_helper_functions.R
@@ -17,9 +17,9 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date,
download_grid <- function(ens_index, location, directory, hours_char, cycle, base_filename1, vars,working_directory){
- ens_base <- if (ens_index == 1) { "gec" } else { "gep" }
- ens_idx <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
- base_filename2 <- paste0(ens_base,ens_idx,".t",cycle,"z.pgrb2a.0p50.f")
+ member_type <- if (ens_index == 1) { "gec" } else { "gep" } # "_c_ontrol", "_p_erturbed"
+ ens_idxname <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
+ base_filename2 <- paste0(member_type,ens_idxname,".t",cycle,"z.pgrb2a.0p50.f")
curr_hours <- hours_char
for(i in 1:length(curr_hours)){
From 03fc21308d756a50fa585b642ce9c7a9c8df0e8b Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Mon, 5 Aug 2024 16:30:09 -0700
Subject: [PATCH 0030/1193] clarify names
---
modules/data.atmosphere/R/GEFS_helper_functions.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R
index 67ac51903d8..99d3783df46 100644
--- a/modules/data.atmosphere/R/GEFS_helper_functions.R
+++ b/modules/data.atmosphere/R/GEFS_helper_functions.R
@@ -193,9 +193,9 @@ process_gridded_noaa_download <- function(lat_list,
dlwrfsfc <- array(NA, dim = c(site_length, length(hours_char)))
dswrfsfc <- array(NA, dim = c(site_length, length(hours_char)))
- ens_base <- if (ens_index == 1) { "gec" } else { "gep" }
- ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
- base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f")
+ member_type <- if (ens_index == 1) { "gec" } else { "gep" } # "_c_ontrol", "_p_erturbed"
+ ens_idxname <- stringr::str_pad(ens_index - 1, width = 2, pad = "0")
+ base_filename2 <- paste0(member_type,ens_idxname,".t",cycle,"z.pgrb2a.0p50.f")
lats <- round(lat_list/.5)*.5
lons <- round(lon_list/.5)*.5
From abcda0c007313bffe31b811b0515765b57505436 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Fri, 9 Aug 2024 16:04:27 -0700
Subject: [PATCH 0031/1193] add myself to data.atm author list
---
modules/data.atmosphere/DESCRIPTION | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
index 893b7b1da18..cb8ccc59ab0 100644
--- a/modules/data.atmosphere/DESCRIPTION
+++ b/modules/data.atmosphere/DESCRIPTION
@@ -2,17 +2,21 @@ Package: PEcAn.data.atmosphere
Type: Package
Title: PEcAn Functions Used for Managing Climate Driver Data
Version: 1.8.0.9000
-Authors@R: c(person("Mike", "Dietze", role = c("aut"),
- email = "dietze@bu.edu"),
- person("David", "LeBauer", role = c("aut", "cre"),
- email = "dlebauer@email.arizona.edu"),
- person("Carl", "Davidson", role = c("aut"),
- email = "davids14@illinois.edu"),
- person("Rob", "Kooper", role = c("aut"),
- email = "kooper@illinois.edu"),
- person("Deepak", "Jaiswal", role = c("aut"),
- email = "djaiswal@djaiswal.edu"),
- person("University of Illinois, NCSA", role = c("cph")))
+Authors@R: c(
+ person("Mike", "Dietze", role = c("aut"),
+ email = "dietze@bu.edu"),
+ person("David", "LeBauer", role = c("aut", "cre"),
+ email = "dlebauer@email.arizona.edu"),
+ person("Carl", "Davidson", role = c("aut"),
+ email = "davids14@illinois.edu"),
+ person("Rob", "Kooper", role = c("aut"),
+ email = "kooper@illinois.edu"),
+ person("Deepak", "Jaiswal", role = c("aut"),
+ email = "djaiswal@djaiswal.edu"),
+ person("Chris", "Black", role = c("ctb"),
+ email = "chris@ckblack.org",
+ comment = c(ORCID="https://orcid.org/0000-0001-8382-298X")),
+ person("University of Illinois, NCSA", role = c("cph")))
Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
workflow management tool that is designed to simplify the management of
model parameterization, execution, and analysis. The PECAn.data.atmosphere
From 63f270f89618abe745c3502587a24b49762e30a8 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Wed, 14 Aug 2024 23:18:53 +0530
Subject: [PATCH 0032/1193] Refactor extra variables in `run.meta.anbalysis`
Signed-off-by: Abhinav Pandey
---
modules/meta.analysis/R/run.meta.analysis.R | 34 ++++++++++++---------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/modules/meta.analysis/R/run.meta.analysis.R b/modules/meta.analysis/R/run.meta.analysis.R
index 65afcdf61bd..4f8841ae46b 100644
--- a/modules/meta.analysis/R/run.meta.analysis.R
+++ b/modules/meta.analysis/R/run.meta.analysis.R
@@ -216,22 +216,26 @@ runModule.run.meta.analysis <- function(settings) {
PEcAn.logger::logger.info(paste0("Running meta-analysis on all PFTs listed by any Settings object in the list: ",
paste(pft.names, collapse = ", ")))
- iterations <- settings$meta.analysis$iter
- random <- settings$meta.analysis$random.effects$on
- use_ghs <- settings$meta.analysis$random.effects$use_ghs
- threshold <- settings$meta.analysis$threshold
- dbfiles <- settings$database$dbfiles
- database <- settings$database$bety
- run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs)
+ run.meta.analysis(
+ pfts,
+ settings$meta.analysis$iter,
+ settings$meta.analysis$random.effects$on,
+ settings$meta.analysis$threshold,
+ settings$database$dbfiles,
+ settings$database$bety,
+ settings$meta.analysis$random.effects$use_ghs
+ )
} else if (PEcAn.settings::is.Settings(settings)) {
- pfts <- settings$pfts
- iterations <- settings$meta.analysis$iter
- random <- settings$meta.analysis$random.effects$on
- use_ghs <- settings$meta.analysis$random.effects$use_ghs
- threshold <- settings$meta.analysis$threshold
- dbfiles <- settings$database$dbfiles
- database <- settings$database$bety
- run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs, update = settings$meta.analysis$update)
+ run.meta.analysis(
+ settings$pfts,
+ settings$meta.analysis$iter,
+ settings$meta.analysis$random.effects$on,
+ settings$meta.analysis$threshold,
+ settings$database$dbfiles,
+ settings$database$bety,
+ settings$meta.analysis$random.effects$use_ghs,
+ update = settings$meta.analysis$update
+ )
} else {
stop("runModule.run.meta.analysis only works with Settings or MultiSettings")
}
From 74003d9582e8ec0a99f303f9b3c8e4f4777298ac Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Wed, 21 Aug 2024 20:24:46 +0530
Subject: [PATCH 0033/1193] get existing machine info using helper function
Signed-off-by: Abhinav Pandey
---
base/db/R/convert_input.R | 26 +++++++++---------
base/db/R/get.machine.info.R | 48 ++++++++++++++++++++++-----------
base/db/man/get.machine.host.Rd | 22 +++++++++++++++
3 files changed, 66 insertions(+), 30 deletions(-)
create mode 100644 base/db/man/get.machine.host.Rd
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index ba2d7a3a5f0..9cc5c8f3c03 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -176,17 +176,15 @@ convert_input <-
# Date/time processing for existing input
existing.input[[i]]$start_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$start_date), "UTC")
existing.input[[i]]$end_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$end_date), "UTC")
-
+
## Obtain machine information
+ machine.host.info <- get.machine.host(host, con = con)
+ machine.host <- machine.host.info$machine.host
+ machine <- machine.host.info$machine
#Grab machine info of file that exists
existing.machine <- db.query(paste0("SELECT * from machines where id = '",
existing.dbfile[[i]]$machine_id, "'"), con)
- #Grab machine info of host machine
- machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
- machine <- db.query(paste0("SELECT * from machines where hostname = '",
- machine.host, "'"), con)
-
# If the files aren't on the machine, we have to download them, so "overwrite" is meaningless.
if (existing.machine$id == machine$id) {
@@ -353,9 +351,9 @@ convert_input <-
existing.dbfile$machine_id, "'"), con)
#Grab machine info of host machine
- machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
- machine <- db.query(paste0("SELECT * from machines where hostname = '",
- machine.host, "'"), con)
+ machine.host.info <- get.machine.host(host, con = con)
+ machine.host <- machine.host.info$machine.host
+ machine <- machine.host.info$machine
if (existing.machine$id != machine$id) {
@@ -475,11 +473,11 @@ convert_input <-
existing.machine <- db.query(paste0("SELECT * from machines where id = '",
existing.dbfile$machine_id, "'"), con)
- #Grab machine info of
- machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
- machine <- db.query(paste0("SELECT * from machines where hostname = '",
- machine.host, "'"), con)
-
+ #Grab machine info of host machine
+ machine.host.info <- get.machine.host(host, con = con)
+ machine.host <- machine.host.info$machine.host
+ machine <- machine.host.info$machine
+
if(existing.machine$id != machine$id){
PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.")
PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency")
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index c98bee6cf20..979b1f6bb33 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -3,21 +3,21 @@
#' @param input.args input args for existing records
#' @param input.id input id for existing records
#' @param con database connection
-#'
+#'
#' @return list of machine, input, and dbfile records
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
-
- machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
- machine <- db.query(paste0("SELECT * from machines where hostname = '",
- machine.host, "'"), con)
-
+
+ machine.host.info <- get.machine.host(host, con = con)
+ machine.host <- machine.host.info$machine.host
+ machine <- machine.host.info$machine
+
if (nrow(machine) == 0) {
PEcAn.logger::logger.error("machine not found", host$name)
return(NULL)
}
-
+
if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
input <- dbfile <- NULL
} else {
@@ -26,19 +26,19 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
PEcAn.logger::logger.error("input not found", input.id)
return(NULL)
}
-
- if(!is.null(input.args$dbfile.id)){
+
+ if (!is.null(input.args$dbfile.id)) {
dbfile <-
db.query(
paste(
- "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =",
+ "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =",
input.id,
" and container_type = 'Input' and machine_id =",
machine$id
),
con
- )
- }else{
+ )
+ } else {
dbfile <-
db.query(
paste(
@@ -48,11 +48,11 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
machine$id
),
con
- )
+ )
}
-
-
+
+
if (nrow(dbfile) == 0) {
PEcAn.logger::logger.error("dbfile not found", input.id)
return(NULL)
@@ -63,5 +63,21 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
}
}
- return(list(machine = machine, input = input, dbfile = dbfile))
+ return(list(machine = machine, input = input, dbfile = dbfile))
+}
+
+#' Helper Function to retrieve machine host and machine informations
+#' @param host host information
+#' @param con database connection
+#' @return list of machine host and machine information
+#' @author Abhinav Pandey
+get.machine.host <- function(host, con = NULL) {
+ #Grab machine info of host machine
+ machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
+ machine <- db.query(paste0(
+ "SELECT * from machines where hostname = '",
+ machine.host, "'"
+ ), con)
+
+ return(list(machine.host, machine))
}
diff --git a/base/db/man/get.machine.host.Rd b/base/db/man/get.machine.host.Rd
new file mode 100644
index 00000000000..926035dec0c
--- /dev/null
+++ b/base/db/man/get.machine.host.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get.machine.info.R
+\name{get.machine.host}
+\alias{get.machine.host}
+\title{Helper Function to retrieve machine host and machine informations}
+\usage{
+get.machine.host(host, con = NULL)
+}
+\arguments{
+\item{host}{host information}
+
+\item{con}{database connection}
+}
+\value{
+list of machine host and machine information
+}
+\description{
+Helper Function to retrieve machine host and machine informations
+}
+\author{
+Abhinav Pandey
+}
From a578be2dfc274c10a100c90e1febf1474d5289f7 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Wed, 9 Oct 2024 11:45:13 +0530
Subject: [PATCH 0034/1193] Applied changes as suggested by @infotroph
Signed-off-by: Abhinav Pandey
---
base/db/R/add.database.entries.R | 161 ++++++++++++++++---------------
base/db/R/convert_input.R | 22 +++--
2 files changed, 100 insertions(+), 83 deletions(-)
diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R
index d3eb994a646..8b36e884398 100644
--- a/base/db/R/add.database.entries.R
+++ b/base/db/R/add.database.entries.R
@@ -23,99 +23,108 @@
add.database.entries <- function(
result, con, start_date,
- end_date, write, overwrite,
+ end_date, overwrite,
insert.new.file, input.args,
machine, mimetype, formatname,
allow.conflicting.dates, ensemble,
ensemble_name, existing.input,
existing.dbfile, input) {
- if (write) {
- # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
- # This list will be returned.
- newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
+ # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result.
+ # This list will be returned.
+ newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
- for (i in 1:length(result)) { # Master for loop
- id_not_added <- TRUE
+ for (i in 1:length(result)) { # Master for loop
+ id_not_added <- TRUE
- if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
- (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
- # Updating record with new dates
- db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con)
- id_not_added <- FALSE
+ if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
+ (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) {
+ # Updating record with new dates
+ db.query(
+ paste0(
+ "UPDATE inputs SET start_date='", start_date, "', end_date='", end_date,
+ "' WHERE id=", existing.input[[i]]$id
+ ),
+ con
+ )
+ id_not_added <- FALSE
- # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration.
- newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
- newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id)
- }
-
- if (overwrite) {
- # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record)
- if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) {
- db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con)
- }
+ # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration.
+ newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+ newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id)
+ }
- if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) {
- db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con)
- }
+ if (overwrite) {
+ # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record)
+ if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) {
+ db.query(
+ paste0(
+ "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]),
+ "', file_name='", result[[i]]$dbfile.name[1],
+ "' WHERE id=", existing.dbfile[[i]]$id
+ ),
+ con
+ )
}
- # If there is no ensemble then for each record there should be one parent
- # But when you have ensembles, all of the members have one parent !!
- parent.id <- if (is.numeric(ensemble)) {
- ifelse(is.null(input[[i]]), NA, input[[1]]$id)
- } else {
- ifelse(is.null(input[[i]]), NA, input[[i]]$id)
+ if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) {
+ db.query(paste0(
+ "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]),
+ "', file_name='", result[[i]]$dbfile.name[1],
+ "' WHERE id=", existing.dbfile[[i]]$id
+ ), con)
}
+ }
+ # If there is no ensemble then for each record there should be one parent
+ # But when you have ensembles, all of the members have one parent !!
+ parent.id <- if (is.numeric(ensemble)) {
+ ifelse(is.null(input[[i]]), NA, input[[1]]$id)
+ } else {
+ ifelse(is.null(input[[i]]), NA, input[[i]]$id)
+ }
- if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
- site.id <- input.args$newsite
- }
- if (insert.new.file && id_not_added) {
- dbfile.id <- dbfile.insert(
- in.path = dirname(result[[i]]$file[1]),
- in.prefix = result[[i]]$dbfile.name[1],
- "Input",
- existing.input[[i]]$id,
- con,
- reuse = TRUE,
- hostname = machine$hostname
- )
+ if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) {
+ site.id <- input.args$newsite
+ }
- newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
- newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
- } else if (id_not_added) {
- # This is to tell input.insert if we are writing ensembles
- # Why does it need it? Because it checks for inputs with the same time period, site, and machine
- # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition
- ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE
+ if (insert.new.file && id_not_added) {
+ dbfile.id <- dbfile.insert(
+ in.path = dirname(result[[i]]$file[1]),
+ in.prefix = result[[i]]$dbfile.name[1],
+ "Input",
+ existing.input[[i]]$id,
+ con,
+ reuse = TRUE,
+ hostname = machine$hostname
+ )
- new_entry <- dbfile.input.insert(
- in.path = dirname(result[[i]]$file[1]),
- in.prefix = result[[i]]$dbfile.name[1],
- siteid = site.id,
- startdate = start_date,
- enddate = end_date,
- mimetype = mimetype,
- formatname = formatname,
- parentid = parent.id,
- con = con,
- hostname = machine$hostname,
- allow.conflicting.dates = allow.conflicting.dates,
- ens = ens.flag
- )
+ newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id)
+ newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id)
+ } else if (id_not_added) {
+ # This is to tell input.insert if we are writing ensembles
+ # Why does it need it? Because it checks for inputs with the same time period, site, and machine
+ # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition
+ ens.flag <- if (!is.null(ensemble) || is.null(ensemble_name)) TRUE else FALSE
- newinput$input.id <- c(newinput$input.id, new_entry$input.id)
- newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
- }
- } # End for loop
+ new_entry <- dbfile.input.insert(
+ in.path = dirname(result[[i]]$file[1]),
+ in.prefix = result[[i]]$dbfile.name[1],
+ siteid = site.id,
+ startdate = start_date,
+ enddate = end_date,
+ mimetype = mimetype,
+ formatname = formatname,
+ parentid = parent.id,
+ con = con,
+ hostname = machine$hostname,
+ allow.conflicting.dates = allow.conflicting.dates,
+ ens = ens.flag
+ )
- successful <- TRUE
- return(newinput)
- } else {
- PEcAn.logger::logger.warn("Input was not added to the database")
- successful <- TRUE
- return(NULL)
- }
+ newinput$input.id <- c(newinput$input.id, new_entry$input.id)
+ newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id)
+ }
+ } # End for loop
+ return(newinput)
}
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 8828d069d6c..ed267440fbc 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -591,13 +591,21 @@ convert_input <-
#---------------------------------------------------------------#
# New arrangement of database adding code to deal with ensembles.
- return (add.database.entries(result, con, start_date,
- end_date, write, overwrite,
- insert.new.file, input.args,
- machine, mimetype, formatname,
- allow.conflicting.dates, ensemble,
- ensemble_name, existing.input,
- existing.dbfile, input))
+ if(write) {
+ add_entries_result <- return (add.database.entries(result, con, start_date,
+ end_date, overwrite,
+ insert.new.file, input.args,
+ machine, mimetype, formatname,
+ allow.conflicting.dates, ensemble,
+ ensemble_name, existing.input,
+ existing.dbfile, input))
+ } else {
+ PEcAn.logger::logger.warn("Input was not added to the database")
+ successful <- TRUE
+ return(NULL)
+ }
+ successful <- TRUE
+ return (add_entries_result)
} # convert_input
From 293a68befdc9452b2011da4f6320da502c91b79d Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Wed, 9 Oct 2024 11:55:21 +0530
Subject: [PATCH 0035/1193] Minor review changes
Signed-off-by: Abhinav Pandey
---
base/db/R/check.missing.files.R | 8 ++++----
base/db/R/convert_input.R | 2 +-
base/db/tests/testthat/test.check.missing.files.R | 1 -
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index 617878496de..29ce044f68c 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -8,7 +8,7 @@
#'
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
-check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) {
+check_missing_files <- function(result, existing.input = NULL, existing.dbfile = NULL) {
result_sizes <- purrr::map_dfr(
result,
~ dplyr::mutate(
@@ -35,12 +35,12 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing
}
- # Wrap in a list for consistant processing later
- if (exists("existing.input") && is.data.frame(existing.input)) {
+ # Wrap in a list for consistent processing later
+ if (is.data.frame(existing.input)) {
existing.input <- list(existing.input)
}
- if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) {
+ if (is.data.frame(existing.dbfile)) {
existing.dbfile <- list(existing.dbfile)
}
return(list(existing.input, existing.dbfile))
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index ed267440fbc..a074a689389 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -583,7 +583,7 @@ convert_input <-
#--------------------------------------------------------------------------------------------------#
# Check if result has empty or missing files
- checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile)
+ checked.missing.files <- check_missing_files(result, existing.input, existing.dbfile)
# Unwrap parameters after performing checks for missing files
existing.input <- checked.missing.files$existing.input
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index c0ad6794f65..bc61bb1ad4a 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -5,7 +5,6 @@ test_that("`check_missing_files()` able to return correct missing files", {
res <- check_missing_files(
result = list(data.frame(file = c("A", "B"))),
- outname = "test",
existing.input = data.frame(),
existing.dbfile = data.frame()
)
From f7f6926fa14c5c5e8ee776b74e0ac5fd77d56048 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Wed, 9 Oct 2024 11:55:36 +0530
Subject: [PATCH 0036/1193] Update base/db/R/get.machine.info.R
Co-authored-by: Chris Black
---
base/db/R/get.machine.info.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 979b1f6bb33..31f489daddc 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -7,7 +7,7 @@
#' @return list of machine, input, and dbfile records
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
-get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) {
+get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
machine.host.info <- get.machine.host(host, con = con)
machine.host <- machine.host.info$machine.host
From 8f820b027cb7fb5da70f8f66e8f6e88abd1d4f8b Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Wed, 9 Oct 2024 11:56:54 +0530
Subject: [PATCH 0037/1193] Apply suggestions from code review
Co-authored-by: Chris Black
---
base/db/R/check.missing.files.R | 1 -
base/db/R/convert_input.R | 6 +++++-
base/db/R/get.machine.info.R | 4 ++--
base/db/tests/testthat/test.check.missing.files.R | 6 ++----
4 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index 29ce044f68c..f3a496cf5de 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -1,7 +1,6 @@
#' Function to check if result has empty or missing files
#'
#' @param result A list of dataframes with file paths
-#' @param outname Name of the output file
#' @param existing.input Existing input records
#' @param existing.dbfile Existing dbfile records
#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index a074a689389..042c9da08db 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -380,7 +380,7 @@ convert_input <-
if (!is.null(ensemble) && ensemble) {
return.all <-TRUE
- } else{
+ } else {
return.all <- FALSE
}
existing.dbfile <- dbfile.input.check(siteid = site.id,
@@ -518,6 +518,10 @@ convert_input <-
# Get machine information
machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id)
+ if (any(sapply(machine.info, is.null))) {
+ PEcAn.logger::logger.error("failed lookup of inputs or dbfiles")
+ return(NULL)
+ }
machine <- machine.info$machine
input <- machine.info$input
dbfile <- machine.info$dbfile
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R
index 31f489daddc..14123a586e9 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get.machine.info.R
@@ -18,7 +18,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
return(NULL)
}
- if (missing(input.id) || is.na(input.id) || is.null(input.id)) {
+ if (is.na(input.id) || is.null(input.id)) {
input <- dbfile <- NULL
} else {
input <- db.query(paste("SELECT * from inputs where id =", input.id), con)
@@ -71,7 +71,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
#' @param con database connection
#' @return list of machine host and machine information
#' @author Abhinav Pandey
-get.machine.host <- function(host, con = NULL) {
+get_machine_host <- function(host, con) {
#Grab machine info of host machine
machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name)
machine <- db.query(paste0(
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index bc61bb1ad4a..75a531283dd 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -1,7 +1,7 @@
test_that("`check_missing_files()` able to return correct missing files", {
# Mock `purrr::map_dfr`
- mocked_res <- mockery::mock(data.frame(file = c("A", "B"), file_size = c(100, 200), missing = c(FALSE, FALSE), empty = c(FALSE, FALSE)))
- mockery::stub(check_missing_files, "purrr::map_dfr", mocked_res)
+ mocked_size <- mockery::mock(100,200)
+ mockery::stub(check_missing_files, "file.size", mocked_res)
res <- check_missing_files(
result = list(data.frame(file = c("A", "B"))),
@@ -9,8 +9,6 @@ test_that("`check_missing_files()` able to return correct missing files", {
existing.dbfile = data.frame()
)
- # Print the structure of `res` for debugging
- str(res)
expect_equal(length(res), 2)
expect_true(is.list(res[[1]]))
From 319ab77de17469019c3dbabb9dfa6a32f2c6bf4d Mon Sep 17 00:00:00 2001
From: Quentin Bell
Date: Thu, 14 Nov 2024 13:32:45 +0200
Subject: [PATCH 0038/1193] Switched from individual parameter writes to using
the vector functionality of SticsRFiles::set_param_xml.
---
models/stics/R/write.config.STICS.R | 1339 +++++++--------------------
1 file changed, 351 insertions(+), 988 deletions(-)
diff --git a/models/stics/R/write.config.STICS.R b/models/stics/R/write.config.STICS.R
index 39802f5520a..dd8afc7d9ac 100644
--- a/models/stics/R/write.config.STICS.R
+++ b/models/stics/R/write.config.STICS.R
@@ -15,7 +15,7 @@
##' @author Istem Fer
##-------------------------------------------------------------------------------------------------#
write.config.STICS <- function(defaults, trait.values, settings, run.id) {
-
+
## the rest of the code assumes only plant PFTs
## little modification here as not to have a bigger re-write for now
if(any(grepl("soil", names(trait.values)))){
@@ -59,8 +59,8 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
p2 <- ifelse(events_file$rotation$planted_crop2[uic] != "-99.0", tolower(events_file$rotation$planted_crop2[uic]), "")
uname <- paste0(p1,p2)
usmdirs[uic] <- paste0(file.path(settings$host$rundir, run.id, uname), "_",
- lubridate::year(events_file$rotation$rotation_begin[uic]), "-",
- lubridate::year(events_file$rotation$rotation_end[uic]))
+ lubridate::year(events_file$rotation$rotation_begin[uic]), "-",
+ lubridate::year(events_file$rotation$rotation_end[uic]))
}
}else{
@@ -123,13 +123,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
}
}
-
+
}
# TODO: have a better way to determine USMs
########################## finish usmdirs
-
+
## make sure rundir and outdir exist
dir.create(rundir, showWarnings = FALSE, recursive = TRUE)
@@ -146,13 +146,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
prf_list$entry$text <- rundir
XML::saveXML(PEcAn.settings::listToXml(prf_list, "properties"),
- file = file.path(cfgdir, "preferences.xml"),
- prefix = '\n\n')
+ file = file.path(cfgdir, "preferences.xml"),
+ prefix = '\n\n')
# stics and javastics path
stics_path <- settings$model$binary
-
+
# Per STICS development team, there are two types of STICS inputs
# Global input: _plt.xml, param_gen.xml, param_newform.xml
@@ -164,9 +164,11 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
## this is where we overwrite model parameters
+ # Convert pecan parameters to stics names
+ trait.values <- pecan2stics(trait.values)
# read in template plt file, has all the formalisms
plt_xml <- XML::xmlParse(system.file("crop_plt.xml", package = "PEcAn.STICS"))
- #plt_list <- XML::xmlToList(plt_xml)
+
plt_files <- list()
for (pft in seq_along(trait.values)) {
@@ -175,7 +177,6 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
plant_file <- file.path(rundir, paste0(names(trait.values)[pft], "_plt.xml"))
-
if(names(trait.values)[pft] != "env"){
# save the template, will be overwritten below
XML::saveXML(plt_xml, file = plant_file)
@@ -183,505 +184,69 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
next
}
+ # Apply changes to those parameters specified by trait.values for this pft.
+ if (!is.null(pft.traits)) {
+ SticsRFiles::set_param_xml(plant_file, param = names(pft.traits), values = as.list(unname(pft.traits)), overwrite = TRUE)
+ }
+
plt_files[[pft]] <- plant_file
# to learn the parameters in a plant file
- # SticsRFiles::get_param_info(file_path = plant_file)
-
- # go over each formalism and replace params following the order in crop_plt
- # TODO: vary more params
+ # SticsRFiles::get_param_info()
- # plant name and group
- # effect of atmospheric CO2 concentration
-
- # phasic development
# to see parameters per formalism
# values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "phasic development")
# unlist(values)
+ # Creating a dataframe of parameter names and their values for feeding into SticsRFiles::set_param_xml.
+ # Note that the parameters in this data frame are either hardcoded for now or otherwise require special treatment.
+ plt_df <- data.frame(codebfroid = 2) # vernalization requirement, hardcoding for now, 2==yes.
+
# name code of plant in 3 letters
# a handful of plants have to have specific codes, e.g. forages need to be 'fou' and vine needs to be 'vig'
# but others can be anything? if not, either consider a LUT or passing via settings
- if(names(trait.values)[pft] %in% c("frg", "wcl", "alf")){
- codeplante <- 'fou'
- codeperenne <- 2
+ if(names(trait.values)[pft] %in% c("frg", "wcl", "alf")){
+ plt_df$codeplante <- "fou"
+ plt_df$codeperenne <- 2
}else{
- codeplante <- base::substr(names(trait.values)[pft],1,3)
- codeperenne <- 1
- }
- codebfroid <- 2 # vernalization requirement, hardcoding for now, 2==yes
- SticsRFiles::set_param_xml(plant_file, "codeplante", codeplante, overwrite = TRUE)
- SticsRFiles::set_param_xml(plant_file, "codeperenne", codeperenne, overwrite = TRUE)
- SticsRFiles::set_param_xml(plant_file, "codebfroid", codebfroid, overwrite = TRUE)
-
- # minimum temperature below which development stops (degree C)
- if ("tdmin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tdmin", pft.traits[which(pft.names == "tdmin")], overwrite = TRUE)
- }
-
- # maximum temperature above which development stops (degree C)
- if ("tdmax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tdmax", pft.traits[which(pft.names == "tdmax")], overwrite = TRUE)
- }
-
- # basal photoperiod
- if ("phobase" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "phobase", pft.traits[which(pft.names == "phobase")], overwrite = TRUE)
- }
-
- # saturating photoperiod
- if ("phosat" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "phosat", pft.traits[which(pft.names == "phosat")], overwrite = TRUE)
- }
-
-
- # maximum phasic delay allowed due to stresses
- if ("phasic_delay_max" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "stressdev", pft.traits[which(pft.names == "phasic_delay_max")], overwrite = TRUE)
- }
-
- # minimum number of vernalising days (d) [0,7]
- if ("vernalization_days_min" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "jvcmini", round(pft.traits[which(pft.names == "vernalization_days_min")]), overwrite = TRUE)
- }
-
- # day of initiation of vernalisation in perennial crops (julian d) [1,731]
- # this only takes effect for perennial crops
- if ("vernalization_init" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "julvernal", round(pft.traits[which(pft.names == "vernalization_init")]), overwrite = TRUE)
- }
-
- # optimal temperature for vernalisation (degreeC)
- if ("vernalization_TOpt" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tfroid", pft.traits[which(pft.names == "vernalization_TOpt")], overwrite = TRUE)
- }
-
- # semi thermal amplitude for vernalising effect (degreeC)
- if ("vernalization_TAmp" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "ampfroid", pft.traits[which(pft.names == "vernalization_TAmp")], overwrite = TRUE)
- }
-
- if ("coeflevamf" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coeflevamf", pft.traits[which(pft.names == "coeflevamf")], overwrite = TRUE)
- }
-
- if ("coefamflax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coefamflax", pft.traits[which(pft.names == "coefamflax")], overwrite = TRUE)
- }
-
- if ("coeflaxsen" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coeflaxsen", pft.traits[which(pft.names == "coeflaxsen")], overwrite = TRUE)
- }
-
- if ("coefsenlan" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coefsenlan", pft.traits[which(pft.names == "coefsenlan")], overwrite = TRUE)
- }
-
- if ("coeflevdrp" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coeflevdrp", pft.traits[which(pft.names == "coeflevdrp")], overwrite = TRUE)
- }
-
- if ("coefdrpmat" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coefdrpmat", pft.traits[which(pft.names == "coefdrpmat")], overwrite = TRUE)
- }
-
- if ("coefflodrp" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coefflodrp", pft.traits[which(pft.names == "coefflodrp")], overwrite = TRUE)
- }
-
-
- # emergence and starting
- # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "emergence and starting")
- # unlist(values)
-
- # minimum temperature below which emergence is stopped (degreeC)
- if ("emergence_Tmin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tgmin", pft.traits[which(pft.names == "emergence_Tmin")], overwrite = TRUE)
+ plt_df$codeplante <- base::substr(names(trait.values)[pft],1,3)
+ plt_df$codeperenne <- 1
}
# nbfeuilplant, leaf number per plant when planting, default 0, skipping for now
-
# this is a switch, for now hardcoding to have delay at the beginning of the crop (1)
# if starting the simulation from a later stage (e.g. lev) this has no effect
# codegermin, option of simulation of a germination phase or a delay at the beginning of the crop (1) or direct starting (2)
- SticsRFiles::set_param_xml(plant_file, "codegermin", 1, overwrite = TRUE)
+ plt_df$codegermin <- 1
- # cumulative thermal time allowing germination (degree-d)
- if ("cum_thermal_germin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "stpltger", pft.traits[which(pft.names == "cum_thermal_germin")], overwrite = TRUE)
- }
-
- # skipping the other parameters related to this switch, they don't seem influential, at least on NPP and LAI
+ # skipping the other parameters related to this switch for now
# potgermi: soil water potential under which seed imbibition is impeded
# nbjgerlim: maximum number of days after grain imbibition allowing full germination
# propjgermin: minimal proportion of the duration nbjgerlim when the temperature is higher than the temperature threshold Tdmax
-
- # parameter of the curve of coleoptile elongation
- if ("belong" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "belong", pft.traits[which(pft.names == "belong")], overwrite = TRUE)
- }
-
- # parameter of the plantlet elongation curve
- if ("celong" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "celong", pft.traits[which(pft.names == "celong")], overwrite = TRUE)
- }
-
- # maximum elongation of the coleoptile in darkness condition
- if ("coleoptile_elong_dark_max" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "elmax", pft.traits[which(pft.names == "coleoptile_elong_dark_max")], overwrite = TRUE)
- }
-
- # number of days after germination after which plant emergence is reduced
- if ("days_reduced_emergence_postgerm" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "nlevlim1", round(pft.traits[which(pft.names == "days2reduced_emergence_postgerm")]), overwrite = TRUE)
- }
-
- # number of days after germination after which plant emergence is impossible
- if ("days2stopped_emergence_postgerm" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "nlevlim2", round(pft.traits[which(pft.names == "days2stopped_emergence_postgerm")]), overwrite = TRUE)
- }
-
- # plant vigor index allowing to emerge through a soil crust, vigueurbat == 1 inactivates some soil crust related parameters, skipping for now
-
- # there are also "planting" related parameters
-
- # leaves
- # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "leaves")
- # unlist(values)
-
-
- # phyllotherme, thermal duration between the apparition of two successive leaves on the main stem (degree day)
- # assuming this is the same as phyllochron
- if ("phyllochron" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "phyllotherme", pft.traits[which(pft.names == "phyllochron")], overwrite = TRUE)
- }
-
- # minimal density above which interplant competition starts (m-2)
- if ("dens_comp" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "bdens", pft.traits[which(pft.names == "dens_comp")], overwrite = TRUE)
- }
-
- # LAI above which competition between plants starts (m2 m-2)
- if ("lai_comp" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "laicomp", pft.traits[which(pft.names == "lai_comp")], overwrite = TRUE)
- }
-
- # basal height of crop (m)
- if ("height" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "hautbase", pft.traits[which(pft.names == "height")], overwrite = TRUE)
- }
-
- # maximum height of crop
- if ("HTMAX" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "hautmax", pft.traits[which(pft.names == "HTMAX")], overwrite = TRUE)
- }
-
- # minimum temperature at which growth ceases
- if ("tcmin_growth" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tcmin", pft.traits[which(pft.names == "tcmin_growth")], overwrite = TRUE)
- }
-
- # maximum temperature at which growth ceases
- if ("tcmax_growth" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tcmax", pft.traits[which(pft.names == "tcmax_growth")], overwrite = TRUE)
- }
-
# temperature beyond which foliar growth stops
- if ("tcmax_foliar_growth" %in% pft.names) {
+ if ("tcxstop" %in% pft.names | "tdmax" %in% pft.names) {
# tcxstop must be > tdmax, priors should be set that way, and we can let the simulation fail afterwards, but putting a warning here
- tdmax <- SticsRFiles::get_param_xml(plant_file, param="tdmax", select = "formalisme", select_value = "phasic development")[[1]][[1]]
- tcxstop <- pft.traits[which(pft.names == "tcmax_foliar_growth")]
+ # Retrieve the new values if they exist, otherwise read them from the plant file
+ if ("tcxstop" %in% pft.names) {
+ tcxstop <- pft.traits[which(pft.names == "tcxstop")]
+ } else {
+ tcxstop <- SticsRFiles::get_param_xml(plant_file, param="tcxstop", select = "formalisme", select_value = "leaves")[[1]][[1]]
+ }
+ if ("tdmax" %in% pft.names) {
+ tdmax <- pft.traits[which(pft.names == "tdmax")]
+ } else {
+ tdmax <- SticsRFiles::get_param_xml(plant_file, param="tdmax", select = "formalisme", select_value = "phasic development")[[1]][[1]]
+ }
if(tcxstop < tdmax){
PEcAn.logger::logger.warn("tcmax_foliar_growth value (", tcxstop, ") should be greater than tdmax (", tdmax, ").")
}
- SticsRFiles::set_param_xml(plant_file, "tcxstop", tcxstop, overwrite = TRUE)
-
- }
-
- # ulai at the inflexion point of the function DELTAI=f(ULAI)
- if ("vlaimax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "vlaimax", pft.traits[which(pft.names == "vlaimax")], overwrite = TRUE)
- }
-
- # parameter of the logistic curve of LAI growth
- if ("pentlaimax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "pentlaimax", pft.traits[which(pft.names == "pentlaimax")], overwrite = TRUE)
- }
-
- # ulai from which the rate of leaf growth decreases
- if ("udlaimax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "udlaimax", pft.traits[which(pft.names == "udlaimax")], overwrite = TRUE)
- }
-
- # life span of early leaves expressed as a fraction of the life span of the last leaves emitted DURVIEF
- if ("early2last_leaflife" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "ratiodurvieI", pft.traits[which(pft.names == "early2last_leaflife")], overwrite = TRUE)
- }
-
- # fraction of senescent biomass (relative to total biomass)
- if ("senes2total_biomass" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "ratiosen", pft.traits[which(pft.names == "senes2total_biomass")], overwrite = TRUE)
- }
-
- # fraction of senescent leaves falling to the soil
- # not sure if this is supposed to be a fraction or a percentage in STICS, values look like a fraction but min-max is given as 0-100
- # treating it like a fraction for now
- if ("fracLeafFall" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "abscission", pft.traits[which(pft.names == "fracLeafFall")], overwrite = TRUE)
- }
-
- # parameter relating the C/N of dead leaves and the INN
- if ("parazofmorte" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "parazofmorte", pft.traits[which(pft.names == "parazofmorte")], overwrite = TRUE)
- }
-
- # parameter of the N stress function active on leaf expansion (INNLAI), bilinear function vs INN passing through the point (INNmin, INNturgmin)
- if ("innturgmin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "innturgmin", pft.traits[which(pft.names == "innturgmin")], overwrite = TRUE)
- }
-
- # accelerating parameter for the lai growth rate
- if ("lai_growth_rate_accelerating" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "dlaimin", pft.traits[which(pft.names == "lai_growth_rate_accelerating")], overwrite = TRUE)
- }
-
- # maximum rate of the setting up of LAI
- if ("lai_max_rate" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "dlaimaxbrut", pft.traits[which(pft.names == "lai_max_rate")], overwrite = TRUE)
- }
-
- # relative additional lifespan due to N excess in plant (INN > 1)
- if ("relative_addlifespan_DT_excessN" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "durviesupmax", pft.traits[which(pft.names == "relative_addlifespan_DT_excessN")], overwrite = TRUE)
- }
-
- # parameter of the N stress function active on senescence (INNsenes), bilinear function vs INN passing through the point (INNmin, INNsen)
- if ("innsen" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "innsen", pft.traits[which(pft.names == "innsen")], overwrite = TRUE)
- }
-
- # threshold soil water content active to simulate water senescence stress as a proportion of the turgor stress
- if ("rapsenturg" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "rapsenturg", pft.traits[which(pft.names == "rapsenturg")], overwrite = TRUE)
- }
-
-
- # radiation interception
- # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "radiation interception")
-
- # extinction coefficient of photosynthetic active radiation in the canopy
- if ("extinction_coefficient_diffuse" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "extin", pft.traits[which(pft.names == "extinction_coefficient_diffuse")], overwrite = TRUE)
- }
-
- # shoot biomass growth
- # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "shoot biomass growth")
-
- # minimum temperature for development
- if ("temin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "temin", pft.traits[which(pft.names == "temin")], overwrite = TRUE)
- }
-
- # maximal temperature above which plant growth stops
- if ("temax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "temax", pft.traits[which(pft.names == "temax")], overwrite = TRUE)
- }
-
- # optimal temperature (1/2) for plant growth
- if ("teopt" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "teopt", pft.traits[which(pft.names == "teopt")], overwrite = TRUE)
- }
-
- # optimal temperature (2/2) for plant growth
- if ("teoptbis" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "teoptbis", pft.traits[which(pft.names == "teoptbis")], overwrite = TRUE)
- }
-
- # maximum radiation use efficiency during the juvenile phase
- if ("RUE_juv" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "efcroijuv", pft.traits[which(pft.names == "RUE_juv")], overwrite = TRUE)
- }
-
- # maximum radiation use efficiency during the vegetative stage
- if ("RUE_veg" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "efcroiveg", pft.traits[which(pft.names == "RUE_veg")], overwrite = TRUE)
- }
-
- # maximum radiation use efficiency during the grain filling phase
- if ("RUE_rep" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "efcroirepro", pft.traits[which(pft.names == "RUE_rep")], overwrite = TRUE)
- }
-
- # fraction of daily remobilisable C reserves
- if ("remobres" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "remobres", pft.traits[which(pft.names == "remobres")], overwrite = TRUE)
- }
-
- # ratio biomass / useful height cut of crops (t.ha-1.m-1)
- if ("biomass2usefulheight" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "coefmshaut", pft.traits[which(pft.names == "biomass2usefulheight")], overwrite = TRUE)
- }
-
-
- # partitioning of biomass in organs
- # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "partitioning of biomass in organs")
-
- # maximum SLA (specific leaf area) of green leaves (cm2 g-1)
- if ("SLAMAX" %in% pft.names) {
- slamax <- pft.traits[which(pft.names == "SLAMAX")]
- slamax <- PEcAn.utils::ud_convert(PEcAn.utils::ud_convert(slamax, "m2", "cm2"), "kg-1", "g-1") # m2 kg-1 to cm2 g-1
- SticsRFiles::set_param_xml(plant_file, "slamax", slamax, overwrite = TRUE)
- }
-
- # minimum SLA (specific leaf area) of green leaves (cm2 g-1)
- if ("SLAMIN" %in% pft.names) {
- slamin <- pft.traits[which(pft.names == "SLAMIN")]
- slamin <- PEcAn.utils::ud_convert(PEcAn.utils::ud_convert(slamin, "m2", "cm2"), "kg-1", "g-1") # m2 kg-1 to cm2 g-1
- SticsRFiles::set_param_xml(plant_file, "slamin", slamin, overwrite = TRUE)
- }
-
-
- # ratio stem (structural part)/leaf
- if ("stem2leaf" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "tigefeuil", pft.traits[which(pft.names == "stem2leaf")], overwrite = TRUE)
- }
-
- # skipping: envfruit, fraction of envelop in grainmaxi (w:w)
- # skipping: sea, specific area of fruit envelops
-
- # yield formation, will get back
-
- # roots
- # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "roots")
-
-
- # sensanox, index of anoxia sensitivity (0 = insensitive), 0 for now
- # stoprac, stage when root growth stops (LAX= maximum leaf area index, end of leaf growth or SEN=beginning of leaf senescence)
-
- # sensrsec, index of root sensitivity to drought (1=insensitive)
- if ("rootsens2drought" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "sensrsec", pft.traits[which(pft.names == "rootsens2drought")], overwrite = TRUE)
- }
-
- # contrdamax, maximal reduction in root growth rate due to soil strengthness (high bulk density)
- if ("db_reduc_rgr_max" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "contrdamax", pft.traits[which(pft.names == "db_reduc_rgr_max")], overwrite = TRUE)
- }
-
- # draclong, maximum rate of root length production per plant (cm plant-1 degreeD-1)
- if ("rootlength_prod_max" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "draclong", pft.traits[which(pft.names == "rootlength_prod_max")], overwrite = TRUE)
- }
-
- # debsenrac, sum of degrees-days defining the beginning of root senescence (root life time) (degreeD)
- if ("root_sen_dday" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "debsenrac", round(pft.traits[which(pft.names == "root_sen_dday")]), overwrite = TRUE)
- }
-
- #lvfront, root density at the root apex (cm cm-3)
- if ("rootdens_at_apex" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "lvfront", pft.traits[which(pft.names == "rootdens_at_apex")], overwrite = TRUE)
- }
-
- # longsperac - specific root length (cm g-1)
- if ("SRL" %in% pft.names) {
- srl_val <- PEcAn.utils::ud_convert(pft.traits[which(pft.names == "SRL")], "m", "cm")
- SticsRFiles::set_param_xml(plant_file, "longsperac", srl_val, overwrite = TRUE)
+ # TODO: Do we force one of these to change or let the simulation fail?
}
# option to activate the N influence on root partitioning within the soil profile (1 = yes, 2 = no)
- SticsRFiles::set_param_xml(plant_file, "codazorac", 1, overwrite = TRUE)
-
- # reduction factor on root growth when soil mineral N is limiting (< minazorac)
- if ("minefnra" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "minefnra", pft.traits[which(pft.names == "minefnra")], overwrite = TRUE)
- }
-
- # mineral N concentration in soil below which root growth is reduced (kg.ha-1.cm-1)
- if ("minazorac" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "minazorac", pft.traits[which(pft.names == "minazorac")], overwrite = TRUE)
- }
-
- # mineral N concentration in soil above which root growth is maximum (kg.ha-1.cm-1)
- if ("maxazorac" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "maxazorac", pft.traits[which(pft.names == "maxazorac")], overwrite = TRUE)
- }
-
- # frost
-
- # formalism - water
-
- # psisto, potential of stomatal closing (absolute value) (bars)
- # note: units in betyDB are m, but my prior is for testing
- if ("psi_stomata_closure" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "psisto", pft.traits[which(pft.names == "psi_stomata_closure")], overwrite = TRUE)
- }
-
- # psiturg, potential of the beginning of decrease of the cellular extension (absolute value) (bars)
- # may or may not be leaf_psi_tlp in betyDB
- if ("leaf_psi_tlp" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "psiturg", pft.traits[which(pft.names == "leaf_psi_tlp")], overwrite = TRUE)
- }
-
- # h2ofeuilverte, water content of green leaves (relative to fresh matter) (g g-1)
- # may or may not be water_content_TLP_leaf in betyDB
- if ("water_content_TLP_leaf" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "h2ofeuilverte", pft.traits[which(pft.names == "water_content_TLP_leaf")], overwrite = TRUE)
- }
-
- # skipping:
- # h2ofeuiljaune
- # h2otigestruc
- # h2otigestruc
- # h2ofrvert
- # deshydbase
- # tempdeshyd
-
- # kmax, maximum crop coefficient for water requirements (=MET/PET)
- if ("crop_water_max" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "kmax", pft.traits[which(pft.names == "crop_water_max")], overwrite = TRUE)
- }
-
- # nitrogen
- # masecNmax
- if ("masecNmax" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "masecNmax", pft.traits[which(pft.names == "masecNmax")], overwrite = TRUE)
- }
-
- # Nreserve
- if ("Nreserve" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "Nreserve", pft.traits[which(pft.names == "Nreserve")], overwrite = TRUE)
- }
-
-
- # Kmabs1
- if ("Kmabs1" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "Kmabs1", pft.traits[which(pft.names == "Kmabs1")], overwrite = TRUE)
- }
-
- # adil
- if ("adil" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "adil", pft.traits[which(pft.names == "adil")], overwrite = TRUE)
- }
-
- # bdil
- if ("bdil" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "bdil", pft.traits[which(pft.names == "bdil")], overwrite = TRUE)
- }
-
- # INNmin
- if ("INNmin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "INNmin", pft.traits[which(pft.names == "INNmin")], overwrite = TRUE)
- }
-
- # Nmeta
- if ("Nmeta" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "Nmeta", pft.traits[which(pft.names == "Nmeta")]*100, overwrite = TRUE)
- }
-
- # correspondance code BBCH
+ plt_df$codazorac <- 1
# cultivar parameters
# values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "cultivar parameters")
@@ -689,53 +254,10 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# there are multiple cultivars (varietes) in plt file
# for now I assume we will always use only #1 in simulations
# hence, _tec file will always say variete==1, if you change the logic don't forget to update handling of the _tec file accordingly
+ # by default set_param_xml modifies the given parameter in all cultivars.
- # maximal lifespan of an adult leaf expressed in summation of Q10=2 (2**(T-Tbase))
- if ("leaf_lifespan_max" %in% pft.names) {
- # this will modifies all varietes' durvieFs by default
- SticsRFiles::set_param_xml(plant_file, "durvieF", pft.traits[which(pft.names == "leaf_lifespan_max")], overwrite = TRUE)
- # see example for setting a particular (the Grindstad) cultivar param
- # SticsRFiles::set_param_xml(plant_file, "durvieF", pft.traits[which(pft.names == "leaf_lifespan_max")], select = "Grindstad", overwrite = TRUE)
- }
-
- # cumulative thermal time between the stages LEV (emergence) and AMF (maximum acceleration of leaf growth, end of juvenile phase)
- if ("cum_thermal_juvenile" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "stlevamf", pft.traits[which(pft.names == "cum_thermal_juvenile")], overwrite = TRUE)
- }
-
- # cumulative thermal time between the stages AMF (maximum acceleration of leaf growth, end of juvenile phase) and LAX (maximum leaf area index, end of leaf growth)
- if ("cum_thermal_growth" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "stamflax", pft.traits[which(pft.names == "cum_thermal_growth")], overwrite = TRUE)
- }
-
- # cumulative thermal time between the stages LEV (emergence) and DRP (starting date of filling of harvested organs)
- if ("cum_thermal_filling" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "stlevdrp", pft.traits[which(pft.names == "cum_thermal_filling")], overwrite = TRUE)
- }
-
- if ("adens" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "adens", pft.traits[which(pft.names == "adens")], overwrite = TRUE)
- }
-
- if ("croirac" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "croirac", pft.traits[which(pft.names == "croirac")], overwrite = TRUE)
- }
-
- # extinction coefficient connecting LAI to crop height
- if ("LAI2height" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "khaut", pft.traits[which(pft.names == "LAI2height")], overwrite = TRUE)
- }
-
- # average root radius
- if ("rayon" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "rayon", pft.traits[which(pft.names == "rayon")], overwrite = TRUE)
- }
-
- # minimal value for drought stress index
- if ("swfacmin" %in% pft.names) {
- SticsRFiles::set_param_xml(plant_file, "swfacmin", pft.traits[which(pft.names == "swfacmin")], overwrite = TRUE)
- }
-
+ # Set the parameters that have been added to plt_df in the plant file.
+ SticsRFiles::set_param_xml(plant_file, names(plt_df), plt_df[1, ], overwrite = TRUE)
# convert xml2txt
if(names(trait.values)[pft] != "env"){
SticsRFiles::convert_xml2txt(file = plant_file)
@@ -756,11 +278,18 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
## these also have plant parameters as well as soil
## at the moment everything is treated as params, but some could be IC or come from the events file
- # these parameters won't change as crop changes in a continous rotation
+ # these parameters won't change as crop changes in a continuous rotation
+
+ # Convert pecan parameters to stics names for soil
+ # prepare for pecan2stics call, expects a list
+ soil_params_list <- list()
+ soil_params_list[[1]] <- soil_params
+ soil_params <- pecan2stics(soil_params_list)[[1]]
soil.names <- names(soil_params)
for (pft in seq_along(trait.values)) {
+
if(names(trait.values)[pft] == "env"){
next
}
@@ -768,382 +297,29 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
gen_xml <- XML::xmlParse(system.file("param_gen.xml", package = "PEcAn.STICS"))
gen_file <- file.path(rundir, "param_gen.xml")
XML::saveXML(gen_xml, file = gen_file)
- codeinitprec <- ifelse(length(usmdirs>1), 1, 2)
- SticsRFiles::set_param_xml(gen_file, "codeinitprec", codeinitprec, overwrite = TRUE)
+ # This input file is created from the template and not modified.
newf_xml <- XML::xmlParse(system.file("param_newform.xml", package = "PEcAn.STICS"))
newf_file <- file.path(rundir, "param_newform.xml")
XML::saveXML(newf_xml, file = newf_file)
-
-
- pft.traits <- unlist(trait.values[[pft]])
- pft.names <- names(pft.traits)
-
- ### Shoot growth
- # parameter defining radiation effect on conversion efficiency
- if ("rad_on_conversion_eff" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "coefb", pft.traits[which(pft.names == "rad_on_conversion_eff")], overwrite = TRUE)
- }
-
- # ratio of root mass to aerial mass at harvest
- if ("root2aerial_harvest" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "proprac", pft.traits[which(pft.names == "root2aerial_harvest")], overwrite = TRUE)
- }
-
- # minimal amount of root mass at harvest (when aerial biomass is nil) t.ha-1
- if ("rootmin_harvest" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "y0msrac", pft.traits[which(pft.names == "rootmin_harvest")], overwrite = TRUE)
- }
-
- ### Root growth
-
- # bulk density of soil below which root growth is reduced due to a lack of soil cohesion (g.cm-3)
- if ("bd_rootgrowth_reduced" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "dacohes", pft.traits[which(pft.names == "bd_rootgrowth_reduced")], overwrite = TRUE)
- }
-
- # bulk density of soil above which root growth is maximal (g.cm-3)
- if ("bd_rootgrowth_maximal" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "daseuilbas", pft.traits[which(pft.names == "bd_rootgrowth_maximal")], overwrite = TRUE)
- }
-
- # bulk density of soil above which root growth becomes impossible (g.cm-3)
- if ("bd_rootgrowth_impossible" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "daseuilhaut", pft.traits[which(pft.names == "bd_rootgrowth_impossible")], overwrite = TRUE)
- }
-
- ### Water absorption and nitrogen content of the plant
-
- # parameter of increase of maximal transpiration when a water stress occurs
- if ("maxTPincrease_waterstress" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "beta", pft.traits[which(pft.names == "maxTPincrease_waterstress")], overwrite = TRUE)
- }
-
- # root length density (RLD) above which water and N uptake are maximum and independent of RLD
- if ("lvopt" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "lvopt", pft.traits[which(pft.names == "lvopt")], overwrite = TRUE)
- }
-
- # diffusion coefficient of nitrate N in soil at field capacity
- if ("difN_FC" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "difN", soil_params[which(soil.names == "difN_FC")], overwrite = TRUE)
- }
-
- # skipping
- # concrr: inorganic N concentration (NH4+NO3-N) in the rain
-
- # minimal amount of rain required to start an automatic fertilisation (N mm.d-1)
- if ("plNmin" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "plNmin", soil_params[which(soil.names == "plNmin")], overwrite = TRUE)
- }
-
- # skipping, irrlev:
- # amount of irrigation applied automatically on the sowing day to allow germination when the model calculates automaticaly
- # the amount of irrigations or when the irrigation dates are calculated by sum of temperature
-
- # minimal amount of N in the plant required to compute INN (kg.ha-1)
- if ("QNpltminINN" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "QNpltminINN", pft.traits[which(pft.names == "QNpltminINN")], overwrite = TRUE)
- }
-
- ### Soil C and N processes and fertiliser losses
-
- # minimal temperature for decomposition of humified organic matter (degreeC)
- if ("tmin_mineralisation" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "tmin_mineralisation", soil_params[which(soil.names == "tmin_mineralisation")], overwrite = TRUE)
- }
-
- # parameter (1/2) of the temperature function on humus decomposition rate
- if ("T_p1_Hdecomp_rate" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "FTEMh", soil_params[which(soil.names == "T_p1_Hdecomp_rate")], overwrite = TRUE)
- }
-
- # parameter (2/2) of the temperature function on humus decomposition rate
- if ("T_p2_Hdecomp_rate" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "FTEMha", soil_params[which(soil.names == "T_p2_Hdecomp_rate")], overwrite = TRUE)
- }
-
- # reference temperature for decomposition of humified organic matter
- if ("T_r_HOMdecomp" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "TREFh", soil_params[which(soil.names == "T_r_HOMdecomp")], overwrite = TRUE)
- }
-
- # parameter (1/2) of the temperature function on decomposition rate of organic residues
- if ("FTEMr" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "FTEMr", soil_params[which(soil.names == "FTEMr")], overwrite = TRUE)
- }
-
- # parameter (2/2) of the temperature function on decomposition rate of organic residues
- if ("FTEMra" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "FTEMra", soil_params[which(soil.names == "FTEMra")], overwrite = TRUE)
- }
-
- # reference temperature for decomposition of organic residues
- if ("T_r_ORdecomp" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "TREFr", soil_params[which(soil.names == "T_r_ORdecomp")], overwrite = TRUE)
- }
-
- # TODO: come back to these
- # # not used anymore, or at least not with this name!!!
- # # relative potential mineralization rate: K2 = fmin1 * exp(- fmin2*argi) / (1+fmin3*calc)
- # if ("FMIN1" %in% soil.names) {
- # SticsRFiles::set_param_xml(gen_file, "FMIN1", soil_params[which(soil.names == "FMIN1")], overwrite = TRUE)
- # }
- #
- # # not used anymore, or at least not with this name!!!
- # # parameter defining the effect of clay on the potential mineralization rate: K2 = fmin1 * exp(-fmin2*argi) / (1+fmin3*calc)
- # if ("FMIN2" %in% soil.names) {
- # SticsRFiles::set_param_xml(gen_file, "FMIN2", soil_params[which(soil.names == "FMIN2")], overwrite = TRUE)
- # }
- #
- # # not used anymore, or at least not with this name!!!
- # # parameter defining the effect of CaCO3 on the potential mineralization rate: K2 = fmin1 * exp(-fmin2*argi) / (1+fmin3*calc)
- # if ("FMIN3" %in% soil.names) {
- # SticsRFiles::set_param_xml(gen_file, "FMIN3", soil_params[which(soil.names == "FMIN3")], overwrite = TRUE)
- # }
-
- # N/C ratio of soil humus
- if ("Wh" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "Wh", soil_params[which(soil.names == "Wh")], overwrite = TRUE)
- }
-
- # soil pH below which NH3 volatilisation derived from fertiliser is nil
- if ("pHminvol" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "pHminvol", soil_params[which(soil.names == "pHminvol")], overwrite = TRUE)
- }
-
- # soil pH above which NH3 volatilisation derived from fertiliser is maximum
- if ("pHmaxvol" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "pHmaxvol", soil_params[which(soil.names == "pHmaxvol")], overwrite = TRUE)
- }
-
- # N uptake rate at which fertilizer loss is divided by 2
- if ("Nupt_fertloss_halve" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "Vabs2", soil_params[which(soil.names == "Nupt_fertloss_halve")], overwrite = TRUE)
- }
-
- # maximal amount of N immobilised in soil derived from the mineral fertilizer
- if ("maxNimm_mineralfert" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "Xorgmax", soil_params[which(soil.names == "maxNimm_mineralfert")], overwrite = TRUE)
- }
-
- # relative water content (fraction of field capacity) below which mineralisation rate is nil
- if ("hminm" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "hminm", soil_params[which(soil.names == "hminm")], overwrite = TRUE)
- }
-
- # relative water content (fraction of field capacity) below which mineralisation rate is maximum
- if ("hoptm" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "hoptm", soil_params[which(soil.names == "hoptm")], overwrite = TRUE)
- }
-
- # skipping, alphaph:
- # maximal soil pH variation per unit of inorganic N added with slurry
-
- # skipping, dphvolmax:
- # maximal pH increase following the application of slurry
-
- # skipping, phvols:
- # parameter used to calculate the variation of soil pH after the addition of slurry
-
- # relative soil mineralisation rate at water saturation
- if ("fhminsat" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "fhminsat", soil_params[which(soil.names == "fhminsat")], overwrite = TRUE)
- }
-
- # reduction factor of decomposition rate of organic residues when mineral N is limiting
- if ("Nlim_reductionOMdecomp" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "fredkN", soil_params[which(soil.names == "Nlim_reductionOMdecomp")], overwrite = TRUE)
- }
-
- # reduction factor of decomposition rate of microbial biomass when mineral N is limiting
- if ("Nlim_reductionMBdecomp" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "fredlN", soil_params[which(soil.names == "Nlim_reductionMBdecomp")], overwrite = TRUE)
- }
-
- # minimal value for the ratio N/C of the microbial biomass when N limits decomposition
- if ("fNCbiomin" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "fNCbiomin", soil_params[which(soil.names == "fNCbiomin")], overwrite = TRUE)
- }
-
- # additional reduction factor of residues decomposition rate when mineral N is very limited in soil
- if ("fredNsup" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "fredNsup", soil_params[which(soil.names == "fredNsup")], overwrite = TRUE)
- }
-
- # maximum priming ratio (relative to SOM decomposition SD rate)
- if ("Primingmax" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "Primingmax", soil_params[which(soil.names == "Primingmax")], overwrite = TRUE)
- }
-
- ### Nitrification, denitrification and associated N2O emissions
- ### TODO: modify these params
- ### Soil hydrology and compaction
- # minimal amount of rain required to produce runoff (mm.d-1)
- if ("precmin4runoff" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "pminruis", soil_params[which(soil.names == "precmin4runoff")], overwrite = TRUE)
- }
-
- # soil thermal diffusivity (cm2.s-1)
- if ("soil_thermal_diffusivity" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "diftherm", soil_params[which(soil.names == "soil_thermal_diffusivity")], overwrite = TRUE)
- }
+ # Creating a dataframe of parameter names and their values for feeding into SticsRFiles::set_param_xml.
+ # Note that the parameters in this data frame are either hardcoded for now or otherwise require special treatment.
+ gen_df <- data.frame(codeinitprec = ifelse(length(usmdirs>1), 1, 2)) # reset initial conditions in chained simulations
- # skipping, bformnappe:
- # coefficient for the water table shape (artificially drained soil)
-
- # drain radius (cm)
- if ("rdrain" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "rdrain", soil_params[which(soil.names == "rdrain")], overwrite = TRUE)
- }
-
- # soil water potential corresponding to wilting point (Mpa)
- if ("SWP_WP" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "psihumin", soil_params[which(soil.names == "SWP_WP")], overwrite = TRUE)
- }
-
- # soil water potential corresponding to field capacity (Mpa)
- if ("SWP_FC" %in% soil.names) {
- SticsRFiles::set_param_xml(gen_file, "psihucc", soil_params[which(soil.names == "SWP_FC")], overwrite = TRUE)
- }
-
- # soil moisture content (fraction of field capacity) above which compaction may occur and delay sowing
- if ("SMC_compaction_delay_sow" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "prophumtasssem", pft.traits[which(pft.names == "SMC_compaction_delay_sow")], overwrite = TRUE)
- }
-
- # soil moisture content (fraction of field capacity) above which compaction may occur and delay harvest
- if ("SMC_compaction_delay_harvest" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "prophumtassrec", pft.traits[which(pft.names == "SMC_compaction_delay_harvest")], overwrite = TRUE)
- }
-
- ### skipping
- ### Soil tillage if soil compaction activated
-
- ### Typology of pebbles fertilisers and residues
- ### should some of these parameters come from event files?
-
- ### codetypeng: Types of mineral fertilisers - 1 atm
- # 1: Ammonium.nitrate
- # 2: Urea.Ammonium.Nitrate.solution
- # 3: Urea
- # 4: Anhydrous.ammonia
- # 5: Ammonium.sulphate
- # 6: Ammonium.phosphate
- # 7: Calcium.nitrate
- # 8: Fixed.efficiency
-
- # each option has 4 params
- # engamm: fraction of ammonium in the N fertilizer
- # orgeng: maximal amount of fertilizer N that can be immobilized in the soil (fraction for type 8)
- # deneng: maximal fraction of the mineral fertilizer that can be denitrified (used if codedenit is not activated)
- # voleng: maximal fraction of mineral fertilizer that can be volatilized
-
- ### codetypres: Type of residues for decomposition parameters - 21 atm
- # 1: Main crop on surface
- # 2: Intermediate crop on surface
- # 3: Manure on surface
- # 4: Green compost on surface
- # 5: Sewage sludge on surface
- # 6: Vinasse on surface
- # 7: Horn on surface
- # 8: Grapevine shoots on surface
- # 9: Others.1 on surface
- # 10: Others.2 on surface
- # 11: Main crop ploughed in
- # 12: Intermediate crop ploughed in
- # 13: Manure ploughed in
- # 14: Green compost ploughed in
- # 15: Sewage sludge ploughed in
- # 16: Vinasse ploughed in
- # 17: Cattle horn ploughed in
- # 18: Grapevine shoots ploughed in
- # 19: Others.1 ploughed in
- # 20: Others.2 ploughed in
- # 21: Dead roots in soil
-
- # each option has 17 params
-
- # fraction of organic residue which is decomposable
- if ("fOR_decomp" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "CroCo", pft.traits[which(pft.names == "fOR_decomp")], overwrite = TRUE)
- }
-
- # parameter of organic residues decomposition: kres=akres+bkres/CsurNres
- if ("ORdecomp_par" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "akres", pft.traits[which(pft.names == "ORdecomp_par")], overwrite = TRUE)
- }
-
- # potential rate of decomposition of organic residues: kres=akres+bkres/CsurNres
- if ("ORdecomp_rate" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "bkres", pft.traits[which(pft.names == "ORdecomp_rate")], overwrite = TRUE)
- }
-
- # parameter determining C/N ratio of biomass during organic residues decomposition: CsurNbio=awb+bwb/CsurNres
- if ("awb" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "awb", pft.traits[which(pft.names == "awb")], overwrite = TRUE)
- }
-
- # parameter determining C/N ratio of biomass during organic residues decomposition: CsurNbio=awb+bwb/CsurNres
- if ("bwb" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "bwb", pft.traits[which(pft.names == "bwb")], overwrite = TRUE)
- }
-
- # minimum ratio C/N of microbial biomass decomposing organic residues
- if ("minC2N_microbialbiomass" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "cwb", pft.traits[which(pft.names == "minC2N_microbialbiomass")], overwrite = TRUE)
- }
-
- # parameter of organic residues humification: hres = 1 - ahres*CsurNres/(bhres+CsurNres)
- if ("ahres" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "ahres", pft.traits[which(pft.names == "ahres")], overwrite = TRUE)
- }
-
- # parameter of organic residues humification: hres = 1 - ahres*CsurNres/(bhres+CsurNres)
- if ("bhres" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "bhres", pft.traits[which(pft.names == "bhres")], overwrite = TRUE)
- }
-
-
- # TODO: we need a soil PFT
-
- # potential decay rate of microbial biomass decomposing organic residues
- if ("microbialbiomass_decay" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "kbio", pft.traits[which(pft.names == "microbialbiomass_decay")], overwrite = TRUE)
- }
-
- # Carbon assimilation yield by the microbial biomass during crop residues decomposition
- if ("microbialbiomass_C_yield" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "yres", pft.traits[which(pft.names == "microbialbiomass_C_yield")], overwrite = TRUE)
- }
-
- # minimum value of C/N ratio of organic residue (g.g-1)
- if ("CNresmin" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "CNresmin", pft.traits[which(pft.names == "CNresmin")], overwrite = TRUE)
- }
+ pft.traits <- unlist(trait.values[[pft]])
+ pft.names <- names(pft.traits)
- # maximum value of C/N ratio of organic residue (g.g-1)
- if ("CNresmax" %in% pft.names) {
- SticsRFiles::set_param_xml(gen_file, "CNresmax", pft.traits[which(pft.names == "CNresmax")], overwrite = TRUE)
+ # Apply changes to those parameters specified by trait.values for this pft.
+ # Currently no checking/differentiation between parameters that are in the plant xml vs these xmls, but, for now, SticsRFiles just throws a warning when the parameter is not in that file.
+ if (!is.null(pft.traits)) {
+ SticsRFiles::set_param_xml(gen_file, param = names(pft.traits), values = as.list(unname(pft.traits)), overwrite = TRUE)
}
-
- # skipping, qmulchruis0:
- # amount of mulch above which runoff is suppressed
-
- # skipping, mouillabilmulch:
- # maximum wettability of crop mulch
-
- # skipping, kcouvmlch:
- # extinction coefficient connecting the soil cover to the amount of plant mulch
- # skipping, albedomulchresidus:
- # albedo of crop mulch
+ # Set the parameters that have been added to gen_df in the param_gen file.
+ SticsRFiles::set_param_xml(gen_file, names(gen_df), gen_df[1, ], overwrite = TRUE)
- # skipping, Qmulchdec:
- # maximal amount of decomposable mulch
-
SticsRFiles::convert_xml2txt(file = gen_file)
this_usm <- grep(names(trait.values)[pft], usmdirs)
@@ -1151,7 +327,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
file.copy(file.path(rundir, "tempopar.sti"), file.path(usmdirs[x], "tempopar.sti"), overwrite = TRUE)
})
- ### new formulations
+ ### new formulations
# DO NOTHING ELSE FOR NOW
SticsRFiles::convert_xml2txt(file = newf_file)
@@ -1169,8 +345,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# read in template ini file
ini_xml <- XML::xmlParse(system.file("pecan_ini.xml", package = "PEcAn.STICS"))
for(i in seq_along(usmdirs)){
-
- # doesn't really matter what these are called, they will all be eventually 'ficini.txt'
+
ini_file <- file.path(rundir, paste0(basename(usmdirs[i]), "_ini.xml"))
# write the ini file
@@ -1183,32 +358,29 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# these may or may not be modified depending on how crop cycles work in STICS
# 'snu' is bare soil
# fine for annual crops but need to change for perennials
- SticsRFiles::set_param_xml(file = ini_file, param = "stade0", values = "snu", select = "plante", select_value = "1", overwrite = TRUE)
+ SticsRFiles::set_param_xml(file = ini_file, param = "stade0", values = "dor", select = "plante", select_value = "1", overwrite = TRUE)
# when snu others are set to 0 by STICS
-
+
}else if(!is.null(settings$run$inputs$poolinitcond)){
ic_path <- settings$run$inputs$poolinitcond$path
ic_nc <- ncdf4::nc_open(ic_path)
# initial leaf area index (m2 m-2)
lai0 <- ncdf4::ncvar_get(ic_nc, "LAI")
- SticsRFiles::set_param_xml(file = ini_file, param = "lai0", values = lai0, select = "plante", select_value = "1", overwrite = TRUE)
# initial aerial biomass (kg m-2 --> t ha-1)
masec0 <- ncdf4::ncvar_get(ic_nc, "AGB")
- SticsRFiles::set_param_xml(file = ini_file, param = "masec0", values = PEcAn.utils::ud_convert(masec0, "kg m-2", "t ha-1"), select = "plante", select_value = "1", overwrite = TRUE)
# initial depth of root apex of the crop (m --> cm)
zrac0 <- ncdf4::ncvar_get(ic_nc, "rooting_depth")
if(zrac0 < 0.2) zrac0 <- 0.2
- SticsRFiles::set_param_xml(file = ini_file, param = "zrac0", values = PEcAn.utils::ud_convert(zrac0, "m", "cm"), select = "plante", select_value = "1", overwrite = TRUE)
# initial grain dry weight - haven't started any simulations from this stage yet
# SticsRFiles::set_param_xml(file = ini_file, param = "magrain0", values = 0, select = "plante", select_value = "1", overwrite = TRUE)
# initial N amount in the plant (kg m-2 --> kg ha-1)
QNplante0 <- ncdf4::ncvar_get(ic_nc, "plant_nitrogen_content")
- SticsRFiles::set_param_xml(file = ini_file, param = "QNplante0", values = PEcAn.utils::ud_convert(QNplante0, "kg m-2", "kg ha-1"), select = "plante", select_value = "1", overwrite = TRUE)
+ QNplante0 <- PEcAn.utils::ud_convert(QNplante0, "kg m-2", "kg ha-1")
# Not anymore
# initial reserve of biomass (kg m-2 --> t ha-1)
@@ -1227,18 +399,21 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
}else if(zrac0 < 0.8){
densinitial[5] <-0 #densinitial layers should not be filled if zrac0 is not there
}
- SticsRFiles::set_param_xml(file = ini_file, param = "densinitial", values = densinitial, select = "plante", select_value = "1", overwrite = TRUE)
# default 'lev'
# SticsRFiles::set_param_xml(file = ini_file, param = "stade0", values = "plt", select = "plante", select_value = "1", overwrite = TRUE)
+ ic_list <- list(lai0 = lai0, masec0 = masec0, zrac0 = zrac0, QNplante0 = QNplante0, densinitial = densinitial)
+
+ SticsRFiles::set_param_xml(file = ini_file, param = names(ic_list), values = ic_list, select = "plante", select_value = "1", overwrite = TRUE)
+
ncdf4::nc_close(ic_nc)
}
SticsRFiles::convert_xml2txt(file = ini_file)
file.rename(file.path(rundir, "ficini.txt"), file.path(usmdirs[i], "ficini.txt"))
}
-
+
############################ Prepare Soils ##################################
@@ -1246,26 +421,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
#### THERE IS SOME BUG IN SticsRFiles::convert_xml2txt FOR SOLS.XML
#### I NOW PUT TXT VERSION TO THE MODEL PACKAGE: param.sol
- #### TODO: revise others to have txt templates directly in the package
-
- # # changed from FINERT to finert and moved to the sols.xml
- # # initial fraction of soil organic N inactive for mineralisation (= stable SON/ total SON)
- # if ("FINERT" %in% soil.names) {
- # SticsRFiles::set_param_xml(gen_file, "finert", soil_params[which(soil.names == "FINERT")], overwrite = TRUE)
- # }
-
- sols_file <- file.path(rundir, "param.sol")
-
- # cp template sols file (txt)
- file.copy(system.file("param.sol", package = "PEcAn.STICS"), sols_file)
-
- # check param names
- # sols_vals <- SticsRFiles::get_soil_txt(sols_file)
+ #### sols_file <- file.path(rundir, "param.sol")
+ #### Note this has changed now, if all is working might delete these comments
+ sols_file <- file.path(rundir, "sols.xml")
str_ns <- paste0(as.numeric(settings$run$site$id) %/% 1e+09, "-", as.numeric(settings$run$site$id) %% 1e+09)
- # I guess not important what this is called as long as it's consistent in usms
- SticsRFiles::set_soil_txt(file = sols_file, param="typsol", value=paste0("sol", str_ns))
+ soils_df <- data.frame(soil_name = paste0("sol", str_ns))
if(!is.null(settings$run$inputs$poolinitcond)){
ic_path <- settings$run$inputs$poolinitcond$path
@@ -1273,53 +435,61 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# pH
pH <- ncdf4::ncvar_get(ic_nc, "pH")
- pH <- round(pH[1], digits = 1) # STICS uses 1 pH value
- SticsRFiles::set_soil_txt(file = sols_file, param="pH", value=pH)
-
- sapply(1:5, function(x) SticsRFiles::set_soil_txt(file = sols_file, param="epc", value=20, layer = x))
+ soils_df$pH <- round(pH[1], digits = 1) # STICS uses 1 pH value
+
+ # Thickness of each soil layer. This sets all (five) at 20cm, to set individual ones use epc_1, epc_2, etc.
+ soils_df$epc <- 20
# volume_fraction_of_water_in_soil_at_field_capacity
hccf <- ncdf4::ncvar_get(ic_nc, "volume_fraction_of_water_in_soil_at_field_capacity")
hccf <- round(hccf*100, digits = 2)
- sapply(seq_along(hccf), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="hccf", value=hccf[x], layer = x))
+ names(hccf) <- paste0("HCCF_", c(1:length(hccf)))
+ soils_df <- cbind(soils_df, t(hccf))
# volume_fraction_of_condensed_water_in_soil_at_wilting_point
hminf <- ncdf4::ncvar_get(ic_nc, "volume_fraction_of_condensed_water_in_soil_at_wilting_point")
hminf <- round(hminf*100, digits = 2)
- sapply(seq_along(hminf), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="hminf", value=hminf[x], layer = x))
+ names(hminf) <- paste0("HMINF_", c(1:length(hminf)))
+ soils_df <- cbind(soils_df, t(hminf))
# soil_organic_nitrogen_content
Norg <- ncdf4::ncvar_get(ic_nc, "soil_organic_nitrogen_content")
Norg <- round(Norg[1]*100, digits = 2) # STICS uses 1 Norg value
- SticsRFiles::set_soil_txt(file = sols_file, param="Norg", value=Norg)
-
+ soils_df$norg <- Norg
+
# mass_fraction_of_clay_in_soil
argi <- ncdf4::ncvar_get(ic_nc, "mass_fraction_of_clay_in_soil")
argi <- round(argi[1]*100, digits = 0) # STICS uses 1 argi value
- SticsRFiles::set_soil_txt(file = sols_file, param="argi", value=argi)
+ soils_df$argi <- argi
# soil_density (kg m-3 --> g cm-3)
DAF <- ncdf4::ncvar_get(ic_nc, "soil_density")
DAF <- round(PEcAn.utils::ud_convert(DAF, "kg m-3", "g cm-3"), digits = 1)
- sapply(seq_along(DAF), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="DAF", value=DAF[x], layer = x))
+ names(DAF) <- paste0("DAF_", c(1:length(DAF)))
+ soils_df <- cbind(soils_df, t(DAF))
# c2n_humus
- #CsurNsol0 <- ncdf4::ncvar_get(ic_nc, "c2n_humus")
- #SticsRFiles::set_soil_txt(file = sols_file, param="CsurNsol", value=CsurNsol0)
+ # CsurNsol0 <- ncdf4::ncvar_get(ic_nc, "c2n_humus")
+ # soils_df$CsurNsol0 <- CsurNsol0
- # epd
+ # epd: thickness of mixing cells in each soil layer ( = 2 * dispersion length)
epd <- rep(10, 5)
- sapply(seq_along(epd), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="epd", value=epd[x], layer = x))
+ names(epd) <- paste0("epd_", c(1:length(epd)))
+ soils_df <- cbind(soils_df, t(epd))
ncdf4::nc_close(ic_nc)
}
- file.copy(sols_file, file.path(usmdirs, "param.sol"))
+ SticsRFiles::gen_sols_xml(sols_file, param_df = soils_df, template = system.file("sols.xml", package = "PEcAn.STICS"))
+ SticsRFiles:::gen_sol_xsl_file(soil_name = paste0("sol", str_ns))
+ SticsRFiles::convert_xml2txt(file = sols_file)
+ file.copy(file.path(rundir, "param.sol"), file.path(usmdirs, "param.sol"))
+
+ # check param values
+ # sols_vals <- SticsRFiles::get_soil_txt(file.path(rundir, "param.sol"), stics_version = SticsRFiles::get_stics_versions_compat()$latest_version)
# DO NOTHING ELSE FOR NOW
-
- # this has some bug for sols.xml
- # SticsRFiles::convert_xml2txt(file = sols_file, javastics = javastics_path)
+
######################### Prepare Weather Station File ###############################
@@ -1345,13 +515,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# DO NOTHING ELSE FOR NOW
# Should these be prepared by met2model.STICS?
-
+
############################## Prepare LAI forcing ####################################
## skipping for now
-
+
############################ Prepare Technical File ##################################
@@ -1372,7 +542,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
tec_df$concirr <- 0.11 # concentration of mineral N in irrigation water (kg ha-1 mm-1)
tec_df$ressuite <- 'straw+roots' # type of crop residue
tec_df$h2ograinmax <- 0.32 # maximal water content of fruits at harvest
-
+
# the following formalisms exist in the tec file:
## supply of organic residus
## soil tillage
@@ -1426,10 +596,10 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
"tauxexportfauche",
"restit",
"mscoupemini") # amount of mineral N added by fertiliser application at each cut of a forage crop, kg.ha-1
-
-
+
+
harvest_sub <- events_sub[events_sub$mgmt_operations_event == "harvest",]
-
+
harvest_list <- list()
for(hrow in seq_len(nrow(harvest_sub))){
@@ -1501,7 +671,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# empty
fert_df <- data.frame(jul = NA, val = NA)
-
+
# If given fertilization date is within simulation days
if(as.Date(fert_sub$date[frow]) %in% dseq_sub){
@@ -1521,30 +691,51 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
}
fert_tec <- do.call("cbind", fert_list)
} #fertilizer-if end
-
-
- # DO NOTHING ELSE FOR NOW
- # TODO: ADD OTHER MANAGEMENT
-
- # same usm -> continue columns
- usm_tec_df <- cbind(tec_df, harvest_tec, fert_tec)
-
- usm_tec_df$ratiol <- 0
-
- SticsRFiles::gen_tec_xml(param_df = usm_tec_df,
- file=system.file("pecan_tec.xml", package = "PEcAn.STICS"),
- out_dir = usmdirs[usmi])
-
- # TODO: more than 1 USM, rbind
-
- SticsRFiles::convert_xml2txt(file = file.path(usmdirs[usmi], "tmp_tec.xml"))
-
- } # end-loop over usms
- } # TODO: if no events file is given modify other harvest parameters, e.g. harvest decision
+
+ # DO NOTHING ELSE FOR NOW
+ # TODO: ADD OTHER MANAGEMENT
+
+ # same usm -> continue columns
+ usm_tec_df <- cbind(tec_df, harvest_tec, fert_tec)
+
+ usm_tec_df$ratiol <- 0
+
+ SticsRFiles::gen_tec_xml(param_df = usm_tec_df,
+ file=system.file("pecan_tec.xml", package = "PEcAn.STICS"),
+ out_dir = usmdirs[usmi])
+
+ # TODO: more than 1 USM, rbind
+
+ SticsRFiles::convert_xml2txt(file = file.path(usmdirs[usmi], "tmp_tec.xml"))
+
+
+ } # end-loop over usms
+ } # TODO: if no events file is given modify other harvest parameters, e.g. harvest decision
+
+ ################################ Prepare Climate file ######################################
+ # symlink climate files
+ met_path <- settings$run$inputs$met$path
+
+ for(usmi in seq_along(usmdirs)){
+
+ usm_years <- c(sapply(strsplit(sub(".*_", "", basename(usmdirs)[usmi]), "-"), function(x) (as.numeric(x))))
+ dseq_sub <- dseq[lubridate::year(dseq) %in% usm_years]
+
+ clim_list <- list() # temporary solution
+ for(clim in seq_along(usm_years)){
+ # currently assuming only first year file has been passed to the settings, modify met2model if changing the logic
+ met_file <- gsub(paste0(lubridate::year(settings$run$start.date), ".climate"), paste0(usm_years[clim], ".climate"), met_path)
+ clim_list[[clim]] <- utils::read.table(met_file)
+ }
+ clim_run <- do.call("rbind", clim_list)
+ utils::write.table(clim_run, file.path(usmdirs[usmi], "climat.txt"), col.names = FALSE, row.names = FALSE)
+
+ }
+
################################ Prepare USM file ######################################
-
+
# loop for each USM
#ncodesuite <- ifelse(length(usmdirs) > 1, 1,0)
@@ -1575,8 +766,8 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
}else{
SticsRFiles::set_usm_txt(usm_file, "codesuite", 1, append = FALSE)
}
-
-
+
+
# number of simulated plants (sole crop=1; intercropping=2)
SticsRFiles::set_usm_txt(usm_file, "nbplantes", 1, append = FALSE) # hardcode for now
@@ -1648,30 +839,10 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# TODO: more than 1 PFTs
# STICS can run 2 PFTs max: main crop + intercrop
}
-
-
-
- ################################ Prepare Run ######################################
- # symlink climate files
- met_path <- settings$run$inputs$met$path
- for(usmi in seq_along(usmdirs)){
-
- usm_years <- c(sapply(strsplit(sub(".*_", "", basename(usmdirs)[usmi]), "-"), function(x) (as.numeric(x))))
- dseq_sub <- dseq[lubridate::year(dseq) %in% usm_years]
-
- clim_list <- list() # temporary solution
- for(clim in seq_along(usm_years)){
- # currently assuming only first year file has been passed to the settings, modify met2model if changing the logic
- met_file <- gsub(paste0(lubridate::year(settings$run$start.date), ".climate"), paste0(usm_years[clim], ".climate"), met_path)
- clim_list[[clim]] <- utils::read.table(met_file)
- }
- clim_run <- do.call("rbind", clim_list)
- utils::write.table(clim_run, file.path(usmdirs[usmi], "climat.txt"), col.names = FALSE, row.names = FALSE)
-
- }
+ ################################ Prepare Run ######################################
# symlink to binary
file.symlink(stics_path, bindir)
@@ -1689,7 +860,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
# cmd_generate <- paste("java -jar", jexe,"--generate-txt", rundir, usm_name)
# cmd_run <- paste("java -jar", jexe,"--run", rundir, usm_name)
-
+
#-----------------------------------------------------------------------
# create launch script (which will create symlink)
if (!is.null(settings$model$jobtemplate) && file.exists(settings$model$jobtemplate)) {
@@ -1742,5 +913,197 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) {
writeLines(jobsh, con = file.path(settings$rundir, run.id, "job.sh"))
Sys.chmod(file.path(settings$rundir, run.id, "job.sh"))
-
+
} # write.config.STICS
+
+
+# ==================================================================================================#
+#' Function to translate pecan param names and units to stics names and units.
+#' @export
+#' @param trait.values trait.values, list
+#' @return translated list
+#' @author Quentin Bell
+# Based on pecan2lpjguess function by Istem Fer https://github.com/PecanProject/pecan/blob/develop/models/lpjguess/R/write.config.LPJGUESS.R#L229
+pecan2stics <- function(trait.values){
+
+ # TODO :match all stics and pecan names
+ vartable <- tibble::tribble(
+ ~sticsname, ~pecanname, ~sticsunits, ~pecanunits,
+ # Plant and soil related parameters
+ "abscission", "fracLeafFall", NA, NA,
+ "adens", "adens", NA, NA,
+ "adil", "adil", NA, NA,
+ "ahres", "ahres", NA, NA,
+ "akres", "ORdecomp_par", NA, NA,
+ "ampfroid", "vernalization_TAmp", NA, NA,
+ "awb", "awb", NA, NA,
+ "bdens", "dens_comp", NA, NA,
+ "bdil", "bdil", NA, NA,
+ "belong", "belong", NA, NA,
+ "beta", "maxTPincrease_waterstress", NA, NA,
+ "bhres", "bhres", NA, NA,
+ "bkres", "ORdecomp_rate", NA, NA,
+ "bwb", "bwb", NA, NA,
+ "celong", "celong", NA, NA,
+ "CNresmax", "CNresmax", NA, NA,
+ "CNresmin", "CNresmin", NA, NA,
+ "coefamflax", "coefamflax", NA, NA,
+ "coefb", "rad_on_conversion_eff", NA, NA,
+ "coefdrpmat", "coefdrpmat", NA, NA,
+ "coefflodrp", "coefflodrp", NA, NA,
+ "coeflaxsen", "coeflaxsen", NA, NA,
+ "coeflevamf", "coeflevamf", NA, NA,
+ "coeflevdrp", "coeflevdrp", NA, NA,
+ "coefmshaut", "biomass2usefulheight", NA, NA,
+ "coefsenlan", "coefsenlan", NA, NA,
+ "contrdamax", "db_reduc_rgr_max", NA, NA,
+ "CroCo", "fOR_decomp", NA, NA,
+ "croirac", "croirac", NA, NA,
+ "cwb", "minC2N_microbialbiomass", NA, NA,
+ "dacohes", "bd_rootgrowth_reduced", NA, NA,
+ "daseuilbas", "bd_rootgrowth_maximal", NA, NA,
+ "daseuilhaut", "bd_rootgrowth_impossible", NA, NA,
+ "debsenrac", "root_sen_dday", "round", "0",
+ "difN", "difN_FC", NA, NA,
+ "diftherm", "soil_thermal_diffusivity", NA, NA,
+ "dlaimaxbrut", "lai_max_rate", NA, NA,
+ "dlaimin", "lai_growth_rate_accelerating", NA, NA,
+ "draclong", "rootlength_prod_max", NA, NA,
+ "durvieF", "leaf_lifespan_max", NA, NA,
+ "durviesupmax", "relative_addlifespan_DT_excessN", NA, NA,
+ "efcroijuv", "RUE_juv", NA, NA,
+ "efcroirepro", "RUE_rep", NA, NA,
+ "efcroiveg", "RUE_veg", NA, NA,
+ "elmax", "coleoptile_elong_dark_max", NA, NA,
+ "extin", "extinction_coefficient_diffuse", NA, NA,
+ "fhminsat", "fhminsat", NA, NA,
+ "FINERT", "FINERT", NA, NA,
+ "FMIN1", "FMIN1", NA, NA,
+ "FMIN2", "FMIN2", NA, NA,
+ "FMIN3", "FMIN3", NA, NA,
+ "fNCbiomin", "fNCbiomin", NA, NA,
+ "fredkN", "Nlim_reductionOMdecomp", NA, NA,
+ "fredlN", "Nlim_reductionMBdecomp", NA, NA,
+ "fredNsup", "fredNsup", NA, NA,
+ "FTEMh", "T_p1_Hdecomp_rate", NA, NA,
+ "FTEMha", "T_p2_Hdecomp_rate", NA, NA,
+ "FTEMr", "FTEMr", NA, NA,
+ "FTEMra", "FTEMra", NA, NA,
+ "h2ofeuilverte", "water_content_TLP_leaf", NA, NA,
+ "hautmax", "HTMAX", NA, NA,
+ "hautbase", "height", NA, NA,
+ "hminm", "hminm", NA, NA,
+ "hoptm", "hoptm", NA, NA,
+ "INNmin", "INNmin", NA, NA,
+ "innsen", "innsen", NA, NA,
+ "innturgmin", "innturgmin", NA, NA,
+ "julvernal", "vernalization_init", "round", "0",
+ "jvcmini", "vernalization_days_min", "round", "0",
+ "kbio", "microbialbiomass_decay", NA, NA,
+ "khaut", "LAI2height", NA, NA,
+ "Kmabs1", "Kmabs1", NA, NA,
+ "kmax", "crop_water_max", NA, NA,
+ "laicomp", "lai_comp", NA, NA,
+ "longsperac", "SRL", NA, NA,
+ "lvfront", "rootdens_at_apex", NA, NA,
+ "lvopt", "lvopt", NA, NA,
+ "masecNmax", "masecNmax", NA, NA,
+ "maxazorac", "maxazorac", NA, NA,
+ "minazorac", "minazorac", NA, NA,
+ "minefnra", "minefnra", NA, NA,
+ "nlevlim1", "days2reduced_emergence_postgerm", "round", "0",
+ "nlevlim2", "days2stopped_emergence_postgerm", "round", "0",
+ "Nmeta", "Nmeta", NA, NA,
+ "Nreserve", "Nreserve", NA, NA,
+ "parazofmorte", "parazofmorte", NA, NA,
+ "pentlaimax", "pentlaimax", NA, NA,
+ "pHmaxvol", "pHmaxvol", NA, NA,
+ "pHminvol", "pHminvol", NA, NA,
+ "phobase", "phobase", NA, NA,
+ "phosat", "phosat", NA, NA,
+ "phyllotherme", "phyllochron", NA, NA,
+ "plNmin", "plNmin", NA, NA,
+ "pminruis", "precmin4runoff", NA, NA,
+ "Primingmax", "Primingmax", NA, NA,
+ "prophumtassrec", "SMC_compaction_delay_harvest", NA, NA,
+ "prophumtasssem", "SMC_compaction_delay_sow", NA, NA,
+ "proprac", "root2aerial_harvest", NA, NA,
+ "psihucc", "SWP_FC", NA, NA,
+ "psihumin", "SWP_WP", NA, NA,
+ "psisto", "psi_stomata_closure", NA, NA, # psisto, potential of stomatal closing (absolute value) (bars). note: units in betyDB are m, but Istem's prior is for testing
+ "psiturg", "leaf_psi_tlp", NA, NA,
+ "QNpltminINN", "QNpltminINN", NA, NA,
+ "rapsenturg", "rapsenturg", NA, NA,
+ "ratiodurvieI", "early2last_leaflife", NA, NA,
+ "ratiosen", "senes2total_biomass", NA, NA,
+ "rayon", "rayon", NA, NA,
+ "rdrain", "rdrain", NA, NA,
+ "remobres", "remobres", NA, NA,
+ "sensrsec", "rootsens2drought", NA, NA,
+ "slamax", "SLAMAX", "cm2 g-1", "m2 kg-1",
+ "slamin", "SLAMIN", "cm2 g-1", "m2 kg-1",
+ "stamflax", "cum_thermal_growth", NA, NA,
+ "stlevamf", "cum_thermal_juvenile", NA, NA,
+ "stlevdrp", "cum_thermal_filling", NA, NA,
+ "stpltger", "cum_thermal_germin", NA, NA,
+ "stressdev", "phasic_delay_max", NA, NA,
+ "swfacmin", "swfacmin", NA, NA,
+ "tcmax", "tcmax_growth", NA, NA,
+ "tcmin", "tcmin_growth", NA, NA,
+ "tcxstop", "tcmax_foliar_growth", NA, NA,
+ "tdmax", "tdmax", NA, NA,
+ "tdmin", "tdmin", NA, NA,
+ "temax", "temax", NA, NA,
+ "temin", "temin", NA, NA,
+ "teopt", "teopt", NA, NA,
+ "teoptbis", "teoptbis", NA, NA,
+ "tfroid", "vernalization_TOpt", NA, NA,
+ "tgmin", "emergence_Tmin", NA, NA,
+ "tigefeuil", "stem2leaf", NA, NA,
+ "tmin_mineralisation", "tmin_mineralisation", NA, NA,
+ "TREFh", "T_r_HOMdecomp", NA, NA,
+ "TREFr", "T_r_ORdecomp", NA, NA,
+ "udlaimax", "udlaimax", NA, NA,
+ "Vabs2", "Nupt_fertloss_halve", NA, NA,
+ "vlaimax", "vlaimax", NA, NA,
+ "Wh", "Wh", NA, NA,
+ "GMIN1", "GMIN1", NA, NA,
+ "GMIN2", "GMIN2", NA, NA,
+ "GMIN3", "GMIN3", NA, NA,
+ "GMIN4", "GMIN4", NA, NA,
+ "GMIN5", "GMIN5", NA, NA,
+ "GMIN6", "GMIN6", NA, NA,
+ "GMIN7", "GMIN7", NA, NA,
+ "Xorgmax", "maxNimm_mineralfert", NA, NA,
+ "y0msrac", "rootmin_harvest", NA, NA,
+ "yres", "microbialbiomass_C_yield", NA, NA,
+ # Missing pecan parameters without corresponding STICS parameters
+ "SLA", "SLA", NA, NA, # This is necessary as any parameters in the prior that are missing from this tibble cause an error.
+ )
+
+ trait.values <- lapply(trait.values, function(x){
+ names(x) <- vartable$sticsname[match(names(x), vartable$pecanname)]
+ return(x)
+ })
+
+ # TODO : unit conversions?
+ toconvert <- vartable$sticsname[!is.na(vartable$sticsunits)]
+ trait.values <- lapply(trait.values, function(x){
+ canconvert <- toconvert[toconvert %in% names(x)]
+ if(length(canconvert) != 0){
+ for(noc in seq_along(canconvert)){
+ if(vartable$sticsunits[vartable$sticsname == canconvert[noc]] == "round"){
+ x[,names(x) == canconvert[noc]] <- round(x[,names(x) == canconvert[noc]])
+ }else{
+ x[,names(x) == canconvert[noc]] <- PEcAn.utils::ud_convert(x[,names(x) == canconvert[noc]],
+ vartable$pecanunits[vartable$sticsname == canconvert[noc]],
+ vartable$sticsunits[vartable$sticsname == canconvert[noc]])
+ }
+
+ }
+ }
+ return(x)
+ })
+
+ return(trait.values)
+}
From dd54681d4d1fc4c59251a553a6d912bb13e895db Mon Sep 17 00:00:00 2001
From: Quentin Bell
Date: Thu, 14 Nov 2024 14:38:45 +0200
Subject: [PATCH 0039/1193] Added Quentin Bell and write.config.STICS change
---
CHANGELOG.md | 1 +
CITATION.cff | 3 +++
2 files changed, 4 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0cbd0976ae5..07555797a0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
* Modules `PEcAn.allometry`, `PEcAn.assim.batch`, `PEcAn.data.mining`, `PEcAn.emulator`, `PEcAn.MA`, `PEcAn.photosynthesis`, `PEcAn.priors`, and `PEcAn.RTM`.
- Renamed master branch to main
- `PEcAn.all::pecan_version()` now reports commit hashes as well as version numbers for each installed package.
+- `write.conmfig.STICS()` now modifies parameters with vectors rather than individually.
### Removed
diff --git a/CITATION.cff b/CITATION.cff
index 7af92146298..32f9620f195 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -122,6 +122,9 @@ authors:
orcid: 'https://orcid.org/0000-0002-7430-7879'
- given-names: Harunobu Ishii
affiliation: Boston University Software & Application Innovation Lab(SAIL)
+ - affiliation: Finnish Meteorological Institute
+ given-names: Quentin Bell
+ orcid: 'https://orcid.org/0009-0005-0253-8642'
preferred-citation:
type: article
From b0a3cbe55e5e0c9df3bb52d559e651505c70a4b2 Mon Sep 17 00:00:00 2001
From: QdBell <5080358+qdbell@users.noreply.github.com>
Date: Fri, 15 Nov 2024 09:09:43 +0200
Subject: [PATCH 0040/1193] Update write.config.STICS.R with STICS filenames
Added which STICS file the parameter is found in to the pecan2stics lookup table.
---
models/stics/R/write.config.STICS.R | 294 ++++++++++++++--------------
1 file changed, 145 insertions(+), 149 deletions(-)
diff --git a/models/stics/R/write.config.STICS.R b/models/stics/R/write.config.STICS.R
index dd8afc7d9ac..5184f1d808c 100644
--- a/models/stics/R/write.config.STICS.R
+++ b/models/stics/R/write.config.STICS.R
@@ -928,157 +928,153 @@ pecan2stics <- function(trait.values){
# TODO :match all stics and pecan names
vartable <- tibble::tribble(
- ~sticsname, ~pecanname, ~sticsunits, ~pecanunits,
+ ~sticsname, ~pecanname, ~sticsunits, ~pecanunits, ~sticsfile,
# Plant and soil related parameters
- "abscission", "fracLeafFall", NA, NA,
- "adens", "adens", NA, NA,
- "adil", "adil", NA, NA,
- "ahres", "ahres", NA, NA,
- "akres", "ORdecomp_par", NA, NA,
- "ampfroid", "vernalization_TAmp", NA, NA,
- "awb", "awb", NA, NA,
- "bdens", "dens_comp", NA, NA,
- "bdil", "bdil", NA, NA,
- "belong", "belong", NA, NA,
- "beta", "maxTPincrease_waterstress", NA, NA,
- "bhres", "bhres", NA, NA,
- "bkres", "ORdecomp_rate", NA, NA,
- "bwb", "bwb", NA, NA,
- "celong", "celong", NA, NA,
- "CNresmax", "CNresmax", NA, NA,
- "CNresmin", "CNresmin", NA, NA,
- "coefamflax", "coefamflax", NA, NA,
- "coefb", "rad_on_conversion_eff", NA, NA,
- "coefdrpmat", "coefdrpmat", NA, NA,
- "coefflodrp", "coefflodrp", NA, NA,
- "coeflaxsen", "coeflaxsen", NA, NA,
- "coeflevamf", "coeflevamf", NA, NA,
- "coeflevdrp", "coeflevdrp", NA, NA,
- "coefmshaut", "biomass2usefulheight", NA, NA,
- "coefsenlan", "coefsenlan", NA, NA,
- "contrdamax", "db_reduc_rgr_max", NA, NA,
- "CroCo", "fOR_decomp", NA, NA,
- "croirac", "croirac", NA, NA,
- "cwb", "minC2N_microbialbiomass", NA, NA,
- "dacohes", "bd_rootgrowth_reduced", NA, NA,
- "daseuilbas", "bd_rootgrowth_maximal", NA, NA,
- "daseuilhaut", "bd_rootgrowth_impossible", NA, NA,
- "debsenrac", "root_sen_dday", "round", "0",
- "difN", "difN_FC", NA, NA,
- "diftherm", "soil_thermal_diffusivity", NA, NA,
- "dlaimaxbrut", "lai_max_rate", NA, NA,
- "dlaimin", "lai_growth_rate_accelerating", NA, NA,
- "draclong", "rootlength_prod_max", NA, NA,
- "durvieF", "leaf_lifespan_max", NA, NA,
- "durviesupmax", "relative_addlifespan_DT_excessN", NA, NA,
- "efcroijuv", "RUE_juv", NA, NA,
- "efcroirepro", "RUE_rep", NA, NA,
- "efcroiveg", "RUE_veg", NA, NA,
- "elmax", "coleoptile_elong_dark_max", NA, NA,
- "extin", "extinction_coefficient_diffuse", NA, NA,
- "fhminsat", "fhminsat", NA, NA,
- "FINERT", "FINERT", NA, NA,
- "FMIN1", "FMIN1", NA, NA,
- "FMIN2", "FMIN2", NA, NA,
- "FMIN3", "FMIN3", NA, NA,
- "fNCbiomin", "fNCbiomin", NA, NA,
- "fredkN", "Nlim_reductionOMdecomp", NA, NA,
- "fredlN", "Nlim_reductionMBdecomp", NA, NA,
- "fredNsup", "fredNsup", NA, NA,
- "FTEMh", "T_p1_Hdecomp_rate", NA, NA,
- "FTEMha", "T_p2_Hdecomp_rate", NA, NA,
- "FTEMr", "FTEMr", NA, NA,
- "FTEMra", "FTEMra", NA, NA,
- "h2ofeuilverte", "water_content_TLP_leaf", NA, NA,
- "hautmax", "HTMAX", NA, NA,
- "hautbase", "height", NA, NA,
- "hminm", "hminm", NA, NA,
- "hoptm", "hoptm", NA, NA,
- "INNmin", "INNmin", NA, NA,
- "innsen", "innsen", NA, NA,
- "innturgmin", "innturgmin", NA, NA,
- "julvernal", "vernalization_init", "round", "0",
- "jvcmini", "vernalization_days_min", "round", "0",
- "kbio", "microbialbiomass_decay", NA, NA,
- "khaut", "LAI2height", NA, NA,
- "Kmabs1", "Kmabs1", NA, NA,
- "kmax", "crop_water_max", NA, NA,
- "laicomp", "lai_comp", NA, NA,
- "longsperac", "SRL", NA, NA,
- "lvfront", "rootdens_at_apex", NA, NA,
- "lvopt", "lvopt", NA, NA,
- "masecNmax", "masecNmax", NA, NA,
- "maxazorac", "maxazorac", NA, NA,
- "minazorac", "minazorac", NA, NA,
- "minefnra", "minefnra", NA, NA,
- "nlevlim1", "days2reduced_emergence_postgerm", "round", "0",
- "nlevlim2", "days2stopped_emergence_postgerm", "round", "0",
- "Nmeta", "Nmeta", NA, NA,
- "Nreserve", "Nreserve", NA, NA,
- "parazofmorte", "parazofmorte", NA, NA,
- "pentlaimax", "pentlaimax", NA, NA,
- "pHmaxvol", "pHmaxvol", NA, NA,
- "pHminvol", "pHminvol", NA, NA,
- "phobase", "phobase", NA, NA,
- "phosat", "phosat", NA, NA,
- "phyllotherme", "phyllochron", NA, NA,
- "plNmin", "plNmin", NA, NA,
- "pminruis", "precmin4runoff", NA, NA,
- "Primingmax", "Primingmax", NA, NA,
- "prophumtassrec", "SMC_compaction_delay_harvest", NA, NA,
- "prophumtasssem", "SMC_compaction_delay_sow", NA, NA,
- "proprac", "root2aerial_harvest", NA, NA,
- "psihucc", "SWP_FC", NA, NA,
- "psihumin", "SWP_WP", NA, NA,
- "psisto", "psi_stomata_closure", NA, NA, # psisto, potential of stomatal closing (absolute value) (bars). note: units in betyDB are m, but Istem's prior is for testing
- "psiturg", "leaf_psi_tlp", NA, NA,
- "QNpltminINN", "QNpltminINN", NA, NA,
- "rapsenturg", "rapsenturg", NA, NA,
- "ratiodurvieI", "early2last_leaflife", NA, NA,
- "ratiosen", "senes2total_biomass", NA, NA,
- "rayon", "rayon", NA, NA,
- "rdrain", "rdrain", NA, NA,
- "remobres", "remobres", NA, NA,
- "sensrsec", "rootsens2drought", NA, NA,
- "slamax", "SLAMAX", "cm2 g-1", "m2 kg-1",
- "slamin", "SLAMIN", "cm2 g-1", "m2 kg-1",
- "stamflax", "cum_thermal_growth", NA, NA,
- "stlevamf", "cum_thermal_juvenile", NA, NA,
- "stlevdrp", "cum_thermal_filling", NA, NA,
- "stpltger", "cum_thermal_germin", NA, NA,
- "stressdev", "phasic_delay_max", NA, NA,
- "swfacmin", "swfacmin", NA, NA,
- "tcmax", "tcmax_growth", NA, NA,
- "tcmin", "tcmin_growth", NA, NA,
- "tcxstop", "tcmax_foliar_growth", NA, NA,
- "tdmax", "tdmax", NA, NA,
- "tdmin", "tdmin", NA, NA,
- "temax", "temax", NA, NA,
- "temin", "temin", NA, NA,
- "teopt", "teopt", NA, NA,
- "teoptbis", "teoptbis", NA, NA,
- "tfroid", "vernalization_TOpt", NA, NA,
- "tgmin", "emergence_Tmin", NA, NA,
- "tigefeuil", "stem2leaf", NA, NA,
- "tmin_mineralisation", "tmin_mineralisation", NA, NA,
- "TREFh", "T_r_HOMdecomp", NA, NA,
- "TREFr", "T_r_ORdecomp", NA, NA,
- "udlaimax", "udlaimax", NA, NA,
- "Vabs2", "Nupt_fertloss_halve", NA, NA,
- "vlaimax", "vlaimax", NA, NA,
- "Wh", "Wh", NA, NA,
- "GMIN1", "GMIN1", NA, NA,
- "GMIN2", "GMIN2", NA, NA,
- "GMIN3", "GMIN3", NA, NA,
- "GMIN4", "GMIN4", NA, NA,
- "GMIN5", "GMIN5", NA, NA,
- "GMIN6", "GMIN6", NA, NA,
- "GMIN7", "GMIN7", NA, NA,
- "Xorgmax", "maxNimm_mineralfert", NA, NA,
- "y0msrac", "rootmin_harvest", NA, NA,
- "yres", "microbialbiomass_C_yield", NA, NA,
+ "abscission", "fracLeafFall", NA, NA, "plt.xml",
+ "adens", "adens", NA, NA, "plt.xml",
+ "adil", "adil", NA, NA, "plt.xml",
+ "ahres", "ahres", NA, NA, "param_gen.xml",
+ "akres", "ORdecomp_par", NA, NA, "param_gen.xml",
+ "ampfroid", "vernalization_TAmp", NA, NA, "plt.xml",
+ "awb", "awb", NA, NA, "param_gen.xml",
+ "bdens", "dens_comp", NA, NA, "plt.xml",
+ "bdil", "bdil", NA, NA, "plt.xml",
+ "belong", "belong", NA, NA, "plt.xml",
+ "beta", "maxTPincrease_waterstress", NA, NA, "param_gen.xml",
+ "bhres", "bhres", NA, NA, "param_gen.xml",
+ "bkres", "ORdecomp_rate", NA, NA, "param_gen.xml",
+ "bwb", "bwb", NA, NA, "param_gen.xml",
+ "celong", "celong", NA, NA, "plt.xml",
+ "CNresmax", "CNresmax", NA, NA, "param_gen.xml",
+ "CNresmin", "CNresmin", NA, NA, "param_gen.xml",
+ "coefamflax", "coefamflax", NA, NA, "plt.xml",
+ "coefb", "rad_on_conversion_eff", NA, NA, "param_gen.xml",
+ "coefdrpmat", "coefdrpmat", NA, NA, "plt.xml",
+ "coefflodrp", "coefflodrp", NA, NA, "plt.xml",
+ "coeflaxsen", "coeflaxsen", NA, NA, "plt.xml",
+ "coeflevamf", "coeflevamf", NA, NA, "plt.xml",
+ "coeflevdrp", "coeflevdrp", NA, NA, "plt.xml",
+ "coefmshaut", "biomass2usefulheight", NA, NA, "plt.xml",
+ "coefsenlan", "coefsenlan", NA, NA, "plt.xml",
+ "contrdamax", "db_reduc_rgr_max", NA, NA, "plt.xml",
+ "CroCo", "fOR_decomp", NA, NA, "param_gen.xml",
+ "croirac", "croirac", NA, NA, "plt.xml",
+ "cwb", "minC2N_microbialbiomass", NA, NA, "param_gen.xml",
+ "dacohes", "bd_rootgrowth_reduced", NA, NA, "param_gen.xml",
+ "daseuilbas", "bd_rootgrowth_maximal", NA, NA, "param_gen.xml",
+ "daseuilhaut", "bd_rootgrowth_impossible", NA, NA, "param_gen.xml",
+ "debsenrac", "root_sen_dday", "round", "0", "plt.xml",
+ "difN", "difN_FC", NA, NA, "param_gen.xml",
+ "diftherm", "soil_thermal_diffusivity", NA, NA, "param_gen.xml",
+ "dlaimaxbrut", "lai_max_rate", NA, NA, "plt.xml",
+ "dlaimin", "lai_growth_rate_accelerating", NA, NA, "plt.xml",
+ "draclong", "rootlength_prod_max", NA, NA, "plt.xml",
+ "durvieF", "leaf_lifespan_max", NA, NA, "plt.xml",
+ "durviesupmax", "relative_addlifespan_DT_excessN", NA, NA, "plt.xml",
+ "efcroijuv", "RUE_juv", NA, NA, "plt.xml",
+ "efcroirepro", "RUE_rep", NA, NA, "plt.xml",
+ "efcroiveg", "RUE_veg", NA, NA, "plt.xml",
+ "elmax", "coleoptile_elong_dark_max", NA, NA, "plt.xml",
+ "extin", "extinction_coefficient_diffuse", NA, NA, "plt.xml",
+ "fhminsat", "fhminsat", NA, NA, "param_gen.xml",
+ "FINERT", "FINERT", NA, NA, "sols.xml",
+ "fNCbiomin", "fNCbiomin", NA, NA, "param_gen.xml",
+ "fredkN", "Nlim_reductionOMdecomp", NA, NA, "param_gen.xml",
+ "fredlN", "Nlim_reductionMBdecomp", NA, NA, "param_gen.xml",
+ "fredNsup", "fredNsup", NA, NA, "param_gen.xml",
+ "FTEMh", "T_p1_Hdecomp_rate", NA, NA, "param_gen.xml",
+ "FTEMha", "T_p2_Hdecomp_rate", NA, NA, "param_gen.xml",
+ "FTEMr", "FTEMr", NA, NA, "param_gen.xml",
+ "FTEMra", "FTEMra", NA, NA, "param_gen.xml",
+ "h2ofeuilverte", "water_content_TLP_leaf", NA, NA, "plt.xml",
+ "hautmax", "HTMAX", NA, NA, "plt.xml",
+ "hautbase", "height", NA, NA, "plt.xml",
+ "hminm", "hminm", NA, NA, "param_gen.xml",
+ "hoptm", "hoptm", NA, NA, "param_gen.xml",
+ "INNmin", "INNmin", NA, NA, "plt.xml",
+ "innsen", "innsen", NA, NA, "plt.xml",
+ "innturgmin", "innturgmin", NA, NA, "plt.xml",
+ "julvernal", "vernalization_init", "round", "0", "plt.xml",
+ "jvcmini", "vernalization_days_min", "round", "0", "plt.xml",
+ "kbio", "microbialbiomass_decay", NA, NA, "param_gen.xml",
+ "khaut", "LAI2height", NA, NA, "plt.xml",
+ "Kmabs1", "Kmabs1", NA, NA, "plt.xml",
+ "kmax", "crop_water_max", NA, NA, "plt.xml",
+ "laicomp", "lai_comp", NA, NA, "plt.xml",
+ "longsperac", "SRL", NA, NA, "plt.xml",
+ "lvfront", "rootdens_at_apex", NA, NA, "plt.xml",
+ "lvopt", "lvopt", NA, NA, "param_gen.xml",
+ "masecNmax", "masecNmax", NA, NA, "plt.xml",
+ "maxazorac", "maxazorac", NA, NA, "plt.xml",
+ "minazorac", "minazorac", NA, NA, "plt.xml",
+ "minefnra", "minefnra", NA, NA, "plt.xml",
+ "nlevlim1", "days2reduced_emergence_postgerm", "round", "0", "plt.xml",
+ "nlevlim2", "days2stopped_emergence_postgerm", "round", "0", "plt.xml",
+ "Nmeta", "Nmeta", NA, NA, "plt.xml",
+ "Nreserve", "Nreserve", NA, NA, "plt.xml",
+ "parazofmorte", "parazofmorte", NA, NA, "plt.xml",
+ "pentlaimax", "pentlaimax", NA, NA, "plt.xml",
+ "pHmaxvol", "pHmaxvol", NA, NA, "param_gen.xml",
+ "pHminvol", "pHminvol", NA, NA, "param_gen.xml",
+ "phobase", "phobase", NA, NA, "plt.xml",
+ "phosat", "phosat", NA, NA, "plt.xml",
+ "phyllotherme", "phyllochron", NA, NA, "plt.xml",
+ "plNmin", "plNmin", NA, NA, "param_gen.xml",
+ "pminruis", "precmin4runoff", NA, NA, "param_gen.xml",
+ "Primingmax", "Primingmax", NA, NA, "param_gen.xml",
+ "prophumtassrec", "SMC_compaction_delay_harvest", NA, NA, "param_gen.xml",
+ "prophumtasssem", "SMC_compaction_delay_sow", NA, NA, "param_gen.xml",
+ "proprac", "root2aerial_harvest", NA, NA, "param_gen.xml",
+ "psihucc", "SWP_FC", NA, NA, "param_gen.xml",
+ "psihumin", "SWP_WP", NA, NA, "param_gen.xml",
+ "psisto", "psi_stomata_closure", NA, NA, "plt.xml", # psisto, potential of stomatal closing (absolute value) (bars). note: units in betyDB are m, but Istem's prior is for testing
+ "psiturg", "leaf_psi_tlp", NA, NA, "plt.xml",
+ "QNpltminINN", "QNpltminINN", NA, NA, "param_gen.xml",
+ "rapsenturg", "rapsenturg", NA, NA, "plt.xml",
+ "ratiodurvieI", "early2last_leaflife", NA, NA, "plt.xml",
+ "ratiosen", "senes2total_biomass", NA, NA, "plt.xml",
+ "rayon", "rayon", NA, NA, "plt.xml",
+ "rdrain", "rdrain", NA, NA, "param_gen.xml",
+ "remobres", "remobres", NA, NA, "plt.xml",
+ "sensrsec", "rootsens2drought", NA, NA, "plt.xml",
+ "slamax", "SLAMAX", "cm2 g-1", "m2 kg-1", "plt.xml",
+ "slamin", "SLAMIN", "cm2 g-1", "m2 kg-1", "plt.xml",
+ "stamflax", "cum_thermal_growth", NA, NA, "plt.xml",
+ "stlevamf", "cum_thermal_juvenile", NA, NA, "plt.xml",
+ "stlevdrp", "cum_thermal_filling", NA, NA, "plt.xml",
+ "stpltger", "cum_thermal_germin", NA, NA, "plt.xml",
+ "stressdev", "phasic_delay_max", NA, NA, "plt.xml",
+ "swfacmin", "swfacmin", NA, NA, "plt.xml",
+ "tcmax", "tcmax_growth", NA, NA, "plt.xml",
+ "tcmin", "tcmin_growth", NA, NA, "plt.xml",
+ "tcxstop", "tcmax_foliar_growth", NA, NA, "plt.xml",
+ "tdmax", "tdmax", NA, NA, "plt.xml",
+ "tdmin", "tdmin", NA, NA, "plt.xml",
+ "temax", "temax", NA, NA, "plt.xml",
+ "temin", "temin", NA, NA, "plt.xml",
+ "teopt", "teopt", NA, NA, "plt.xml",
+ "teoptbis", "teoptbis", NA, NA, "plt.xml",
+ "tfroid", "vernalization_TOpt", NA, NA, "plt.xml",
+ "tgmin", "emergence_Tmin", NA, NA, "plt.xml",
+ "tigefeuil", "stem2leaf", NA, NA, "plt.xml",
+ "tmin_mineralisation", "tmin_mineralisation", NA, NA, "param_gen.xml",
+ "TREFh", "T_r_HOMdecomp", NA, NA, "param_gen.xml",
+ "TREFr", "T_r_ORdecomp", NA, NA, "param_gen.xml",
+ "udlaimax", "udlaimax", NA, NA, "plt.xml",
+ "Vabs2", "Nupt_fertloss_halve", NA, NA, "param_gen.xml",
+ "vlaimax", "vlaimax", NA, NA, "plt.xml",
+ "Wh", "Wh", NA, NA, "param_gen.xml",
+ "GMIN1", "GMIN1", NA, NA, "param_gen.xml",
+ "GMIN2", "GMIN2", NA, NA, "param_gen.xml",
+ "GMIN3", "GMIN3", NA, NA, "param_gen.xml",
+ "GMIN4", "GMIN4", NA, NA, "param_gen.xml",
+ "GMIN5", "GMIN5", NA, NA, "param_gen.xml",
+ "GMIN6", "GMIN6", NA, NA, "param_gen.xml",
+ "GMIN7", "GMIN7", NA, NA, "param_gen.xml",
+ "Xorgmax", "maxNimm_mineralfert", NA, NA, "param_gen.xml",
+ "y0msrac", "rootmin_harvest", NA, NA, "param_gen.xml",
+ "yres", "microbialbiomass_C_yield", NA, NA, "param_gen.xml",
# Missing pecan parameters without corresponding STICS parameters
- "SLA", "SLA", NA, NA, # This is necessary as any parameters in the prior that are missing from this tibble cause an error.
)
trait.values <- lapply(trait.values, function(x){
From 938434326732df0e0752df1893c384d205d3b345 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Mon, 2 Dec 2024 19:47:17 +0530
Subject: [PATCH 0041/1193] Update machine host to remove duplicate code
Signed-off-by: Abhinav Pandey
---
...abase.entries.R => add_database_entries.R} | 2 +-
base/db/R/check.missing.files.R | 12 ++--
base/db/R/convert_input.R | 65 +++++--------------
...{get.machine.info.R => get_machine_info.R} | 23 ++++++-
...ase.entries.Rd => add_database_entries.Rd} | 13 ++--
base/db/man/check_missing_files.Rd | 9 +--
...et.machine.host.Rd => get_machine_host.Rd} | 8 +--
...et.machine.info.Rd => get_machine_info.Rd} | 8 +--
base/db/tests/testthat/test.convert_input.R | 4 +-
9 files changed, 64 insertions(+), 80 deletions(-)
rename base/db/R/{add.database.entries.R => add_database_entries.R} (99%)
rename base/db/R/{get.machine.info.R => get_machine_info.R} (66%)
rename base/db/man/{add.database.entries.Rd => add_database_entries.Rd} (90%)
rename base/db/man/{get.machine.host.Rd => get_machine_host.Rd} (73%)
rename base/db/man/{get.machine.info.Rd => get_machine_info.Rd} (73%)
diff --git a/base/db/R/add.database.entries.R b/base/db/R/add_database_entries.R
similarity index 99%
rename from base/db/R/add.database.entries.R
rename to base/db/R/add_database_entries.R
index 8b36e884398..87814e04db8 100644
--- a/base/db/R/add.database.entries.R
+++ b/base/db/R/add_database_entries.R
@@ -21,7 +21,7 @@
#'
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
-add.database.entries <- function(
+add_database_entries <- function(
result, con, start_date,
end_date, overwrite,
insert.new.file, input.args,
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R
index f3a496cf5de..9c119239988 100644
--- a/base/db/R/check.missing.files.R
+++ b/base/db/R/check.missing.files.R
@@ -19,12 +19,6 @@ check_missing_files <- function(result, existing.input = NULL, existing.dbfile =
)
if (any(result_sizes$missing) || any(result_sizes$empty)) {
- log_format_df <- function(df) {
- formatted_df <- rbind(colnames(df), format(df))
- formatted_text <- purrr::reduce(formatted_df, paste, sep = " ")
- paste(formatted_text, collapse = "\n")
- }
-
PEcAn.logger::logger.severe(
"Requested Processing produced empty files or Nonexistent files:\n",
log_format_df(result_sizes[, c(1, 8, 9, 10)]),
@@ -44,3 +38,9 @@ check_missing_files <- function(result, existing.input = NULL, existing.dbfile =
}
return(list(existing.input, existing.dbfile))
}
+
+log_format_df <- function(df) {
+ formatted_df <- rbind(colnames(df), format(df))
+ formatted_text <- purrr::reduce(formatted_df, paste, sep = " ")
+ paste(formatted_text, collapse = "\n")
+}
\ No newline at end of file
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 042c9da08db..94f11df502e 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -176,7 +176,7 @@ convert_input <-
existing.input[[i]]$end_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$end_date), "UTC")
## Obtain machine information
- machine.host.info <- get.machine.host(host, con = con)
+ machine.host.info <- get_machine_host(host, con = con)
machine.host <- machine.host.info$machine.host
machine <- machine.host.info$machine
#Grab machine info of file that exists
@@ -341,33 +341,16 @@ convert_input <-
add = TRUE
) # Close on.exit
}
-
-
-
- #Grab machine info of file that exists
- existing.machine <- db.query(paste0("SELECT * from machines where id = '",
- existing.dbfile$machine_id, "'"), con)
-
- #Grab machine info of host machine
- machine.host.info <- get.machine.host(host, con = con)
- machine.host <- machine.host.info$machine.host
- machine <- machine.host.info$machine
-
- if (existing.machine$id != machine$id) {
-
- PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.")
- PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency")
- insert.new.file <- TRUE
- start_date <- existing.input$start_date
- end_date <- existing.input$end_date
-
- } else {
- # There's an existing input that spans desired start/end dates with files on this machine
- PEcAn.logger::logger.info("Skipping this input conversion because files are already available.")
- return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id))
+
+ existing_files_result <- check_and_handle_existing_files(existing.dbfile, host, con, existing.input, start_date, end_date)
+ if (!is.null(existing_files_result$input.id)) {
+ return(existing_files_result)
+ } else {
+ insert.new.file <- existing_files_result$insert.new.file
+ start_date <- existing_files_result$start_date
+ end_date <- existing_files_result$end_date
}
-
-
+
} else {
# No existing record found. Should be good to go with regular conversion.
}
@@ -467,25 +450,13 @@ convert_input <-
} else if ((start_date >= existing.input$start_date) &&
(end_date <= existing.input$end_date)) {
- #Grab machine info of file that exists
- existing.machine <- db.query(paste0("SELECT * from machines where id = '",
- existing.dbfile$machine_id, "'"), con)
-
- #Grab machine info of host machine
- machine.host.info <- get.machine.host(host, con = con)
- machine.host <- machine.host.info$machine.host
- machine <- machine.host.info$machine
-
- if(existing.machine$id != machine$id){
- PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.")
- PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency")
- insert.new.file <- TRUE
- start_date <- existing.input$start_date
- end_date <- existing.input$end_date
+ existing_files_result <- check_and_handle_existing_files(existing.dbfile, host, con, existing.input, start_date, end_date)
+ if (!is.null(existing_files_result$input.id)) {
+ return(existing_files_result)
} else {
- # There's an existing input that spans desired start/end dates with files on this machine
- PEcAn.logger::logger.info("Skipping this input conversion because files are already available.")
- return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id))
+ insert.new.file <- existing_files_result$insert.new.file
+ start_date <- existing_files_result$start_date
+ end_date <- existing_files_result$end_date
}
} else {
@@ -516,7 +487,7 @@ convert_input <-
#---------------------------------------------------------------------------------------------------------------#
# Get machine information
- machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id)
+ machine.info <- get_machine_info(host, input.args = input.args, input.id = input.id)
if (any(sapply(machine.info, is.null))) {
PEcAn.logger::logger.error("failed lookup of inputs or dbfiles")
@@ -596,7 +567,7 @@ convert_input <-
#---------------------------------------------------------------#
# New arrangement of database adding code to deal with ensembles.
if(write) {
- add_entries_result <- return (add.database.entries(result, con, start_date,
+ add_entries_result <- return (add_database_entries(result, con, start_date,
end_date, overwrite,
insert.new.file, input.args,
machine, mimetype, formatname,
diff --git a/base/db/R/get.machine.info.R b/base/db/R/get_machine_info.R
similarity index 66%
rename from base/db/R/get.machine.info.R
rename to base/db/R/get_machine_info.R
index 14123a586e9..2d35cfaca06 100644
--- a/base/db/R/get.machine.info.R
+++ b/base/db/R/get_machine_info.R
@@ -9,7 +9,7 @@
get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) {
- machine.host.info <- get.machine.host(host, con = con)
+ machine.host.info <- get_machine_host(host, con = con)
machine.host <- machine.host.info$machine.host
machine <- machine.host.info$machine
@@ -81,3 +81,24 @@ get_machine_host <- function(host, con) {
return(list(machine.host, machine))
}
+
+check_and_handle_existing_files <- function(existing.dbfile, host, con, existing.input, start_date, end_date) {
+ # Grab machine info of file that exists
+ existing.machine <- db.query(paste0("SELECT * from machines where id = '",
+ existing.dbfile$machine_id, "'"), con)
+
+ # Grab machine info of host machine
+ machine.host.info <- get_machine_host(host, con = con)
+ machine.host <- machine.host.info$machine.host
+ machine <- machine.host.info$machine
+
+ if (existing.machine$id != machine$id) {
+ PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.")
+ PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency")
+ return(list(insert.new.file = TRUE, start_date = existing.input$start_date, end_date = existing.input$end_date))
+ } else {
+ # There's an existing input that spans desired start/end dates with files on this machine
+ PEcAn.logger::logger.info("Skipping this input conversion because files are already available.")
+ return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id))
+ }
+}
\ No newline at end of file
diff --git a/base/db/man/add.database.entries.Rd b/base/db/man/add_database_entries.Rd
similarity index 90%
rename from base/db/man/add.database.entries.Rd
rename to base/db/man/add_database_entries.Rd
index 5de01cd1705..ad1f8acbc05 100644
--- a/base/db/man/add.database.entries.Rd
+++ b/base/db/man/add_database_entries.Rd
@@ -1,15 +1,14 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add.database.entries.R
-\name{add.database.entries}
-\alias{add.database.entries}
+% Please edit documentation in R/add_database_entries.R
+\name{add_database_entries}
+\alias{add_database_entries}
\title{Return new arrangement of database while adding code to deal with ensembles}
\usage{
-add.database.entries(
+add_database_entries(
result,
con,
start_date,
end_date,
- write,
overwrite,
insert.new.file,
input.args,
@@ -33,8 +32,6 @@ add.database.entries(
\item{end_date}{end date of the data}
-\item{write}{whether to write to the database}
-
\item{overwrite}{Logical: If a file already exists, create a fresh copy?}
\item{insert.new.file}{whether to insert a new file}
@@ -58,6 +55,8 @@ add.database.entries(
\item{existing.dbfile}{existing dbfile records}
\item{input}{input records}
+
+\item{write}{whether to write to the database}
}
\value{
list of input and dbfile ids
diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd
index 8dd541f9380..6f5f8c23ce9 100644
--- a/base/db/man/check_missing_files.Rd
+++ b/base/db/man/check_missing_files.Rd
@@ -4,18 +4,11 @@
\alias{check_missing_files}
\title{Function to check if result has empty or missing files}
\usage{
-check_missing_files(
- result,
- outname,
- existing.input = NULL,
- existing.dbfile = NULL
-)
+check_missing_files(result, existing.input = NULL, existing.dbfile = NULL)
}
\arguments{
\item{result}{A list of dataframes with file paths}
-\item{outname}{Name of the output file}
-
\item{existing.input}{Existing input records}
\item{existing.dbfile}{Existing dbfile records}
diff --git a/base/db/man/get.machine.host.Rd b/base/db/man/get_machine_host.Rd
similarity index 73%
rename from base/db/man/get.machine.host.Rd
rename to base/db/man/get_machine_host.Rd
index 926035dec0c..4dbc2258ab7 100644
--- a/base/db/man/get.machine.host.Rd
+++ b/base/db/man/get_machine_host.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get.machine.info.R
-\name{get.machine.host}
-\alias{get.machine.host}
+% Please edit documentation in R/get_machine_info.R
+\name{get_machine_host}
+\alias{get_machine_host}
\title{Helper Function to retrieve machine host and machine informations}
\usage{
-get.machine.host(host, con = NULL)
+get_machine_host(host, con)
}
\arguments{
\item{host}{host information}
diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get_machine_info.Rd
similarity index 73%
rename from base/db/man/get.machine.info.Rd
rename to base/db/man/get_machine_info.Rd
index 6e57013c4d7..68221a9c565 100644
--- a/base/db/man/get.machine.info.Rd
+++ b/base/db/man/get_machine_info.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get.machine.info.R
-\name{get.machine.info}
-\alias{get.machine.info}
+% Please edit documentation in R/get_machine_info.R
+\name{get_machine_info}
+\alias{get_machine_info}
\title{Get machine information from db}
\usage{
-get.machine.info(host, input.args, input.id = NULL, con = NULL)
+get_machine_info(host, input.args, input.id = NULL, con = NULL)
}
\arguments{
\item{host}{host information}
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
index e4f40e7bcb5..474ed4eaaeb 100644
--- a/base/db/tests/testthat/test.convert_input.R
+++ b/base/db/tests/testthat/test.convert_input.R
@@ -3,7 +3,7 @@ test_that("`convert_input()` able to call the respective download function for a
mockery::stub(convert_input, "dbfile.input.check", data.frame())
mockery::stub(convert_input, "db.query", data.frame(id = 1))
- mockery::stub(convert_input, "get.machine.info", list(
+ mockery::stub(convert_input, "get_machine_info", list(
machine = data.frame(id = 1),
input = data.frame(id = 1),
dbfile = data.frame(id = 1)
@@ -20,7 +20,7 @@ test_that("`convert_input()` able to call the respective download function for a
existing.input = list(data.frame(file = character(0))),
existing.dbfile = list(data.frame(file = character(0)))
))
- mockery::stub(convert_input, "add.database.entries", list(input.id = 1, dbfile.id = 1))
+ mockery::stub(convert_input, "add_database_entries", list(input.id = 1, dbfile.id = 1))
convert_input(
input.id = NA,
From 04d78354ed0798fc08b82efb4f74c8b90f70d39c Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Mon, 23 Dec 2024 05:39:07 +0530
Subject: [PATCH 0042/1193] Update naming
---
base/db/R/{check.missing.files.R => check_missing_files.R} | 0
base/db/man/check_missing_files.Rd | 2 +-
2 files changed, 1 insertion(+), 1 deletion(-)
rename base/db/R/{check.missing.files.R => check_missing_files.R} (100%)
diff --git a/base/db/R/check.missing.files.R b/base/db/R/check_missing_files.R
similarity index 100%
rename from base/db/R/check.missing.files.R
rename to base/db/R/check_missing_files.R
diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd
index 6f5f8c23ce9..fa63c7878d5 100644
--- a/base/db/man/check_missing_files.Rd
+++ b/base/db/man/check_missing_files.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/check.missing.files.R
+% Please edit documentation in R/check_missing_files.R
\name{check_missing_files}
\alias{check_missing_files}
\title{Function to check if result has empty or missing files}
From e9a95eef0704025afcdc02aa8c9703aa1117dddb Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Mon, 27 Jan 2025 17:42:35 +0530
Subject: [PATCH 0043/1193] Update documentations wrt comments by @mdietze
---
base/db/R/add_database_entries.R | 47 ++++++++++++++++-------------
base/db/R/convert_input.R | 12 ++++----
base/db/man/add_database_entries.Rd | 45 ++++++++++++++-------------
3 files changed, 56 insertions(+), 48 deletions(-)
diff --git a/base/db/R/add_database_entries.R b/base/db/R/add_database_entries.R
index 87814e04db8..e9dcbadfdce 100644
--- a/base/db/R/add_database_entries.R
+++ b/base/db/R/add_database_entries.R
@@ -1,24 +1,29 @@
-#' Return new arrangement of database while adding code to deal with ensembles
-#'
-#' @param result list of results from the download function
-#' @param con database connection
-#' @param start_date start date of the data
-#' @param end_date end date of the data
-#' @param write whether to write to the database
-#' @param overwrite Logical: If a file already exists, create a fresh copy?
-#' @param insert.new.file whether to insert a new file
-#' @param input.args input arguments obtained from the convert_input function
-#' @param machine machine information
-#' @param mimetype data product specific file format
-#' @param formatname format name of the data
-#' @param allow.conflicting.dates whether to allow conflicting dates
-#' @param ensemble ensemble id
-#' @param ensemble_name ensemble name
-#' @param existing.input existing input records
-#' @param existing.dbfile existing dbfile records
-#' @param input input records
-#' @return list of input and dbfile ids
-#'
+#' Insert or Update Database Records for New or Modified Input Data
+#'
+#' @title Insert or Update Database Records for New or Modified Input Data
+#' @description This function is called internally by [convert_input()] to insert or update **input** and **dbfile** records in the PEcAn BETY database after one or more data-conversion or download functions have produced local or remote files. It is specifically intended for use with the output from data-conversion functions called by [convert_input()] (e.g. various "download_X" or "met2model_X" functions), but can be adapted if the return structure matches the requirements below.
+#'
+#' @param result list of data frames, each data frame corresponding to one piece or "chunk" of newly-created data. Typically, these data frames are produced by the function specified in `convert_input(..., fcn=...)`. Each data frame must contain at least: \describe{ \item{file}{Absolute file path(s) to the newly created file(s).} \item{dbfile.name}{The base filename(s) (without leading path) for each corresponding file.} } Additional columns are allowed but unused by this function.
+#' @param con database connection object (as returned by, e.g., \code{\link[DBI]{dbConnect}}).
+#' @param start_date Date or character. The start date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions.
+#' @param end_date Date or character. The end date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions.
+#' @param overwrite logical. If `TRUE`, any existing database records and files for the same input and date range should be overwritten with the new files. If `FALSE`, existing files are preserved.
+#' @param insert.new.file logical. If `TRUE`, forces the creation of a new **dbfile** entry even if an existing entry is found. Typically used for forecast or ensemble data that may be partially present.
+#' @param input.args list. This is passed from [convert_input()] and contains auxiliary arguments or settings that were passed along internally. It may include items such as `newsite` (integer site ID), among others. Its exact contents are not strictly defined but typically include the arguments provided to `convert_input()`.
+#' @param machine data.frame. Single row describing the machine on which the new data resides. It typically has columns like `id` and `hostname`, indicating the corresponding row in BETY's `machines` table.
+#' @param mimetype character. String indicating the file's MIME type (e.g. `"text/csv"`, `"application/x-netcdf"`, etc.).
+#' @param formatname character. String describing the file format (as listed in BETYdb's `formats` table). For example `"CF Meteorology"`.
+#' @param allow.conflicting.dates logical. If `TRUE`, allows creation or insertion of new file records even if their date range overlaps with existing records. If `FALSE`, overlapping ranges may cause errors or be disallowed.
+#' @param ensemble integer or logical. If an integer > 1, indicates that multiple ensemble members were generated (often for forecast data) and that each member may need separate database entries. If `FALSE`, the data are not an ensemble.
+#' @param ensemble_name character. String providing a descriptive label or identifier for an ensemble member. Typically used if `convert_input()` is called iteratively for each member.
+#' @param existing.input data.frame. Possibly zero rows representing the current record(s) in the `inputs` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied.
+#' @param existing.dbfile data.frame. Possibly zero rows representing the current record(s) in the `dbfiles` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied.
+#' @param input data.frame. Single row with the parent input record from BETYdb, typically including columns like `id`, `start_date`, `end_date`, etc. If the new data are derived from an existing input, this links them in the `parent_id` column of the new entries.
+#'
+#' @return list with two elements: \describe{ \item{input.id}{A numeric vector of new (or updated) input record IDs.} \item{dbfile.id}{A numeric vector of new (or updated) dbfile record IDs.} }
+#'
+#' @details This function consolidates the final step of adding or updating records in the BETY database to reflect newly created data files. It either updates existing `input` and `dbfile` records or creates new records, depending on the provided arguments (`overwrite`, `insert.new.file`, etc.) and whether a matching record already exists. Typically, these records represent model-ready meteorological or other environmental data, after format conversion or downloading has taken place in [convert_input()].
+#'
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
add_database_entries <- function(
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 94f11df502e..3e55c83c48a 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -568,12 +568,12 @@ convert_input <-
# New arrangement of database adding code to deal with ensembles.
if(write) {
add_entries_result <- return (add_database_entries(result, con, start_date,
- end_date, overwrite,
- insert.new.file, input.args,
- machine, mimetype, formatname,
- allow.conflicting.dates, ensemble,
- ensemble_name, existing.input,
- existing.dbfile, input))
+ end_date, overwrite,
+ insert.new.file, input.args,
+ machine, mimetype, formatname,
+ allow.conflicting.dates, ensemble,
+ ensemble_name, existing.input,
+ existing.dbfile, input))
} else {
PEcAn.logger::logger.warn("Input was not added to the database")
successful <- TRUE
diff --git a/base/db/man/add_database_entries.Rd b/base/db/man/add_database_entries.Rd
index ad1f8acbc05..d103c985853 100644
--- a/base/db/man/add_database_entries.Rd
+++ b/base/db/man/add_database_entries.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/add_database_entries.R
\name{add_database_entries}
\alias{add_database_entries}
-\title{Return new arrangement of database while adding code to deal with ensembles}
+\title{Insert or Update Database Records for New or Modified Input Data}
\usage{
add_database_entries(
result,
@@ -24,45 +24,48 @@ add_database_entries(
)
}
\arguments{
-\item{result}{list of results from the download function}
+\item{result}{list of data frames, each data frame corresponding to one piece or "chunk" of newly-created data. Typically, these data frames are produced by the function specified in `convert_input(..., fcn=...)`. Each data frame must contain at least: \describe{ \item{file}{Absolute file path(s) to the newly created file(s).} \item{dbfile.name}{The base filename(s) (without leading path) for each corresponding file.} } Additional columns are allowed but unused by this function.}
-\item{con}{database connection}
+\item{con}{database connection object (as returned by, e.g., \code{\link[DBI]{dbConnect}}).}
-\item{start_date}{start date of the data}
+\item{start_date}{Date or character. The start date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions.}
-\item{end_date}{end date of the data}
+\item{end_date}{Date or character. The end date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions.}
-\item{overwrite}{Logical: If a file already exists, create a fresh copy?}
+\item{overwrite}{logical. If `TRUE`, any existing database records and files for the same input and date range should be overwritten with the new files. If `FALSE`, existing files are preserved.}
-\item{insert.new.file}{whether to insert a new file}
+\item{insert.new.file}{logical. If `TRUE`, forces the creation of a new **dbfile** entry even if an existing entry is found. Typically used for forecast or ensemble data that may be partially present.}
-\item{input.args}{input arguments obtained from the convert_input function}
+\item{input.args}{list. This is passed from [convert_input()] and contains auxiliary arguments or settings that were passed along internally. It may include items such as `newsite` (integer site ID), among others. Its exact contents are not strictly defined but typically include the arguments provided to `convert_input()`.}
-\item{machine}{machine information}
+\item{machine}{data.frame. Single row describing the machine on which the new data resides. It typically has columns like `id` and `hostname`, indicating the corresponding row in BETY's `machines` table.}
-\item{mimetype}{data product specific file format}
+\item{mimetype}{character. String indicating the file's MIME type (e.g. `"text/csv"`, `"application/x-netcdf"`, etc.).}
-\item{formatname}{format name of the data}
+\item{formatname}{character. String describing the file format (as listed in BETYdb's `formats` table). For example `"CF Meteorology"`.}
-\item{allow.conflicting.dates}{whether to allow conflicting dates}
+\item{allow.conflicting.dates}{logical. If `TRUE`, allows creation or insertion of new file records even if their date range overlaps with existing records. If `FALSE`, overlapping ranges may cause errors or be disallowed.}
-\item{ensemble}{ensemble id}
+\item{ensemble}{integer or logical. If an integer > 1, indicates that multiple ensemble members were generated (often for forecast data) and that each member may need separate database entries. If `FALSE`, the data are not an ensemble.}
-\item{ensemble_name}{ensemble name}
+\item{ensemble_name}{character. String providing a descriptive label or identifier for an ensemble member. Typically used if `convert_input()` is called iteratively for each member.}
-\item{existing.input}{existing input records}
+\item{existing.input}{data.frame. Possibly zero rows representing the current record(s) in the `inputs` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied.}
-\item{existing.dbfile}{existing dbfile records}
+\item{existing.dbfile}{data.frame. Possibly zero rows representing the current record(s) in the `dbfiles` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied.}
-\item{input}{input records}
-
-\item{write}{whether to write to the database}
+\item{input}{data.frame. Single row with the parent input record from BETYdb, typically including columns like `id`, `start_date`, `end_date`, etc. If the new data are derived from an existing input, this links them in the `parent_id` column of the new entries.}
}
\value{
-list of input and dbfile ids
+list with two elements: \describe{ \item{input.id}{A numeric vector of new (or updated) input record IDs.} \item{dbfile.id}{A numeric vector of new (or updated) dbfile record IDs.} }
}
\description{
-Return new arrangement of database while adding code to deal with ensembles
+This function is called internally by [convert_input()] to insert or update **input** and **dbfile** records in the PEcAn BETY database after one or more data-conversion or download functions have produced local or remote files. It is specifically intended for use with the output from data-conversion functions called by [convert_input()] (e.g. various "download_X" or "met2model_X" functions), but can be adapted if the return structure matches the requirements below.
+}
+\details{
+Insert or Update Database Records for New or Modified Input Data
+
+This function consolidates the final step of adding or updating records in the BETY database to reflect newly created data files. It either updates existing `input` and `dbfile` records or creates new records, depending on the provided arguments (`overwrite`, `insert.new.file`, etc.) and whether a matching record already exists. Typically, these records represent model-ready meteorological or other environmental data, after format conversion or downloading has taken place in [convert_input()].
}
\author{
Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
From 525e05fae57edce8daf6b569b0b1b2040e6acf6c Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Mon, 27 Jan 2025 17:52:45 +0530
Subject: [PATCH 0044/1193] Update check_missing_files.R
---
base/db/R/check_missing_files.R | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/base/db/R/check_missing_files.R b/base/db/R/check_missing_files.R
index 9c119239988..4471ee90404 100644
--- a/base/db/R/check_missing_files.R
+++ b/base/db/R/check_missing_files.R
@@ -1,9 +1,19 @@
-#' Function to check if result has empty or missing files
+#' Check for Missing or Empty Files in Conversion Results
#'
-#' @param result A list of dataframes with file paths
-#' @param existing.input Existing input records
-#' @param existing.dbfile Existing dbfile records
-#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
+#' This function inspects the file paths in a list of data frames (typically produced by a download or conversion routine) to ensure that each file is present and non-empty. Specifically, it checks whether any file path is missing or has a file size of zero, and logs an error if such files are detected. It also normalizes `existing.input` and `existing.dbfile` so that each is returned as a list of data frames.
+#'
+#' @param result A list of data frames containing file information. Each data frame is expected to have a column named `file` with absolute file paths created by a data-conversion or download function. For example, this might be the structure returned by a "download_X" or "met2model_X" function when invoked via [convert_input()].
+#' @param existing.input A data frame or list of data frames (possibly zero rows) representing input records in the BETY `inputs` table that match (or partially match) the data being added. This is converted to a list of data frames if it is not already.
+#' @param existing.dbfile A data frame or list of data frames (possibly zero rows) representing dbfile records in the BETY `dbfiles` table that match (or partially match) the data being added. This is also converted to a list of data frames if it is not already.
+#'
+#' @return A list containing:
+#' \itemize{
+#' \item A list of data frames for `existing.input`
+#' \item A list of data frames for `existing.dbfile`
+#' }
+#'
+#' @details
+#' The function calculates the file size for each file specified in the `result` data frames. If any file path is missing (`NA`) or any file size is zero, the function raises a fatal error (via [PEcAn.logger::logger.severe]) indicating that an expected file is either nonexistent or empty. If no such issues are found, it merely ensures that `existing.input` and `existing.dbfile` are each wrapped in a list for consistent downstream usage.
#'
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
From f82fc4b49de2afb058347af57b8de5fa1ce4d6ec Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Mon, 27 Jan 2025 17:53:41 +0530
Subject: [PATCH 0045/1193] Update add_database_entries.R
---
base/db/R/add_database_entries.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/base/db/R/add_database_entries.R b/base/db/R/add_database_entries.R
index e9dcbadfdce..7351e631423 100644
--- a/base/db/R/add_database_entries.R
+++ b/base/db/R/add_database_entries.R
@@ -38,7 +38,7 @@ add_database_entries <- function(
# This list will be returned.
newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null.
- for (i in 1:length(result)) { # Master for loop
+ for (i in seq_along(result)) { # Master for loop
id_not_added <- TRUE
if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 &&
From 5ac641376bbfbbe323580a0603763f9719900842 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Mon, 27 Jan 2025 18:00:26 +0530
Subject: [PATCH 0046/1193] Renamed `add_database_entries` and Updated
documentations
---
base/db/R/convert_input.R | 2 +-
...ase_entries.R => update_ensemble_writes.R} | 2 +-
base/db/man/check_missing_files.Rd | 19 +++++++++++++------
...e_entries.Rd => update_ensemble_writes.Rd} | 8 ++++----
base/db/tests/testthat/test.convert_input.R | 2 +-
5 files changed, 20 insertions(+), 13 deletions(-)
rename base/db/R/{add_database_entries.R => update_ensemble_writes.R} (99%)
rename base/db/man/{add_database_entries.Rd => update_ensemble_writes.Rd} (97%)
diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R
index 3e55c83c48a..0fc625915f6 100644
--- a/base/db/R/convert_input.R
+++ b/base/db/R/convert_input.R
@@ -567,7 +567,7 @@ convert_input <-
#---------------------------------------------------------------#
# New arrangement of database adding code to deal with ensembles.
if(write) {
- add_entries_result <- return (add_database_entries(result, con, start_date,
+ add_entries_result <- return (update_ensemble_writes(result, con, start_date,
end_date, overwrite,
insert.new.file, input.args,
machine, mimetype, formatname,
diff --git a/base/db/R/add_database_entries.R b/base/db/R/update_ensemble_writes.R
similarity index 99%
rename from base/db/R/add_database_entries.R
rename to base/db/R/update_ensemble_writes.R
index 7351e631423..8076ccb209c 100644
--- a/base/db/R/add_database_entries.R
+++ b/base/db/R/update_ensemble_writes.R
@@ -26,7 +26,7 @@
#'
#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
-add_database_entries <- function(
+update_ensemble_writes <- function(
result, con, start_date,
end_date, overwrite,
insert.new.file, input.args,
diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd
index fa63c7878d5..35ad0f24331 100644
--- a/base/db/man/check_missing_files.Rd
+++ b/base/db/man/check_missing_files.Rd
@@ -2,22 +2,29 @@
% Please edit documentation in R/check_missing_files.R
\name{check_missing_files}
\alias{check_missing_files}
-\title{Function to check if result has empty or missing files}
+\title{Check for Missing or Empty Files in Conversion Results}
\usage{
check_missing_files(result, existing.input = NULL, existing.dbfile = NULL)
}
\arguments{
-\item{result}{A list of dataframes with file paths}
+\item{result}{A list of data frames containing file information. Each data frame is expected to have a column named `file` with absolute file paths created by a data-conversion or download function. For example, this might be the structure returned by a "download_X" or "met2model_X" function when invoked via [convert_input()].}
-\item{existing.input}{Existing input records}
+\item{existing.input}{A data frame or list of data frames (possibly zero rows) representing input records in the BETY `inputs` table that match (or partially match) the data being added. This is converted to a list of data frames if it is not already.}
-\item{existing.dbfile}{Existing dbfile records}
+\item{existing.dbfile}{A data frame or list of data frames (possibly zero rows) representing dbfile records in the BETY `dbfiles` table that match (or partially match) the data being added. This is also converted to a list of data frames if it is not already.}
}
\value{
-A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records
+A list containing:
+\itemize{
+ \item A list of data frames for `existing.input`
+ \item A list of data frames for `existing.dbfile`
+}
}
\description{
-Function to check if result has empty or missing files
+This function inspects the file paths in a list of data frames (typically produced by a download or conversion routine) to ensure that each file is present and non-empty. Specifically, it checks whether any file path is missing or has a file size of zero, and logs an error if such files are detected. It also normalizes `existing.input` and `existing.dbfile` so that each is returned as a list of data frames.
+}
+\details{
+The function calculates the file size for each file specified in the `result` data frames. If any file path is missing (`NA`) or any file size is zero, the function raises a fatal error (via [PEcAn.logger::logger.severe]) indicating that an expected file is either nonexistent or empty. If no such issues are found, it merely ensures that `existing.input` and `existing.dbfile` are each wrapped in a list for consistent downstream usage.
}
\author{
Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko
diff --git a/base/db/man/add_database_entries.Rd b/base/db/man/update_ensemble_writes.Rd
similarity index 97%
rename from base/db/man/add_database_entries.Rd
rename to base/db/man/update_ensemble_writes.Rd
index d103c985853..587de12b1b3 100644
--- a/base/db/man/add_database_entries.Rd
+++ b/base/db/man/update_ensemble_writes.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_database_entries.R
-\name{add_database_entries}
-\alias{add_database_entries}
+% Please edit documentation in R/update_ensemble_writes.R
+\name{update_ensemble_writes}
+\alias{update_ensemble_writes}
\title{Insert or Update Database Records for New or Modified Input Data}
\usage{
-add_database_entries(
+update_ensemble_writes(
result,
con,
start_date,
diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R
index 474ed4eaaeb..c6c27bde320 100644
--- a/base/db/tests/testthat/test.convert_input.R
+++ b/base/db/tests/testthat/test.convert_input.R
@@ -20,7 +20,7 @@ test_that("`convert_input()` able to call the respective download function for a
existing.input = list(data.frame(file = character(0))),
existing.dbfile = list(data.frame(file = character(0)))
))
- mockery::stub(convert_input, "add_database_entries", list(input.id = 1, dbfile.id = 1))
+ mockery::stub(convert_input, "update_ensemble_writes", list(input.id = 1, dbfile.id = 1))
convert_input(
input.id = NA,
From bd8f9691ef0a58b0a061ca33ca10646454da3656 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 19 Feb 2025 20:46:37 -0500
Subject: [PATCH 0047/1193] Update namespace for added functions.
---
modules/assim.sequential/NAMESPACE | 2 ++
1 file changed, 2 insertions(+)
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index db21f07876e..3d034fd126f 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand
export(Analysis.sda)
+export(Average.ERA5.2.GeoTIFF)
export(Construct.H.multisite)
export(Construct.R)
export(Construct_H)
@@ -29,6 +30,7 @@ export(assessParams)
export(block_matrix)
export(conj_wt_wishart_sampler)
export(construct_nimble_H)
+export(downscale.qsub.main)
export(dwtmnorm)
export(get_ensemble_weights)
export(hop_test)
From d9ad8f450bea052bb9424912a26977be7c3a3c78 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 19 Feb 2025 20:47:01 -0500
Subject: [PATCH 0048/1193] Added the script for running the north america
downscale functions.
---
.../inst/anchor/NA_downscale_script.R | 80 +++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 modules/assim.sequential/inst/anchor/NA_downscale_script.R
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
new file mode 100644
index 00000000000..d670bcfdff4
--- /dev/null
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -0,0 +1,80 @@
+library(purrr)
+library(foreach)
+setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/")
+# average ERA5 to climatic covariates.
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET"
+in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/"
+dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year"))
+start.dates <- dates[1:10]
+end.dates <- dates[2:11]
+paths <- c()
+for (i in 1:10) {
+ paths <- c(paths, PEcAnAssimSequential:::Average.ERA5.2.GeoTIFF(start.dates[i], end.dates[i], in.path, outdir))
+ print(i)
+}
+# setup.
+base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/sda.all.forecast.analysis.Rdata")
+variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
+settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/pecanIC.xml"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/"
+cores <- 28
+date <- seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")
+# loop over years.
+for (i in seq_along(date)) {
+ # setup covariates paths and variable names.
+ cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif",
+ var.name = "LC"),
+ year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif",
+ var.name = "year_since_disturb"),
+ agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif",
+ var.name = "agb"),
+ twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff",
+ var.name = "twi"),
+ met = list(dir = paths[i],
+ var.name = c("temp", "prec", "srad", "vapr")),
+ soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif",
+ var.name = c("PH", "N", "SOC", "Sand")))
+ # Assemble covariates.
+ if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) {
+ covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff")
+ } else {
+ covariates.dir <- create.covariates.geotiff(outdir = outdir,
+ year = lubridate::year(date[i]),
+ base.map.dir = base.map.dir,
+ cov.tif.file.list = cov.tif.file.list,
+ normalize = T,
+ cores = cores)
+ }
+ # grab analysis.
+ analysis.yr <- analysis.all[[i]]
+ time <- date[i]
+ # loop over carbon types.
+ for (j in seq_along(variables)) {
+ # setup folder.
+ variable <- variables[j]
+ folder.path <- file.path(outdir, paste0(variables[j], "_", date[i]))
+ dir.create(folder.path)
+ save(list = c("settings", "analysis.yr", "covariates.dir", "time", "variable", "folder.path", "base.map.dir", "cores", "outdir"),
+ file = file.path(folder.path, "dat.Rdata"))
+ # prepare for qsub.
+ jobsh <- c("#!/bin/bash -l",
+ "module load R/4.1.2",
+ "echo \"require (PEcAnAssimSequential)",
+ " require (foreach)",
+ " require (purrr)",
+ " downscale.qsub.main('@FOLDER_PATH@')",
+ " \" | R --no-save")
+ jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh)
+ writeLines(jobsh, con = file.path(folder.path, "job.sh"))
+ # qsub command.
+ qsub <- "qsub -l h_rt=6:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash"
+ qsub <- gsub("@CORES@", cores, qsub)
+ qsub <- gsub("@NAME@", paste0("ds_", i, "_", j), qsub)
+ qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub)
+ qsub <- gsub("@STDERR@", file.path(folder.path, "stderr.log"), qsub)
+ qsub <- strsplit(qsub, " (?=([^\"']*\"[^\"']*\")*[^\"']*$)", perl = TRUE)
+ cmd <- qsub[[1]]
+ out <- system2(cmd, file.path(folder.path, "job.sh"), stdout = TRUE, stderr = TRUE)
+ }
+}
From 14c431238004b1465428012b72c1878e3d6da1ae Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 19 Feb 2025 20:47:19 -0500
Subject: [PATCH 0049/1193] Add the script for the downscale functions.
---
modules/assim.sequential/R/SDA_NA_downscale.R | 489 ++++++++++++++++++
1 file changed, 489 insertions(+)
create mode 100644 modules/assim.sequential/R/SDA_NA_downscale.R
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
new file mode 100644
index 00000000000..599ea382b5b
--- /dev/null
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -0,0 +1,489 @@
+#' @description
+#' This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file.
+#' @title Average.ERA5.2.GeoTIFF
+#'
+#' @param start.date character: start point of when to average the data (e.g., 2012-01-01).
+#' @param end.date character: end point of when to average the data (e.g., 2021-12-31).
+#' @param in.path character: the directory where your ERA5 data stored (they should named as ERA5_YEAR.nc).
+#' @param outdir character: the output directory where the averaged GeoTIFF file will be generated.
+#'
+#' @return character: path to the exported GeoTIFF file.
+#'
+#' @examples
+#' @export
+#' @author Dongchen Zhang
+Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
+ # open ERA5 nc file as geotiff format for referencing crs and ext.
+ ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", lubridate::year(start.date), ".nc")))
+ dates <- seq(start.date, end.date, "1 year")
+ if (length(dates) < 2) {
+ PEcAn.logger::logger.info("There is no time range to be calculated!")
+ return(NA)
+ }
+ # initialize final outcomes.
+ temp.all <- precip.all <- srd.all <- dewpoint.all <- c()
+ for (i in 2:length(dates)) {
+ # initialize start and end dates for the current period
+ if (i == 1) {
+ start <- start.date
+ } else {
+ start <- as.Date(paste0(lubridate::year(dates[i]), "-01-01"))
+ }
+ if (i == length(dates)) {
+ end <- end.date
+ } else {
+ end <- as.Date(paste0(lubridate::year(dates[i]), "-12-31"))
+ }
+ # loop over years.
+ for (j in seq_along(dates)) {
+ # open ERA5 nc file.
+ met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc")))
+ # find index for the date.
+ times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC")
+ time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end)
+ # extract temperature.
+ PEcAn.logger::logger.info("entering temperature.")
+ temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract precipitation.
+ PEcAn.logger::logger.info("entering precipitation.")
+ precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract shortwave solar radiation.
+ PEcAn.logger::logger.info("entering solar radiation.")
+ srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract dewpoint.
+ PEcAn.logger::logger.info("entering dewpoint.")
+ dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # close the NC connection.
+ ncdf4::nc_close(met.nc)
+ }
+ }
+ # aggregate across time.
+ # temperature.
+ temp <- apply(temp.all, c(1, 2), mean)
+ temp <- PEcAn.utils::ud_convert(temp, "K", "degC")
+ # precipitation.
+ precip <- apply(precip.all, c(1, 2), mean)
+ # solar radiation.
+ srd <- apply(srd.all, c(1, 2), mean)
+ # dewpoint.
+ dewpoint <- apply(dewpoint.all, c(1, 2), mean)
+ dewpoint <- PEcAn.utils::ud_convert(dewpoint, "K", "degC")
+ # convert dew point to relative humidity.
+ beta <- (112 - (0.1 * temp) + dewpoint) / (112 + (0.9 * temp))
+ relative.humidity <- beta ^ 8
+ VPD <- PEcAn.data.atmosphere::get.vpd(100*relative.humidity, temp)
+ # combine together.
+ PEcAn.logger::logger.info("Aggregate maps.")
+ met.rast <- c(terra::rast(matrix(temp, nrow = dim(temp)[2], ncol = dim(temp)[1], byrow = T)),
+ terra::rast(matrix(precip, nrow = dim(precip)[2], ncol = dim(precip)[1], byrow = T)),
+ terra::rast(matrix(srd, nrow = dim(srd)[2], ncol = dim(srd)[1], byrow = T)),
+ terra::rast(matrix(VPD, nrow = dim(VPD)[2], ncol = dim(VPD)[1], byrow = T)))
+ # adjust crs and extents.
+ terra::crs(met.rast) <- terra::crs(ERA5.tiff)
+ terra::ext(met.rast) <- terra::ext(ERA5.tiff)
+ names(met.rast) <- c("temp", "prec", "srad", "vapr")
+ # write into geotiff file.
+ terra::writeRaster(met.rast, file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
+ # end.
+ gc()
+ return(file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
+}
+
+
+
+# assemble covariates from different spatial scales/resolutions and crs.
+# Here is an example of the `cov.tif.file.list` object:
+# cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif",
+# var.name = "LC"),
+# year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif",
+# var.name = "year_since_disturb"),
+# agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif",
+# var.name = "agb"),
+# twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff",
+# var.name = "twi"),
+# met = list(dir = paths[i],
+# var.name = c("temp", "prec", "srad", "vapr")),
+# soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif",
+# var.name = c("PH", "N", "SOC", "Sand")))
+# This function helps to stack target data layers from various GeoTIFF maps to a single map
+# cropped and projected to the `base.map`. It also enables the normalization feature to facilitate the ML process.
+
+
+#' @description
+#' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map.
+#' @title stack.covariates.2.geotiff
+#'
+#' @param outdir character: the output directory where the stacked GeoTIFF file will be generated.
+#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map.
+#' @param cov.tif.file.list list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")).
+#' @param normalize boolean: decide if we want to normalize each data layer, the default is TRUE.
+#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
+#'
+#' @return path to the exported GeoTIFF file.
+#'
+#' @examples
+#' @author Dongchen Zhang
+stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) {
+ # create the folder if it doesn't exist.
+ if (!file.exists(outdir)) {
+ dir.create(outdir)
+ }
+ # parallel loop.
+ # register parallel nodes.
+ if (cores > length(cov.tif.file.list)) {
+ cores <- length(cov.tif.file.list)
+ }
+ cl <- parallel::makeCluster(as.numeric(cores))
+ doSNOW::registerDoSNOW(cl)
+ #progress bar.
+ pb <- utils::txtProgressBar(min=1, max=length(cov.tif.file.list), style=3)
+ progress <- function(n) utils::setTxtProgressBar(pb, n)
+ opts <- list(progress=progress)
+ # foreach loop.
+ paths <- foreach::foreach(f = cov.tif.file.list,
+ .packages=c("Kendall", "terra"),
+ .options.snow=opts) %dopar% {
+ # load the base map.
+ base.map <- terra::rast(base.map.dir)
+ # read geotif file.
+ temp.rast <- terra::rast(f$dir)
+ # normalize.
+ if (normalize & !"LC" %in% f$var.name) {
+ nx <- terra::minmax(temp.rast)
+ temp.rast <- (temp.rast - nx[1,]) / (nx[2,] - nx[1,])
+ }
+ # set name to layers if we set it up in advance.
+ # otherwise the original layer name will be used.
+ if (!is.null(f$var.name)) {
+ names(temp.rast) <- f$var.name
+ }
+ # raster operations.
+ terra::crs(temp.rast) <- terra::crs(base.map)
+ temp.rast <- terra::crop(temp.rast, base.map)
+ temp.rast <- terra::resample(temp.rast, base.map)
+ # write the raster into disk.
+ file.name <- paste0(f$var.name, collapse = "_")
+ path <- file.path(outdir, paste0(file.name, ".tiff"))
+ terra::writeRaster(temp.rast, path)
+ return(path)
+ } %>% unlist
+ # stop parallel.
+ parallel::stopCluster(cl)
+ foreach::registerDoSEQ()
+ gc()
+ # combine rasters.
+ all.rast <- terra::rast(paths)
+ # write all covariates into disk.
+ terra::writeRaster(all.rast, file.path(outdir, "covariates.tiff"), overwrite = T)
+ # remove previous tiff files.
+ unlink(paths)
+ # return results.
+ return(file.path(outdir, "covariates.tiff"))
+}
+
+#' @description
+#' convert settings to geospatial points in terra.
+#' @title pecan.settings.2.pts
+#'
+#' @param settings PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.
+#'
+#' @return terra spatial points object.
+#'
+#' @examples
+#' @author Dongchen Zhang
+pecan.settings.2.pts <- function(settings) {
+ if (is.character(settings)) {
+ # read settings.
+ settings <- PEcAn.settings::read.settings(settings)
+ }
+ # grab lat/lon.
+ site.locs <- settings$run %>% purrr::map('site') %>%
+ purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>%
+ t %>% `colnames<-`(c("Lon","Lat")) %>% as.data.frame()
+ # convert lat/lon to terra::vect.
+ pts <- terra::vect(site.locs, geom = c("Lon", "Lat"), crs = "EPSG:4326")
+ return(pts)
+}
+
+#' @description
+#' This function helps to build the data frame (pixels by data columns) for only vegetated pixels to improve the efficiency.
+#' Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work.
+#' @title stack.covariates.2.df
+#'
+#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function.
+#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
+#'
+#' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels.
+#'
+#' @examples
+#' @author Dongchen Zhang
+stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) {
+ # load maps.
+ all.rast <- terra::rast(rast.dir)
+ # parallel loop.
+ layer.names <- names(all.rast)
+ # register parallel nodes.
+ if (cores > length(layer.names)) {
+ cores <- length(layer.names)
+ }
+ cl <- parallel::makeCluster(as.numeric(cores))
+ doSNOW::registerDoSNOW(cl)
+ #progress bar.
+ pb <- utils::txtProgressBar(min=1, max=length(layer.names), style=3)
+ progress <- function(n) utils::setTxtProgressBar(pb, n)
+ opts <- list(progress=progress)
+ # foreach loop.
+ vecs <- foreach::foreach(r = seq_along(layer.names),
+ .packages=c("Kendall", "terra"),
+ .options.snow=opts) %dopar% {
+ all.rast <- terra::rast(rast.dir)
+ temp.vec <- matrix(all.rast[[r]], byrow = T)
+ na.inds <- which(is.na(temp.vec))
+ # if it's LC layer.
+ if ("LC" == names(all.rast)[r]) {
+ non.veg.inds <- which(! temp.vec %in% 1:8)
+ na.inds <- unique(c(na.inds, non.veg.inds))
+ }
+ return(list(vec = temp.vec,
+ na.inds = na.inds))
+ }
+ # stop parallel.
+ parallel::stopCluster(cl)
+ foreach::registerDoSEQ()
+ gc()
+ # grab uniqued NA index.
+ na.inds <- vecs %>% purrr::map("na.inds") %>% unlist %>% unique
+ # remove NA from each covariate.
+ cov.vecs <- vecs %>% purrr::map(function(v){
+ return(v$vec[-na.inds])
+ }) %>% dplyr::bind_cols() %>% `colnames<-`(layer.names) %>% as.data.frame()
+ non.na.inds <- seq_along(matrix(all.rast[[1]]))[-na.inds]
+ return(list(df = cov.vecs, non.na.inds = non.na.inds))
+}
+
+#' @description
+#' This function helps to create the training dataset of specific variable type and locations for downscaling.
+#' TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy.
+#' @title prepare.train.dat
+#'
+#' @param settings character: physical path that points to the pecan settings XML file.
+#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
+#' @param covariates.dir character: path to the exported covariates GeoTIFF file.
+#' @param variable character: name of state variable. It should match up with the column names of the analysis data frame.
+#'
+#' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates.
+#'
+#' @examples
+#' @author Dongchen Zhang
+prepare.train.dat <- function(settings, analysis, covariates.dir, variable) {
+ # convert settings into geospatial points.
+ pts <- pecan.settings.2.pts(settings)
+ # read covariates.
+ cov.rast <- terra::rast(covariates.dir)
+ # extract covariates by locations.
+ predictors <- as.data.frame(terra::extract(cov.rast, pts, ID = FALSE))
+ covariate_names <- names(predictors)
+ if ("ID" %in% covariate_names) {
+ rm.ind <- which("ID" %in% covariate_names)
+ covariate_names <- covariate_names[-rm.ind]
+ predictors <- predictors[,-rm.ind]
+ }
+ # grab carbon data.
+ var.dat <- analysis[,which(colnames(analysis) == variable)] %>% t %>%
+ as.data.frame() %>% `colnames<-`(paste0("ensemble", seq(nrow(analysis))))
+ # combine carbon and predictor.
+ full_data <- cbind(var.dat, predictors)
+ return(full_data)
+}
+
+#' @description
+#' This function helps to train the ML model across ensemble members in parallel.
+#' @title parallel.rf.train
+#'
+#' @param full_data numeric: the matrix generated using the `prepare.train.dat` function.
+#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
+#'
+#' @return list of trained models across ensemble members.
+#'
+#' @examples
+#' @author Dongchen Zhang
+parallel.rf.train <- function(full_data, cores = parallel::detectCores()) {
+ # grab ensemble and predictor index.
+ col.names <- colnames(full_data)
+ ensemble.inds <- which(grepl("ensemble", col.names, fixed = TRUE))
+ predictor.inds <- seq_along(col.names)[-ensemble.inds]
+ # parallel train.
+ # register parallel nodes.
+ if (cores > length(ensemble.inds)) {
+ cores <- length(ensemble.inds)
+ }
+ cl <- parallel::makeCluster(as.numeric(cores))
+ doSNOW::registerDoSNOW(cl)
+ #progress bar.
+ pb <- utils::txtProgressBar(min=1, max=length(ensemble.inds), style=3)
+ progress <- function(n) utils::setTxtProgressBar(pb, n)
+ opts <- list(progress=progress)
+ # foreach loop.
+ models <- foreach::foreach(i = ensemble.inds,
+ .packages=c("Kendall", "stats", "randomForest"),
+ .options.snow=opts) %dopar% {
+ ensemble_col <- col.names[ensemble.inds[i]]
+ formula <- stats::as.formula(paste(ensemble_col, "~", paste(col.names[predictor.inds], collapse = " + ")))
+ randomForest::randomForest(formula,
+ data = full_data,
+ ntree = 1000,
+ na.action = stats::na.omit,
+ keep.forest = TRUE,
+ importance = TRUE)
+ }
+ # stop parallel.
+ parallel::stopCluster(cl)
+ foreach::registerDoSEQ()
+ gc()
+ return(models)
+}
+
+#' @description
+#' This function helps to predict the target variable observations based on the covariates.
+#' The prediction is working in parallel across vegetated pixels.
+#' @title parallel.prediction
+#'
+#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
+#' @param models list: trained models across ensemble members generated by the `parallel.rf.train` function.
+#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function.
+#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function.
+#' @param outdir character: the output directory where the downscaled maps will be stored.
+#' @param name list: containing the time and variable name to create the final GeoTIFF file name.
+#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
+#'
+#' @return paths to the ensemble downscaled maps.
+#'
+#' @examples
+#' @author Dongchen Zhang
+parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) {
+ # load base map.
+ base.map <- terra::rast(base.map.dir)
+ dims <- dim(base.map)
+ # setup progress bar for ensemble members.
+ pb <- utils::txtProgressBar(min = 0, max = length(models), style = 3)
+ paths <- c()
+ # loop over ensemble members.
+ for (i in seq_along(models)) {
+ # update progress bar.
+ utils::setTxtProgressBar(pb, i)
+ # go to the next if the current file has already been generated.
+ file.name <- paste0(c("ensemble", i, name$time, name$variable), collapse = "_")
+ if (file.exists(file.path(outdir, paste0(file.name, ".tiff")))) {
+ next
+ }
+ # register parallel nodes.
+ cl <- parallel::makeCluster(cores)
+ doSNOW::registerDoSNOW(cl)
+ # foreach parallel.
+ model <- models[[i]]
+ output <- foreach::foreach(d=itertools::isplitRows(cov.vecs, chunks=cores),
+ .packages=c("stats", "randomForest")) %dopar% {
+ stats::predict(model, d)
+ } %>% unlist
+ # export to geotiff map.
+ vec <- rep(NA, dims[1]*dims[2])
+ vec[non.na.inds] <- output
+ map <- terra::rast(matrix(vec, dims[1], dims[2], byrow = T))
+ terra::ext(map) <- terra::ext(base.map)
+ terra::crs(map) <- terra::crs(base.map)
+ terra::writeRaster(map, file.path(outdir, paste0(file.name, ".tiff")))
+ paths <- c(paths, file.path(outdir, paste0(file.name, ".tiff")))
+ # stop parallel.
+ parallel::stopCluster(cl)
+ foreach::registerDoSEQ()
+ gc()
+ }
+ return(paths)
+}
+
+#' @description
+#' This is the main function to execute the RF training and prediction.
+#' Note it will be deployed by each node you requested if the qsub feature is enabled below.
+#' @title downscale.rf.main
+#'
+#' @param settings character: physical path that points to the pecan settings XML file.
+#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
+#' @param covariates.dir character: path to the exported covariates GeoTIFF file.
+#' @param time: character: the time tag used to differentiate the outputs from others.
+#' @param variable: character: name of state variable. It should match up with the column names of the analysis data frame.
+#' @param outdir character: the output directory where the downscaled maps will be stored.
+#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
+#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
+#'
+#' @return paths to the ensemble downscaled maps.
+#'
+#' @examples
+#' @author Dongchen Zhang
+downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) {
+ # create folder specific for the time and carbon type.
+ folder.name <- file.path(outdir, paste0(c(variable, time), collapse = "_"))
+ if (!file.exists(folder.name)) {
+ dir.create(folder.name)
+ }
+ # prepare training data.
+ PEcAn.logger::logger.info("Preparing training data.")
+ full_data <- prepare.train.dat(settings = settings,
+ analysis = analysis,
+ covariates.dir = covariates.dir,
+ variable = variable)
+ # convert LC into factor.
+ if ("LC" %in% colnames(full_data)) {
+ full_data[,"LC"] <- factor(full_data[,"LC"])
+ }
+ # parallel train.
+ PEcAn.logger::logger.info("Parallel training.")
+ models <- parallel.rf.train(full_data = full_data, cores = cores)
+ # save trained models for future analysis.
+ # saveRDS(models, file.path(folder.name, "rf_models.rds"))
+ save(models, file = file.path(folder.name, "rf_models.Rdata"))
+ # convert stacked covariates geotiff file into data frame.
+ PEcAn.logger::logger.info("Converting geotiff to df.")
+ cov.df <- stack.covariates.2.df(rast.dir = covariates.dir, cores = cores)
+ # reconstruct LC because of the computation accuracy.
+ cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0
+ # convert LC into factor.
+ if ("LC" %in% colnames(cov.df$df)) {
+ cov.df$df[,"LC"] <- factor(cov.df$df[,"LC"])
+ }
+ # parallel prediction.
+ PEcAn.logger::logger.info("Parallel prediction.")
+ paths <- parallel.prediction(base.map.dir = base.map.dir,
+ models = models,
+ cov.vecs = cov.df$df,
+ non.na.inds = cov.df$non.na.inds,
+ outdir = folder.name,
+ name = list(time = time, variable = variable),
+ cores = cores)
+ # calculate mean and std.
+ PEcAn.logger::logger.info("Calculate mean and std.")
+ ras.all <- terra::rast(paths)
+ mean <- terra::app(ras.all, "mean")
+ std <- terra::app(ras.all, "std")
+ # write into geotiff files.
+ image.base.name <- paste0(time, "_", variable, ".tiff")
+ terra::writeRaster(mean, filename = file.path(folder.name, paste0("mean_", image.base.name)))
+ terra::writeRaster(std, filename = file.path(folder.name, paste0("std_", image.base.name)))
+ return(list(ensemble.prediction.files = paths,
+ mean.prediction.file = file.path(folder.name, paste0("mean_", image.base.name)),
+ std.prediction.file = file.path(folder.name, paste0("std_", image.base.name))))
+}
+
+#' @description
+#' This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function.
+#' @title downscale.qsub.main
+#'
+#' @param folder.path Character: physical path to which the job file is located.
+#'
+#' @examples
+#' @export
+#' @author Dongchen Zhang
+downscale.qsub.main <- function(folder.path) {
+ dat <- readRDS(file.path(folder.path, "dat.rds"))
+ out <- downscale.rf.main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores)
+ saveRDS(out, file.path(folder.path, "res.rds"))
+}
\ No newline at end of file
From 12b41e35fe0370a9339baeafe51471158ea2edf8 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 19 Feb 2025 20:47:35 -0500
Subject: [PATCH 0050/1193] Update the Rd files.
---
.../man/Average.ERA5.2.GeoTIFF.Rd | 26 +++++++++++
.../man/downscale.qsub.main.Rd | 17 +++++++
.../assim.sequential/man/downscale.rf.main.Rd | 44 +++++++++++++++++++
.../man/parallel.prediction.Rd | 41 +++++++++++++++++
.../assim.sequential/man/parallel.rf.train.Rd | 22 ++++++++++
.../man/pecan.settings.2.pts.Rd | 20 +++++++++
.../assim.sequential/man/prepare.train.dat.Rd | 27 ++++++++++++
.../man/stack.covariates.2.df.Rd | 23 ++++++++++
.../man/stack.covariates.2.geotiff.Rd | 34 ++++++++++++++
9 files changed, 254 insertions(+)
create mode 100644 modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd
create mode 100644 modules/assim.sequential/man/downscale.qsub.main.Rd
create mode 100644 modules/assim.sequential/man/downscale.rf.main.Rd
create mode 100644 modules/assim.sequential/man/parallel.prediction.Rd
create mode 100644 modules/assim.sequential/man/parallel.rf.train.Rd
create mode 100644 modules/assim.sequential/man/pecan.settings.2.pts.Rd
create mode 100644 modules/assim.sequential/man/prepare.train.dat.Rd
create mode 100644 modules/assim.sequential/man/stack.covariates.2.df.Rd
create mode 100644 modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
diff --git a/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd b/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd
new file mode 100644
index 00000000000..0c4a561bde9
--- /dev/null
+++ b/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{Average.ERA5.2.GeoTIFF}
+\alias{Average.ERA5.2.GeoTIFF}
+\title{Average.ERA5.2.GeoTIFF}
+\usage{
+Average.ERA5.2.GeoTIFF(start.date, end.date, in.path, outdir)
+}
+\arguments{
+\item{start.date}{character: start point of when to average the data (e.g., 2012-01-01).}
+
+\item{end.date}{character: end point of when to average the data (e.g., 2021-12-31).}
+
+\item{in.path}{character: the directory where your ERA5 data stored (they should named as ERA5_YEAR.nc).}
+
+\item{outdir}{character: the output directory where the averaged GeoTIFF file will be generated.}
+}
+\value{
+character: path to the exported GeoTIFF file.
+}
+\description{
+This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/downscale.qsub.main.Rd b/modules/assim.sequential/man/downscale.qsub.main.Rd
new file mode 100644
index 00000000000..50de6e2d945
--- /dev/null
+++ b/modules/assim.sequential/man/downscale.qsub.main.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{downscale.qsub.main}
+\alias{downscale.qsub.main}
+\title{downscale.qsub.main}
+\usage{
+downscale.qsub.main(folder.path)
+}
+\arguments{
+\item{folder.path}{Character: physical path to which the job file is located.}
+}
+\description{
+This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/downscale.rf.main.Rd b/modules/assim.sequential/man/downscale.rf.main.Rd
new file mode 100644
index 00000000000..75ec6612760
--- /dev/null
+++ b/modules/assim.sequential/man/downscale.rf.main.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{downscale.rf.main}
+\alias{downscale.rf.main}
+\title{downscale.rf.main}
+\usage{
+downscale.rf.main(
+ settings,
+ analysis,
+ covariates.dir,
+ time,
+ variable,
+ outdir,
+ base.map.dir,
+ cores = parallel::detectCores()
+)
+}
+\arguments{
+\item{settings}{character: physical path that points to the pecan settings XML file.}
+
+\item{analysis}{numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.}
+
+\item{covariates.dir}{character: path to the exported covariates GeoTIFF file.}
+
+\item{outdir}{character: the output directory where the downscaled maps will be stored.}
+
+\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.}
+
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+
+\item{time:}{character: the time tag used to differentiate the outputs from others.}
+
+\item{variable:}{character: name of state variable. It should match up with the column names of the analysis data frame.}
+}
+\value{
+paths to the ensemble downscaled maps.
+}
+\description{
+This is the main function to execute the RF training and prediction.
+Note it will be deployed by each node you requested if the qsub feature is enabled below.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/parallel.prediction.Rd b/modules/assim.sequential/man/parallel.prediction.Rd
new file mode 100644
index 00000000000..fdb04edb8b4
--- /dev/null
+++ b/modules/assim.sequential/man/parallel.prediction.Rd
@@ -0,0 +1,41 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{parallel.prediction}
+\alias{parallel.prediction}
+\title{parallel.prediction}
+\usage{
+parallel.prediction(
+ base.map.dir,
+ models,
+ cov.vecs,
+ non.na.inds,
+ outdir,
+ name,
+ cores = parallel::detectCores()
+)
+}
+\arguments{
+\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.}
+
+\item{models}{list: trained models across ensemble members generated by the `parallel.rf.train` function.}
+
+\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function.}
+
+\item{outdir}{character: the output directory where the downscaled maps will be stored.}
+
+\item{name}{list: containing the time and variable name to create the final GeoTIFF file name.}
+
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+
+\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function.}
+}
+\value{
+paths to the ensemble downscaled maps.
+}
+\description{
+This function helps to predict the target variable observations based on the covariates.
+The prediction is working in parallel across vegetated pixels.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/parallel.rf.train.Rd b/modules/assim.sequential/man/parallel.rf.train.Rd
new file mode 100644
index 00000000000..4b61a544126
--- /dev/null
+++ b/modules/assim.sequential/man/parallel.rf.train.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{parallel.rf.train}
+\alias{parallel.rf.train}
+\title{parallel.rf.train}
+\usage{
+parallel.rf.train(full_data, cores = parallel::detectCores())
+}
+\arguments{
+\item{full_data}{numeric: the matrix generated using the `prepare.train.dat` function.}
+
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+}
+\value{
+list of trained models across ensemble members.
+}
+\description{
+This function helps to train the ML model across ensemble members in parallel.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/pecan.settings.2.pts.Rd b/modules/assim.sequential/man/pecan.settings.2.pts.Rd
new file mode 100644
index 00000000000..91828b1077e
--- /dev/null
+++ b/modules/assim.sequential/man/pecan.settings.2.pts.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{pecan.settings.2.pts}
+\alias{pecan.settings.2.pts}
+\title{pecan.settings.2.pts}
+\usage{
+pecan.settings.2.pts(settings)
+}
+\arguments{
+\item{settings}{PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.}
+}
+\value{
+terra spatial points object.
+}
+\description{
+convert settings to geospatial points in terra.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/prepare.train.dat.Rd b/modules/assim.sequential/man/prepare.train.dat.Rd
new file mode 100644
index 00000000000..ec9122dc56d
--- /dev/null
+++ b/modules/assim.sequential/man/prepare.train.dat.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{prepare.train.dat}
+\alias{prepare.train.dat}
+\title{prepare.train.dat}
+\usage{
+prepare.train.dat(settings, analysis, covariates.dir, variable)
+}
+\arguments{
+\item{settings}{character: physical path that points to the pecan settings XML file.}
+
+\item{analysis}{numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.}
+
+\item{covariates.dir}{character: path to the exported covariates GeoTIFF file.}
+
+\item{variable}{character: name of state variable. It should match up with the column names of the analysis data frame.}
+}
+\value{
+matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates.
+}
+\description{
+This function helps to create the training dataset of specific variable type and locations for downscaling.
+TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/stack.covariates.2.df.Rd b/modules/assim.sequential/man/stack.covariates.2.df.Rd
new file mode 100644
index 00000000000..64b9b2f7ffa
--- /dev/null
+++ b/modules/assim.sequential/man/stack.covariates.2.df.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{stack.covariates.2.df}
+\alias{stack.covariates.2.df}
+\title{stack.covariates.2.df}
+\usage{
+\method{stack}{covariates.2.df}(rast.dir, cores = parallel::detectCores())
+}
+\arguments{
+\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function.}
+
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+}
+\value{
+list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels.
+}
+\description{
+This function helps to build the data frame (pixels by data columns) for only vegetated pixels to improve the efficiency.
+Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
new file mode 100644
index 00000000000..6e2a1abb220
--- /dev/null
+++ b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/SDA_NA_downscale.R
+\name{stack.covariates.2.geotiff}
+\alias{stack.covariates.2.geotiff}
+\title{stack.covariates.2.geotiff}
+\usage{
+\method{stack}{covariates.2.geotiff}(
+ outdir,
+ base.map.dir,
+ cov.tif.file.list,
+ normalize = T,
+ cores = parallel::detectCores()
+)
+}
+\arguments{
+\item{outdir}{character: the output directory where the stacked GeoTIFF file will be generated.}
+
+\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map.}
+
+\item{cov.tif.file.list}{list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")).}
+
+\item{normalize}{boolean: decide if we want to normalize each data layer, the default is TRUE.}
+
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+}
+\value{
+path to the exported GeoTIFF file.
+}
+\description{
+This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map.
+}
+\author{
+Dongchen Zhang
+}
From d027f62af2d5520b75f7835cd9b12ef979b40137 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 21 Feb 2025 10:00:48 -0500
Subject: [PATCH 0051/1193] Add function to namespace.
---
modules/assim.sequential/NAMESPACE | 1 +
1 file changed, 1 insertion(+)
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 3d034fd126f..700f2cc6c16 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand
+S3method(stack,covariates.2.geotiff)
export(Analysis.sda)
export(Average.ERA5.2.GeoTIFF)
export(Construct.H.multisite)
From d06dc17453807ce68d9c61dcd316710419cfbf2f Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 21 Feb 2025 10:01:06 -0500
Subject: [PATCH 0052/1193] Update function.
---
modules/assim.sequential/R/SDA_NA_downscale.R | 24 ++++++-------------
1 file changed, 7 insertions(+), 17 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
index 599ea382b5b..7233c0bf8f5 100644
--- a/modules/assim.sequential/R/SDA_NA_downscale.R
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -9,20 +9,15 @@
#'
#' @return character: path to the exported GeoTIFF file.
#'
-#' @examples
#' @export
#' @author Dongchen Zhang
Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
# open ERA5 nc file as geotiff format for referencing crs and ext.
ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", lubridate::year(start.date), ".nc")))
dates <- seq(start.date, end.date, "1 year")
- if (length(dates) < 2) {
- PEcAn.logger::logger.info("There is no time range to be calculated!")
- return(NA)
- }
# initialize final outcomes.
temp.all <- precip.all <- srd.all <- dewpoint.all <- c()
- for (i in 2:length(dates)) {
+ for (i in seq_along(dates)) {
# initialize start and end dates for the current period
if (i == 1) {
start <- start.date
@@ -114,6 +109,7 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
#' @title stack.covariates.2.geotiff
#'
#' @param outdir character: the output directory where the stacked GeoTIFF file will be generated.
+#' @param year numeric: the year of when the covariates are stacked.
#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map.
#' @param cov.tif.file.list list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")).
#' @param normalize boolean: decide if we want to normalize each data layer, the default is TRUE.
@@ -121,9 +117,10 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
#'
#' @return path to the exported GeoTIFF file.
#'
-#' @examples
+#' @export
+#'
#' @author Dongchen Zhang
-stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) {
+stack.covariates.2.geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) {
# create the folder if it doesn't exist.
if (!file.exists(outdir)) {
dir.create(outdir)
@@ -174,11 +171,11 @@ stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list,
# combine rasters.
all.rast <- terra::rast(paths)
# write all covariates into disk.
- terra::writeRaster(all.rast, file.path(outdir, "covariates.tiff"), overwrite = T)
+ terra::writeRaster(all.rast, file.path(outdir, paste0("covariates_", year, ".tiff")), overwrite = T)
# remove previous tiff files.
unlink(paths)
# return results.
- return(file.path(outdir, "covariates.tiff"))
+ return(file.path(outdir, paste0("covariates_", year, ".tiff")))
}
#' @description
@@ -189,7 +186,6 @@ stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list,
#'
#' @return terra spatial points object.
#'
-#' @examples
#' @author Dongchen Zhang
pecan.settings.2.pts <- function(settings) {
if (is.character(settings)) {
@@ -215,7 +211,6 @@ pecan.settings.2.pts <- function(settings) {
#'
#' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels.
#'
-#' @examples
#' @author Dongchen Zhang
stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) {
# load maps.
@@ -273,7 +268,6 @@ stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) {
#'
#' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates.
#'
-#' @examples
#' @author Dongchen Zhang
prepare.train.dat <- function(settings, analysis, covariates.dir, variable) {
# convert settings into geospatial points.
@@ -305,7 +299,6 @@ prepare.train.dat <- function(settings, analysis, covariates.dir, variable) {
#'
#' @return list of trained models across ensemble members.
#'
-#' @examples
#' @author Dongchen Zhang
parallel.rf.train <- function(full_data, cores = parallel::detectCores()) {
# grab ensemble and predictor index.
@@ -358,7 +351,6 @@ parallel.rf.train <- function(full_data, cores = parallel::detectCores()) {
#'
#' @return paths to the ensemble downscaled maps.
#'
-#' @examples
#' @author Dongchen Zhang
parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) {
# load base map.
@@ -417,7 +409,6 @@ parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#'
#' @return paths to the ensemble downscaled maps.
#'
-#' @examples
#' @author Dongchen Zhang
downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) {
# create folder specific for the time and carbon type.
@@ -479,7 +470,6 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable
#'
#' @param folder.path Character: physical path to which the job file is located.
#'
-#' @examples
#' @export
#' @author Dongchen Zhang
downscale.qsub.main <- function(folder.path) {
From d90fcb85be2af3c8db28d71bfbefb3c5193d82f3 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 21 Feb 2025 10:01:21 -0500
Subject: [PATCH 0053/1193] Parallel average ERA5.
---
.../inst/anchor/NA_downscale_script.R | 41 +++++++++++--------
1 file changed, 25 insertions(+), 16 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index d670bcfdff4..83613b1ff1b 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -1,23 +1,24 @@
library(purrr)
library(foreach)
-setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/")
+library(PEcAnAssimSequential)
+setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/")
# average ERA5 to climatic covariates.
outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET"
in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/"
dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year"))
start.dates <- dates[1:10]
end.dates <- dates[2:11]
-paths <- c()
-for (i in 1:10) {
- paths <- c(paths, PEcAnAssimSequential:::Average.ERA5.2.GeoTIFF(start.dates[i], end.dates[i], in.path, outdir))
- print(i)
-}
+# parallel average ERA5 into covariates.
+future::plan(future::multisession, workers = 5)
+paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){
+ Average.ERA5.2.GeoTIFF(d1, d2, in.path, outdir)
+}, .progress = T) %>% unlist
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
-settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/pecanIC.xml"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/"
+settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/pecanIC.xml"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/downscale_maps/"
cores <- 28
date <- seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")
# loop over years.
@@ -39,12 +40,12 @@ for (i in seq_along(date)) {
if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) {
covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff")
} else {
- covariates.dir <- create.covariates.geotiff(outdir = outdir,
- year = lubridate::year(date[i]),
- base.map.dir = base.map.dir,
- cov.tif.file.list = cov.tif.file.list,
- normalize = T,
- cores = cores)
+ covariates.dir <- stack.covariates.2.geotiff(outdir = outdir,
+ year = lubridate::year(date[i]),
+ base.map.dir = base.map.dir,
+ cov.tif.file.list = cov.tif.file.list,
+ normalize = T,
+ cores = cores)
}
# grab analysis.
analysis.yr <- analysis.all[[i]]
@@ -55,8 +56,16 @@ for (i in seq_along(date)) {
variable <- variables[j]
folder.path <- file.path(outdir, paste0(variables[j], "_", date[i]))
dir.create(folder.path)
- save(list = c("settings", "analysis.yr", "covariates.dir", "time", "variable", "folder.path", "base.map.dir", "cores", "outdir"),
- file = file.path(folder.path, "dat.Rdata"))
+ saveRDS(list(settings = settings,
+ analysis.yr = analysis.yr,
+ covariates.dir = covariates.dir,
+ time = time,
+ variable = variable,
+ folder.path = folder.path,
+ base.map.dir = base.map.dir,
+ cores = cores,
+ outdir = outdir),
+ file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
"module load R/4.1.2",
From 7733d6a9ea5d6b814f4da6a08264ea13180f3935 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 21 Feb 2025 10:01:29 -0500
Subject: [PATCH 0054/1193] Update documentation.
---
modules/assim.sequential/man/stack.covariates.2.geotiff.Rd | 3 +++
1 file changed, 3 insertions(+)
diff --git a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
index 6e2a1abb220..02f3da9dc97 100644
--- a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
+++ b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
@@ -6,6 +6,7 @@
\usage{
\method{stack}{covariates.2.geotiff}(
outdir,
+ year,
base.map.dir,
cov.tif.file.list,
normalize = T,
@@ -15,6 +16,8 @@
\arguments{
\item{outdir}{character: the output directory where the stacked GeoTIFF file will be generated.}
+\item{year}{numeric: the year of when the covariates are stacked.}
+
\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map.}
\item{cov.tif.file.list}{list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")).}
From 0a030c84543f938c4512e8c0b3da6a14ad7da582 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 21 Feb 2025 10:06:21 -0500
Subject: [PATCH 0055/1193] Update change log.
---
CHANGELOG.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6547b2df78c..e55afe04100 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -59,7 +59,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha
- Added GEDI AGB preparation workflow.
- Added new feature of downloading datasets from the NASA DAAC ORNL database.
- Extended downscale function and created 'downscale_hrly' so that it handles more frequent data
-- Added 'aggregate' as a new feature for downscaled data
+- Added 'aggregate' as a new feature for downscaled data.
+- Added downscale functions and scripts that apply to the North America SDA run.
### Fixed
From 623b4c149aca37e7fc88085119f69c3c0e4c566e Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 21 Feb 2025 10:07:44 -0500
Subject: [PATCH 0056/1193] Remove commented lines.
---
modules/assim.sequential/R/SDA_NA_downscale.R | 20 -------------------
1 file changed, 20 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
index 7233c0bf8f5..446d970fc5b 100644
--- a/modules/assim.sequential/R/SDA_NA_downscale.R
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -84,26 +84,6 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
return(file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
}
-
-
-# assemble covariates from different spatial scales/resolutions and crs.
-# Here is an example of the `cov.tif.file.list` object:
-# cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif",
-# var.name = "LC"),
-# year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif",
-# var.name = "year_since_disturb"),
-# agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif",
-# var.name = "agb"),
-# twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff",
-# var.name = "twi"),
-# met = list(dir = paths[i],
-# var.name = c("temp", "prec", "srad", "vapr")),
-# soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif",
-# var.name = c("PH", "N", "SOC", "Sand")))
-# This function helps to stack target data layers from various GeoTIFF maps to a single map
-# cropped and projected to the `base.map`. It also enables the normalization feature to facilitate the ML process.
-
-
#' @description
#' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map.
#' @title stack.covariates.2.geotiff
From 2b7a0b6079fd058ee60adc021de08b089917e352 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Fri, 7 Mar 2025 14:13:28 -0800
Subject: [PATCH 0057/1193] Update CHANGELOG.md
Co-authored-by: Istem Fer
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 07555797a0f..e2456536ba8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,7 +22,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
* Modules `PEcAn.allometry`, `PEcAn.assim.batch`, `PEcAn.data.mining`, `PEcAn.emulator`, `PEcAn.MA`, `PEcAn.photosynthesis`, `PEcAn.priors`, and `PEcAn.RTM`.
- Renamed master branch to main
- `PEcAn.all::pecan_version()` now reports commit hashes as well as version numbers for each installed package.
-- `write.conmfig.STICS()` now modifies parameters with vectors rather than individually.
+- `write.config.STICS()` now modifies parameters with vectors rather than individually.
### Removed
From c2a29f9519ae0168b29891d222a738cf41a2b2b2 Mon Sep 17 00:00:00 2001
From: Abhinav Pandey
Date: Tue, 18 Mar 2025 23:07:17 +0530
Subject: [PATCH 0058/1193] Update test.check.missing.files.R
---
base/db/tests/testthat/test.check.missing.files.R | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R
index 75a531283dd..2dd4d2a9156 100644
--- a/base/db/tests/testthat/test.check.missing.files.R
+++ b/base/db/tests/testthat/test.check.missing.files.R
@@ -1,7 +1,7 @@
test_that("`check_missing_files()` able to return correct missing files", {
- # Mock `purrr::map_dfr`
- mocked_size <- mockery::mock(100,200)
- mockery::stub(check_missing_files, "file.size", mocked_res)
+ # Mock `file.size`
+ mocked_size <- mockery::mock(100, 200)
+ mockery::stub(check_missing_files, "file.size", mocked_size)
res <- check_missing_files(
result = list(data.frame(file = c("A", "B"))),
@@ -9,8 +9,13 @@ test_that("`check_missing_files()` able to return correct missing files", {
existing.dbfile = data.frame()
)
-
+ # Check that result has expected structure
expect_equal(length(res), 2)
expect_true(is.list(res[[1]]))
expect_true(is.list(res[[2]]))
+
+ # Verify mock was called correctly
+ mockery::expect_called(mocked_size, 2)
+ expect_equal(mockery::mock_args(mocked_size)[[1]], list("A"))
+ expect_equal(mockery::mock_args(mocked_size)[[2]], list("B"))
})
From 62e8a59a1fde391689c914763ea74970824ef313 Mon Sep 17 00:00:00 2001
From: kutumia
Date: Sun, 23 Mar 2025 03:56:17 +0000
Subject: [PATCH 0059/1193] Fix 404 documentation links and update references
in README and Rmd (#3269)
---
modules/data.atmosphere/README.md | 6 ++++--
modules/data.atmosphere/man/download.FluxnetLaThuile.Rd | 4 +++-
modules/data.atmosphere/vignettes/ameriflux_demo.Rmd | 6 ++++--
3 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/modules/data.atmosphere/README.md b/modules/data.atmosphere/README.md
index cb59cf9df5c..fa4e4d14b7e 100644
--- a/modules/data.atmosphere/README.md
+++ b/modules/data.atmosphere/README.md
@@ -26,13 +26,15 @@ The PEcAn.data.atmosphere package is 'standalone'.
## Documentation
-* [Package Documentation](https://pecanproject.github.io/pecan//modules/data.atmosphere/inst/web/index.html)
+## * [Package Documentation](https://pecanproject.github.io/pecan//modules/data.atmosphere/inst/web/index.html)
* Vignettes
+
## PEcAn variable names
-For the most updated list, see https://pecanproject.github.io/pecan-documentation/latest/time-standard.html#input-standards
+## For the most updated list, see https://pecanproject.github.io/pecan-documentation/latest/time-standard.html#input-standards
+
General Note: dates in the database should be datatime (preferably with timezone), and datetime passed around in PEcAn should be of type POSIXlt.
diff --git a/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd b/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd
index 218365ee2e7..0165e565f14 100644
--- a/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd
+++ b/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd
@@ -17,7 +17,9 @@ download.FluxnetLaThuile(
}
\arguments{
\item{sitename}{the FLUXNET ID of the site to be downloaded, used as file name prefix.
-The 'SITE_ID' field in \href{http://www.fluxdata.org/DataInfo/Dataset\%20Doc\%20Lib/SynthDataSummary.aspx}{list of Fluxnet LaThuile sites}}
+% The 'SITE_ID' field in \href{http://www.fluxdata.org/DataInfo/Dataset\%20Doc\%20Lib/SynthDataSummary.aspx}{list of Fluxnet LaThuile sites}}
+% Link deprecated – was pointing to Fluxnet LaThuile dataset
+
\item{outfolder}{location on disk where outputs will be stored}
diff --git a/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd b/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd
index 344e1c27cfb..1f57a880f34 100644
--- a/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd
+++ b/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd
@@ -11,9 +11,11 @@ vignette: >
# Overview
-This is a demonstration of the PEcAn utilities for downloading met data, converting it to the PEcAn-CF format (which is based on the Climate Forecasting conventions and similar to MsTMIP). These variables are defined in the [PEcAn documentation](https://pecanproject.github.io/pecan-documentation/latest/met-data.html).
+This is a demonstration of the PEcAn utilities for downloading met data, converting it to the PEcAn-CF format (which is based on the Climate Forecasting conventions and similar to MsTMIP). These variables are described in the [PEcAn met data documentation](https://pecanproject.github.io/pecan-documentation/) (link previously pointed to a broken page).
+
+We’ll download 12 years of met data from the [Bondville Ameriflux site](https://ameriflux.lbl.gov/sites/siteinfo/US-Bo1), which has a `SITE_ID` of `US-Bo1`.
+
-In this example we will download 12 years of met data from the [Bondville Ameriflux site](http://ameriflux.ornl.gov/fullsiteinfo.php?sid=44). It has an Ameriflux `SITE_ID` of `US-Bo1`
The PEcAn.data.atmosphere source code is in [`modules/data.atmosphere`](https://github.com/PecanProject/pecan/tree/main/modules/data.atmosphere) and the documentation can be found with either `package?PEcAn.data.atmosphere` or in the [data.atmosphere package documentation](https://pecanproject.github.io/pecan//modules/data.atmosphere/inst/web/index.html).
From fa245526d27fac812d3b2dd2af1a51c4fa30ad29 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Mon, 24 Mar 2025 14:38:58 -0400
Subject: [PATCH 0060/1193] Rename the function.
---
modules/assim.sequential/NAMESPACE | 2 +-
modules/assim.sequential/R/SDA_NA_downscale.R | 6 +++---
modules/assim.sequential/man/stack.covariates.2.df.Rd | 2 +-
...ariates.2.geotiff.Rd => stack_covariates_2_geotiff.Rd} | 8 ++++----
4 files changed, 9 insertions(+), 9 deletions(-)
rename modules/assim.sequential/man/{stack.covariates.2.geotiff.Rd => stack_covariates_2_geotiff.Rd} (89%)
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 700f2cc6c16..0920b2aacb6 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand
-S3method(stack,covariates.2.geotiff)
export(Analysis.sda)
export(Average.ERA5.2.GeoTIFF)
export(Construct.H.multisite)
@@ -58,6 +57,7 @@ export(sda.enkf.multisite)
export(sda.enkf.original)
export(sda_weights_site)
export(simple.local)
+export(stack_covariates_2_geotiff)
export(tobit.model)
export(tobit2space.model)
export(tobit_model_censored)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
index 446d970fc5b..e0f25975565 100644
--- a/modules/assim.sequential/R/SDA_NA_downscale.R
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -86,7 +86,7 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
#' @description
#' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map.
-#' @title stack.covariates.2.geotiff
+#' @title stack_covariates_2_geotiff
#'
#' @param outdir character: the output directory where the stacked GeoTIFF file will be generated.
#' @param year numeric: the year of when the covariates are stacked.
@@ -100,7 +100,7 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
#' @export
#'
#' @author Dongchen Zhang
-stack.covariates.2.geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) {
+stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) {
# create the folder if it doesn't exist.
if (!file.exists(outdir)) {
dir.create(outdir)
@@ -186,7 +186,7 @@ pecan.settings.2.pts <- function(settings) {
#' Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work.
#' @title stack.covariates.2.df
#'
-#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function.
+#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function.
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
#'
#' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels.
diff --git a/modules/assim.sequential/man/stack.covariates.2.df.Rd b/modules/assim.sequential/man/stack.covariates.2.df.Rd
index 64b9b2f7ffa..b91a11cbb56 100644
--- a/modules/assim.sequential/man/stack.covariates.2.df.Rd
+++ b/modules/assim.sequential/man/stack.covariates.2.df.Rd
@@ -7,7 +7,7 @@
\method{stack}{covariates.2.df}(rast.dir, cores = parallel::detectCores())
}
\arguments{
-\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function.}
+\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function.}
\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
}
diff --git a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd
similarity index 89%
rename from modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
rename to modules/assim.sequential/man/stack_covariates_2_geotiff.Rd
index 02f3da9dc97..d83411ec708 100644
--- a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd
+++ b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{stack.covariates.2.geotiff}
-\alias{stack.covariates.2.geotiff}
-\title{stack.covariates.2.geotiff}
+\name{stack_covariates_2_geotiff}
+\alias{stack_covariates_2_geotiff}
+\title{stack_covariates_2_geotiff}
\usage{
-\method{stack}{covariates.2.geotiff}(
+stack_covariates_2_geotiff(
outdir,
year,
base.map.dir,
From 0b3bbbafd11518bd5d49b09772f9350f1d4e8ab7 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Mon, 24 Mar 2025 14:40:03 -0400
Subject: [PATCH 0061/1193] Remove the redundant for loop.
---
modules/assim.sequential/R/SDA_NA_downscale.R | 41 +++++++++----------
1 file changed, 19 insertions(+), 22 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
index e0f25975565..ebfddd4bb36 100644
--- a/modules/assim.sequential/R/SDA_NA_downscale.R
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -29,28 +29,25 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
} else {
end <- as.Date(paste0(lubridate::year(dates[i]), "-12-31"))
}
- # loop over years.
- for (j in seq_along(dates)) {
- # open ERA5 nc file.
- met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc")))
- # find index for the date.
- times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC")
- time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end)
- # extract temperature.
- PEcAn.logger::logger.info("entering temperature.")
- temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3)
- # extract precipitation.
- PEcAn.logger::logger.info("entering precipitation.")
- precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3)
- # extract shortwave solar radiation.
- PEcAn.logger::logger.info("entering solar radiation.")
- srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3)
- # extract dewpoint.
- PEcAn.logger::logger.info("entering dewpoint.")
- dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3)
- # close the NC connection.
- ncdf4::nc_close(met.nc)
- }
+ # open ERA5 nc file.
+ met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc")))
+ # find index for the date.
+ times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC")
+ time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end)
+ # extract temperature.
+ PEcAn.logger::logger.info("entering temperature.")
+ temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract precipitation.
+ PEcAn.logger::logger.info("entering precipitation.")
+ precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract shortwave solar radiation.
+ PEcAn.logger::logger.info("entering solar radiation.")
+ srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract dewpoint.
+ PEcAn.logger::logger.info("entering dewpoint.")
+ dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # close the NC connection.
+ ncdf4::nc_close(met.nc)
}
# aggregate across time.
# temperature.
From 3831129365b012e5a96e474939df95c75bd36895 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Mon, 24 Mar 2025 16:36:09 -0400
Subject: [PATCH 0062/1193] Rename functions.
---
modules/assim.sequential/NAMESPACE | 4 +-
modules/assim.sequential/R/SDA_NA_downscale.R | 81 +++++++++----------
.../inst/anchor/NA_downscale_script.R | 7 +-
...2.GeoTIFF.Rd => Average_ERA5_2_GeoTIFF.Rd} | 8 +-
...le.qsub.main.Rd => downscale_qsub_main.Rd} | 10 +--
...nscale.rf.main.Rd => downscale_rf_main.Rd} | 8 +-
...l.prediction.Rd => parallel_prediction.Rd} | 14 ++--
...allel.rf.train.Rd => parallel_rf_train.Rd} | 10 +--
...tings.2.pts.Rd => pecan_settings_2_pts.Rd} | 8 +-
...pare.train.dat.Rd => prepare_train_dat.Rd} | 10 +--
...iates.2.df.Rd => stack_covariates_2_df.Rd} | 8 +-
11 files changed, 80 insertions(+), 88 deletions(-)
rename modules/assim.sequential/man/{Average.ERA5.2.GeoTIFF.Rd => Average_ERA5_2_GeoTIFF.Rd} (82%)
rename modules/assim.sequential/man/{downscale.qsub.main.Rd => downscale_qsub_main.Rd} (66%)
rename modules/assim.sequential/man/{downscale.rf.main.Rd => downscale_rf_main.Rd} (93%)
rename modules/assim.sequential/man/{parallel.prediction.Rd => parallel_prediction.Rd} (80%)
rename modules/assim.sequential/man/{parallel.rf.train.Rd => parallel_rf_train.Rd} (74%)
rename modules/assim.sequential/man/{pecan.settings.2.pts.Rd => pecan_settings_2_pts.Rd} (76%)
rename modules/assim.sequential/man/{prepare.train.dat.Rd => prepare_train_dat.Rd} (77%)
rename modules/assim.sequential/man/{stack.covariates.2.df.Rd => stack_covariates_2_df.Rd} (83%)
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 0920b2aacb6..90512107500 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,7 +1,7 @@
# Generated by roxygen2: do not edit by hand
export(Analysis.sda)
-export(Average.ERA5.2.GeoTIFF)
+export(Average_ERA5_2_GeoTIFF)
export(Construct.H.multisite)
export(Construct.R)
export(Construct_H)
@@ -30,7 +30,7 @@ export(assessParams)
export(block_matrix)
export(conj_wt_wishart_sampler)
export(construct_nimble_H)
-export(downscale.qsub.main)
+export(downscale_qsub_main)
export(dwtmnorm)
export(get_ensemble_weights)
export(hop_test)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
index ebfddd4bb36..c1db7489258 100644
--- a/modules/assim.sequential/R/SDA_NA_downscale.R
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -1,6 +1,6 @@
#' @description
#' This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file.
-#' @title Average.ERA5.2.GeoTIFF
+#' @title Average_ERA5_2_GeoTIFF
#'
#' @param start.date character: start point of when to average the data (e.g., 2012-01-01).
#' @param end.date character: end point of when to average the data (e.g., 2021-12-31).
@@ -11,29 +11,20 @@
#'
#' @export
#' @author Dongchen Zhang
-Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) {
- # open ERA5 nc file as geotiff format for referencing crs and ext.
- ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", lubridate::year(start.date), ".nc")))
- dates <- seq(start.date, end.date, "1 year")
+Average_ERA5_2_GeoTIFF <- function (start.date, end.date, in.path, outdir) {
+ # create dates.
+ years <- sort(unique(lubridate::year(start.date):lubridate::year(end.date)))
# initialize final outcomes.
temp.all <- precip.all <- srd.all <- dewpoint.all <- c()
- for (i in seq_along(dates)) {
- # initialize start and end dates for the current period
- if (i == 1) {
- start <- start.date
- } else {
- start <- as.Date(paste0(lubridate::year(dates[i]), "-01-01"))
- }
- if (i == length(dates)) {
- end <- end.date
- } else {
- end <- as.Date(paste0(lubridate::year(dates[i]), "-12-31"))
- }
+ # loop over years.
+ for (i in seq_along(years)) {
+ # open ERA5 nc file as geotiff format for referencing crs and ext.
+ ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", years[i], ".nc")))
# open ERA5 nc file.
- met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc")))
+ met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", years[i], ".nc")))
# find index for the date.
times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC")
- time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end)
+ time.inds <- which(lubridate::date(times) >= start.date & lubridate::date(times) <= end.date)
# extract temperature.
PEcAn.logger::logger.info("entering temperature.")
temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3)
@@ -157,14 +148,14 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.
#' @description
#' convert settings to geospatial points in terra.
-#' @title pecan.settings.2.pts
+#' @title pecan_settings_2_pts
#'
#' @param settings PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.
#'
#' @return terra spatial points object.
#'
#' @author Dongchen Zhang
-pecan.settings.2.pts <- function(settings) {
+pecan_settings_2_pts <- function(settings) {
if (is.character(settings)) {
# read settings.
settings <- PEcAn.settings::read.settings(settings)
@@ -181,7 +172,7 @@ pecan.settings.2.pts <- function(settings) {
#' @description
#' This function helps to build the data frame (pixels by data columns) for only vegetated pixels to improve the efficiency.
#' Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work.
-#' @title stack.covariates.2.df
+#' @title stack_covariates_2_df
#'
#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function.
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
@@ -189,7 +180,7 @@ pecan.settings.2.pts <- function(settings) {
#' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels.
#'
#' @author Dongchen Zhang
-stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) {
+stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) {
# load maps.
all.rast <- terra::rast(rast.dir)
# parallel loop.
@@ -235,8 +226,8 @@ stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) {
#' @description
#' This function helps to create the training dataset of specific variable type and locations for downscaling.
-#' TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy.
-#' @title prepare.train.dat
+#' TODO: Add a ratio argument (training sample size/total sample size) so that we could calculate the out-of-sample accuracy.
+#' @title prepare_train_dat
#'
#' @param settings character: physical path that points to the pecan settings XML file.
#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
@@ -246,9 +237,9 @@ stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) {
#' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates.
#'
#' @author Dongchen Zhang
-prepare.train.dat <- function(settings, analysis, covariates.dir, variable) {
+prepare_train_dat <- function(settings, analysis, covariates.dir, variable) {
# convert settings into geospatial points.
- pts <- pecan.settings.2.pts(settings)
+ pts <- pecan_settings_2_pts(settings)
# read covariates.
cov.rast <- terra::rast(covariates.dir)
# extract covariates by locations.
@@ -269,15 +260,15 @@ prepare.train.dat <- function(settings, analysis, covariates.dir, variable) {
#' @description
#' This function helps to train the ML model across ensemble members in parallel.
-#' @title parallel.rf.train
+#' @title parallel_rf_train
#'
-#' @param full_data numeric: the matrix generated using the `prepare.train.dat` function.
+#' @param full_data numeric: the matrix generated using the `prepare_train_dat` function.
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
#'
#' @return list of trained models across ensemble members.
#'
#' @author Dongchen Zhang
-parallel.rf.train <- function(full_data, cores = parallel::detectCores()) {
+parallel_rf_train <- function(full_data, cores = parallel::detectCores()) {
# grab ensemble and predictor index.
col.names <- colnames(full_data)
ensemble.inds <- which(grepl("ensemble", col.names, fixed = TRUE))
@@ -316,12 +307,12 @@ parallel.rf.train <- function(full_data, cores = parallel::detectCores()) {
#' @description
#' This function helps to predict the target variable observations based on the covariates.
#' The prediction is working in parallel across vegetated pixels.
-#' @title parallel.prediction
+#' @title parallel_prediction
#'
#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
-#' @param models list: trained models across ensemble members generated by the `parallel.rf.train` function.
-#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function.
-#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function.
+#' @param models list: trained models across ensemble members generated by the `parallel_rf_train` function.
+#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.
+#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function.
#' @param outdir character: the output directory where the downscaled maps will be stored.
#' @param name list: containing the time and variable name to create the final GeoTIFF file name.
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
@@ -329,7 +320,7 @@ parallel.rf.train <- function(full_data, cores = parallel::detectCores()) {
#' @return paths to the ensemble downscaled maps.
#'
#' @author Dongchen Zhang
-parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) {
+parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) {
# load base map.
base.map <- terra::rast(base.map.dir)
dims <- dim(base.map)
@@ -373,7 +364,7 @@ parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#' @description
#' This is the main function to execute the RF training and prediction.
#' Note it will be deployed by each node you requested if the qsub feature is enabled below.
-#' @title downscale.rf.main
+#' @title downscale_rf_main
#'
#' @param settings character: physical path that points to the pecan settings XML file.
#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
@@ -387,7 +378,7 @@ parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#' @return paths to the ensemble downscaled maps.
#'
#' @author Dongchen Zhang
-downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) {
+downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) {
# create folder specific for the time and carbon type.
folder.name <- file.path(outdir, paste0(c(variable, time), collapse = "_"))
if (!file.exists(folder.name)) {
@@ -395,7 +386,7 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable
}
# prepare training data.
PEcAn.logger::logger.info("Preparing training data.")
- full_data <- prepare.train.dat(settings = settings,
+ full_data <- prepare_train_dat(settings = settings,
analysis = analysis,
covariates.dir = covariates.dir,
variable = variable)
@@ -405,13 +396,13 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable
}
# parallel train.
PEcAn.logger::logger.info("Parallel training.")
- models <- parallel.rf.train(full_data = full_data, cores = cores)
+ models <- parallel_rf_train(full_data = full_data, cores = cores)
# save trained models for future analysis.
# saveRDS(models, file.path(folder.name, "rf_models.rds"))
save(models, file = file.path(folder.name, "rf_models.Rdata"))
# convert stacked covariates geotiff file into data frame.
PEcAn.logger::logger.info("Converting geotiff to df.")
- cov.df <- stack.covariates.2.df(rast.dir = covariates.dir, cores = cores)
+ cov.df <- stack_covariates_2_df(rast.dir = covariates.dir, cores = cores)
# reconstruct LC because of the computation accuracy.
cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0
# convert LC into factor.
@@ -420,7 +411,7 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable
}
# parallel prediction.
PEcAn.logger::logger.info("Parallel prediction.")
- paths <- parallel.prediction(base.map.dir = base.map.dir,
+ paths <- parallel_prediction(base.map.dir = base.map.dir,
models = models,
cov.vecs = cov.df$df,
non.na.inds = cov.df$non.na.inds,
@@ -442,15 +433,15 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable
}
#' @description
-#' This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function.
-#' @title downscale.qsub.main
+#' This qsub function helps to run the submitted qsub jobs for running the downscale_rf_main function.
+#' @title downscale_qsub_main
#'
#' @param folder.path Character: physical path to which the job file is located.
#'
#' @export
#' @author Dongchen Zhang
-downscale.qsub.main <- function(folder.path) {
+downscale_qsub_main <- function(folder.path) {
dat <- readRDS(file.path(folder.path, "dat.rds"))
- out <- downscale.rf.main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores)
+ out <- downscale_rf_main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores)
saveRDS(out, file.path(folder.path, "res.rds"))
}
\ No newline at end of file
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 83613b1ff1b..2530e068ecc 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -5,9 +5,10 @@ setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/")
# average ERA5 to climatic covariates.
outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET"
in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/"
-dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year"))
-start.dates <- dates[1:10]
-end.dates <- dates[2:11]
+# dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year"))
+# dates <- seq(as.Date("2012-01-01"), as.Date("2024-12-31"), "1 year")
+start.dates <- seq(as.Date("2012-01-01"), as.Date("2024-01-01"), "1 year")
+end.dates <- seq(as.Date("2012-12-31"), as.Date("2024-12-31"), "1 year")
# parallel average ERA5 into covariates.
future::plan(future::multisession, workers = 5)
paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){
diff --git a/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
similarity index 82%
rename from modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd
rename to modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
index 0c4a561bde9..5443add6937 100644
--- a/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd
+++ b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{Average.ERA5.2.GeoTIFF}
-\alias{Average.ERA5.2.GeoTIFF}
-\title{Average.ERA5.2.GeoTIFF}
+\name{Average_ERA5_2_GeoTIFF}
+\alias{Average_ERA5_2_GeoTIFF}
+\title{Average_ERA5_2_GeoTIFF}
\usage{
-Average.ERA5.2.GeoTIFF(start.date, end.date, in.path, outdir)
+Average_ERA5_2_GeoTIFF(start.date, end.date, in.path, outdir)
}
\arguments{
\item{start.date}{character: start point of when to average the data (e.g., 2012-01-01).}
diff --git a/modules/assim.sequential/man/downscale.qsub.main.Rd b/modules/assim.sequential/man/downscale_qsub_main.Rd
similarity index 66%
rename from modules/assim.sequential/man/downscale.qsub.main.Rd
rename to modules/assim.sequential/man/downscale_qsub_main.Rd
index 50de6e2d945..dbf35a30c41 100644
--- a/modules/assim.sequential/man/downscale.qsub.main.Rd
+++ b/modules/assim.sequential/man/downscale_qsub_main.Rd
@@ -1,16 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{downscale.qsub.main}
-\alias{downscale.qsub.main}
-\title{downscale.qsub.main}
+\name{downscale_qsub_main}
+\alias{downscale_qsub_main}
+\title{downscale_qsub_main}
\usage{
-downscale.qsub.main(folder.path)
+downscale_qsub_main(folder.path)
}
\arguments{
\item{folder.path}{Character: physical path to which the job file is located.}
}
\description{
-This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function.
+This qsub function helps to run the submitted qsub jobs for running the downscale_rf_main function.
}
\author{
Dongchen Zhang
diff --git a/modules/assim.sequential/man/downscale.rf.main.Rd b/modules/assim.sequential/man/downscale_rf_main.Rd
similarity index 93%
rename from modules/assim.sequential/man/downscale.rf.main.Rd
rename to modules/assim.sequential/man/downscale_rf_main.Rd
index 75ec6612760..05b7568a934 100644
--- a/modules/assim.sequential/man/downscale.rf.main.Rd
+++ b/modules/assim.sequential/man/downscale_rf_main.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{downscale.rf.main}
-\alias{downscale.rf.main}
-\title{downscale.rf.main}
+\name{downscale_rf_main}
+\alias{downscale_rf_main}
+\title{downscale_rf_main}
\usage{
-downscale.rf.main(
+downscale_rf_main(
settings,
analysis,
covariates.dir,
diff --git a/modules/assim.sequential/man/parallel.prediction.Rd b/modules/assim.sequential/man/parallel_prediction.Rd
similarity index 80%
rename from modules/assim.sequential/man/parallel.prediction.Rd
rename to modules/assim.sequential/man/parallel_prediction.Rd
index fdb04edb8b4..3fceff412a2 100644
--- a/modules/assim.sequential/man/parallel.prediction.Rd
+++ b/modules/assim.sequential/man/parallel_prediction.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{parallel.prediction}
-\alias{parallel.prediction}
-\title{parallel.prediction}
+\name{parallel_prediction}
+\alias{parallel_prediction}
+\title{parallel_prediction}
\usage{
-parallel.prediction(
+parallel_prediction(
base.map.dir,
models,
cov.vecs,
@@ -17,9 +17,9 @@ parallel.prediction(
\arguments{
\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.}
-\item{models}{list: trained models across ensemble members generated by the `parallel.rf.train` function.}
+\item{models}{list: trained models across ensemble members generated by the `parallel_rf_train` function.}
-\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function.}
+\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function.}
\item{outdir}{character: the output directory where the downscaled maps will be stored.}
@@ -27,7 +27,7 @@ parallel.prediction(
\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
-\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function.}
+\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.}
}
\value{
paths to the ensemble downscaled maps.
diff --git a/modules/assim.sequential/man/parallel.rf.train.Rd b/modules/assim.sequential/man/parallel_rf_train.Rd
similarity index 74%
rename from modules/assim.sequential/man/parallel.rf.train.Rd
rename to modules/assim.sequential/man/parallel_rf_train.Rd
index 4b61a544126..1d8863f582a 100644
--- a/modules/assim.sequential/man/parallel.rf.train.Rd
+++ b/modules/assim.sequential/man/parallel_rf_train.Rd
@@ -1,13 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{parallel.rf.train}
-\alias{parallel.rf.train}
-\title{parallel.rf.train}
+\name{parallel_rf_train}
+\alias{parallel_rf_train}
+\title{parallel_rf_train}
\usage{
-parallel.rf.train(full_data, cores = parallel::detectCores())
+parallel_rf_train(full_data, cores = parallel::detectCores())
}
\arguments{
-\item{full_data}{numeric: the matrix generated using the `prepare.train.dat` function.}
+\item{full_data}{numeric: the matrix generated using the `prepare_train_dat` function.}
\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
}
diff --git a/modules/assim.sequential/man/pecan.settings.2.pts.Rd b/modules/assim.sequential/man/pecan_settings_2_pts.Rd
similarity index 76%
rename from modules/assim.sequential/man/pecan.settings.2.pts.Rd
rename to modules/assim.sequential/man/pecan_settings_2_pts.Rd
index 91828b1077e..e676382d67f 100644
--- a/modules/assim.sequential/man/pecan.settings.2.pts.Rd
+++ b/modules/assim.sequential/man/pecan_settings_2_pts.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{pecan.settings.2.pts}
-\alias{pecan.settings.2.pts}
-\title{pecan.settings.2.pts}
+\name{pecan_settings_2_pts}
+\alias{pecan_settings_2_pts}
+\title{pecan_settings_2_pts}
\usage{
-pecan.settings.2.pts(settings)
+pecan_settings_2_pts(settings)
}
\arguments{
\item{settings}{PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.}
diff --git a/modules/assim.sequential/man/prepare.train.dat.Rd b/modules/assim.sequential/man/prepare_train_dat.Rd
similarity index 77%
rename from modules/assim.sequential/man/prepare.train.dat.Rd
rename to modules/assim.sequential/man/prepare_train_dat.Rd
index ec9122dc56d..381a9eacd6f 100644
--- a/modules/assim.sequential/man/prepare.train.dat.Rd
+++ b/modules/assim.sequential/man/prepare_train_dat.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{prepare.train.dat}
-\alias{prepare.train.dat}
-\title{prepare.train.dat}
+\name{prepare_train_dat}
+\alias{prepare_train_dat}
+\title{prepare_train_dat}
\usage{
-prepare.train.dat(settings, analysis, covariates.dir, variable)
+prepare_train_dat(settings, analysis, covariates.dir, variable)
}
\arguments{
\item{settings}{character: physical path that points to the pecan settings XML file.}
@@ -20,7 +20,7 @@ matrix within which the first sets of columns contain values of state variables
}
\description{
This function helps to create the training dataset of specific variable type and locations for downscaling.
-TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy.
+TODO: Add a ratio argument (training sample size/total sample size) so that we could calculate the out-of-sample accuracy.
}
\author{
Dongchen Zhang
diff --git a/modules/assim.sequential/man/stack.covariates.2.df.Rd b/modules/assim.sequential/man/stack_covariates_2_df.Rd
similarity index 83%
rename from modules/assim.sequential/man/stack.covariates.2.df.Rd
rename to modules/assim.sequential/man/stack_covariates_2_df.Rd
index b91a11cbb56..ed69f768faf 100644
--- a/modules/assim.sequential/man/stack.covariates.2.df.Rd
+++ b/modules/assim.sequential/man/stack_covariates_2_df.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_NA_downscale.R
-\name{stack.covariates.2.df}
-\alias{stack.covariates.2.df}
-\title{stack.covariates.2.df}
+\name{stack_covariates_2_df}
+\alias{stack_covariates_2_df}
+\title{stack_covariates_2_df}
\usage{
-\method{stack}{covariates.2.df}(rast.dir, cores = parallel::detectCores())
+stack_covariates_2_df(rast.dir, cores = parallel::detectCores())
}
\arguments{
\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function.}
From 94c030bf58aadea0cb9dff6485cbdfa5223488b6 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Mon, 24 Mar 2025 22:47:38 -0400
Subject: [PATCH 0063/1193] Update documentation.
---
modules/assim.sequential/R/SDA_NA_downscale.R | 12 ++++++------
modules/assim.sequential/man/prepare_train_dat.Rd | 6 +++---
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R
index c1db7489258..8e75a562779 100644
--- a/modules/assim.sequential/R/SDA_NA_downscale.R
+++ b/modules/assim.sequential/R/SDA_NA_downscale.R
@@ -229,17 +229,15 @@ stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) {
#' TODO: Add a ratio argument (training sample size/total sample size) so that we could calculate the out-of-sample accuracy.
#' @title prepare_train_dat
#'
-#' @param settings character: physical path that points to the pecan settings XML file.
+#' @param pts spatialpoints: spatial points returned by `terra::vectors` function.
#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
#' @param covariates.dir character: path to the exported covariates GeoTIFF file.
#' @param variable character: name of state variable. It should match up with the column names of the analysis data frame.
#'
-#' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates.
+#' @return matrix (num.sites, num.variables * num.ensemble + num.covariates) within which the first sets of columns contain values of state variables for each ensemble member of every site, and the rest columns contain the corresponding covariates.
#'
#' @author Dongchen Zhang
-prepare_train_dat <- function(settings, analysis, covariates.dir, variable) {
- # convert settings into geospatial points.
- pts <- pecan_settings_2_pts(settings)
+prepare_train_dat <- function(pts, analysis, covariates.dir, variable) {
# read covariates.
cov.rast <- terra::rast(covariates.dir)
# extract covariates by locations.
@@ -386,7 +384,9 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable
}
# prepare training data.
PEcAn.logger::logger.info("Preparing training data.")
- full_data <- prepare_train_dat(settings = settings,
+ # convert settings into geospatial points.
+ pts <- pecan_settings_2_pts(settings)
+ full_data <- prepare_train_dat(pts = pts,
analysis = analysis,
covariates.dir = covariates.dir,
variable = variable)
diff --git a/modules/assim.sequential/man/prepare_train_dat.Rd b/modules/assim.sequential/man/prepare_train_dat.Rd
index 381a9eacd6f..1925a5342d1 100644
--- a/modules/assim.sequential/man/prepare_train_dat.Rd
+++ b/modules/assim.sequential/man/prepare_train_dat.Rd
@@ -4,10 +4,10 @@
\alias{prepare_train_dat}
\title{prepare_train_dat}
\usage{
-prepare_train_dat(settings, analysis, covariates.dir, variable)
+prepare_train_dat(pts, analysis, covariates.dir, variable)
}
\arguments{
-\item{settings}{character: physical path that points to the pecan settings XML file.}
+\item{pts}{spatialpoints: spatial points returned by `terra::vectors` function.}
\item{analysis}{numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.}
@@ -16,7 +16,7 @@ prepare_train_dat(settings, analysis, covariates.dir, variable)
\item{variable}{character: name of state variable. It should match up with the column names of the analysis data frame.}
}
\value{
-matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates.
+matrix (num.sites, num.variables * num.ensemble + num.covariates) within which the first sets of columns contain values of state variables for each ensemble member of every site, and the rest columns contain the corresponding covariates.
}
\description{
This function helps to create the training dataset of specific variable type and locations for downscaling.
From 7c288d95808c06c3a2e47561703339cfced41d43 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Mon, 24 Mar 2025 22:48:29 -0400
Subject: [PATCH 0064/1193] Update the script for downscaling.
---
.../inst/anchor/NA_downscale_script.R | 24 ++++++++++++-------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 2530e068ecc..9e43a865eb6 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -5,14 +5,20 @@ setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/")
# average ERA5 to climatic covariates.
outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET"
in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/"
-# dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year"))
-# dates <- seq(as.Date("2012-01-01"), as.Date("2024-12-31"), "1 year")
-start.dates <- seq(as.Date("2012-01-01"), as.Date("2024-01-01"), "1 year")
-end.dates <- seq(as.Date("2012-12-31"), as.Date("2024-12-31"), "1 year")
+start.dates <- c("2012-01-01", "2012-07-16", "2013-07-16",
+ "2014-07-16", "2015-07-16", "2016-07-16",
+ "2017-07-16", "2018-07-16", "2019-07-16",
+ "2020-07-16", "2021-07-16", "2022-07-16",
+ "2023-07-16")
+end.dates <- c("2012-07-15", "2013-07-15", "2014-07-15",
+ "2015-07-15", "2016-07-15", "2017-07-15",
+ "2018-07-15", "2019-07-15", "2020-07-15",
+ "2021-07-15", "2022-07-15", "2023-07-15",
+ "2024-07-15")
# parallel average ERA5 into covariates.
-future::plan(future::multisession, workers = 5)
+future::plan(future::multisession, workers = 5, gc = T)
paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){
- Average.ERA5.2.GeoTIFF(d1, d2, in.path, outdir)
+ Average_ERA5_2_GeoTIFF(d1, d2, in.path, outdir)
}, .progress = T) %>% unlist
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
@@ -21,7 +27,7 @@ variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/pecanIC.xml"
outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/downscale_maps/"
cores <- 28
-date <- seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")
+date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
# loop over years.
for (i in seq_along(date)) {
# setup covariates paths and variable names.
@@ -41,7 +47,7 @@ for (i in seq_along(date)) {
if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) {
covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff")
} else {
- covariates.dir <- stack.covariates.2.geotiff(outdir = outdir,
+ covariates.dir <- stack_covariates_2_geotiff(outdir = outdir,
year = lubridate::year(date[i]),
base.map.dir = base.map.dir,
cov.tif.file.list = cov.tif.file.list,
@@ -73,7 +79,7 @@ for (i in seq_along(date)) {
"echo \"require (PEcAnAssimSequential)",
" require (foreach)",
" require (purrr)",
- " downscale.qsub.main('@FOLDER_PATH@')",
+ " downscale_qsub_main('@FOLDER_PATH@')",
" \" | R --no-save")
jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh)
writeLines(jobsh, con = file.path(folder.path, "job.sh"))
From 5dac92f2108cc39311e9c6944e3e887ac5d9b3d3 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 26 Mar 2025 15:09:18 -0400
Subject: [PATCH 0065/1193] Rename the file.
---
.../R/{SDA_NA_downscale.R => SDA_parallel_downscale.R} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename modules/assim.sequential/R/{SDA_NA_downscale.R => SDA_parallel_downscale.R} (100%)
diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
similarity index 100%
rename from modules/assim.sequential/R/SDA_NA_downscale.R
rename to modules/assim.sequential/R/SDA_parallel_downscale.R
From 51931a6f89070a3e4d637026be300713853b9b35 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 3 Apr 2025 08:34:25 +0000
Subject: [PATCH 0066/1193] Enhance robustness and error handling in
soilgrids_soilC_extract()
---
.../data.land/R/soilgrids_soc_extraction.R | 88 ++++++++++++++-----
1 file changed, 68 insertions(+), 20 deletions(-)
diff --git a/modules/data.land/R/soilgrids_soc_extraction.R b/modules/data.land/R/soilgrids_soc_extraction.R
index 81a5995fc75..ceab4486a7c 100644
--- a/modules/data.land/R/soilgrids_soc_extraction.R
+++ b/modules/data.land/R/soilgrids_soc_extraction.R
@@ -66,6 +66,18 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) {
# prepare site info for extraction
internal_site_info <- site_info[, c("site_id", "site_name", "lat", "lon")]
+
+ # Early return if no valid sites (after processing internal_site_info)
+ if (nrow(internal_site_info) == 0) {
+ if (verbose) {
+ PEcAn.logger::logger.severe(
+ "No valid sites remaining after NA check. ",
+ "All sites had missing SoilGrids data for the first depth layer."
+ )
+ }
+ return(NULL)
+ }
+
#create a variable to store mean and quantile of organic carbon density (ocd) for each soil depth
ocdquant <- matrix(NA, nrow = 6, ncol = length(internal_site_info$lon) * 4) #row represents soil depth, col represents mean, 5%, 50% and 95%-quantile of ocd for all sites
lonlat <- cbind(internal_site_info$lon, internal_site_info$lat)
@@ -78,17 +90,27 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) {
p <- terra::vect(lonlat, crs = "+proj=longlat +datum=WGS84") # Users need to provide lon/lat
newcrs <- "+proj=igh +datum=WGS84 +no_defs +towgs84=0,0,0"
p_reproj <- terra::project(p, newcrs) # Transform the point vector to data with Homolosine projection
+
+ # Extract coordinates for safe parallel transfer
+ p_coords <- terra::crds(p_reproj)
+
data_tag <- c("_mean.vrt", "_Q0.05.vrt", "_Q0.5.vrt", "_Q0.95.vrt")
name_tag <- expand.grid(depths, data_tag, stringsAsFactors = F)#find the combinations between data and depth tags.
L <- split(as.data.frame(name_tag), seq(nrow(as.data.frame(name_tag))))#convert tags into lists.
get_layer <- function(l) {
ocd_url <- paste0(base_data_url, l[[1]], l[[2]])
- ocd_map <- terra::extract(terra::rast(ocd_url), p_reproj)
- unlist(ocd_map[, -1]) / 10
+ tryCatch({
+ # Create temporary vector inside worker
+ p_temp <- terra::vect(p_coords, crs = newcrs)
+ vals <- terra::extract(terra::rast(ocd_url), p_temp)
+ unlist(vals[, -1]) / 10
+ }, error = function(e) {
+ rep(NA, nrow(p_coords))
+ })
}
- ocd_real <- try(furrr::future_map(L, get_layer, .progress = TRUE))
+ ocd_real <- try(furrr::future_map(L, get_layer, .options = furrr::furrr_options(seed = TRUE), .progress = TRUE))
if ("try-error" %in% class(ocd_real)) {
ocd_real <- vector("list", length = length(L))
pb <- utils::txtProgressBar(min = 0, max = length(L), style = 3)
@@ -116,6 +138,19 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) {
ocd_df$Value<-as.numeric(ocd_df$Value)
f1<-factor(ocd_df$Siteid,levels=unique(ocd_df$Siteid))
f2<-factor(ocd_df$Depth,levels=unique(ocd_df$Depth))
+
+ # Skip if not enough quantiles (before gamma fitting)
+ if (length(unique(ocd_df$Quantile)) < 2) {
+ if (verbose) {
+ PEcAn.logger::logger.warn(
+ "Insufficient quantiles (", length(unique(ocd_df$Quantile)), ") ",
+ "available for gamma distribution fitting at some sites. ",
+ "Require at least 2 different quantiles to fit parameters."
+ )
+ }
+ return(NULL)
+ }
+
#split data by groups of sites and soil depth, while keeping the original order of each group
dat <- split(ocd_df, list(f1, f2))
@@ -132,22 +167,29 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) {
}
fitQ <- function(x) {
- val = x$Value
- stat = as.character(x$Quantile)
- theta = c(10, 10)
- fit <-
- list(Gamma = stats::optim(theta, cgamma, val = val, stat = stat))
- SS <- sapply(fit, function(f) {
- f$value
- })
- par <- sapply(fit, function(f) {
- f$par
- })
- return(list(par = par, SS = SS))
+ val <- x$Value
+ stat <- as.character(x$Quantile)
+ # Skip fitting if all values are NA or not numeric
+ if (all(is.na(val)) || length(val) == 0) {
+ return(list(par = c(NA, NA), SS = NA))
+ }
+ theta <- c(10, 10)
+ fit <- tryCatch(
+ stats::optim(theta, cgamma, val = val, stat = stat),
+ error = function(e) NULL
+ )
+ if (is.null(fit)) {
+ return(list(par = c(NA, NA), SS = NA))
+ }
+ return(list(par = fit$par, SS = fit$value))
}
score <- suppressWarnings(lapply(dat, fitQ))
bestPar <- sapply(score, function(f) { f$par })
+ # Ensure bestPar is a 2-row matrix even when invalid sites are present
+ if (is.null(dim(bestPar)) || nrow(bestPar) != 2) {
+ bestPar <- matrix(bestPar, nrow = 2, byrow = TRUE)
+ }
mean <- bestPar[1,] / bestPar[2,]
std <- sqrt(bestPar[1,] / bestPar[2,] ^ 2)
mean_site <- matrix(mean, length(internal_site_info$lon), 6)
@@ -184,11 +226,17 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) {
rownames(soilgrids_soilC_data) <- NULL
if (!is.null(outdir)) {
- PEcAn.logger::logger.info(paste0("Storing results in: ",file.path(outdir,"soilgrids_soilC_data.csv")))
- utils::write.csv(soilgrids_soilC_data,file=file.path(outdir,"soilgrids_soilC_data.csv"),row.names = FALSE)
- }
- else {
- PEcAn.logger::logger.error("No output directory found.")
+ # Ensure the directory exists; create if not
+ if (!dir.exists(outdir)) {
+ dir.create(outdir, recursive = TRUE)
+ PEcAn.logger::logger.info(paste0("Created output directory: ", outdir))
+ }
+ PEcAn.logger::logger.info(paste0("Storing results in: ", file.path(outdir, "soilgrids_soilC_data.csv")))
+ utils::write.csv(soilgrids_soilC_data,
+ file = file.path(outdir, "soilgrids_soilC_data.csv"),
+ row.names = FALSE)
+ } else {
+ PEcAn.logger::logger.warn("No output directory found. Results are only returned to R environment.")
}
# return the results to the terminal as well
return(soilgrids_soilC_data)
From f4e26b6d0671e338ef729862e9a65ef9dfdb95bd Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 3 Apr 2025 12:43:05 -0400
Subject: [PATCH 0067/1193] Update documentation.
---
modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd | 2 +-
modules/assim.sequential/man/downscale_qsub_main.Rd | 2 +-
modules/assim.sequential/man/downscale_rf_main.Rd | 2 +-
modules/assim.sequential/man/parallel_prediction.Rd | 2 +-
modules/assim.sequential/man/parallel_rf_train.Rd | 2 +-
modules/assim.sequential/man/pecan_settings_2_pts.Rd | 2 +-
modules/assim.sequential/man/prepare_train_dat.Rd | 2 +-
modules/assim.sequential/man/stack_covariates_2_df.Rd | 2 +-
modules/assim.sequential/man/stack_covariates_2_geotiff.Rd | 2 +-
9 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
index 5443add6937..0f2dca9761c 100644
--- a/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
+++ b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{Average_ERA5_2_GeoTIFF}
\alias{Average_ERA5_2_GeoTIFF}
\title{Average_ERA5_2_GeoTIFF}
diff --git a/modules/assim.sequential/man/downscale_qsub_main.Rd b/modules/assim.sequential/man/downscale_qsub_main.Rd
index dbf35a30c41..0aeb957983f 100644
--- a/modules/assim.sequential/man/downscale_qsub_main.Rd
+++ b/modules/assim.sequential/man/downscale_qsub_main.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{downscale_qsub_main}
\alias{downscale_qsub_main}
\title{downscale_qsub_main}
diff --git a/modules/assim.sequential/man/downscale_rf_main.Rd b/modules/assim.sequential/man/downscale_rf_main.Rd
index 05b7568a934..99962b4cc4e 100644
--- a/modules/assim.sequential/man/downscale_rf_main.Rd
+++ b/modules/assim.sequential/man/downscale_rf_main.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{downscale_rf_main}
\alias{downscale_rf_main}
\title{downscale_rf_main}
diff --git a/modules/assim.sequential/man/parallel_prediction.Rd b/modules/assim.sequential/man/parallel_prediction.Rd
index 3fceff412a2..11edbf93cb5 100644
--- a/modules/assim.sequential/man/parallel_prediction.Rd
+++ b/modules/assim.sequential/man/parallel_prediction.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{parallel_prediction}
\alias{parallel_prediction}
\title{parallel_prediction}
diff --git a/modules/assim.sequential/man/parallel_rf_train.Rd b/modules/assim.sequential/man/parallel_rf_train.Rd
index 1d8863f582a..a001a78d365 100644
--- a/modules/assim.sequential/man/parallel_rf_train.Rd
+++ b/modules/assim.sequential/man/parallel_rf_train.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{parallel_rf_train}
\alias{parallel_rf_train}
\title{parallel_rf_train}
diff --git a/modules/assim.sequential/man/pecan_settings_2_pts.Rd b/modules/assim.sequential/man/pecan_settings_2_pts.Rd
index e676382d67f..0a027356987 100644
--- a/modules/assim.sequential/man/pecan_settings_2_pts.Rd
+++ b/modules/assim.sequential/man/pecan_settings_2_pts.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{pecan_settings_2_pts}
\alias{pecan_settings_2_pts}
\title{pecan_settings_2_pts}
diff --git a/modules/assim.sequential/man/prepare_train_dat.Rd b/modules/assim.sequential/man/prepare_train_dat.Rd
index 1925a5342d1..4dcde31dd5e 100644
--- a/modules/assim.sequential/man/prepare_train_dat.Rd
+++ b/modules/assim.sequential/man/prepare_train_dat.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{prepare_train_dat}
\alias{prepare_train_dat}
\title{prepare_train_dat}
diff --git a/modules/assim.sequential/man/stack_covariates_2_df.Rd b/modules/assim.sequential/man/stack_covariates_2_df.Rd
index ed69f768faf..edb3011aa83 100644
--- a/modules/assim.sequential/man/stack_covariates_2_df.Rd
+++ b/modules/assim.sequential/man/stack_covariates_2_df.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{stack_covariates_2_df}
\alias{stack_covariates_2_df}
\title{stack_covariates_2_df}
diff --git a/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd
index d83411ec708..ad74d3c6fdb 100644
--- a/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd
+++ b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_NA_downscale.R
+% Please edit documentation in R/SDA_parallel_downscale.R
\name{stack_covariates_2_geotiff}
\alias{stack_covariates_2_geotiff}
\title{stack_covariates_2_geotiff}
From c94077dda05bfce6762d0417828782dbbd918987 Mon Sep 17 00:00:00 2001
From: Blesson
Date: Sat, 5 Apr 2025 09:57:14 +0530
Subject: [PATCH 0068/1193] Add files via upload
changed the file location
---
.../tests/testthat/ensemble-test.R | 88 +++++++++++++++++++
1 file changed, 88 insertions(+)
create mode 100644 modules/uncertainty/tests/testthat/ensemble-test.R
diff --git a/modules/uncertainty/tests/testthat/ensemble-test.R b/modules/uncertainty/tests/testthat/ensemble-test.R
new file mode 100644
index 00000000000..07b3d548dc9
--- /dev/null
+++ b/modules/uncertainty/tests/testthat/ensemble-test.R
@@ -0,0 +1,88 @@
+library(testthat)
+library(PEcAn.logger)
+library(PEcAn.DB)
+
+
+source("modules/uncertainty/R/ensemble.R")
+dummy_binary_path <- file.path(tempdir(), "sipnet")
+file.create(dummy_binary_path)
+# Mock SIPNET writer
+if (!exists("write.config.SIPNET")) {
+ write.config.SIPNET <- function(...) {
+ PEcAn.logger::logger.info("Mock SIPNET writer called")
+ return(invisible(TRUE))
+ }
+}
+
+context("Ensemble Input Validation Tests")
+
+create_base_settings <- function() {
+ list(
+ workflow = list(id = 1),
+ model = list(
+ id = 1000,
+ type = "SIPNET",
+ binary = dummy_binary_path
+ ),
+ run = list(
+ site = list(id = 1, name = "Test Site", lat = 40.0, lon = -80.0),
+ start.date = "2004-01-01",
+ end.date = "2004-12-31"
+ ),
+ host = list(
+ outdir = tempdir(),
+ rundir = tempdir(),
+ name = "localhost"
+ ),
+ database = list(bety = list(write = FALSE))
+ )
+}
+
+test_that("Single input with no samples works", {
+ withr::local_tempdir()
+
+ def <- list(
+ inputs = list(soil = list(path = "soil1.nc")),
+ pfts = list(list(
+ name = "temperate.pft",
+ constants = list(param1 = 0.5)
+ )),
+ model = list(id = 1000),
+ database = list(bety = list(write = FALSE))
+ )
+
+ settings <- create_base_settings()
+ settings$run$inputs <- list(soil = list(path = "soil1.nc"))
+ settings$ensemble <- list(size = 1)
+
+ writeLines("", "soil1.nc")
+
+ result <- write.ensemble.configs(def, NULL, settings, "SIPNET")
+ expect_true(!is.null(result$runs))
+ expect_true(!is.null(result$ensemble.id))
+})
+
+test_that("Multiple inputs without samples throws error", {
+ def <- list(
+ inputs = list(soil = list(path = c("soil1.nc", "soil2.nc"))),
+ pfts = list(list(
+ name = "temperate.pft",
+ constants = list(param1 = 0.5)
+ )),
+ model = list(id = 1000),
+ database = list(bety = list(write = FALSE))
+ )
+
+ settings <- create_base_settings()
+ settings$ensemble <- list(size = 1)
+
+ purrr::walk(c("soil1.nc", "soil2.nc"), ~ writeLines("", .x))
+
+ expect_error(
+ write.ensemble.configs(def, NULL, settings, "SIPNET"),
+ "Multiple soil inputs found but no sampling method specified"
+ )
+})
+
+# ... rest of tests with similar corrections ...
+
From d8d58d9d478b4695f4d6e4f485b15305d4c77d6b Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Thu, 10 Apr 2025 00:56:02 +0530
Subject: [PATCH 0069/1193] docs: enhance X-schema.org-keywords in DESCRIPTION
files for better package discoverability
---
base/all/DESCRIPTION | 1 +
base/db/DESCRIPTION | 1 +
base/logger/DESCRIPTION | 1 +
base/qaqc/DESCRIPTION | 1 +
base/remote/DESCRIPTION | 1 +
base/settings/DESCRIPTION | 1 +
base/utils/DESCRIPTION | 1 +
base/visualization/DESCRIPTION | 1 +
base/workflow/DESCRIPTION | 1 +
9 files changed, 9 insertions(+)
diff --git a/base/all/DESCRIPTION b/base/all/DESCRIPTION
index 2cad68114e2..6e7f9b5631e 100644
--- a/base/all/DESCRIPTION
+++ b/base/all/DESCRIPTION
@@ -78,3 +78,4 @@ LazyData: true
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
+X-schema.org-keywords: PEcAn, QAQC, integration, model, skill, testing, quality assurance, quality control, model validation, model evaluation, performance metrics, statistical analysis, data visualization, model diagnostics, benchmarking, model comparison, scientific validation, reproducibility
\ No newline at end of file
diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION
index f88a16982e4..861910aac54 100644
--- a/base/db/DESCRIPTION
+++ b/base/db/DESCRIPTION
@@ -79,3 +79,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: PEcAn, database, management, tool, model, parameterization, execution, analysis
diff --git a/base/logger/DESCRIPTION b/base/logger/DESCRIPTION
index 50bb54d1eee..b8eff6dffbc 100644
--- a/base/logger/DESCRIPTION
+++ b/base/logger/DESCRIPTION
@@ -31,3 +31,4 @@ License: BSD_3_clause + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
+X-schema.org-keywords: PEcAn, logger, functions, outputs, workflow, management, tool, debugging, error handling, logging levels, console output, file logging, error control, log4j, message filtering, execution control, scientific workflow, diagnostic tools
diff --git a/base/qaqc/DESCRIPTION b/base/qaqc/DESCRIPTION
index 041f957d25b..26441f6a332 100644
--- a/base/qaqc/DESCRIPTION
+++ b/base/qaqc/DESCRIPTION
@@ -38,3 +38,4 @@ Encoding: UTF-8
VignetteBuilder: knitr, rmarkdown
Config/testthat/edition: 3
RoxygenNote: 7.3.2
+X-schema.org-keywords: PEcAn, QAQC, integration, model, skill, testing, quality assurance, quality control, model validation, model evaluation, performance metrics, statistical analysis, data visualization, model diagnostics, benchmarking, model comparison, scientific validation, reproducibility
diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION
index 196d97967d2..a4e7be620fa 100644
--- a/base/remote/DESCRIPTION
+++ b/base/remote/DESCRIPTION
@@ -35,3 +35,4 @@ Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
+X-schema.org-keywords: PEcAn, remote, model, execution, utilities, communication, code, distributed computing, remote hosts, parallel processing, ecosystem modeling, workflow automation, SSH, HTTP, JSON, API integration, cluster computing, scientific computing, model deployment
diff --git a/base/settings/DESCRIPTION b/base/settings/DESCRIPTION
index f7b3e0409b0..d410a3a3ef3 100644
--- a/base/settings/DESCRIPTION
+++ b/base/settings/DESCRIPTION
@@ -31,3 +31,4 @@ Suggests:
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
+X-schema.org-keywords: PEcAn, settings, functions, files, configuration management, XML parsing, parameter settings, workflow configuration, model parameters, environment variables, system settings, data settings, simulation settings, scientific workflow, configuration files, settings validation, parameter management
diff --git a/base/utils/DESCRIPTION b/base/utils/DESCRIPTION
index 72948b3d713..5b8cbb3f767 100644
--- a/base/utils/DESCRIPTION
+++ b/base/utils/DESCRIPTION
@@ -59,3 +59,4 @@ LazyData: true
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
+X-schema.org-keywords: PEcAn, utilities, functions, tools, data manipulation, scientific workflow, model parameterization, data analysis, netCDF handling, time series analysis, unit conversion, data processing, scientific computing, ecological forecasting, model execution, data integration, workflow management, scientific investigation
diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION
index 8b38be4c664..c6ee5058f38 100644
--- a/base/visualization/DESCRIPTION
+++ b/base/visualization/DESCRIPTION
@@ -54,3 +54,4 @@ Encoding: UTF-8
VignetteBuilder: knitr, rmarkdown
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
+X-schema.org-keywords: PEcAn, visualization, functions, data, models, data visualization, scientific plotting, model output visualization, time series plots, spatial visualization, statistical graphics, ggplot2, data analysis, scientific communication, model diagnostics, ecological data visualization, climate data visualization, scientific figures, data exploration
diff --git a/base/workflow/DESCRIPTION b/base/workflow/DESCRIPTION
index ae07761cb71..d5a376b8fd7 100644
--- a/base/workflow/DESCRIPTION
+++ b/base/workflow/DESCRIPTION
@@ -46,3 +46,4 @@ Suggests:
Copyright: Authors
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: PEcAn, workflow, functions, ecological, forecasts, reanalysis, scientific workflow, model execution, data assimilation, parameter estimation, uncertainty analysis, ecosystem modeling, scientific analysis, workflow automation, model-data fusion, ecological forecasting, scientific investigation, data processing, model integration, workflow management
From abb70f249cee5fe1f11efb540fbc4c51eaec2297 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 10 Apr 2025 01:31:19 -0400
Subject: [PATCH 0070/1193] Improve efficiency.
---
.../R/SDA_parallel_downscale.R | 24 ++++++++++++++-----
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index 8e75a562779..a11b209f099 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -123,9 +123,15 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.
names(temp.rast) <- f$var.name
}
# raster operations.
- terra::crs(temp.rast) <- terra::crs(base.map)
- temp.rast <- terra::crop(temp.rast, base.map)
- temp.rast <- terra::resample(temp.rast, base.map)
+ if (! terra::crs(base.map) == terra::crs(temp.rast)) {
+ terra::crs(temp.rast) <- terra::crs(base.map)
+ }
+ if (! terra::ext(base.map) == terra::ext(temp.rast)) {
+ temp.rast <- terra::crop(temp.rast, base.map)
+ }
+ if (! all(c(nrow(base.map) == nrow(temp.rast), ncol(base.map) == ncol(temp.rast)))) {
+ temp.rast <- terra::resample(temp.rast, base.map)
+ }
# write the raster into disk.
file.name <- paste0(f$var.name, collapse = "_")
path <- file.path(outdir, paste0(file.name, ".tiff"))
@@ -150,13 +156,17 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.
#' convert settings to geospatial points in terra.
#' @title pecan_settings_2_pts
#'
-#' @param settings PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.
+#' @param settings PEcAn settings: either a character that points to the settings or shape file or the actual pecan settings object will be accepted.
#'
#' @return terra spatial points object.
#'
#' @author Dongchen Zhang
pecan_settings_2_pts <- function(settings) {
if (is.character(settings)) {
+ # if it's shapefile.
+ if (grepl(".shp", settings)) {
+ return(terra::vect(settings))
+ }
# read settings.
settings <- PEcAn.settings::read.settings(settings)
}
@@ -205,6 +215,7 @@ stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) {
# if it's LC layer.
if ("LC" == names(all.rast)[r]) {
non.veg.inds <- which(! temp.vec %in% 1:8)
+ # non.veg.inds <- which(! temp.vec %in% 0:11)
na.inds <- unique(c(na.inds, non.veg.inds))
}
return(list(vec = temp.vec,
@@ -253,6 +264,7 @@ prepare_train_dat <- function(pts, analysis, covariates.dir, variable) {
as.data.frame() %>% `colnames<-`(paste0("ensemble", seq(nrow(analysis))))
# combine carbon and predictor.
full_data <- cbind(var.dat, predictors)
+ full_data <- full_data[which(full_data$LC %in% 1:8),]
return(full_data)
}
@@ -404,7 +416,7 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable
PEcAn.logger::logger.info("Converting geotiff to df.")
cov.df <- stack_covariates_2_df(rast.dir = covariates.dir, cores = cores)
# reconstruct LC because of the computation accuracy.
- cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0
+ # cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0
# convert LC into factor.
if ("LC" %in% colnames(cov.df$df)) {
cov.df$df[,"LC"] <- factor(cov.df$df[,"LC"])
@@ -416,7 +428,7 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable
cov.vecs = cov.df$df,
non.na.inds = cov.df$non.na.inds,
outdir = folder.name,
- name = list(time = time, variable = variable),
+ name = list(time = as.character(time), variable = variable),
cores = cores)
# calculate mean and std.
PEcAn.logger::logger.info("Calculate mean and std.")
From f677d1822fe43ae5379cfbaa96246eb8706e8acc Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 10 Apr 2025 01:31:33 -0400
Subject: [PATCH 0071/1193] Add varied land cover and stand age.
---
.../inst/anchor/NA_downscale_script.R | 236 ++++++++++++++++--
1 file changed, 209 insertions(+), 27 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 9e43a865eb6..05d30bdbc80 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -2,6 +2,175 @@ library(purrr)
library(foreach)
library(PEcAnAssimSequential)
setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/")
+# prepare stand age time-series.
+modis.lc.folder <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/LC"
+stand.age.out.folder <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/stand_age"
+# filter land cover time-series.
+# loop over years.
+# read tiff file.
+forest_type <- c(1:4)
+grass_type <- c(5:8)
+non_veg_type <- c(0, 9, 10, 11)
+base.map <- terra::rast("/projectnb/dietzelab/dongchen/anchorSites/downscale/base_map.tiff")
+base_crs <- terra::crs(base.map)
+base_ext <- terra::ext(base.map)
+# load forest age data.
+forest_age <- matrix(terra::rast("/projectnb/dietzelab/dongchen/anchorSites/downscale/forest_age/forest_age_2010_TC000_crop.tiff"), byrow = T)
+# calculate mean age for different LC types.
+LC <- matrix(terra::rast(file.path(modis.lc.folder, paste0(2010, ".tif")))[[5]], byrow = T)
+mean_age <- c()
+for (i in 1:8) {
+ mean_age <- c(mean_age, mean(forest_age[which(LC == i)], na.rm = T))
+}
+# function for filtering time series.
+filter.lc.ts <- function(vec, window.L = 3) {
+ L <- length(vec)
+ window <- c()
+ edge.case <- FALSE
+ #
+ if(length(unique(vec)) == 1) {
+ return(c(unique(vec), unique(vec), length(vec)))
+ }
+ for (i in L:1) {
+ # push item into the window.
+ window <- c(window, vec[i])
+ # print(window)
+ # if window has not reached its size.
+ if (length(window) < window.L) {
+ next
+ }
+ # window operation.
+ uni.val <- unique(window)
+ # if there is no change.
+ if (length(uni.val) == 1) {
+
+ } else if (length(uni.val) > 1) {
+ # check if head == tail.
+ if (head(window, 1) == tail(window, 1)) {
+
+ } else {
+ window.ind <- window.L - tail(which(window == head(uni.val, 1)), 1) + 1
+ return(c(tail(rev(uni.val), 2), L - i + 1 - window.ind + 1))
+ }
+ }
+ # remove the last item from the window.
+ window <- tail(window, -1)
+ }
+ # if there is no disturbance afterall.
+ return(c(vec[length(vec)], vec[length(vec)], length(vec)))
+}
+
+# store MODIS land cover time-series into matrix.
+ts_lc <- c()
+for (end.year in 2012:2023) {
+ print(end.year)
+ if (end.year == 2012) {
+ start.year <- 2001
+ } else {
+ start.year <- end.year
+ }
+ # load last year MODIS LC map.
+ LC <- matrix(terra::rast(file.path(modis.lc.folder, paste0(end.year, ".tif")))[[5]], byrow = T)
+ # store MODIS land cover time-series into matrix.
+ # ts_lc <- c()
+ for (y in start.year:end.year) {
+ # load image.
+ lc_tif <- terra::rast(file.path(modis.lc.folder, paste0(y, ".tif")))
+ lc_matrix <- matrix(lc_tif[[5]], byrow = T)
+ # reclassify.
+ lc_matrix[which(lc_matrix %in% forest_type)] <- 1
+ lc_matrix[which(lc_matrix %in% grass_type)] <- 2
+ lc_matrix[which(lc_matrix %in% non_veg_type)] <- 3
+ # combine image.
+ ts_lc <- cbind(ts_lc, lc_matrix)
+ # print(y)
+ }
+ # loop over NA.
+ split_data.matrix <- function(matrix, chunk.size=100) {
+ ncols <- dim(matrix)[2]
+ nchunks <- (ncols-1) %/% chunk.size + 1
+ split.data <- list()
+ min <- 1
+ for (i in seq_len(nchunks)) {
+ if (i == nchunks-1) { #make last two chunks of equal size
+ left <- ncols-(i-1)*chunk.size
+ max <- min+round(left/2)-1
+ } else {
+ max <- min(i*chunk.size, ncols)
+ }
+ split.data[[i]] <- t(matrix[,min:max,drop=FALSE])
+ min <- max+1 #for next chunk
+ }
+ return(split.data)
+ }
+ mat.lists <- split_data.matrix(t(ts_lc), floor(dim(ts_lc)[1]/parallel::detectCores()))
+ # register parallel nodes.
+ cl <- parallel::makeCluster(parallel::detectCores())
+ doSNOW::registerDoSNOW(cl)
+ res <- foreach::foreach(d = mat.lists, .packages=c("purrr")) %dopar% {
+ temp.res <- matrix(NA, dim(d)[1], 4) %>% `colnames<-`(c("from", "to", "years", "type"))
+ pb <- utils::txtProgressBar(min=1, max=dim(d)[1], style=3)
+ for (i in 1:dim(d)[1]) {
+ if (any(is.na(d[i,]))) next
+ temp.res[i, 1:3] <- filter.lc.ts(d[i,])
+ # grab change patterns.
+ if (all(temp.res[i, 1:2] == c(1, 2))) {
+ temp.res[i, 4] <- 1
+ } else if (all(temp.res[i, 1:2] == c(1, 3))) {
+ temp.res[i, 4] <- 2
+ } else if (all(temp.res[i, 1:2] == c(2, 3))) {
+ temp.res[i, 4] <- 3
+ } else if (all(temp.res[i, 1:2] == c(2, 1))) {
+ temp.res[i, 4] <- 4
+ } else if (all(temp.res[i, 1:2] == c(3, 1))) {
+ temp.res[i, 4] <- 5
+ } else if (all(temp.res[i, 1:2] == c(3, 2))) {
+ temp.res[i, 4] <- 6
+ }
+ utils::setTxtProgressBar(pb, i)
+ }
+ return(temp.res)
+ }
+ res <- do.call(rbind, res)
+ # any pixel in forest that are tagged as grassland should be replaced with the
+ # load forest age data.
+ forest_age <- matrix(terra::rast("/projectnb/dietzelab/dongchen/anchorSites/downscale/forest_age/forest_age_2010_TC000_crop.tiff"), byrow = T)
+ forest_age <- cbind(forest_age, res, LC) %>% `colnames<-`(c("forest_age", "from", "to", "years", "type", "LC"))
+ forest_age <- split_data.matrix(t(forest_age), floor(dim(forest_age)[1]/parallel::detectCores()))
+ forest_age <- foreach::foreach(d = forest_age, .packages=c("purrr")) %dopar% {
+ for (i in 1:dim(d)[1]) {
+ # if it's diturbed vegetation.
+ if (is.na(d[i, "years"])) next
+ if (d[i, "years"] < (end.year - 2000)) {
+ d[i, "forest_age"] <- d[i, "years"]
+ next
+ }
+ # no record for the forest age.
+ if (is.na(d[i, "forest_age"])) {
+ # if it is non vegetation.
+ if (d[i, "to"] == 3) {
+ # forest_age[i] <- 0
+ next
+ } else {
+ # if it's non-disturbed vegetation.
+ d[i, "forest_age"] <- mean_age[d[i, "LC"]]
+ }
+ }
+ }
+ return(d)
+ }
+ forest_age <- do.call(rbind, forest_age)
+ # stop parallel.
+ parallel::stopCluster(cl)
+ foreach::registerDoSEQ()
+ # write to raster.
+ forest_age <- terra::rast(matrix(forest_age[,"forest_age"], 9360, 19080, byrow = T))
+ terra::ext(forest_age) <- base_ext
+ terra::crs(forest_age) <- base_crs
+ names(forest_age) <- "year_since_disturb"
+ terra::writeRaster(forest_age, file=file.path(stand.age.out.folder, paste0(end.year, "_stand_age.tif")))
+ gc()
+}
# average ERA5 to climatic covariates.
outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET"
in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/"
@@ -20,48 +189,61 @@ future::plan(future::multisession, workers = 5, gc = T)
paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){
Average_ERA5_2_GeoTIFF(d1, d2, in.path, outdir)
}, .progress = T) %>% unlist
-# setup.
-base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
-load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/sda.all.forecast.analysis.Rdata")
-variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
-settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/pecanIC.xml"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/downscale_maps/"
-cores <- 28
-date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
-# loop over years.
-for (i in seq_along(date)) {
+# create covariates time series.
+for (y in 2012:2024) {
+ print(y)
+ if (y == 2024) {
+ y.lc <- 2023
+ } else {
+ y.lc <- y
+ }
+ # LC <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/LC", paste0(y.lc, ".tif"))
+ LC <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
+ stand.age <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/stand_age", paste0(y.lc, "_stand_age.tif"))
+ met <- list.files("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET", full.names = T)
+ met <- met[which(grepl(y, met))]
# setup covariates paths and variable names.
- cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif",
+ cov.tif.file.list <- list(LC = list(dir = LC,
var.name = "LC"),
- year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif",
+ year_since_disturb = list(dir = stand.age,
var.name = "year_since_disturb"),
agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif",
var.name = "agb"),
twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff",
var.name = "twi"),
- met = list(dir = paths[i],
+ met = list(dir = met,
var.name = c("temp", "prec", "srad", "vapr")),
soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif",
var.name = c("PH", "N", "SOC", "Sand")))
+ covariates.dir <- stack_covariates_2_geotiff(outdir = "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/covariates",
+ year = y,
+ base.map.dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/base_map.tiff",
+ cov.tif.file.list = cov.tif.file.list,
+ normalize = T,
+ cores = parallel::detectCores())
+}
+
+# setup.
+base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/sda.all.forecast.analysis.Rdata")
+variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
+# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/pecanIC.xml"
+settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/"
+cores <- 28
+date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
+# loop over years.
+for (i in seq_along(date)) {
# Assemble covariates.
- if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) {
- covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff")
- } else {
- covariates.dir <- stack_covariates_2_geotiff(outdir = outdir,
- year = lubridate::year(date[i]),
- base.map.dir = base.map.dir,
- cov.tif.file.list = cov.tif.file.list,
- normalize = T,
- cores = cores)
- }
+ covariates.dir <- file.path(outdir, "covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
- analysis.yr <- analysis.all[[i]]
+ analysis.yr <- forecast.all[[i]]
time <- date[i]
# loop over carbon types.
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(outdir, paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_forecast_lc_ts"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -71,7 +253,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = outdir),
+ outdir = file.path(outdir, "downscale_maps_forecast_lc_ts")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
@@ -84,7 +266,7 @@ for (i in seq_along(date)) {
jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh)
writeLines(jobsh, con = file.path(folder.path, "job.sh"))
# qsub command.
- qsub <- "qsub -l h_rt=6:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash"
+ qsub <- "qsub -l h_rt=10:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash"
qsub <- gsub("@CORES@", cores, qsub)
qsub <- gsub("@NAME@", paste0("ds_", i, "_", j), qsub)
qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub)
From ea80cb209891b88ca4ba093e3126471a585b12a4 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 10 Apr 2025 01:31:41 -0400
Subject: [PATCH 0072/1193] Update documentation.
---
modules/assim.sequential/man/pecan_settings_2_pts.Rd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/assim.sequential/man/pecan_settings_2_pts.Rd b/modules/assim.sequential/man/pecan_settings_2_pts.Rd
index 0a027356987..1acbac65946 100644
--- a/modules/assim.sequential/man/pecan_settings_2_pts.Rd
+++ b/modules/assim.sequential/man/pecan_settings_2_pts.Rd
@@ -7,7 +7,7 @@
pecan_settings_2_pts(settings)
}
\arguments{
-\item{settings}{PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.}
+\item{settings}{PEcAn settings: either a character that points to the settings or shape file or the actual pecan settings object will be accepted.}
}
\value{
terra spatial points object.
From bb09c4bdc9135ccfc23282b5967bbe696359de81 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 10 Apr 2025 23:27:27 +0000
Subject: [PATCH 0073/1193] Added SoilGrid IC Utilities
---
base/settings/NAMESPACE | 1 +
base/settings/NEWS.md | 1 +
base/settings/R/get.site.info.R | 205 ++++++++++
base/settings/man/get.site.info.Rd | 40 ++
.../tests/testthat/test.get.site.info.R | 182 +++++++++
modules/data.land/NAMESPACE | 3 +
modules/data.land/NEWS.md | 6 +-
modules/data.land/R/IC_SOILGRID_Utilities.R | 373 ++++++++++++++++++
.../man/generate_soilgrids_ensemble.Rd | 34 ++
.../man/preprocess_soilgrids_data.Rd | 28 ++
modules/data.land/man/soilgrids_ic_process.Rd | 52 +++
11 files changed, 924 insertions(+), 1 deletion(-)
create mode 100644 base/settings/R/get.site.info.R
create mode 100644 base/settings/man/get.site.info.Rd
create mode 100644 base/settings/tests/testthat/test.get.site.info.R
create mode 100644 modules/data.land/R/IC_SOILGRID_Utilities.R
create mode 100644 modules/data.land/man/generate_soilgrids_ensemble.Rd
create mode 100644 modules/data.land/man/preprocess_soilgrids_data.Rd
create mode 100644 modules/data.land/man/soilgrids_ic_process.Rd
diff --git a/base/settings/NAMESPACE b/base/settings/NAMESPACE
index ec2a23e9ea5..609d3c38fb3 100644
--- a/base/settings/NAMESPACE
+++ b/base/settings/NAMESPACE
@@ -35,6 +35,7 @@ export(createMultiSiteSettings)
export(createSitegroupMultiSettings)
export(expandMultiSettings)
export(fix.deprecated.settings)
+export(get.site.info)
export(get_args)
export(is.MultiSettings)
export(is.SafeList)
diff --git a/base/settings/NEWS.md b/base/settings/NEWS.md
index 034822a763f..2a9591f2ed1 100644
--- a/base/settings/NEWS.md
+++ b/base/settings/NEWS.md
@@ -10,6 +10,7 @@
* `createMultiSiteSettings` argument `siteIds` now accepts data frames as well as the previously accepted numeric or character vectors. The data frame should have one site per row, uniquely identified by a mandatory `id` column. All columns of each row will become fields of the resulting `settings$run$site` block.
* New function `setEnsemblePaths` inserts paths to your ensemble inputs (met, poolinitcond, etc) into every site's `inputs` block according to the filename pattern specified in a template string.
+* New function `get.site.info` extracts and validates site information from either a PEcAn settings object or a CSV file, providing a standardized data frame with site_id, site_name, lat, lon, and str_id.
## Fixed
diff --git a/base/settings/R/get.site.info.R b/base/settings/R/get.site.info.R
new file mode 100644
index 00000000000..d252c54dace
--- /dev/null
+++ b/base/settings/R/get.site.info.R
@@ -0,0 +1,205 @@
+#' Extract and validate site information from settings or CSV file
+#'
+#' @param settings PEcAn settings list containing site information (optional)
+#' @param csv_path Path to a CSV file containing site information (optional)
+#' @param strict_checking Logical. If TRUE, will validate coordinates more strictly
+#'
+#' @return A data frame with site_id, site_name, lat, lon, and str_id
+#' @export get.site.info
+#'
+#' @details This function extracts and validates site information from either a PEcAn settings
+#' object or a CSV file. At least one input must be provided. If both are provided,
+#' the settings object takes precedence.
+#'
+#' If using a CSV file, it must contain at minimum the columns: site_id, lat, and lon.
+#' The column site_name is optional and will default to site_id if not provided.
+#'
+#' @examples
+#' \dontrun{
+#' # From settings object
+#' settings <- PEcAn.settings::read.settings("pecan.xml")
+#' site_info <- PEcAn.settings::get.site.info(settings)
+#'
+#' # From CSV file
+#' site_info <- PEcAn.settings::get.site.info(csv_path = "sites.csv")
+#' }
+get.site.info <- function(settings = NULL, csv_path = NULL, strict_checking = TRUE) {
+
+ # Check if at least one input is provided
+ if (is.null(settings) && is.null(csv_path)) {
+ PEcAn.logger::logger.severe("No site information provided. Please provide either settings or csv_path.")
+ }
+
+ # Process settings object (highest precedence when both are provided)
+ if (!is.null(settings)) {
+ PEcAn.logger::logger.debug("Extracting site information from settings object")
+
+ # Check if this is a MultiSettings object
+ if (inherits(settings, "MultiSettings")) {
+ PEcAn.logger::logger.info("Detected MultiSettings object")
+
+ # Process sites from MultiSettings
+ site_list <- lapply(settings, function(s) {
+ if (is.null(s$run) || is.null(s$run$site)) {
+ PEcAn.logger::logger.severe("Site information missing from one of the settings in MultiSettings")
+ }
+ return(s$run$site)
+ })
+ } else {
+ # Process single settings object
+ if (is.null(settings$run) || is.null(settings$run$site)) {
+ PEcAn.logger::logger.severe("Site information missing from settings (settings$run$site)")
+ }
+
+ # Check if we have vectorized site information
+ site_fields <- c("id", "name", "lat", "lon")
+ field_lengths <- sapply(site_fields, function(f) {
+ if (is.null(settings$run$site[[f]])) 0 else length(settings$run$site[[f]])
+ })
+
+ max_length <- max(field_lengths)
+ is_vectorized <- max_length > 1
+
+ if (is_vectorized) {
+ PEcAn.logger::logger.info("Detected vectorized site information in settings")
+
+ # Create a list of site information from vectorized input
+ site_list <- list()
+ for (i in 1:max_length) {
+ site <- list()
+ for (field in site_fields) {
+ if (!is.null(settings$run$site[[field]]) && i <= length(settings$run$site[[field]])) {
+ site[[field]] <- settings$run$site[[field]][i]
+ }
+ }
+ site_list[[i]] <- site
+ }
+ } else {
+ # Just a single non-vectorized site
+ site_list <- list(settings$run$site)
+ }
+ }
+ } else {
+ # Process CSV file input
+ PEcAn.logger::logger.debug("Reading site information from CSV file:", csv_path)
+
+ # Check if file exists
+ if (!file.exists(csv_path)) {
+ PEcAn.logger::logger.severe("CSV file not found:", csv_path)
+ }
+
+ # Read CSV file
+ csv_data <- utils::read.csv(csv_path, stringsAsFactors = FALSE)
+
+ # Check for required columns
+ required_cols <- c("site_id", "lat", "lon")
+ missing_cols <- setdiff(required_cols, colnames(csv_data))
+ if (length(missing_cols) > 0) {
+ PEcAn.logger::logger.severe("Missing required columns in CSV file: ",
+ paste(missing_cols, collapse = ", "))
+ }
+
+ # Add site_name if missing (use site_id as default)
+ if (!"site_name" %in% colnames(csv_data)) {
+ csv_data$site_name <- as.character(csv_data$site_id)
+ PEcAn.logger::logger.debug("Added site_name column using site_id values")
+ }
+
+ # Convert CSV data to the site_list format for consistent processing
+ site_list <- lapply(1:nrow(csv_data), function(i) {
+ row <- csv_data[i, ]
+ list(
+ id = row$site_id,
+ name = row$site_name,
+ lat = row$lat,
+ lon = row$lon
+ )
+ })
+ }
+
+ # Process each site from the site_list
+ result <- lapply(seq_along(site_list), function(i) {
+ site <- site_list[[i]]
+
+ # Check for required site ID
+ if (is.null(site$id)) {
+ PEcAn.logger::logger.severe(sprintf("Site ID is required but missing for site %d", i))
+ }
+
+ # Extract and validate site ID
+ site_id <- as.numeric(site$id)
+ if (is.na(site_id)) {
+ PEcAn.logger::logger.severe(sprintf("Site ID must be numeric for site %d", i))
+ }
+
+ # Check if the site name exists, use ID as name if missing
+ site_name <- ifelse(!is.null(site$name), site$name, as.character(site_id))
+
+ # Check for required coordinates
+ if (is.null(site$lat) || is.null(site$lon)) {
+ PEcAn.logger::logger.severe(sprintf("Site coordinates are required but missing for site %d", i))
+ }
+
+ # Extract and validate coordinates
+ lat <- as.numeric(site$lat)
+ lon <- as.numeric(site$lon)
+
+ if (is.na(lat) || is.na(lon)) {
+ PEcAn.logger::logger.severe(sprintf("Site coordinates must be numeric for site %d", i))
+ }
+
+ # site ID for display and file naming
+ str_id <- as.character(site$id)
+
+ # Return a standardized site info list
+ return(list(
+ site_id = site_id,
+ site_name = site_name,
+ lat = lat,
+ lon = lon,
+ str_id = str_id
+ ))
+ })
+
+ # Create the data frame using vapply to maintain types
+ site_df <- data.frame(
+ site_id = vapply(result, function(x) x$site_id, numeric(1)),
+ site_name = vapply(result, function(x) x$site_name, character(1)),
+ lat = vapply(result, function(x) x$lat, numeric(1)),
+ lon = vapply(result, function(x) x$lon, numeric(1)),
+ str_id = vapply(result, function(x) x$str_id, character(1)),
+ stringsAsFactors = FALSE
+ )
+
+ # Validate coordinates based on strictness settings
+ if (strict_checking) {
+ # Check for valid latitude range
+ invalid_lats <- site_df$lat < -90 | site_df$lat > 90
+ if (any(invalid_lats)) {
+ invalid_sites <- paste(site_df$site_id[invalid_lats], collapse = ", ")
+ PEcAn.logger::logger.severe(sprintf("Invalid latitude values (outside -90 to 90) found for sites: %s", invalid_sites))
+ }
+
+ # Check for valid longitude range
+ invalid_lons <- site_df$lon < -180 | site_df$lon > 180
+ if (any(invalid_lons)) {
+ invalid_sites <- paste(site_df$site_id[invalid_lons], collapse = ", ")
+ PEcAn.logger::logger.severe(sprintf("Invalid longitude values (outside -180 to 180) found for sites: %s", invalid_sites))
+ }
+ } else {
+ # Just warn if coordinates are suspicious
+ suspicious_lats <- site_df$lat < -90 | site_df$lat > 90
+ if (any(suspicious_lats)) {
+ suspicious_sites <- paste(site_df$site_id[suspicious_lats], collapse = ", ")
+ PEcAn.logger::logger.warn(sprintf("Suspicious latitude values (outside -90 to 90) found for sites: %s", suspicious_sites))
+ }
+
+ suspicious_lons <- site_df$lon < -180 | site_df$lon > 180
+ if (any(suspicious_lons)) {
+ suspicious_sites <- paste(site_df$site_id[suspicious_lons], collapse = ", ")
+ PEcAn.logger::logger.warn(sprintf("Suspicious longitude values (outside -180 to 180) found for sites: %s", suspicious_sites))
+ }
+ }
+
+ return(site_df)
+}
\ No newline at end of file
diff --git a/base/settings/man/get.site.info.Rd b/base/settings/man/get.site.info.Rd
new file mode 100644
index 00000000000..fa8bc90ca22
--- /dev/null
+++ b/base/settings/man/get.site.info.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get.site.info.R
+\name{get.site.info}
+\alias{get.site.info}
+\title{Extract and validate site information from settings or CSV file}
+\usage{
+get.site.info(settings = NULL, csv_path = NULL, strict_checking = TRUE)
+}
+\arguments{
+\item{settings}{PEcAn settings list containing site information (optional)}
+
+\item{csv_path}{Path to a CSV file containing site information (optional)}
+
+\item{strict_checking}{Logical. If TRUE, will validate coordinates more strictly}
+}
+\value{
+A data frame with site_id, site_name, lat, lon, and str_id
+}
+\description{
+Extract and validate site information from settings or CSV file
+}
+\details{
+This function extracts and validates site information from either a PEcAn settings
+object or a CSV file. At least one input must be provided. If both are provided,
+the settings object takes precedence.
+
+\if{html}{\out{
}}\preformatted{ If using a CSV file, it must contain at minimum the columns: site_id, lat, and lon.
+ The column site_name is optional and will default to site_id if not provided.
+}\if{html}{\out{
}}
+}
+\examples{
+\dontrun{
+# From settings object
+settings <- PEcAn.settings::read.settings("pecan.xml")
+site_info <- PEcAn.settings::get.site.info(settings)
+
+# From CSV file
+site_info <- PEcAn.settings::get.site.info(csv_path = "sites.csv")
+}
+}
diff --git a/base/settings/tests/testthat/test.get.site.info.R b/base/settings/tests/testthat/test.get.site.info.R
new file mode 100644
index 00000000000..d2ec7e4b821
--- /dev/null
+++ b/base/settings/tests/testthat/test.get.site.info.R
@@ -0,0 +1,182 @@
+context("get.site.info")
+
+test_that("get.site.info works with settings object", {
+ # Create a simple settings object
+ settings <- list(
+ run = list(
+ site = list(
+ id = 1000000001,
+ name = "Test Site",
+ lat = 45.0,
+ lon = -90.0
+ )
+ )
+ )
+
+ # Call get.site.info
+ site_info <- get.site.info(settings)
+
+ # Check the result
+ expect_is(site_info, "data.frame")
+ expect_equal(nrow(site_info), 1)
+ expect_equal(site_info$site_id, 1000000001)
+ expect_equal(site_info$site_name, "Test Site")
+ expect_equal(site_info$lat, 45.0)
+ expect_equal(site_info$lon, -90.0)
+ expect_equal(site_info$str_id, as.character(settings$run$site$id))
+})
+
+test_that("get.site.info works with CSV file", {
+ # Create a temporary CSV file
+ csv_file <- tempfile(fileext = ".csv")
+ csv_data <- data.frame(
+ site_id = c(1000000002, 1000000003),
+ site_name = c("Site 1", "Site 2"),
+ lat = c(40.0, 50.0),
+ lon = c(-80.0, -100.0)
+ )
+ write.csv(csv_data, csv_file, row.names = FALSE)
+
+ # Call get.site.info
+ site_info <- get.site.info(csv_path = csv_file)
+
+ # Check the result
+ expect_is(site_info, "data.frame")
+ expect_equal(nrow(site_info), 2)
+ expect_equal(site_info$site_id, c(1000000002, 1000000003))
+ expect_equal(site_info$site_name, c("Site 1", "Site 2"))
+ expect_equal(site_info$lat, c(40.0, 50.0))
+ expect_equal(site_info$lon, c(-80.0, -100.0))
+ expect_equal(site_info$str_id, as.character(csv_data$site_id))
+
+ # Clean up
+ unlink(csv_file)
+})
+
+test_that("get.site.info works with MultiSettings object", {
+ # Create a MultiSettings object
+ settings1 <- list(
+ run = list(
+ site = list(
+ id = 1000000004,
+ name = "Multi Site 1",
+ lat = 35.0,
+ lon = -85.0
+ )
+ )
+ )
+
+ settings2 <- list(
+ run = list(
+ site = list(
+ id = 1000000005,
+ name = "Multi Site 2",
+ lat = 55.0,
+ lon = -95.0
+ )
+ )
+ )
+
+ multi_settings <- structure(
+ list(settings1, settings2),
+ class = "MultiSettings"
+ )
+
+ # Call get.site.info
+ site_info <- get.site.info(multi_settings)
+
+ # Check the result
+ expect_is(site_info, "data.frame")
+ expect_equal(nrow(site_info), 2)
+ expect_equal(site_info$site_id, c(1000000004, 1000000005))
+ expect_equal(site_info$site_name, c("Multi Site 1", "Multi Site 2"))
+ expect_equal(site_info$lat, c(35.0, 55.0))
+ expect_equal(site_info$lon, c(-85.0, -95.0))
+ expect_equal(site_info$str_id, as.character(c(1000000004, 1000000005)))
+})
+
+test_that("get.site.info works with vectorized site information", {
+ # Create a settings object with vectorized site information
+ settings <- list(
+ run = list(
+ site = list(
+ id = c(1000000006, 1000000007),
+ name = c("Vector Site 1", "Vector Site 2"),
+ lat = c(30.0, 60.0),
+ lon = c(-75.0, -105.0)
+ )
+ )
+ )
+
+ # Call get.site.info
+ site_info <- get.site.info(settings)
+
+ # Check the result
+ expect_is(site_info, "data.frame")
+ expect_equal(nrow(site_info), 2)
+ expect_equal(site_info$site_id, c(1000000006, 1000000007))
+ expect_equal(site_info$site_name, c("Vector Site 1", "Vector Site 2"))
+ expect_equal(site_info$lat, c(30.0, 60.0))
+ expect_equal(site_info$lon, c(-75.0, -105.0))
+ expect_equal(site_info$str_id, as.character(c(1000000006, 1000000007)))
+})
+
+test_that("get.site.info validates coordinates with strict_checking", {
+ # Create a settings object with invalid coordinates
+ settings <- list(
+ run = list(
+ site = list(
+ id = 1000000008,
+ name = "Invalid Site",
+ lat = 100.0, # Invalid latitude
+ lon = -180.0
+ )
+ )
+ )
+
+ # Call get.site.info with strict_checking = TRUE
+ expect_error(get.site.info(settings, strict_checking = TRUE),
+ "Invalid latitude values")
+
+ # Call get.site.info with strict_checking = FALSE
+ site_info <- get.site.info(settings, strict_checking = FALSE)
+
+ # Check the result
+ expect_is(site_info, "data.frame")
+ expect_equal(nrow(site_info), 1)
+ expect_equal(site_info$site_id, 1000000008)
+ expect_equal(site_info$site_name, "Invalid Site")
+ expect_equal(site_info$lat, 100.0)
+ expect_equal(site_info$lon, -180.0)
+ expect_equal(site_info$str_id, as.character(settings$run$site$id))
+})
+
+test_that("str_id is correctly generated as a character string", {
+ settings <- list(
+ run = list(
+ site = list(
+ id = 1000000001,
+ name = "Test Site",
+ lat = 45.0,
+ lon = -90.0
+ )
+ )
+ )
+ site_info <- get.site.info(settings)
+ expect_type(site_info$str_id, "character")
+ expect_equal(site_info$str_id, as.character(settings$run$site$id))
+
+ # Test with CSV input
+ csv_file <- tempfile(fileext = ".csv")
+ csv_data <- data.frame(
+ site_id = c(1000000002, 1000000003),
+ site_name = c("Site 1", "Site 2"),
+ lat = c(40.0, 50.0),
+ lon = c(-80.0, -100.0)
+ )
+ write.csv(csv_data, csv_file, row.names = FALSE)
+ site_info_csv <- get.site.info(csv_path = csv_file)
+ expect_type(site_info_csv$str_id, "character")
+ expect_equal(site_info_csv$str_id, as.character(csv_data$site_id))
+ unlink(csv_file)
+})
\ No newline at end of file
diff --git a/modules/data.land/NAMESPACE b/modules/data.land/NAMESPACE
index 3c28da884b7..de495841c93 100644
--- a/modules/data.land/NAMESPACE
+++ b/modules/data.land/NAMESPACE
@@ -27,6 +27,7 @@ export(format_identifier)
export(from.Tag)
export(from.TreeCode)
export(gSSURGO.Query)
+export(generate_soilgrids_ensemble)
export(get.attributes)
export(get.soil)
export(get_resource_map)
@@ -45,6 +46,7 @@ export(plot2AGB)
export(pool_ic_list2netcdf)
export(pool_ic_netcdf2list)
export(prepare_pools)
+export(preprocess_soilgrids_data)
export(put_veg_module)
export(sample_ic)
export(sclass)
@@ -53,6 +55,7 @@ export(soil.units)
export(soil2netcdf)
export(soil_params)
export(soil_process)
+export(soilgrids_ic_process)
export(soilgrids_soilC_extract)
export(subset_layer)
export(to.Tag)
diff --git a/modules/data.land/NEWS.md b/modules/data.land/NEWS.md
index f77c74281e7..b135f192c65 100644
--- a/modules/data.land/NEWS.md
+++ b/modules/data.land/NEWS.md
@@ -9,7 +9,11 @@
## Added
* New function `soilgrids_soilC_extract` retrieves soil C estimates with uncertainty from the ISRIC SoilGrids 250m data. (#3040, @Qianyuxuan)
-
+* New utility script `IC_SOILGRID_Utilities.R` for processing SoilGrids data to generate soil carbon initial condition (IC) files. This includes:
+ - **`soilgrids_ic_process`**: A function to extract, process, and generate ensemble members from SoilGrids250m data, supporting input from PEcAn settings and optional CSV files.
+ - **`preprocess_soilgrids_data`**: A helper function to handle missing values and ensure data integrity during preprocessing.
+ - **`generate_soilgrids_ensemble`**: A function to create ensemble members for a site based on processed soil carbon data.
+
## Fixed
* `gSSURGO.Query()` now always returns all the columns requested, even ones that are all NA. It also now always requires `mukeys` to be specified.
diff --git a/modules/data.land/R/IC_SOILGRID_Utilities.R b/modules/data.land/R/IC_SOILGRID_Utilities.R
new file mode 100644
index 00000000000..feab19b9cc9
--- /dev/null
+++ b/modules/data.land/R/IC_SOILGRID_Utilities.R
@@ -0,0 +1,373 @@
+#' SoilGrids Initial Conditions (IC) Utilities
+#'
+#' @author Akash
+#' @description Functions for generating soil carbon IC files from SoilGrids250m data
+#' @details This module provides functions for extracting, processing, and generating
+#' ensemble members for soil carbon initial conditions using SoilGrids data.
+#' All soil carbon values are in kg/m².
+
+# Required package
+library(truncnorm)
+
+#' Process SoilGrids data for initial conditions
+#'
+#' @param settings PEcAn settings list containing site information. Should include:
+#' \itemize{
+#' \item settings$run$site - Site information with id, lat, lon
+#' \item settings$ensemble$size - (Optional) Number of ensemble members to create
+#' \item settings$soil$default_soilC - (Optional) Default soil carbon value in kg/m²
+#' \item settings$soil$default_uncertainty - (Optional) Default uncertainty as fraction
+#' }
+#' @param csv_path Path to a CSV file containing site information (optional)
+#' @param dir Output directory for IC files
+#' @param overwrite Overwrite existing files? (Default: FALSE)
+#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE
+#'
+#' @return List of paths to generated IC files
+#' @export
+#'
+#' @details This function processes SoilGrids data to create carbon initial condition
+#' files. It extracts soil carbon data for all sites, handles missing values,
+#' generates ensemble members, and writes NetCDF files.
+#'
+#' @examples
+#' \dontrun{
+#' # From settings object
+#' settings <- PEcAn.settings::read.settings("pecan.xml")
+#' ic_files <- soilgrids_ic_process(settings, dir = "output/IC/")
+#'
+#' # From CSV file
+#' ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "output/IC/")
+#' }
+soilgrids_ic_process <- function(settings, csv_path=NULL, dir, overwrite = FALSE, verbose = FALSE) {
+ # Start timing
+ start_time <- proc.time()
+
+ # Extract site information using PEcAn.settings::get.site.info
+ site_info <- PEcAn.settings::get.site.info(settings = settings, csv_path = csv_path)
+
+ # Get optional parameters from settings if available
+ ensemble_size <- ifelse(is.null(settings$ensemble$size), 1, settings$ensemble$size)
+ default_soilC <- ifelse(is.null(settings$soil$default_soilC), 5.0, settings$soil$default_soilC)
+ default_uncertainty <- ifelse(is.null(settings$soil$default_uncertainty), 0.2, settings$soil$default_uncertainty)
+
+ # Create output directory if it doesn't exist
+ if (!dir.exists(dir)) {
+ PEcAn.logger::logger.info(sprintf("Creating output directory: %s", dir))
+ dir.create(dir, recursive = TRUE)
+ }
+
+ # Create a data folder for intermediate outputs
+ data_dir <- file.path(dir, "SoilGrids_data")
+ if (!dir.exists(data_dir)) {
+ dir.create(data_dir, recursive = TRUE)
+ }
+
+ # Log the number of sites being processed
+ n_sites <- nrow(site_info)
+ PEcAn.logger::logger.info(sprintf("Processing %d site(s)", n_sites))
+
+ if (verbose) {
+ for (i in 1:nrow(site_info)) {
+ PEcAn.logger::logger.info(sprintf("Site %d: %s (lat=%f, lon=%f)",
+ i, site_info$site_name[i],
+ site_info$lat[i], site_info$lon[i]))
+ }
+ }
+
+ # Check for cached data
+ soilc_csv_path <- file.path(data_dir, "soilgrids_soilC_data.csv")
+ if (file.exists(soilc_csv_path) && !overwrite) {
+ PEcAn.logger::logger.info("Using existing SoilGrids data:", soilc_csv_path)
+ soil_data <- utils::read.csv(soilc_csv_path, check.names = FALSE)
+ } else {
+ # Extract data for all sites at once
+ PEcAn.logger::logger.info("Extracting SoilGrids data for", nrow(site_info), "sites")
+ soil_data <- PEcAn.data.land::soilgrids_soilC_extract(
+ site_info = site_info,
+ outdir = data_dir,
+ verbose = verbose
+ )
+
+ # Save the extracted data for future use
+ utils::write.csv(soil_data, soilc_csv_path, row.names = FALSE)
+ }
+
+ # Validate soil carbon data units through range check
+ if (any(soil_data$`Total_soilC_0-30cm` > 150, na.rm = TRUE)) {
+ PEcAn.logger::logger.warn("Some soil carbon values exceed 150 kg/m², values may be in wrong units")
+ }
+
+ # Preprocess data
+ PEcAn.logger::logger.info("Preprocessing soil carbon data")
+ processed_data <- preprocess_soilgrids_data(
+ soil_data = soil_data,
+ default_soilC = default_soilC,
+ default_uncertainty = default_uncertainty,
+ verbose = verbose
+ )
+
+ # Create a list to hold the ensemble files for each site
+ all_ensemble_files <- list()
+
+ # Process each site
+ for (s in 1:nrow(site_info)) {
+ current_site <- site_info[s, ]
+
+ # Create output directory for this site
+ site_outfolder <- file.path(dir, paste0("SoilGrids_site_", current_site$str_id))
+ if (!dir.exists(site_outfolder)) {
+ dir.create(site_outfolder, recursive = TRUE)
+ }
+
+ # Check for existing files
+ existing_files <- list.files(site_outfolder, "*.nc$", full.names = TRUE)
+ if (length(existing_files) > 0 && !overwrite) {
+ PEcAn.logger::logger.info(sprintf("Using existing SoilGrids IC files for site %s", current_site$site_name))
+ all_ensemble_files[[current_site$str_id]] <- existing_files
+ next
+ }
+
+ if (verbose) {
+ PEcAn.logger::logger.info(sprintf("Generating ensemble members for site %s", current_site$site_name))
+ }
+
+ # Generate ensemble members for this site
+ ensemble_data <- generate_soilgrids_ensemble(
+ processed_data = processed_data,
+ site_id = current_site$site_id,
+ lat = current_site$lat,
+ lon = current_site$lon,
+ ensemble_size = ensemble_size,
+ verbose = verbose
+ )
+
+ # Write ensemble members to NetCDF files
+ site_ensemble_files <- list()
+
+ for (ens in seq_len(ensemble_size)) {
+ # Write to NetCDF
+ result <- PEcAn.data.land::pool_ic_list2netcdf(
+ input = ensemble_data[[ens]],
+ outdir = site_outfolder,
+ siteid = current_site$site_id,
+ ens = ens
+ )
+
+ site_ensemble_files[[ens]] <- result$file
+
+ if (verbose) {
+ PEcAn.logger::logger.info(sprintf("Generated IC file: %s for site %s",
+ basename(result$file),
+ current_site$site_name))
+ }
+ }
+
+ # Add this site's files to the overall list
+ all_ensemble_files[[current_site$str_id]] <- site_ensemble_files
+ }
+
+ # Log performance metrics
+ end_time <- proc.time()
+ elapsed_time <- end_time - start_time
+ PEcAn.logger::logger.info(sprintf("IC generation completed for %d site(s) in %.2f seconds",
+ n_sites, elapsed_time[3]))
+
+ return(all_ensemble_files)
+}
+
+#' Preprocess SoilGrids data
+#'
+#' @param soil_data Raw soil carbon data from soilgrids_soilC_extract
+#' @param default_soilC Default soil carbon value in kg/m² to use when data is missing
+#' @param default_uncertainty Default uncertainty as fraction to use when data is missing
+#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE
+#'
+#' @return Processed soil carbon data
+#' @export
+preprocess_soilgrids_data <- function(soil_data, default_soilC = 5.0,
+ default_uncertainty = 0.2, verbose = FALSE) {
+ if (verbose) {
+ PEcAn.logger::logger.info("Preprocessing soil carbon data")
+ }
+
+ # Create a copy to avoid modifying the original
+ processed <- soil_data
+
+ # Handle missing values in Total_soilC_0-30cm
+ na_count <- sum(is.na(processed$`Total_soilC_0-30cm`))
+ if (na_count > 0) {
+ PEcAn.logger::logger.warn(sprintf("Found %d missing values in soil carbon data", na_count))
+
+ # Sites with missing 0-30cm but available 0-200cm data
+ has_200cm_data <- is.na(processed$`Total_soilC_0-30cm`) & !is.na(processed$`Total_soilC_0-200cm`)
+ if (any(has_200cm_data)) {
+ processed$`Total_soilC_0-30cm`[has_200cm_data] <- processed$`Total_soilC_0-200cm`[has_200cm_data] * 0.15
+ PEcAn.logger::logger.warn(sprintf(
+ "Using scaled 0-200cm soil carbon values for %d site(s)", sum(has_200cm_data)
+ ))
+
+ if (verbose) {
+ for (i in which(has_200cm_data)) {
+ PEcAn.logger::logger.debug(sprintf(
+ "Using scaled 0-200cm soil carbon value (%.2f) for site %s",
+ processed$`Total_soilC_0-30cm`[i], processed$Site_ID[i]
+ ))
+ }
+ }
+ }
+
+ # Sites still with missing data - use default value
+ still_missing <- is.na(processed$`Total_soilC_0-30cm`)
+ if (any(still_missing)) {
+ processed$`Total_soilC_0-30cm`[still_missing] <- default_soilC
+ PEcAn.logger::logger.warn(sprintf(
+ "Using default soil carbon value (%.2f kg/m²) for %d site(s)",
+ default_soilC, sum(still_missing)
+ ))
+
+ if (verbose) {
+ for (i in which(still_missing)) {
+ PEcAn.logger::logger.debug(sprintf(
+ "Using default soil carbon value (%.2f kg/m²) for site %s",
+ default_soilC, processed$Site_ID[i]
+ ))
+ }
+ }
+ }
+ }
+
+ # Handle missing values in Std_soilC_0-30cm
+ na_count <- sum(is.na(processed$`Std_soilC_0-30cm`))
+ if (na_count > 0) {
+ PEcAn.logger::logger.warn(sprintf("Found %d missing values in soil carbon uncertainty", na_count))
+
+ # Sites with missing 0-30cm but available 0-200cm uncertainty data
+ has_200cm_data <- is.na(processed$`Std_soilC_0-30cm`) & !is.na(processed$`Std_soilC_0-200cm`)
+ if (any(has_200cm_data)) {
+ processed$`Std_soilC_0-30cm`[has_200cm_data] <- processed$`Std_soilC_0-200cm`[has_200cm_data] * 0.15
+ PEcAn.logger::logger.warn(sprintf(
+ "Using scaled 0-200cm soil carbon uncertainty for %d site(s)", sum(has_200cm_data)
+ ))
+
+ if (verbose) {
+ for (i in which(has_200cm_data)) {
+ PEcAn.logger::logger.debug(sprintf(
+ "Using scaled 0-200cm soil carbon uncertainty (%.2f) for site %s",
+ processed$`Std_soilC_0-30cm`[i], processed$Site_ID[i]
+ ))
+ }
+ }
+ }
+
+ # Sites still with missing uncertainty - use default percentage of mean
+ still_missing <- is.na(processed$`Std_soilC_0-30cm`)
+ if (any(still_missing)) {
+ processed$`Std_soilC_0-30cm`[still_missing] <-
+ processed$`Total_soilC_0-30cm`[still_missing] * default_uncertainty
+ PEcAn.logger::logger.warn(sprintf(
+ "Using default uncertainty (%.1f%% of mean) for %d site(s)",
+ default_uncertainty * 100, sum(still_missing)
+ ))
+
+ if (verbose) {
+ for (i in which(still_missing)) {
+ PEcAn.logger::logger.debug(sprintf(
+ "Using default uncertainty (%.1f%% of mean) for site %s",
+ default_uncertainty * 100, processed$Site_ID[i]
+ ))
+ }
+ }
+ }
+ }
+
+ # Ensure standard deviation is non-negative
+ neg_sd_count <- sum(processed$`Std_soilC_0-30cm` < 0, na.rm = TRUE)
+ if (neg_sd_count > 0) {
+ PEcAn.logger::logger.warn(sprintf("Found %d negative standard deviations", neg_sd_count))
+ processed$`Std_soilC_0-30cm` <- pmax(processed$`Std_soilC_0-30cm`, 0, na.rm = TRUE)
+ }
+
+ # Ensure mean is non-negative
+ neg_mean_count <- sum(processed$`Total_soilC_0-30cm` < 0, na.rm = TRUE)
+ if (neg_mean_count > 0) {
+ PEcAn.logger::logger.warn(sprintf("Found %d negative mean values", neg_mean_count))
+ processed$`Total_soilC_0-30cm` <- pmax(processed$`Total_soilC_0-30cm`, 0, na.rm = TRUE)
+ }
+
+ # Add minimum standard deviation to avoid zero uncertainty
+ min_sd <- 0.1 * processed$`Total_soilC_0-30cm` # 10% of mean as minimum SD
+ is_zero_sd <- processed$`Std_soilC_0-30cm` == 0 | is.na(processed$`Std_soilC_0-30cm`)
+ zero_sd_count <- sum(is_zero_sd)
+
+ if (zero_sd_count > 0) {
+ PEcAn.logger::logger.info(sprintf("Setting minimum uncertainty for %d zero/NA standard deviations",
+ zero_sd_count))
+ processed$`Std_soilC_0-30cm` <- pmax(processed$`Std_soilC_0-30cm`, min_sd, na.rm = TRUE)
+ }
+
+ return(processed)
+}
+
+#' Generate ensemble members for a site
+#'
+#' @param processed_data Processed soil carbon data
+#' @param site_id Site ID
+#' @param lat Site latitude
+#' @param lon Site longitude
+#' @param ensemble_size Number of ensemble members to create
+#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE
+#'
+#' @return List of ensemble data for the site
+#' @export
+generate_soilgrids_ensemble <- function(processed_data, site_id, lat, lon, ensemble_size, verbose = FALSE) {
+ if (verbose) {
+ PEcAn.logger::logger.info(sprintf("Generating %d ensemble members for site %s", ensemble_size, site_id))
+ }
+
+ # Get site row from processed data
+ site_row <- which(processed_data$Site_ID == site_id)
+ if (length(site_row) == 0) {
+ PEcAn.logger::logger.severe(sprintf("Site %s not found in processed data", site_id))
+ }
+
+ # Set random seed for reproducibility
+ set.seed(as.numeric(site_id))
+
+ # Generate all ensemble members at once
+ soil_c_values <- truncnorm::rtruncnorm(
+ n = ensemble_size,
+ a = 0, # Lower bound (no negative values)
+ b = Inf, # Upper bound
+ mean = processed_data$`Total_soilC_0-30cm`[site_row],
+ sd = processed_data$`Std_soilC_0-30cm`[site_row]
+ )
+
+ if (verbose) {
+ PEcAn.logger::logger.debug(sprintf(
+ "Generated %d soil carbon values for site %s (mean: %.2f, sd: %.2f)",
+ ensemble_size,
+ site_id,
+ processed_data$`Total_soilC_0-30cm`[site_row],
+ processed_data$`Std_soilC_0-30cm`[site_row]
+ ))
+ }
+
+ # Create input lists for pool_ic_list2netcdf
+ ensemble_data <- lapply(seq_len(ensemble_size), function(ens) {
+ list(
+ dims = list(
+ lat = lat,
+ lon = lon,
+ time = 1
+ ),
+ vals = list(
+ soil_organic_carbon_content = soil_c_values[ens],
+ wood_carbon_content = 0, # Not provided by SoilGrids
+ litter_carbon_content = 0 # Not provided by SoilGrids
+ )
+ )
+ })
+
+ return(ensemble_data)
+}
diff --git a/modules/data.land/man/generate_soilgrids_ensemble.Rd b/modules/data.land/man/generate_soilgrids_ensemble.Rd
new file mode 100644
index 00000000000..7604bfc8b71
--- /dev/null
+++ b/modules/data.land/man/generate_soilgrids_ensemble.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/IC_SOILGRID_Utilities.R
+\name{generate_soilgrids_ensemble}
+\alias{generate_soilgrids_ensemble}
+\title{Generate ensemble members for a site}
+\usage{
+generate_soilgrids_ensemble(
+ processed_data,
+ site_id,
+ lat,
+ lon,
+ ensemble_size,
+ verbose = FALSE
+)
+}
+\arguments{
+\item{processed_data}{Processed soil carbon data}
+
+\item{site_id}{Site ID}
+
+\item{lat}{Site latitude}
+
+\item{lon}{Site longitude}
+
+\item{ensemble_size}{Number of ensemble members to create}
+
+\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE}
+}
+\value{
+List of ensemble data for the site
+}
+\description{
+Generate ensemble members for a site
+}
diff --git a/modules/data.land/man/preprocess_soilgrids_data.Rd b/modules/data.land/man/preprocess_soilgrids_data.Rd
new file mode 100644
index 00000000000..b1e3cbd524e
--- /dev/null
+++ b/modules/data.land/man/preprocess_soilgrids_data.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/IC_SOILGRID_Utilities.R
+\name{preprocess_soilgrids_data}
+\alias{preprocess_soilgrids_data}
+\title{Preprocess SoilGrids data}
+\usage{
+preprocess_soilgrids_data(
+ soil_data,
+ default_soilC = 5,
+ default_uncertainty = 0.2,
+ verbose = FALSE
+)
+}
+\arguments{
+\item{soil_data}{Raw soil carbon data from soilgrids_soilC_extract}
+
+\item{default_soilC}{Default soil carbon value in kg/m² to use when data is missing}
+
+\item{default_uncertainty}{Default uncertainty as fraction to use when data is missing}
+
+\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE}
+}
+\value{
+Processed soil carbon data
+}
+\description{
+Preprocess SoilGrids data
+}
diff --git a/modules/data.land/man/soilgrids_ic_process.Rd b/modules/data.land/man/soilgrids_ic_process.Rd
new file mode 100644
index 00000000000..5a8cd74ff03
--- /dev/null
+++ b/modules/data.land/man/soilgrids_ic_process.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/IC_SOILGRID_Utilities.R
+\name{soilgrids_ic_process}
+\alias{soilgrids_ic_process}
+\title{Process SoilGrids data for initial conditions}
+\usage{
+soilgrids_ic_process(
+ settings,
+ csv_path = NULL,
+ dir,
+ overwrite = FALSE,
+ verbose = FALSE
+)
+}
+\arguments{
+\item{settings}{PEcAn settings list containing site information. Should include:
+\itemize{
+ \item settings$run$site - Site information with id, lat, lon
+ \item settings$ensemble$size - (Optional) Number of ensemble members to create
+ \item settings$soil$default_soilC - (Optional) Default soil carbon value in kg/m²
+ \item settings$soil$default_uncertainty - (Optional) Default uncertainty as fraction
+}}
+
+\item{csv_path}{Path to a CSV file containing site information (optional)}
+
+\item{dir}{Output directory for IC files}
+
+\item{overwrite}{Overwrite existing files? (Default: FALSE)}
+
+\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE}
+}
+\value{
+List of paths to generated IC files
+}
+\description{
+Process SoilGrids data for initial conditions
+}
+\details{
+This function processes SoilGrids data to create carbon initial condition
+ files. It extracts soil carbon data for all sites, handles missing values,
+ generates ensemble members, and writes NetCDF files.
+}
+\examples{
+\dontrun{
+# From settings object
+settings <- PEcAn.settings::read.settings("pecan.xml")
+ic_files <- soilgrids_ic_process(settings, dir = "output/IC/")
+
+# From CSV file
+ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "output/IC/")
+}
+}
From 7f2f1e8601daf6e6ecdb3dc85be5aa403fa01f37 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Mon, 14 Apr 2025 00:38:56 +0000
Subject: [PATCH 0074/1193] fix: "/pkgdoc/package_documentation/pkgdocs": not
found
---
docker/docs/Dockerfile | 1 +
1 file changed, 1 insertion(+)
diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile
index 9d764e540b3..ee9974ce9c1 100644
--- a/docker/docs/Dockerfile
+++ b/docker/docs/Dockerfile
@@ -37,6 +37,7 @@ COPY scripts/build_pkgdown.R /pkgdoc/scripts/build_pkgdown.R
COPY base /pkgdoc/base/
COPY modules /pkgdoc/modules/
COPY models /pkgdoc/models/
+COPY package_documentation /pkgdoc/package_documentation
RUN make clean && make pkgdocs
# ----------------------------------------------------------------------
From 1872d7f54745b606fc04db09e43251345320e1d8 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Mon, 14 Apr 2025 07:45:08 +0000
Subject: [PATCH 0075/1193] Fix rsync path in gh action to correctly deploy
pkgdocs from main repo to package-documentation
---
.github/workflows/pkgdown.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml
index 1a73549f402..9a4da14b6d0 100644
--- a/.github/workflows/pkgdown.yml
+++ b/.github/workflows/pkgdown.yml
@@ -63,7 +63,7 @@ jobs:
fi
cd package-documentation
mkdir -p $VERSION
- rsync -a --delete pkgdocs/ ${VERSION}/
+ rsync -a --delete ../package_documentation/pkgdocs/ ${VERSION}/
git add --all *
git commit -m "Build pkgdown docs from pecan revision ${GITHUB_SHA}" || true
git push -q origin main
From c185578af4c45afb2f682e669cf3a785d564af76 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Tue, 15 Apr 2025 14:09:33 -0400
Subject: [PATCH 0076/1193] Change the file path.
---
.../inst/anchor/NA_downscale_script.R | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 05d30bdbc80..746ed0886a4 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -225,11 +225,11 @@ for (y in 2012:2024) {
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
-load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/sda.all.forecast.analysis.Rdata")
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
-# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/pecanIC.xml"
+# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/pecanIC.xml"
settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/"
cores <- 28
date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
# loop over years.
@@ -237,13 +237,13 @@ for (i in seq_along(date)) {
# Assemble covariates.
covariates.dir <- file.path(outdir, "covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
- analysis.yr <- forecast.all[[i]]
+ analysis.yr <- analysis.all[[i]]
time <- date[i]
# loop over carbon types.
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_forecast_lc_ts"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -253,7 +253,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_forecast_lc_ts")),
+ outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
From 1706aada4dc738bab8fe799cd65d33428d69b933 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Wed, 16 Apr 2025 19:28:08 +0000
Subject: [PATCH 0077/1193] feat: restructure pkgdown docs, added custom
_pkgdown.yml, and enhanced index page
---
scripts/build_pkgdown.R | 152 +++++++++++++++++++++++++++++++++++++---
1 file changed, 144 insertions(+), 8 deletions(-)
diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R
index a6c0fee4f82..4e92e1322db 100644
--- a/scripts/build_pkgdown.R
+++ b/scripts/build_pkgdown.R
@@ -1,15 +1,16 @@
#!/usr/bin/env Rscript
-
# Build pkgdown documentation for PEcAn packages
library(pkgdown)
-
+library(yaml)
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
stop("No package names provided. Please pass package names as arguments.")
}
-
packages <- args
-
+output_dir <- "_pkgdown_docs"
+if (!dir.exists(output_dir)) {
+ dir.create(output_dir, recursive = TRUE)
+}
if (requireNamespace("PEcAn.logger", quietly = TRUE)) {
logger <- PEcAn.logger::logger.info
} else {
@@ -18,8 +19,63 @@ if (requireNamespace("PEcAn.logger", quietly = TRUE)) {
}
}
-logger("Building pkgdown docs for:", paste(packages, collapse = ", "))
+pkg_config <- function(pkg) {
+ pkgname <- desc::desc_get("Package", pkg)
+
+ list(
+ url = "https://pecanproject.github.io/",
+ home = list(
+ title = sprintf("%s Functions for PEcAn", pkgname),
+ ),
+ template = list(
+ bootstrap = 5,
+ bslib = list(
+ primary = "#0054AD",
+ `border-radius` = "0.5rem",
+ `btn-border-radius` = "0.25rem"
+ ),
+ `light-switch` = TRUE,
+ ),
+ navbar = list(
+ structure = list(
+ left = c("pecan_home", "reference", "news"),
+ right = c("search", "github", "light-switch")
+ ),
+ components = list(
+ pecan_home = list(
+ text = "PEcAn Home",
+ href = "../../../index.html",
+ `aria-label` = "PEcAn Project Home"
+ ),
+ reference = list(
+ text = "Reference",
+ href = "reference/index.html"
+ ),
+ github = list(
+ icon = "fab fa-github",
+ href = "https://github.com/PecanProject/pecan",
+ `aria-label` = "GitHub"
+ )
+ )
+ ),
+ reference = list(
+ list(
+ title = "All Functions",
+ desc = "All functions exported by this package",
+ contents = list("matches('.*')")
+ )
+ ),
+ news = list(
+ text = "News",
+ href = "news/index.html"
+ ),
+ development = list(
+ mode = "auto"
+ )
+ )
+}
+logger("Building pkgdown docs for:", paste(packages, collapse = ", "))
for (pkg in packages) {
logger("Building pkgdown site for:", pkg)
current_wd <- getwd()
@@ -27,6 +83,17 @@ for (pkg in packages) {
if (!dir.exists(pkg)) {
stop(paste("Package directory does not exist:", pkg))
}
+ pkg_config_path <- file.path(pkg, "_pkgdown.yml")
+ pkg_config <- pkg_config(pkg)
+ # If _pkgdown.yml exists, merge with our config, otherwise create new
+ if (file.exists(pkg_config_path)) {
+ exist_config <- yaml::read_yaml(pkg_config_path)
+ # Merge configurations, preserving existing settings
+ merged_config <- modifyList(exist_config, pkg_config)
+ yaml::write_yaml(merged_config, pkg_config_path)
+ } else {
+ yaml::write_yaml(pkg_config, pkg_config_path)
+ }
setwd(pkg)
pkgdown::build_site()
setwd(current_wd)
@@ -35,11 +102,17 @@ for (pkg in packages) {
warning(paste("No docs folder created for:", pkg))
next
}
- dest <- file.path("package_documentation/pkgdocs", pkg)
- if (!dir.exists(dest)) {
+ pkgname <- desc::desc_get("Package", pkg)
+ dest <- file.path(output_dir, strsplit(pkg, "/")[[1]][1], pkgname)
+ if (!dir.exists(dest)) {
dir.create(dest, recursive = TRUE, showWarnings = FALSE)
}
- file.copy(from = source_docs, to = dest, recursive = TRUE, overwrite = TRUE)
+ file.copy(
+ from = list.files(source_docs, full.names = TRUE),
+ to = dest,
+ recursive = TRUE,
+ overwrite = TRUE
+ )
logger("✅ Successfully copied docs from", pkg, "to", dest)
}, error = function(e) {
warning(paste("❌ Error building pkgdown site for", pkg, ":", e$message))
@@ -50,4 +123,67 @@ for (pkg in packages) {
})
}
+logger("Creating index page")
+
+built_pkg_dirs <- list.dirs(output_dir, recursive=FALSE, full.names = FALSE)
+html_header <- c(
+ '',
+ '',
+ '',
+ ' Package-specific documentation for the PEcAn R packages',
+ ' ',
+ ' ',
+ '',
+ '',
+ '
PEcAn package documentation
',
+ '
Function documentation and articles for each PEcAn package,',
+ ' generated from the package source using pkgdown package.
'
+)
+after_text <- c(
+ ' ',
+ '',
'',
''
)
writeLines(
- text = c(html_header, content, html_footer),
+ text = c(before_text, listing_text, after_text),
con = file.path(output_dir, "index.html")
)
From 76146bb2d5a7f84bf0614a5c7dc7adfbdc2fa373 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 19 Apr 2025 00:34:13 +0000
Subject: [PATCH 0079/1193] fix docker and GHA path
---
.github/workflows/pkgdown.yml | 2 +-
docker/docs/Dockerfile | 7 +++----
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml
index 9a4da14b6d0..d068b000cba 100644
--- a/.github/workflows/pkgdown.yml
+++ b/.github/workflows/pkgdown.yml
@@ -63,7 +63,7 @@ jobs:
fi
cd package-documentation
mkdir -p $VERSION
- rsync -a --delete ../package_documentation/pkgdocs/ ${VERSION}/
+ rsync -a --delete ../_pkgdown_docs/ ${VERSION}/
git add --all *
git commit -m "Build pkgdown docs from pecan revision ${GITHUB_SHA}" || true
git push -q origin main
diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile
index ee9974ce9c1..d0af39672f2 100644
--- a/docker/docs/Dockerfile
+++ b/docker/docs/Dockerfile
@@ -37,8 +37,7 @@ COPY scripts/build_pkgdown.R /pkgdoc/scripts/build_pkgdown.R
COPY base /pkgdoc/base/
COPY modules /pkgdoc/modules/
COPY models /pkgdoc/models/
-COPY package_documentation /pkgdoc/package_documentation
-RUN make clean && make pkgdocs
+RUN make clean && make pkgdocs
# ----------------------------------------------------------------------
# copy html pages to container
@@ -52,7 +51,7 @@ RUN apt-get update \
COPY docker/docs/index.html /usr/local/apache2/htdocs/
COPY --from=pecandocs /src/book_source/_book/ /usr/local/apache2/htdocs/docs/pecan/
-COPY --from=pecandocs /pkgdoc/package_documentation/pkgdocs/ /usr/local/apache2/htdocs/pkgdocs/
+COPY --from=pecandocs /pkgdoc/_pkgdown_docs/ /usr/local/apache2/htdocs/pkgdocs/
# ----------------------------------------------------------------------
# PEcAn version information
@@ -66,4 +65,4 @@ ARG PECAN_GIT_DATE="unknown"
ENV PECAN_VERSION=${PECAN_VERSION} \
PECAN_GIT_BRANCH=${PECAN_GIT_BRANCH} \
PECAN_GIT_CHECKSUM=${PECAN_GIT_CHECKSUM} \
- PECAN_GIT_DATE=${PECAN_GIT_DATE}
+ PECAN_GIT_DATE=${PECAN_GIT_DATE}
\ No newline at end of file
From c6b3b152a31ec646f312a6d71604dc45e4d2eee8 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 19 Apr 2025 00:37:14 +0000
Subject: [PATCH 0080/1193] Add URL and BugReports fields to DESCRIPTION files
---
base/all/DESCRIPTION | 2 ++
base/db/DESCRIPTION | 2 ++
base/logger/DESCRIPTION | 2 +-
base/qaqc/DESCRIPTION | 2 ++
base/remote/DESCRIPTION | 2 ++
base/settings/DESCRIPTION | 2 ++
base/utils/DESCRIPTION | 2 ++
base/visualization/DESCRIPTION | 2 ++
base/workflow/DESCRIPTION | 2 ++
models/basgra/DESCRIPTION | 2 ++
models/biocro/DESCRIPTION | 2 ++
models/cable/DESCRIPTION | 2 ++
models/clm45/DESCRIPTION | 2 ++
models/dalec/DESCRIPTION | 2 ++
models/dvmdostem/DESCRIPTION | 2 ++
models/ed/DESCRIPTION | 2 ++
models/fates/DESCRIPTION | 2 ++
models/gday/DESCRIPTION | 2 ++
models/jules/DESCRIPTION | 2 ++
models/ldndc/DESCRIPTION | 2 ++
models/linkages/DESCRIPTION | 2 ++
models/lpjguess/DESCRIPTION | 2 ++
models/maat/DESCRIPTION | 2 ++
models/maespa/DESCRIPTION | 2 ++
models/preles/DESCRIPTION | 2 ++
models/sibcasa/DESCRIPTION | 2 ++
models/sipnet/DESCRIPTION | 2 ++
models/stics/DESCRIPTION | 2 ++
models/template/DESCRIPTION | 2 ++
modules/allometry/DESCRIPTION | 2 ++
modules/assim.batch/DESCRIPTION | 2 ++
modules/assim.sequential/DESCRIPTION | 2 ++
modules/benchmark/DESCRIPTION | 2 ++
modules/data.atmosphere/DESCRIPTION | 2 ++
modules/data.land/DESCRIPTION | 2 ++
modules/data.mining/DESCRIPTION | 2 ++
modules/data.remote/DESCRIPTION | 2 ++
modules/emulator/DESCRIPTION | 2 ++
modules/meta.analysis/DESCRIPTION | 2 ++
modules/photosynthesis/DESCRIPTION | 2 ++
modules/priors/DESCRIPTION | 2 ++
modules/rtm/DESCRIPTION | 2 ++
modules/uncertainty/DESCRIPTION | 2 ++
43 files changed, 85 insertions(+), 1 deletion(-)
diff --git a/base/all/DESCRIPTION b/base/all/DESCRIPTION
index 2cad68114e2..68ec2401c6d 100644
--- a/base/all/DESCRIPTION
+++ b/base/all/DESCRIPTION
@@ -43,6 +43,8 @@ Description: The Predictive Ecosystem Carbon Analyzer
PEcAn is to streamline the interaction between data and
models, and to improve the efficacy of scientific
investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends:
PEcAn.DB,
PEcAn.settings,
diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION
index f88a16982e4..c1ec1b53328 100644
--- a/base/db/DESCRIPTION
+++ b/base/db/DESCRIPTION
@@ -39,6 +39,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
DBI,
dbplyr (>= 2.4.0),
diff --git a/base/logger/DESCRIPTION b/base/logger/DESCRIPTION
index 50bb54d1eee..5eea045dd1a 100644
--- a/base/logger/DESCRIPTION
+++ b/base/logger/DESCRIPTION
@@ -19,8 +19,8 @@ Description: Convenience functions for logging outputs from 'PEcAn',
and lenience when running large batches of simulations that should not be
terminated by errors in individual models. It is loosely based on
the 'log4j' package.
+URL: https://pecanproject.github.io/, https://github.com/PecanProject/pecan
BugReports: https://github.com/PecanProject/pecan/issues
-URL: https://pecanproject.github.io/
Imports:
utils,
stringi
diff --git a/base/qaqc/DESCRIPTION b/base/qaqc/DESCRIPTION
index 041f957d25b..9c5db8a5d2b 100644
--- a/base/qaqc/DESCRIPTION
+++ b/base/qaqc/DESCRIPTION
@@ -10,6 +10,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
Author: David LeBauer, Tess McCabe
Maintainer: David LeBauer
Description: PEcAn integration and model skill testing
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
graphics,
diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION
index 196d97967d2..85bf7d92fc2 100644
--- a/base/remote/DESCRIPTION
+++ b/base/remote/DESCRIPTION
@@ -15,6 +15,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This package contains utilities for communicating with and executing code on local and remote hosts.
In particular, it has PEcAn-specific utilities for starting ecosystem model runs.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
foreach,
diff --git a/base/settings/DESCRIPTION b/base/settings/DESCRIPTION
index f7b3e0409b0..5983d386a91 100644
--- a/base/settings/DESCRIPTION
+++ b/base/settings/DESCRIPTION
@@ -12,6 +12,8 @@ LazyLoad: yes
LazyData: FALSE
Require: hdf5
Description: Contains functions to read PEcAn settings files.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends:
methods
Imports:
diff --git a/base/utils/DESCRIPTION b/base/utils/DESCRIPTION
index 72948b3d713..a248c1dddc4 100644
--- a/base/utils/DESCRIPTION
+++ b/base/utils/DESCRIPTION
@@ -30,6 +30,8 @@ Description: The Predictive Ecosystem Carbon Analyzer
PEcAn is to streamline the interaction between data and
models, and to improve the efficacy of scientific
investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
abind (>= 1.4.5),
curl,
diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION
index 8b38be4c664..9d1638c18db 100644
--- a/base/visualization/DESCRIPTION
+++ b/base/visualization/DESCRIPTION
@@ -27,6 +27,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
efficacy of scientific investigation.
This module is used to create more complex visualizations from the data
generated by PEcAn code, specifically the models.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
data.table,
ggplot2,
diff --git a/base/workflow/DESCRIPTION b/base/workflow/DESCRIPTION
index ae07761cb71..e05d6d9e67b 100644
--- a/base/workflow/DESCRIPTION
+++ b/base/workflow/DESCRIPTION
@@ -25,6 +25,8 @@ Description: The Predictive Ecosystem Carbon Analyzer
models, and to improve the efficacy of scientific
investigation. This package provides workhorse functions
that can be used to run the major steps of a PEcAn analysis.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
License: BSD_3_clause + file LICENSE
Imports:
dplyr,
diff --git a/models/basgra/DESCRIPTION b/models/basgra/DESCRIPTION
index f34e3f69db1..4f103380387 100644
--- a/models/basgra/DESCRIPTION
+++ b/models/basgra/DESCRIPTION
@@ -6,6 +6,8 @@ Authors@R: c(person("Istem", "Fer", role = c("aut", "cre"),
email = "istem.fer@fmi.fi"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link the BASGRA model to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends: R (>= 4.0.0)
Imports:
PEcAn.logger,
diff --git a/models/biocro/DESCRIPTION b/models/biocro/DESCRIPTION
index 51a2475e4dd..5a8832b0a99 100644
--- a/models/biocro/DESCRIPTION
+++ b/models/biocro/DESCRIPTION
@@ -12,6 +12,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
Author: David LeBauer, Deepak Jaiswal, Christopher Black
Maintainer: David LeBauer
Description: This module provides functions to link BioCro to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
PEcAn.remote,
diff --git a/models/cable/DESCRIPTION b/models/cable/DESCRIPTION
index d7de137afa5..cb9a189878c 100644
--- a/models/cable/DESCRIPTION
+++ b/models/cable/DESCRIPTION
@@ -9,6 +9,8 @@ Authors@R: c(person("Kaitlin", "Ragosta", role = c("aut")),
Author: Kaitlin Ragosta
Maintainer: Tony Gardella
Description: This module provides functions to link the (CABLE) to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
PEcAn.utils (>= 1.4.8)
diff --git a/models/clm45/DESCRIPTION b/models/clm45/DESCRIPTION
index 363b065e63c..40c55274444 100644
--- a/models/clm45/DESCRIPTION
+++ b/models/clm45/DESCRIPTION
@@ -11,6 +11,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. This package provides functions to
link the Community Land Model, version 4.5, to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends:
PEcAn.logger,
PEcAn.utils
diff --git a/models/dalec/DESCRIPTION b/models/dalec/DESCRIPTION
index a73bd635c4d..f8119f40452 100644
--- a/models/dalec/DESCRIPTION
+++ b/models/dalec/DESCRIPTION
@@ -10,6 +10,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
Author: Mike Dietze, Tristain Quaife
Maintainer: Mike Dietze
Description: This module provides functions to link DALEC to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
PEcAn.remote,
diff --git a/models/dvmdostem/DESCRIPTION b/models/dvmdostem/DESCRIPTION
index a22b0daa62e..fe94de55fc2 100644
--- a/models/dvmdostem/DESCRIPTION
+++ b/models/dvmdostem/DESCRIPTION
@@ -11,6 +11,8 @@ Author: Tobey Carman, Shawn Serbin
Maintainer: Tobey Carman , Shawn Serbin
Description: This module provides functions to link the dvmdostem model to
PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
lubridate,
ncdf4,
diff --git a/models/ed/DESCRIPTION b/models/ed/DESCRIPTION
index 3e87cfc89f4..26625d9a71c 100644
--- a/models/ed/DESCRIPTION
+++ b/models/ed/DESCRIPTION
@@ -30,6 +30,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. This package provides functions to
link the Ecosystem Demography Model, version 2, to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends:
R (>= 3.5)
Imports:
diff --git a/models/fates/DESCRIPTION b/models/fates/DESCRIPTION
index a44c6011931..93bce032e32 100644
--- a/models/fates/DESCRIPTION
+++ b/models/fates/DESCRIPTION
@@ -15,6 +15,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. This package provides functions to
link the FATES model to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
stringr,
PEcAn.logger,
diff --git a/models/gday/DESCRIPTION b/models/gday/DESCRIPTION
index 3cda7b49439..3edb2b191a2 100644
--- a/models/gday/DESCRIPTION
+++ b/models/gday/DESCRIPTION
@@ -10,6 +10,8 @@ Authors@R: c(person("Martin", "De Kauwe", role = c("aut", "cre"),
Author: Martin De Kauwe
Maintainer: Martin De Kauwe
Description: This module provides functions to link the GDAY model to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends:
PEcAn.utils
Imports:
diff --git a/models/jules/DESCRIPTION b/models/jules/DESCRIPTION
index 3e6210fee3e..6ba95818b5d 100644
--- a/models/jules/DESCRIPTION
+++ b/models/jules/DESCRIPTION
@@ -6,6 +6,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
email = "dietze@bu.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link the (JULES) to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.atmosphere,
PEcAn.logger,
diff --git a/models/ldndc/DESCRIPTION b/models/ldndc/DESCRIPTION
index 4cb800e9a37..5a3eefededd 100644
--- a/models/ldndc/DESCRIPTION
+++ b/models/ldndc/DESCRIPTION
@@ -5,6 +5,8 @@ Version: 1.0.0.9000
Authors@R: c(person("Henri", "Kajasilta", role = c("aut", "cre"),
email = "henri.kajasilta@fmi.fi"))
Description: This module provides functions to link the (LDNDC) to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
lubridate,
diff --git a/models/linkages/DESCRIPTION b/models/linkages/DESCRIPTION
index 79e0b26d0fb..7696b136e56 100644
--- a/models/linkages/DESCRIPTION
+++ b/models/linkages/DESCRIPTION
@@ -8,6 +8,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut"),
email = "araiho@nd.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link the (LINKAGES) to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.land,
PEcAn.DB,
diff --git a/models/lpjguess/DESCRIPTION b/models/lpjguess/DESCRIPTION
index da914cf7d71..5c0ce84ddae 100644
--- a/models/lpjguess/DESCRIPTION
+++ b/models/lpjguess/DESCRIPTION
@@ -8,6 +8,8 @@ Authors@R: c(person("Istem", "Fer", role = c("aut", "cre"),
email = "tonygard@bu.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link LPJ-GUESS to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
PEcAn.remote,
diff --git a/models/maat/DESCRIPTION b/models/maat/DESCRIPTION
index ce1684852b2..8a9dda597f3 100644
--- a/models/maat/DESCRIPTION
+++ b/models/maat/DESCRIPTION
@@ -6,6 +6,8 @@ Authors@R: c(
person("Shawn", "Serbin", role = c("aut", "cre"), email="sserbin@bnl.gov"),
person("Anthony", "Walker", role = "aut", email="walkerap@ornl.gov"))
Description: This module provides functions to wrap the MAAT model into the PEcAn workflows.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.atmosphere,
PEcAn.logger,
diff --git a/models/maespa/DESCRIPTION b/models/maespa/DESCRIPTION
index 40d1f78eaa3..1c3c2724564 100644
--- a/models/maespa/DESCRIPTION
+++ b/models/maespa/DESCRIPTION
@@ -11,6 +11,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.This package allows for MAESPA to be
run through the PEcAN workflow.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.atmosphere,
PEcAn.logger,
diff --git a/models/preles/DESCRIPTION b/models/preles/DESCRIPTION
index 125a9ffddbe..c83ce2863cd 100644
--- a/models/preles/DESCRIPTION
+++ b/models/preles/DESCRIPTION
@@ -14,6 +14,8 @@ Description: This module provides functions to run the PREdict Light use
parameterization,execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
lubridate (>= 1.6.0),
diff --git a/models/sibcasa/DESCRIPTION b/models/sibcasa/DESCRIPTION
index 0c3bae6faed..aaaeae34345 100644
--- a/models/sibcasa/DESCRIPTION
+++ b/models/sibcasa/DESCRIPTION
@@ -12,6 +12,8 @@ Authors@R: c(person("Rob", "Kooper", role = "cre",
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link (SiBCASA) to PEcAn.
It is a work in progress and is not yet fully functional.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
ncdf4,
PEcAn.logger
diff --git a/models/sipnet/DESCRIPTION b/models/sipnet/DESCRIPTION
index e5f14edc35d..40ee9884497 100644
--- a/models/sipnet/DESCRIPTION
+++ b/models/sipnet/DESCRIPTION
@@ -10,6 +10,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
lubridate (>= 1.6.0),
diff --git a/models/stics/DESCRIPTION b/models/stics/DESCRIPTION
index 2fcce87bfa7..acf8c5b4764 100644
--- a/models/stics/DESCRIPTION
+++ b/models/stics/DESCRIPTION
@@ -7,6 +7,8 @@ Authors@R: c(
email = "istem.fer@fmi.fi",
role = c("aut", "cre")))
Description: This module provides functions to link the STICS to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.settings,
PEcAn.logger,
diff --git a/models/template/DESCRIPTION b/models/template/DESCRIPTION
index 7e0aea20f21..52797ff2d8b 100644
--- a/models/template/DESCRIPTION
+++ b/models/template/DESCRIPTION
@@ -6,6 +6,8 @@ Authors@R: c(person("Jane", "Doe", role = c("aut", "cre"),
email = "jdoe@illinois.edu"),
person("John", "Doe", role = c("aut")))
Description: This module provides functions to link the (ModelName) to PEcAn.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.DB,
PEcAn.logger,
diff --git a/modules/allometry/DESCRIPTION b/modules/allometry/DESCRIPTION
index c031dc02766..d0ab5cb0f1f 100644
--- a/modules/allometry/DESCRIPTION
+++ b/modules/allometry/DESCRIPTION
@@ -7,6 +7,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
person("Shashank", "Singh", role = c("ctb")),
person("University of Illinois, NCSA", role = c("cph")))
Description: Synthesize allometric equations or fit allometries to data.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
coda (>= 0.18),
grDevices,
diff --git a/modules/assim.batch/DESCRIPTION b/modules/assim.batch/DESCRIPTION
index 6283791ace5..03d0fe4cf6e 100644
--- a/modules/assim.batch/DESCRIPTION
+++ b/modules/assim.batch/DESCRIPTION
@@ -14,6 +14,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
VignetteBuilder: knitr, rmarkdown
Imports:
abind,
diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index dfa454384cd..5b65d48d31a 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -9,6 +9,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
coda,
dplyr,
diff --git a/modules/benchmark/DESCRIPTION b/modules/benchmark/DESCRIPTION
index e5a312f0c1d..a867a88537a 100644
--- a/modules/benchmark/DESCRIPTION
+++ b/modules/benchmark/DESCRIPTION
@@ -21,6 +21,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
efficacy of scientific investigation. The PEcAn.benchmark package provides
utilities for comparing models and data, including a suite of statistical
metrics and plots.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
ggplot2,
diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
index ebfc9bbc639..abce32df49a 100644
--- a/modules/data.atmosphere/DESCRIPTION
+++ b/modules/data.atmosphere/DESCRIPTION
@@ -19,6 +19,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
package converts climate driver data into a standard format for models
integrated into PEcAn. As a standalone package, it provides an interface to
access diverse climate data sets.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
abind (>= 1.4.5),
amerifluxr,
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 0a86834e441..554305857c7 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -21,6 +21,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends: R (>= 3.5.0)
Imports:
coda,
diff --git a/modules/data.mining/DESCRIPTION b/modules/data.mining/DESCRIPTION
index 60229a54ef7..8ab63bf1907 100644
--- a/modules/data.mining/DESCRIPTION
+++ b/modules/data.mining/DESCRIPTION
@@ -2,6 +2,8 @@ Package: PEcAn.data.mining
Type: Package
Title: PEcAn Functions Used for Exploring Model Residuals and Structures
Description: (Temporary description) PEcAn functions used for exploring model residuals and structures.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Version: 1.7.3.9000
Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
email = "dietze@bu.edu"),
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index abb339e187c..dbd38de0abe 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -10,6 +10,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut"),
Author: Mike Dietze, Bailey Morrison
Maintainer: Bailey Morrison
Description: PEcAn module for processing remote data. Python module requirements: requests, json, re, ast, panads, sys. If any of these modules are missing, install using pip install .
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
curl,
DBI,
diff --git a/modules/emulator/DESCRIPTION b/modules/emulator/DESCRIPTION
index ab62ac706ad..0d3fe2698b8 100644
--- a/modules/emulator/DESCRIPTION
+++ b/modules/emulator/DESCRIPTION
@@ -13,6 +13,8 @@ Imports:
Description: Implementation of a Gaussian Process model (both likelihood and
bayesian approaches) for kriging and model emulation. Includes functions
for sampling design and prediction.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.3.2
diff --git a/modules/meta.analysis/DESCRIPTION b/modules/meta.analysis/DESCRIPTION
index 60ba579a6e4..18270488f79 100644
--- a/modules/meta.analysis/DESCRIPTION
+++ b/modules/meta.analysis/DESCRIPTION
@@ -24,6 +24,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. The PEcAn.MA package contains
the functions used in the Bayesian meta-analysis of trait data.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
coda (>= 0.18),
lattice,
diff --git a/modules/photosynthesis/DESCRIPTION b/modules/photosynthesis/DESCRIPTION
index b101522ae90..40d79efe89c 100644
--- a/modules/photosynthesis/DESCRIPTION
+++ b/modules/photosynthesis/DESCRIPTION
@@ -18,6 +18,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
efficacy of scientific investigation. The PEcAn.photosynthesis package
contains functions used in the Hierarchical Bayesian calibration of the
Farquhar et al 1980 model.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends:
rjags
Imports:
diff --git a/modules/priors/DESCRIPTION b/modules/priors/DESCRIPTION
index abfc6f2bb56..a3ef7212975 100644
--- a/modules/priors/DESCRIPTION
+++ b/modules/priors/DESCRIPTION
@@ -6,6 +6,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
email = "dlebauer@email.arizona.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: Functions to estimate priors from data.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
License: BSD_3_clause + file LICENSE
Copyright: Authors
LazyLoad: yes
diff --git a/modules/rtm/DESCRIPTION b/modules/rtm/DESCRIPTION
index 2be3ff4f0a8..1d903ece731 100644
--- a/modules/rtm/DESCRIPTION
+++ b/modules/rtm/DESCRIPTION
@@ -13,6 +13,8 @@ Description: Functions for performing forward runs and inversions of radiative
transfer models (RTMs). Inversions can be performed using maximum
likelihood, or more complex hierarchical Bayesian methods.
Underlying numerical analyses are optimized for speed using Fortran code.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Depends: R (>= 2.10)
Imports:
PEcAn.logger,
diff --git a/modules/uncertainty/DESCRIPTION b/modules/uncertainty/DESCRIPTION
index 6074bd2cc0d..895c561cbdb 100644
--- a/modules/uncertainty/DESCRIPTION
+++ b/modules/uncertainty/DESCRIPTION
@@ -26,6 +26,8 @@ Description: The Predictive Ecosystem Carbon Analyzer
PECAn is to streamline the interaction between data and
models, and to improve the efficacy of scientific
investigation.
+URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
ggplot2,
From 69526b019d2ea0765737adcf0ec0a329a74ab6e5 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 19 Apr 2025 04:28:24 +0000
Subject: [PATCH 0081/1193] added 'yaml' and 'desc' dependencies for pkgdown
docs build
---
.github/workflows/pkgdown.yml | 4 ++--
docker/docs/Dockerfile | 2 +-
scripts/build_pkgdown.R | 1 +
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml
index d068b000cba..9297072675f 100644
--- a/.github/workflows/pkgdown.yml
+++ b/.github/workflows/pkgdown.yml
@@ -24,9 +24,9 @@ jobs:
# Checkout source code
- uses: actions/checkout@v4
- # Install pkgdown
+ # Install dependencies
- name: Install dependencies
- run: Rscript -e 'install.packages("pkgdown")'
+ run: Rscript -e 'install.packages(c("pkgdown", "yaml", "desc"))'
# Generate documentation using Makefile
- name: Generate Package Documentation
diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile
index d0af39672f2..19e9b10edac 100644
--- a/docker/docs/Dockerfile
+++ b/docker/docs/Dockerfile
@@ -15,7 +15,7 @@ RUN apt-get update \
-e 'remotes::install_version("rmarkdown", ">= 2.19", dependencies = TRUE, upgrade = FALSE, repos = repos)' \
-e 'remotes::install_version("knitr", ">= 1.42", dependencies = TRUE, upgrade = FALSE, repos = repos)' \
-e 'remotes::install_version("bookdown", ">= 0.31", dependencies = TRUE, upgrade = FALSE, repos = repos)' \
- -e 'install.packages("pkgdown", repos = repos)' \
+ -e 'install.packages(c("pkgdown", "yaml", "desc"), repos = repos)' \
&& rm -rf /var/lib/apt/lists/*
# ----------------------------------------------------------------------
diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R
index 1bb3f5db9ce..d0cb44bae14 100644
--- a/scripts/build_pkgdown.R
+++ b/scripts/build_pkgdown.R
@@ -2,6 +2,7 @@
# Build pkgdown documentation for PEcAn packages
library(pkgdown)
library(yaml)
+library(desc)
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
stop("No package names provided. Please pass package names as arguments.")
From 7b2ee28c55ca0ecb787a94f08f59d77aa15bc447 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 19 Apr 2025 15:24:52 +0000
Subject: [PATCH 0082/1193] added updated .Rd files after adding URL to
DESCRIPTION
---
base/db/man/PEcAn.DB-package.Rd | 9 +++++++++
base/utils/man/PEcAn.Rd | 9 +++++++++
modules/emulator/man/PEcAn.emulator-package.Rd | 9 +++++++++
3 files changed, 27 insertions(+)
diff --git a/base/db/man/PEcAn.DB-package.Rd b/base/db/man/PEcAn.DB-package.Rd
index cdd80bdc1d9..5a63f5dd82e 100644
--- a/base/db/man/PEcAn.DB-package.Rd
+++ b/base/db/man/PEcAn.DB-package.Rd
@@ -8,6 +8,15 @@
\description{
This package provides an interface between PEcAn and the BETY database.
For usage examples, please see \code{vignette("betydb_access")}
+}
+\seealso{
+Useful links:
+\itemize{
+ \item \url{https://pecanproject.github.io}
+ \item \url{https://github.com/PecanProject/pecan}
+ \item Report bugs at \url{https://github.com/PecanProject/pecan/issues}
+}
+
}
\author{
\strong{Maintainer}: David LeBauer \email{dlebauer@email.arizona.edu}
diff --git a/base/utils/man/PEcAn.Rd b/base/utils/man/PEcAn.Rd
index 4a6752d93c7..773527bedba 100644
--- a/base/utils/man/PEcAn.Rd
+++ b/base/utils/man/PEcAn.Rd
@@ -49,6 +49,15 @@ Current development is focused on developing PEcAn into a real-time data
assimilation and forecasting system. This system will provide a detailed
analysis of the past and present ecosystem functioning that seamlessly
transitions into forecasts.
+}
+\seealso{
+Useful links:
+\itemize{
+ \item \url{https://pecanproject.github.io}
+ \item \url{https://github.com/PecanProject/pecan}
+ \item Report bugs at \url{https://github.com/PecanProject/pecan/issues}
+}
+
}
\author{
\strong{Maintainer}: Rob Kooper \email{kooper@illinois.edu}
diff --git a/modules/emulator/man/PEcAn.emulator-package.Rd b/modules/emulator/man/PEcAn.emulator-package.Rd
index ba35157964c..14f5d38e38b 100644
--- a/modules/emulator/man/PEcAn.emulator-package.Rd
+++ b/modules/emulator/man/PEcAn.emulator-package.Rd
@@ -9,6 +9,15 @@ Supports both likelihood and bayesian approaches for kriging and model
emulation. Includes functions for sampling design and prediction.}
\description{
Implementation of a Gaussian Process model (both likelihood and bayesian approaches) for kriging and model emulation. Includes functions for sampling design and prediction.
+}
+\seealso{
+Useful links:
+\itemize{
+ \item \url{https://pecanproject.github.io}
+ \item \url{https://github.com/PecanProject/pecan}
+ \item Report bugs at \url{https://github.com/PecanProject/pecan/issues}
+}
+
}
\author{
\strong{Maintainer}: Mike Dietze \email{dietze@bu.edu}
From 775f820e285647930db5db0b0d0a0d866625bc0c Mon Sep 17 00:00:00 2001
From: divne7022
Date: Wed, 23 Apr 2025 23:36:28 +0000
Subject: [PATCH 0083/1193] updated build_pkgdown.R
---
scripts/build_pkgdown.R | 49 ++++++++++++++++++++---------------------
1 file changed, 24 insertions(+), 25 deletions(-)
diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R
index d0cb44bae14..6327b064503 100644
--- a/scripts/build_pkgdown.R
+++ b/scripts/build_pkgdown.R
@@ -20,26 +20,6 @@ if (requireNamespace("PEcAn.logger", quietly = TRUE)) {
}
}
-pkg_config <- function() {
-
- list(
- url = "https://pecanproject.github.io/",
- template = list(
- bootstrap = 5,
- includes = list(
- before_navbar = paste0(
- "\n",
- ""
- )
- )
- )
- )
-}
-
logger("Building pkgdown docs for:", paste(packages, collapse = ", "))
for (pkg in packages) {
logger("Building pkgdown site for:", pkg)
@@ -48,11 +28,30 @@ for (pkg in packages) {
if (!dir.exists(pkg)) {
stop(paste("Package directory does not exist:", pkg))
}
- pkg_config_path <- file.path(pkg, "_pkgdown.yml")
- pkg_config_data <- pkg_config()
- yaml::write_yaml(pkg_config_data, pkg_config_path)
setwd(pkg)
- pkgdown::build_site()
+ pkgdown::build_site(
+ pkg = ".",
+ override = list(
+ repo = list(
+ url = list(
+ source = paste0("https://github.com/PecanProject/pecan/blob/develop/", pkg)
+ )
+ ),
+ template = list(
+ bootstrap = 5,
+ includes = list(
+ before_navbar = paste0(
+ "\n",
+ ""
+ )
+ )
+ )
+ )
+ )
setwd(current_wd)
source_docs <- file.path(pkg, "docs")
if (!dir.exists(source_docs)) {
@@ -92,7 +91,7 @@ before_text <- c(
'',
'
PEcAn package documentation
',
'
Function documentation and articles for each PEcAn package,',
- ' generated from the package source using pkgdown package.
',
+ ' generated from the package source using pkgdown.
',
'',
'
'
)
From 088129daf7db3e0d46f5b0785c1d6be3c75898f8 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Wed, 23 Apr 2025 23:38:53 +0000
Subject: [PATCH 0084/1193] updated DESCRIPTON URL
---
base/all/DESCRIPTION | 2 +-
base/db/DESCRIPTION | 2 +-
base/logger/DESCRIPTION | 2 +-
base/qaqc/DESCRIPTION | 2 +-
base/remote/DESCRIPTION | 2 +-
base/settings/DESCRIPTION | 2 +-
base/utils/DESCRIPTION | 2 +-
base/visualization/DESCRIPTION | 2 +-
base/workflow/DESCRIPTION | 2 +-
models/basgra/DESCRIPTION | 2 +-
models/biocro/DESCRIPTION | 2 +-
models/cable/DESCRIPTION | 2 +-
models/clm45/DESCRIPTION | 2 +-
models/dalec/DESCRIPTION | 2 +-
models/dvmdostem/DESCRIPTION | 2 +-
models/ed/DESCRIPTION | 2 +-
models/fates/DESCRIPTION | 2 +-
models/gday/DESCRIPTION | 2 +-
models/jules/DESCRIPTION | 2 +-
models/ldndc/DESCRIPTION | 2 +-
models/linkages/DESCRIPTION | 2 +-
models/lpjguess/DESCRIPTION | 2 +-
models/maat/DESCRIPTION | 2 +-
models/maespa/DESCRIPTION | 2 +-
models/preles/DESCRIPTION | 2 +-
models/sibcasa/DESCRIPTION | 2 +-
models/sipnet/DESCRIPTION | 2 +-
models/stics/DESCRIPTION | 2 +-
models/template/DESCRIPTION | 2 +-
modules/allometry/DESCRIPTION | 2 +-
modules/assim.batch/DESCRIPTION | 2 +-
modules/assim.sequential/DESCRIPTION | 2 +-
modules/benchmark/DESCRIPTION | 2 +-
modules/data.atmosphere/DESCRIPTION | 2 +-
modules/data.land/DESCRIPTION | 2 +-
modules/data.mining/DESCRIPTION | 2 +-
modules/data.remote/DESCRIPTION | 2 +-
modules/emulator/DESCRIPTION | 2 +-
modules/meta.analysis/DESCRIPTION | 2 +-
modules/photosynthesis/DESCRIPTION | 2 +-
modules/priors/DESCRIPTION | 2 +-
modules/rtm/DESCRIPTION | 2 +-
modules/uncertainty/DESCRIPTION | 2 +-
43 files changed, 43 insertions(+), 43 deletions(-)
diff --git a/base/all/DESCRIPTION b/base/all/DESCRIPTION
index 68ec2401c6d..a357d6cfb93 100644
--- a/base/all/DESCRIPTION
+++ b/base/all/DESCRIPTION
@@ -43,7 +43,7 @@ Description: The Predictive Ecosystem Carbon Analyzer
PEcAn is to streamline the interaction between data and
models, and to improve the efficacy of scientific
investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends:
PEcAn.DB,
diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION
index c1ec1b53328..5072539cf28 100644
--- a/base/db/DESCRIPTION
+++ b/base/db/DESCRIPTION
@@ -39,7 +39,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
DBI,
diff --git a/base/logger/DESCRIPTION b/base/logger/DESCRIPTION
index 5eea045dd1a..c7a2b6465ee 100644
--- a/base/logger/DESCRIPTION
+++ b/base/logger/DESCRIPTION
@@ -19,7 +19,7 @@ Description: Convenience functions for logging outputs from 'PEcAn',
and lenience when running large batches of simulations that should not be
terminated by errors in individual models. It is loosely based on
the 'log4j' package.
-URL: https://pecanproject.github.io/, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io/
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
utils,
diff --git a/base/qaqc/DESCRIPTION b/base/qaqc/DESCRIPTION
index 9c5db8a5d2b..fc76b179a7d 100644
--- a/base/qaqc/DESCRIPTION
+++ b/base/qaqc/DESCRIPTION
@@ -10,7 +10,7 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
Author: David LeBauer, Tess McCabe
Maintainer: David LeBauer
Description: PEcAn integration and model skill testing
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION
index 85bf7d92fc2..53cca1542d9 100644
--- a/base/remote/DESCRIPTION
+++ b/base/remote/DESCRIPTION
@@ -15,7 +15,7 @@ Authors@R: c(person("David", "LeBauer", role = c("aut"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This package contains utilities for communicating with and executing code on local and remote hosts.
In particular, it has PEcAn-specific utilities for starting ecosystem model runs.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
diff --git a/base/settings/DESCRIPTION b/base/settings/DESCRIPTION
index 5983d386a91..da595fedf13 100644
--- a/base/settings/DESCRIPTION
+++ b/base/settings/DESCRIPTION
@@ -12,7 +12,7 @@ LazyLoad: yes
LazyData: FALSE
Require: hdf5
Description: Contains functions to read PEcAn settings files.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends:
methods
diff --git a/base/utils/DESCRIPTION b/base/utils/DESCRIPTION
index a248c1dddc4..df65b3ae5bc 100644
--- a/base/utils/DESCRIPTION
+++ b/base/utils/DESCRIPTION
@@ -30,7 +30,7 @@ Description: The Predictive Ecosystem Carbon Analyzer
PEcAn is to streamline the interaction between data and
models, and to improve the efficacy of scientific
investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
abind (>= 1.4.5),
diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION
index 9d1638c18db..81ef1eafc1e 100644
--- a/base/visualization/DESCRIPTION
+++ b/base/visualization/DESCRIPTION
@@ -27,7 +27,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
efficacy of scientific investigation.
This module is used to create more complex visualizations from the data
generated by PEcAn code, specifically the models.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
data.table,
diff --git a/base/workflow/DESCRIPTION b/base/workflow/DESCRIPTION
index e05d6d9e67b..1026b5921f5 100644
--- a/base/workflow/DESCRIPTION
+++ b/base/workflow/DESCRIPTION
@@ -25,7 +25,7 @@ Description: The Predictive Ecosystem Carbon Analyzer
models, and to improve the efficacy of scientific
investigation. This package provides workhorse functions
that can be used to run the major steps of a PEcAn analysis.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
License: BSD_3_clause + file LICENSE
Imports:
diff --git a/models/basgra/DESCRIPTION b/models/basgra/DESCRIPTION
index 4f103380387..48eeda84f0a 100644
--- a/models/basgra/DESCRIPTION
+++ b/models/basgra/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R: c(person("Istem", "Fer", role = c("aut", "cre"),
email = "istem.fer@fmi.fi"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link the BASGRA model to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends: R (>= 4.0.0)
Imports:
diff --git a/models/biocro/DESCRIPTION b/models/biocro/DESCRIPTION
index 5a8832b0a99..697343b4519 100644
--- a/models/biocro/DESCRIPTION
+++ b/models/biocro/DESCRIPTION
@@ -12,7 +12,7 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
Author: David LeBauer, Deepak Jaiswal, Christopher Black
Maintainer: David LeBauer
Description: This module provides functions to link BioCro to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
diff --git a/models/cable/DESCRIPTION b/models/cable/DESCRIPTION
index cb9a189878c..5a54f62697a 100644
--- a/models/cable/DESCRIPTION
+++ b/models/cable/DESCRIPTION
@@ -9,7 +9,7 @@ Authors@R: c(person("Kaitlin", "Ragosta", role = c("aut")),
Author: Kaitlin Ragosta
Maintainer: Tony Gardella
Description: This module provides functions to link the (CABLE) to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
diff --git a/models/clm45/DESCRIPTION b/models/clm45/DESCRIPTION
index 40c55274444..01736954346 100644
--- a/models/clm45/DESCRIPTION
+++ b/models/clm45/DESCRIPTION
@@ -11,7 +11,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. This package provides functions to
link the Community Land Model, version 4.5, to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends:
PEcAn.logger,
diff --git a/models/dalec/DESCRIPTION b/models/dalec/DESCRIPTION
index f8119f40452..23ac2688b86 100644
--- a/models/dalec/DESCRIPTION
+++ b/models/dalec/DESCRIPTION
@@ -10,7 +10,7 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
Author: Mike Dietze, Tristain Quaife
Maintainer: Mike Dietze
Description: This module provides functions to link DALEC to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
diff --git a/models/dvmdostem/DESCRIPTION b/models/dvmdostem/DESCRIPTION
index fe94de55fc2..d3a07be220a 100644
--- a/models/dvmdostem/DESCRIPTION
+++ b/models/dvmdostem/DESCRIPTION
@@ -11,7 +11,7 @@ Author: Tobey Carman, Shawn Serbin
Maintainer: Tobey Carman , Shawn Serbin
Description: This module provides functions to link the dvmdostem model to
PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
lubridate,
diff --git a/models/ed/DESCRIPTION b/models/ed/DESCRIPTION
index 26625d9a71c..9fafef44172 100644
--- a/models/ed/DESCRIPTION
+++ b/models/ed/DESCRIPTION
@@ -30,7 +30,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. This package provides functions to
link the Ecosystem Demography Model, version 2, to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends:
R (>= 3.5)
diff --git a/models/fates/DESCRIPTION b/models/fates/DESCRIPTION
index 93bce032e32..3f24279686b 100644
--- a/models/fates/DESCRIPTION
+++ b/models/fates/DESCRIPTION
@@ -15,7 +15,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. This package provides functions to
link the FATES model to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
stringr,
diff --git a/models/gday/DESCRIPTION b/models/gday/DESCRIPTION
index 3edb2b191a2..b49275c529c 100644
--- a/models/gday/DESCRIPTION
+++ b/models/gday/DESCRIPTION
@@ -10,7 +10,7 @@ Authors@R: c(person("Martin", "De Kauwe", role = c("aut", "cre"),
Author: Martin De Kauwe
Maintainer: Martin De Kauwe
Description: This module provides functions to link the GDAY model to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends:
PEcAn.utils
diff --git a/models/jules/DESCRIPTION b/models/jules/DESCRIPTION
index 6ba95818b5d..7c8941fd8ca 100644
--- a/models/jules/DESCRIPTION
+++ b/models/jules/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
email = "dietze@bu.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link the (JULES) to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.atmosphere,
diff --git a/models/ldndc/DESCRIPTION b/models/ldndc/DESCRIPTION
index 5a3eefededd..f619c756945 100644
--- a/models/ldndc/DESCRIPTION
+++ b/models/ldndc/DESCRIPTION
@@ -5,7 +5,7 @@ Version: 1.0.0.9000
Authors@R: c(person("Henri", "Kajasilta", role = c("aut", "cre"),
email = "henri.kajasilta@fmi.fi"))
Description: This module provides functions to link the (LDNDC) to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
diff --git a/models/linkages/DESCRIPTION b/models/linkages/DESCRIPTION
index 7696b136e56..62d7c23d2f9 100644
--- a/models/linkages/DESCRIPTION
+++ b/models/linkages/DESCRIPTION
@@ -8,7 +8,7 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut"),
email = "araiho@nd.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link the (LINKAGES) to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.land,
diff --git a/models/lpjguess/DESCRIPTION b/models/lpjguess/DESCRIPTION
index 5c0ce84ddae..bf3aaf11693 100644
--- a/models/lpjguess/DESCRIPTION
+++ b/models/lpjguess/DESCRIPTION
@@ -8,7 +8,7 @@ Authors@R: c(person("Istem", "Fer", role = c("aut", "cre"),
email = "tonygard@bu.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link LPJ-GUESS to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
diff --git a/models/maat/DESCRIPTION b/models/maat/DESCRIPTION
index 8a9dda597f3..c99f9108500 100644
--- a/models/maat/DESCRIPTION
+++ b/models/maat/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R: c(
person("Shawn", "Serbin", role = c("aut", "cre"), email="sserbin@bnl.gov"),
person("Anthony", "Walker", role = "aut", email="walkerap@ornl.gov"))
Description: This module provides functions to wrap the MAAT model into the PEcAn workflows.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.atmosphere,
diff --git a/models/maespa/DESCRIPTION b/models/maespa/DESCRIPTION
index 1c3c2724564..0e287c88387 100644
--- a/models/maespa/DESCRIPTION
+++ b/models/maespa/DESCRIPTION
@@ -11,7 +11,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.This package allows for MAESPA to be
run through the PEcAN workflow.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.data.atmosphere,
diff --git a/models/preles/DESCRIPTION b/models/preles/DESCRIPTION
index c83ce2863cd..2020a341dd5 100644
--- a/models/preles/DESCRIPTION
+++ b/models/preles/DESCRIPTION
@@ -14,7 +14,7 @@ Description: This module provides functions to run the PREdict Light use
parameterization,execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.logger,
diff --git a/models/sibcasa/DESCRIPTION b/models/sibcasa/DESCRIPTION
index aaaeae34345..55cafaa02c9 100644
--- a/models/sibcasa/DESCRIPTION
+++ b/models/sibcasa/DESCRIPTION
@@ -12,7 +12,7 @@ Authors@R: c(person("Rob", "Kooper", role = "cre",
person("University of Illinois, NCSA", role = c("cph")))
Description: This module provides functions to link (SiBCASA) to PEcAn.
It is a work in progress and is not yet fully functional.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
ncdf4,
diff --git a/models/sipnet/DESCRIPTION b/models/sipnet/DESCRIPTION
index 40ee9884497..779d23205f0 100644
--- a/models/sipnet/DESCRIPTION
+++ b/models/sipnet/DESCRIPTION
@@ -10,7 +10,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
diff --git a/models/stics/DESCRIPTION b/models/stics/DESCRIPTION
index acf8c5b4764..fff8a8e1362 100644
--- a/models/stics/DESCRIPTION
+++ b/models/stics/DESCRIPTION
@@ -7,7 +7,7 @@ Authors@R: c(
email = "istem.fer@fmi.fi",
role = c("aut", "cre")))
Description: This module provides functions to link the STICS to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.settings,
diff --git a/models/template/DESCRIPTION b/models/template/DESCRIPTION
index 52797ff2d8b..265df447507 100644
--- a/models/template/DESCRIPTION
+++ b/models/template/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R: c(person("Jane", "Doe", role = c("aut", "cre"),
email = "jdoe@illinois.edu"),
person("John", "Doe", role = c("aut")))
Description: This module provides functions to link the (ModelName) to PEcAn.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
PEcAn.DB,
diff --git a/modules/allometry/DESCRIPTION b/modules/allometry/DESCRIPTION
index d0ab5cb0f1f..a0c2e2678c4 100644
--- a/modules/allometry/DESCRIPTION
+++ b/modules/allometry/DESCRIPTION
@@ -7,7 +7,7 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
person("Shashank", "Singh", role = c("ctb")),
person("University of Illinois, NCSA", role = c("cph")))
Description: Synthesize allometric equations or fit allometries to data.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
coda (>= 0.18),
diff --git a/modules/assim.batch/DESCRIPTION b/modules/assim.batch/DESCRIPTION
index 03d0fe4cf6e..eea4f86e51f 100644
--- a/modules/assim.batch/DESCRIPTION
+++ b/modules/assim.batch/DESCRIPTION
@@ -14,7 +14,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
VignetteBuilder: knitr, rmarkdown
Imports:
diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 5b65d48d31a..fec57eb1efc 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -9,7 +9,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
coda,
diff --git a/modules/benchmark/DESCRIPTION b/modules/benchmark/DESCRIPTION
index a867a88537a..426565b3a55 100644
--- a/modules/benchmark/DESCRIPTION
+++ b/modules/benchmark/DESCRIPTION
@@ -21,7 +21,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
efficacy of scientific investigation. The PEcAn.benchmark package provides
utilities for comparing models and data, including a suite of statistical
metrics and plots.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
index abce32df49a..f86ef982567 100644
--- a/modules/data.atmosphere/DESCRIPTION
+++ b/modules/data.atmosphere/DESCRIPTION
@@ -19,7 +19,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
package converts climate driver data into a standard format for models
integrated into PEcAn. As a standalone package, it provides an interface to
access diverse climate data sets.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
abind (>= 1.4.5),
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 554305857c7..eac2b3ec05b 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -21,7 +21,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
model parameterization, execution, and analysis. The goal of PECAn is to
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends: R (>= 3.5.0)
Imports:
diff --git a/modules/data.mining/DESCRIPTION b/modules/data.mining/DESCRIPTION
index 8ab63bf1907..79b3f0727d9 100644
--- a/modules/data.mining/DESCRIPTION
+++ b/modules/data.mining/DESCRIPTION
@@ -2,7 +2,7 @@ Package: PEcAn.data.mining
Type: Package
Title: PEcAn Functions Used for Exploring Model Residuals and Structures
Description: (Temporary description) PEcAn functions used for exploring model residuals and structures.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Version: 1.7.3.9000
Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index dbd38de0abe..652b889d8a3 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -10,7 +10,7 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut"),
Author: Mike Dietze, Bailey Morrison
Maintainer: Bailey Morrison
Description: PEcAn module for processing remote data. Python module requirements: requests, json, re, ast, panads, sys. If any of these modules are missing, install using pip install .
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
curl,
diff --git a/modules/emulator/DESCRIPTION b/modules/emulator/DESCRIPTION
index 0d3fe2698b8..a8ee339e631 100644
--- a/modules/emulator/DESCRIPTION
+++ b/modules/emulator/DESCRIPTION
@@ -13,7 +13,7 @@ Imports:
Description: Implementation of a Gaussian Process model (both likelihood and
bayesian approaches) for kriging and model emulation. Includes functions
for sampling design and prediction.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
diff --git a/modules/meta.analysis/DESCRIPTION b/modules/meta.analysis/DESCRIPTION
index 18270488f79..236b6e6ed97 100644
--- a/modules/meta.analysis/DESCRIPTION
+++ b/modules/meta.analysis/DESCRIPTION
@@ -24,7 +24,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
streamline the interaction between data and models, and to improve the
efficacy of scientific investigation. The PEcAn.MA package contains
the functions used in the Bayesian meta-analysis of trait data.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
coda (>= 0.18),
diff --git a/modules/photosynthesis/DESCRIPTION b/modules/photosynthesis/DESCRIPTION
index 40d79efe89c..8046a282b13 100644
--- a/modules/photosynthesis/DESCRIPTION
+++ b/modules/photosynthesis/DESCRIPTION
@@ -18,7 +18,7 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
efficacy of scientific investigation. The PEcAn.photosynthesis package
contains functions used in the Hierarchical Bayesian calibration of the
Farquhar et al 1980 model.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends:
rjags
diff --git a/modules/priors/DESCRIPTION b/modules/priors/DESCRIPTION
index a3ef7212975..37479e0cf0a 100644
--- a/modules/priors/DESCRIPTION
+++ b/modules/priors/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
email = "dlebauer@email.arizona.edu"),
person("University of Illinois, NCSA", role = c("cph")))
Description: Functions to estimate priors from data.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
License: BSD_3_clause + file LICENSE
Copyright: Authors
diff --git a/modules/rtm/DESCRIPTION b/modules/rtm/DESCRIPTION
index 1d903ece731..4d5d9b442ca 100644
--- a/modules/rtm/DESCRIPTION
+++ b/modules/rtm/DESCRIPTION
@@ -13,7 +13,7 @@ Description: Functions for performing forward runs and inversions of radiative
transfer models (RTMs). Inversions can be performed using maximum
likelihood, or more complex hierarchical Bayesian methods.
Underlying numerical analyses are optimized for speed using Fortran code.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Depends: R (>= 2.10)
Imports:
diff --git a/modules/uncertainty/DESCRIPTION b/modules/uncertainty/DESCRIPTION
index 895c561cbdb..f19873eb29b 100644
--- a/modules/uncertainty/DESCRIPTION
+++ b/modules/uncertainty/DESCRIPTION
@@ -26,7 +26,7 @@ Description: The Predictive Ecosystem Carbon Analyzer
PECAn is to streamline the interaction between data and
models, and to improve the efficacy of scientific
investigation.
-URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan
+URL: https://pecanproject.github.io
BugReports: https://github.com/PecanProject/pecan/issues
Imports:
dplyr,
From 479add0e88c08dac91c6989221b1a707e6ba3f51 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Wed, 23 Apr 2025 23:41:36 +0000
Subject: [PATCH 0085/1193] updated .Rd files
---
base/db/man/PEcAn.DB-package.Rd | 1 -
base/utils/man/PEcAn.Rd | 1 -
modules/emulator/man/PEcAn.emulator-package.Rd | 1 -
3 files changed, 3 deletions(-)
diff --git a/base/db/man/PEcAn.DB-package.Rd b/base/db/man/PEcAn.DB-package.Rd
index 5a63f5dd82e..db7c298cafa 100644
--- a/base/db/man/PEcAn.DB-package.Rd
+++ b/base/db/man/PEcAn.DB-package.Rd
@@ -13,7 +13,6 @@ For usage examples, please see \code{vignette("betydb_access")}
Useful links:
\itemize{
\item \url{https://pecanproject.github.io}
- \item \url{https://github.com/PecanProject/pecan}
\item Report bugs at \url{https://github.com/PecanProject/pecan/issues}
}
diff --git a/base/utils/man/PEcAn.Rd b/base/utils/man/PEcAn.Rd
index 773527bedba..0bf5ab7e4f5 100644
--- a/base/utils/man/PEcAn.Rd
+++ b/base/utils/man/PEcAn.Rd
@@ -54,7 +54,6 @@ transitions into forecasts.
Useful links:
\itemize{
\item \url{https://pecanproject.github.io}
- \item \url{https://github.com/PecanProject/pecan}
\item Report bugs at \url{https://github.com/PecanProject/pecan/issues}
}
diff --git a/modules/emulator/man/PEcAn.emulator-package.Rd b/modules/emulator/man/PEcAn.emulator-package.Rd
index 14f5d38e38b..7e30243d008 100644
--- a/modules/emulator/man/PEcAn.emulator-package.Rd
+++ b/modules/emulator/man/PEcAn.emulator-package.Rd
@@ -14,7 +14,6 @@ Implementation of a Gaussian Process model (both likelihood and bayesian approac
Useful links:
\itemize{
\item \url{https://pecanproject.github.io}
- \item \url{https://github.com/PecanProject/pecan}
\item Report bugs at \url{https://github.com/PecanProject/pecan/issues}
}
From a496a2a8131574e8b3ab32f8fa036b808457c111 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 24 Apr 2025 00:05:19 +0000
Subject: [PATCH 0086/1193] removed yaml and desc dependencies from pkgdown.yml
---
.github/workflows/pkgdown.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml
index 9297072675f..e505dd906d4 100644
--- a/.github/workflows/pkgdown.yml
+++ b/.github/workflows/pkgdown.yml
@@ -26,7 +26,7 @@ jobs:
# Install dependencies
- name: Install dependencies
- run: Rscript -e 'install.packages(c("pkgdown", "yaml", "desc"))'
+ run: Rscript -e 'install.packages("pkgdown")'
# Generate documentation using Makefile
- name: Generate Package Documentation
From 2037541e703310540b94909d76507235f1bac9ff Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 25 Apr 2025 10:43:14 -0400
Subject: [PATCH 0087/1193] Update path.
---
.../inst/anchor/NA_downscale_script.R | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 746ed0886a4..c82e6382213 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -225,25 +225,26 @@ for (y in 2012:2024) {
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
-load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/sda.all.forecast.analysis.Rdata")
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_2025_4_22/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/pecanIC.xml"
settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_2025_4_22/"
cores <- 28
date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
# loop over years.
for (i in seq_along(date)) {
+ print(i)
# Assemble covariates.
- covariates.dir <- file.path(outdir, "covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
+ covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
- analysis.yr <- analysis.all[[i]]
+ analysis.yr <- forecast.all[[i]]
time <- date[i]
# loop over carbon types.
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_forecast_lc_ts"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -253,7 +254,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
+ outdir = file.path(outdir, "downscale_maps_forecast_lc_ts")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
@@ -266,7 +267,7 @@ for (i in seq_along(date)) {
jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh)
writeLines(jobsh, con = file.path(folder.path, "job.sh"))
# qsub command.
- qsub <- "qsub -l h_rt=10:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash"
+ qsub <- "qsub -l h_rt=24:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash"
qsub <- gsub("@CORES@", cores, qsub)
qsub <- gsub("@NAME@", paste0("ds_", i, "_", j), qsub)
qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub)
From 39cc0cd7a257aba1ee67df946f685960a5a44bfb Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 30 Apr 2025 09:05:59 -0400
Subject: [PATCH 0088/1193] change variable and path.
---
.../assim.sequential/inst/anchor/NA_downscale_script.R | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index c82e6382213..057229d3214 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -225,11 +225,11 @@ for (y in 2012:2024) {
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
-load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_2025_4_22/sda.all.forecast.analysis.Rdata")
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/pecanIC.xml"
settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_2025_4_22/"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/"
cores <- 28
date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
# loop over years.
@@ -238,13 +238,13 @@ for (i in seq_along(date)) {
# Assemble covariates.
covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
- analysis.yr <- forecast.all[[i]]
+ analysis.yr <- analysis.all[[i]]
time <- date[i]
# loop over carbon types.
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_forecast_lc_ts"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -254,7 +254,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_forecast_lc_ts")),
+ outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
From 7ea9ac8ee0ca66be67de1ec0f0be56b08504c531 Mon Sep 17 00:00:00 2001
From: Katherine Rein
Date: Thu, 1 May 2025 14:55:48 -0400
Subject: [PATCH 0089/1193] Python Irrigation files
---
.../inst/Python/CCMMF_Irrigation_API.py | 156 +++++++++++++++++
.../inst/Python/CCMMF_Irrigation_CalcVis.py | 104 ++++++++++++
.../Python/CCMMF_Irrigation_DataDownload.py | 160 ++++++++++++++++++
.../inst/Python/CCMMF_Irrigation_Events.py | 58 +++++++
modules/data.remote/inst/Python/README.txt | 143 ++++++++++++++++
5 files changed, 621 insertions(+)
create mode 100644 modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
create mode 100644 modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
create mode 100644 modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
create mode 100644 modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
create mode 100644 modules/data.remote/inst/Python/README.txt
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
new file mode 100644
index 00000000000..553c939281b
--- /dev/null
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 6 13:59:07 2025
+
+@author: katherineanne
+"""
+# %% Import modules
+
+import requests
+import numpy as np
+from netCDF4 import Dataset, num2date
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+from datetime import datetime, date, timedelta
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pyarrow.dataset as ds
+import ee
+import CCMMF_Irrigation_DataDownload
+import CCMMF_Irrigation_CalcVis
+import CCMMF_Irrigation_Events
+
+ee.Initialize()
+
+# %% Define multi use variables
+
+# Define years to look at
+years = list(range(2016, 2026))
+
+# Define folder name for csv files
+csv_folder = 'WaterBalanceCSV/'
+
+# Define the name of the parquet filename
+pq_filename = 'CCMMF_Irrigation_Parquet'
+
+# %% Loading data
+
+# Read in parquet file
+# Load the full dataset
+dataset = ds.dataset(pq_filename, format="parquet", partitioning = 'hive')
+table = dataset.to_table()
+parquet_df = table.to_pandas()
+days_to_download = 0
+
+# Group by the location column and convert to dictionary
+data_dict = {location: location_df for location, location_df in parquet_df.groupby("location")}
+
+# %% Check current date with most current downloaded data
+
+# Delete the current CHIRPS file for this year
+# This will ensure we read in the new data for the current date
+# We only do this if the data is not up to date
+cur_year = datetime.now().year
+today = datetime.now().date()
+chirps_filename = f'chirps-v2.0.{cur_year}.days_p05.nc'
+
+if os.path.exists(chirps_filename):
+ with Dataset(chirps_filename, 'r') as nc:
+
+ time_var = nc.variables['time']
+ dates = num2date(time_var[:], units=time_var.units)
+ most_recent = max(dates)
+ most_recent_date = date(most_recent.year, most_recent.month, most_recent.day)
+
+ if most_recent_date != today:
+ print('Deleted')
+ days_to_download = (today - most_recent_date).days
+ os.remove(chirps_filename)
+
+# %% Define locations
+
+# Read in all lat lons
+df_lat_lon = pd.read_csv('design_points.csv')
+
+# Handle duplicates
+df_lat_lon = df_lat_lon.drop_duplicates()
+
+# %% Iterate through locations and download data for each
+
+for row_number in range(29):
+
+ # Load location data
+ latitude = df_lat_lon['lat'].iloc[row_number]
+ longitude = df_lat_lon['lon'].iloc[row_number]
+ location = df_lat_lon['id'].iloc[row_number]
+
+ if location in data_dict:
+
+ df = data_dict[location]
+
+ # If we have not downloaded data for today yet...
+ if days_to_download != 0:
+ # Download new data
+ start_date = today - timedelta(days=days_to_download)
+ new_df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
+ [start_date.year, cur_year],
+ csv_folder, start_date, today)
+
+ # Concatenate with already saved data
+ old_df = data_dict[location]
+ df = pd.concat([new_df, old_df], ignore_index=True)
+ df = df.sort_values(by='time')
+ data_dict[location] = df
+
+ # Save data
+ filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
+ df.to_csv(filename, index=False)
+
+ # Check that all years have been read in
+ df['time'] = pd.to_datetime(df['time'])
+ df_years = df['time'].dt.year.unique().tolist()
+
+ if set(df_years) != set(years):
+
+ # Years in what years we want but not in saved data
+ # Does not care if there are values in saved data that are not in wanted years
+ not_saved_years = set(years) - set(df_years)
+ not_saved_years = list(not_saved_years)
+
+ # Download data and calculate for new years
+ new_df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
+ not_saved_years, csv_folder)
+
+ # Concatenate with already saved data
+ old_df = data_dict[location]
+ df = pd.concat([new_df, old_df], ignore_index=True)
+ df = df.sort_values(by='time')
+ data_dict[location] = df
+
+ # Save data
+ filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
+ df.to_csv(filename, index=False)
+
+ # The location is not in the saved dictionary
+ else:
+ # Download and calculate if it doesn't exist
+ df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
+ years, csv_folder)
+ data_dict[location] = df
+
+ # Save data
+ filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
+ df.to_csv(filename, index=False)
+
+# %% Create Event Files
+
+CCMMF_Irrigation_Events.file_creation(data_dict)
+
+# %% Write to parquet
+
+for location, loc_df in data_dict.items():
+ loc_df['location'] = location
+ table = pa.Table.from_pandas(loc_df)
+ pq.write_to_dataset(table, root_path = pq_filename, partition_cols = ['location', 'year'])
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
new file mode 100644
index 00000000000..46312c85fca
--- /dev/null
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Apr 23 14:46:51 2025
+
+@author: krein21
+"""
+# %% Import modules
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+# %% Turn raw data into usable data
+
+def water_balance(df_open_et, precip_data, LAT, LON):
+ print(f'{LAT} {LON}')
+
+ # Create dataframe
+ df_water_balance = df_open_et
+ df_water_balance['precip'] = precip_data
+
+ # Handle NAs
+ df_water_balance['et'] = df_water_balance['et'].fillna(0)
+ df_water_balance['precip'] = df_water_balance['precip'].fillna(0)
+
+ # Constants
+ WHC = 500 # units: mm
+ W_min = 0.15 * WHC
+ field_capacity = WHC/2
+
+ # Water Balance Equation
+ df_water_balance['W_t'] = field_capacity
+
+ for row_number in range(1,len(df_water_balance)):
+
+ # Pull all data
+ W_tminusone = df_water_balance['W_t'].iloc[row_number - 1]
+ precip = df_water_balance['precip'].iloc[row_number]
+ et = df_water_balance['et'].iloc[row_number]
+
+ # Calculate initial W_t
+ # W_t = W_t-1 + P_t - ET_t
+ W_t_initial = W_tminusone + precip - et
+
+ # Calculate irrigation
+ # Irr_t = max(Wmin - W_t, 0)
+ irr = max(W_min - W_t_initial, 0)
+
+ # Calculate runoff
+ # Qt = max(Wt - WHC, 0)
+ runoff = max(W_t_initial - WHC, 0)
+
+ # Calculate final W_t
+ # W_t = W_t-1 + P_t + Irr_t - ET_t - Q_t
+ W_t = W_tminusone + precip + irr - et - runoff
+
+ # Add values to dataframe
+ df_water_balance.loc[row_number, 'W_t'] = W_t
+ df_water_balance.loc[row_number, 'irr'] = irr
+ df_water_balance.loc[row_number, 'runoff'] = runoff
+
+ # Add year, day and week values
+ df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
+ df_water_balance['year'] = df_water_balance['time'].dt.year
+ df_water_balance['week'] = df_water_balance['time'].dt.isocalendar().week
+ df_water_balance['day_of_year'] = df_water_balance['time'].dt.dayofyear
+
+ return df_water_balance
+
+# %% Time Series
+
+def timeseries_graphs(df_water_balance, LAT, LON, YEAR):
+
+ # Slicing warning if not copied
+ df_water_balance = df_water_balance.copy()
+
+ # Create cumulative sum columns
+ df_water_balance['et_cumsum'] = df_water_balance['et'].cumsum()
+ df_water_balance['precip_cumsum'] = df_water_balance['precip'].cumsum()
+ df_water_balance['irr_cumsum'] = df_water_balance['irr'].cumsum()
+
+ # Ensure time is dates
+ df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
+
+ # Plot time series
+ plt.figure(figsize=(10, 5))
+ plt.plot(df_water_balance['time'], df_water_balance['et_cumsum'], linestyle = 'dotted', lw = 2.5, label = 'Evapotranspiration')
+ plt.plot(df_water_balance['time'], df_water_balance['precip_cumsum'], linestyle = 'dashed', lw = 2.5, label = 'Precipitation')
+ plt.plot(df_water_balance['time'], df_water_balance['irr_cumsum'], linestyle = 'dashdot', lw = 2.5, label = 'Irrigation')
+ plt.plot(df_water_balance['time'], df_water_balance['runoff'], linestyle = 'solid', lw = 2.5, label = 'Runoff')
+
+ plt.xlabel('Date')
+ plt.ylabel('Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (mm)')
+ plt.suptitle('Evapotransipiration and Precipitation Time Series in Central Valley CA')
+ plt.title(f'(Lat: {LAT}, Lon: {LON})')
+ plt.legend()
+ plt.grid()
+
+ # Save plot
+ filename = f'TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
+ plt.savefig(filename)
+
+ plt.show()
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
new file mode 100644
index 00000000000..742a01f479e
--- /dev/null
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Apr 23 14:42:41 2025
+
+@author: krein21
+"""
+# %% Import modules
+
+import requests
+import numpy as np
+import pandas as pd
+from netCDF4 import Dataset
+import CCMMF_Irrigation_CalcVis
+import os
+import ee
+
+ee.Initialize()
+
+
+# %% Download GEE OPEN ET Data
+
+def GEEOpenET(START_DATE, END_DATE, LAT, LON):
+ '''THIS DOES NOT RUN AT THE MOMENT'''
+
+ # Access OpenET dataset
+ collection = ee.ImageCollection("OpenET/ENSEMBLE/CONUS/GRIDMET/MONTHLY/v2_0") \
+ .filterDate(START_DATE, END_DATE) \
+ .filterBounds(ee.Geometry.Point([LON, LAT]))
+
+ # Extract et time series
+ def extract_et(img):
+ date = img.date().format()
+ et = img.reduceRegion(ee.Reducer.first(), ee.Geometry.Point([LON, LAT]), 1000).get('et_ensemble_mad')
+ return ee.Feature(None, {'date': date, 'et': et})
+
+ et_series = collection.map(extract_et)
+
+ # Convert data to df
+ et_series = et_series.getInfo() # Convert from ee.List to Python list
+ print(et_series)
+ print(type(et_series))
+ open_et_df = pd.DataFrame(et_series)
+ open_et_df['date'] = pd.to_datetime(open_et_df['date'])
+
+ print(open_et_df)
+
+ return open_et_df
+
+# %% Request OPEN ET Data (from website)
+
+def OpenETData(START_DATE, END_DATE, LAT, LON):
+
+ # Read in API Key
+ with open('OpenETAPIKey.txt', 'r') as file:
+ api_key = file.readline()
+
+ header = {"Authorization": api_key}
+
+ # endpoint arguments
+ args = {
+ "date_range": [START_DATE, END_DATE],
+ "interval": "daily",
+ "geometry": [LON,LAT],
+ "model": "Ensemble",
+ "variable": "ET",
+ "reference_et": "gridMET",
+ "units": "mm",
+ "file_format": "JSON"
+ }
+
+ # query the api
+ resp = requests.post(
+ headers=header,
+ json=args,
+ url="https://openet-api.org/raster/timeseries/point"
+ )
+
+ # Parse the JSON response
+ et_data = resp.json()
+
+ open_et_df = pd.DataFrame(et_data)
+ open_et_df['time'] = pd.to_datetime(open_et_df['time'])
+
+ return open_et_df
+
+# %% Download CHIRPS Data
+
+def CHIRPSData(YEAR, LAT, LON):
+
+ # Set URL and file name
+ url = f'https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p05/chirps-v2.0.{YEAR}.days_p05.nc'
+ destfile = f'chirps-v2.0.{YEAR}.days_p05.nc'
+
+ # Check if the file already exists before downloading
+ if not os.path.exists(destfile):
+ print(f"{destfile} not found. Downloading now...")
+ response = requests.get(url, timeout=600)
+
+ with open(destfile, 'wb') as f:
+ f.write(response.content)
+
+ # Open the NetCDF file
+ nc_data = Dataset(destfile, 'r')
+
+ # Print metadata for precipitation
+ #precip_variable = nc_data.variables['precip']
+ #print(precip_variable)
+
+ # Extract coordinate variables
+ lon = nc_data.variables['longitude'][:]
+ lat = nc_data.variables['latitude'][:]
+
+ # Find the nearest lat/lon index
+ lon_idx = np.abs(lon - LON).argmin()
+ lat_idx = np.abs(lat - LAT).argmin()
+
+ # Extract the data just for that lat lon
+ precip_data = nc_data.variables['precip'][:, lat_idx, lon_idx]
+
+ # Close the NetCDF file when done
+ nc_data.close()
+
+ # Clean data
+ precip_data = precip_data.filled(np.nan)
+ precip_data_df = pd.DataFrame(precip_data)
+
+ return precip_data_df
+
+# %% Calculate and visualize new data
+
+def new_data_entry_API(LAT, LON, years, csv_folder, START_DATE = None, END_DATE = None):
+ print(f'{LAT} {LON} {years}')
+
+ # Define start and end date
+ if START_DATE == None or END_DATE == None:
+ START_DATE = f'{years[0]}-01-01'
+ END_DATE = f'{years[-1]}-12-31'
+
+ # Download open et data
+ et_df = OpenETData(START_DATE, END_DATE, LAT, LON)
+
+ # Download CHIRPS data year by year and concatenate
+ precip_data = pd.DataFrame()
+ for year in years:
+ precip_data_year = CHIRPSData(year, LAT, LON)
+ precip_data = pd.concat([precip_data, precip_data_year], ignore_index=True)
+
+ # Organize and water balance
+ df_water_balance = CCMMF_Irrigation_CalcVis.water_balance(et_df, precip_data, LAT, LON)
+
+ # Graph
+ df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
+ for year in years:
+ CCMMF_Irrigation_CalcVis.timeseries_graphs(df_water_balance[df_water_balance['time'].dt.year == year], LAT, LON, year)
+
+ # Save to csv to ensure data is stored
+ filename = f'{csv_folder}CCMMR_Water_Balance_{LAT}_{LON}.csv'
+ df_water_balance.to_csv(filename, index=False)
+ return df_water_balance
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
new file mode 100644
index 00000000000..e11ab0c6f45
--- /dev/null
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 24 12:15:39 2025
+
+@author: krein21
+
+Columns:
+ - loc: spatial location index (starts at 0)
+ - year: year of start of this timestep
+ - day: day of start of this timestep (1 - 366)
+ - event_type: type of event
+ - amount_added (cm/day)
+ - type: (0 = canopy, 1 = soil, 2 = flood)
+
+"""
+
+# %% Import modules
+
+import pandas as pd
+
+
+# %% Create event file
+
+def file_creation(data_dict):
+
+ # Create an event file for each location
+ for key, df in data_dict.items():
+
+ # Add columns
+ df['event_type'] = 'irrig'
+ df['loc'] = 0
+ df['type'] = 1
+
+ # Calculate new units for irrigation
+ df['irr'] = df['irr'] * 0.1
+
+ # Aggregate by week
+ # Sum irrigation
+ eventfile_df = df.groupby(['year', 'week'], as_index = False).agg({
+ 'loc': 'first',
+ 'year': 'first',
+ 'day_of_year': 'first',
+ 'event_type': 'first',
+ 'irr': 'sum',
+ 'type': 'first'
+ })
+
+ # Remove week column
+ eventfile_df = eventfile_df.drop('week', axis = 1)
+
+ # Write to file(s)
+ folder_name = 'CCMMF_Irrigation_EventFiles/'
+ filename = f'{folder_name}irrigation_eventfile_{key}.txt'
+ eventfile_df.to_csv(filename, sep = ' ', index = False, header = False)
+
+
+
diff --git a/modules/data.remote/inst/Python/README.txt b/modules/data.remote/inst/Python/README.txt
new file mode 100644
index 00000000000..759d65aa280
--- /dev/null
+++ b/modules/data.remote/inst/Python/README.txt
@@ -0,0 +1,143 @@
+This document walks through the code written by Katherine Rein during the
+Spring 2025 semester for the CCMMF project. This code works on downloading and
+manipulating evapotranspiration data and precipitation data for different sites
+in California.
+
+Data Sources:
+- Evapotranspiration: OpenET
+ - https://openet.gitbook.io/docs
+- Precipitation: CHIRPS
+ - https://data.chc.ucsb.edu/products/CHIRPS-2.0/
+
+Main Storage Folder: /projectnb/dietzelab/ccmmf/management/irrigation
+
+How to use SCC:
+- When creating desktop ensure -> Extra qsub options: -l buyin
+- Once desktop loads:
+ - Open Terminal
+ - Type: module load miniconda
+ - Create or Load environment
+ - Load: conda activate ccmmf_env
+ - Create (all on one line): conda create -n ccmmf_env python jupyter
+ spyder xarray requests numpy netcdf4 matplotlib pandas pyarrow earthengine-api
+ - To open spyder: spyder &
+ - This may take a second to run. Be patient it will open eventually.
+
+Google Earth Engine Account:
+- Contact __ for a new Google Earth Enginge project
+- Go to https://code.earthengine.google.com/
+- Click on your profile picture in the top right corner
+- Select Project Info
+- Under Cloud Project you will find the Cloud Project ID (ex. ee-krein21-s25)
+ - Save this value for later
+- Find the manage cloud project link and click on it
+- Under Project Info on the left hand side select Go to Project Settings
+- Select IAM on the left hand side
+- Select Grant Access
+- Add openet@googlegroups.com as a viewer (under basic)
+- Now open up a terminal window and navigate to the irrigation folder
+- Run the following command in terminal: earthengine authenticate --auth_mode=notebook
+- Paste the link it gives you into a browser and log into your Google account
+that is linked to the Google Earth Engine project
+- Paste the token back into the terminal window
+
+OpenET Account:
+- Click log in/sign up for an account at https://etdata.org/
+- Use the same account as you used for your Google Earth Engine project
+- Once account has been created, add in the saved Cloud Project ID into the
+Cloud Project ID field at the bottom of profile settings
+
+Organization:
+- Python Files
+ - CCMMF_Irrigation_API: This file is the main file that runs the data downloading
+ and other data manipulation for using the OpenET API. It loads in the previously
+ downloaded data and decides which data it needs to download.
+ - CCMMF_Irrigation_DataDownload: This file contains the different download
+ fucntions for each data type. It also contains the fuction that downloads
+ compiles all of the functions to download new data for a new location/years.
+ - CCMMF_Irrigation_CalcVis: This file contains the functions used to clean
+ and visualize the raw data.
+ - CCMMF_Irrigation_Events: This file contains the function that turns a
+ dictionary of dataframes into txt files for each location in the dictionary.
+ It both selects columns and sets constants for other columns. It also aggregates
+ the data by week.
+- Folders
+ - WaterBalanceCSV: This is where all of the csv files for each location get
+ saved. This is a back up way to save all of the data and also makes it easier
+ to quickly view data per location. Each file is labeled with the corresponding
+ lat and long coordinate. The folder name is defined in "Define multi use
+ variables" section of CCMMF_Irrigation_API.
+ - TimeseriesPNG: This is where the timeseries graphs for each location and
+ each year are saved. There is no variable name for this folder it is simply
+ included in this string f'TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
+ in the timeseries_graphs function in CCMMF_Irrigation_CalcVis.
+ - CCMMF_Irrigation_Parquet: This folder is a directory for all of the parquet
+ files. It is written in a way that Python and R can then tile the data by
+ both location and year. This folder name is also defined in the "Define multi
+ use variables" section of CCMMF_Irrigation_API.
+ - CCMMF_Irrigation_EventFiles: This holds all of the event txt files for each
+ location. The column names are in the header of CCMMF_Irrigation_Events. The
+ naming format for the files is irrigation_eventfile_{location_id}.txt.
+- Other
+ - chirps-v2.0.{year}.days_p05.nc: These are the files that contain the downloaded
+ CHIRPS data on a daily scale for the whole world. They are downloaded from the
+ web and then read in for each location and year. They are quite large and
+ take a while to download so if your code isn't running quickly that may be
+ why (if given new years).
+ - design_points.csv: This is the inital locations dataframe that we started
+ with. To scale this program up, simply change the csv that is being read in.
+ Currently the column headers are id, lat, and lon. Keeping these the same
+ will be easiest.
+
+Workflow:
+- Data is read in from parquet file
+- Calculate how old the data is (and how much new data needs to be read in)
+ - If data is old, then delete the most recent CHIRPS file because we want
+ to read in new data
+- Read in location data (lat, lon, location_id)
+- Iterate through the location data and download new data
+- Check if the location id is in the parquet file we downloaded
+ - If yes: check that our data is currently up to date (download/organize
+ new dates if needed)
+ - Also check that the years sequence is the same from what has been
+ downloaded to what we defined as the years we want to look at (This
+ really only catches any years that are new at the front)
+ - If no: download/organize for predefined year span
+- Write the data that has been downloaded and organzied to the parquet file
+
+Functions (by files):
+- CCMMF_Irrigation_DataDownload
+ - GEEOpenET: This function downloads data from Google Earth Engine and turns
+ it into a dataframe with evapotranspiration data and the date.
+ - OpenETData: This function downloads data using the OpenET API and turns
+ it into a dataframe with evapotranspiration data and the date.
+ - CHIRPSData: This function downloads the .nc file from the CHIRPS website
+ and then reads in the values for the closest latitude longitude values. It
+ then returns the data as a dataframe.
+ - new_data_entry_API:
+- CCMMF_Irrigation_CalcVis
+ - water_balance: This function takes the raw data for each location and calculates
+ the water balance equation for each time step. It also calculates the different
+ time columns (week, year, day of year).
+ - timeseries_graphs: This takes a dataframe and saves/prints a cumulative
+ sum graph for evapotranspiration, irrigation, and precipitation. There is
+ also a runoff curve that is not a cumulative sum.
+- CCMMF_Irrigation_Events
+ - file_creation: This function takes in a dictionary of dataframes. It then
+ itterates over each location in the dictionary and selects/calculates the
+ expected columns for the txt file. It also aggregates this data by week.
+
+Next Steps:
+- Get the Google Earth Engine download working
+- Create a CCMMF_Irrigation_GEE file
+ - Does the same thing as CCMMF_Irrigation_API except uses GEEOpenET and the
+ monthly et values are then assumed to be the same for each day of the month.
+- Compare monthly and daily et values
+- Site specific water holding capacity and crop specific rooting depth
+
+
+
+
+
+
+
\ No newline at end of file
From 2144ca75272932522ea5273256701edef82fd639 Mon Sep 17 00:00:00 2001
From: ChaneyFinkeldei
Date: Fri, 2 May 2025 10:04:27 -0400
Subject: [PATCH 0090/1193] initial commit of NEON soil carbon validation code
---
.../inst/NEON_soils/soilcarbon_validation.Rmd | 663 ++++++++++++++++++
1 file changed, 663 insertions(+)
create mode 100644 modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd
diff --git a/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd b/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd
new file mode 100644
index 00000000000..4e35ab36c22
--- /dev/null
+++ b/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd
@@ -0,0 +1,663 @@
+---
+title: "Soil Carbon data validation"
+date: "2025-01-27"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+# Load libraries, WD, cran repository
+library(DBI)
+library(RSQLite)
+library(dplyr)
+library(geosphere)
+library(sp)
+library(ggplot2)
+library(rnaturalearth)
+library(sf)
+library(maps)
+library(scoringRules)
+library(ncf)
+library(terra)
+library(knitr)
+library(gstat)
+library(tidyr)
+library(neonstore)
+library(neonUtilities)
+library(swfscMisc)
+library(PEcAn.utils)
+library(maps)
+
+
+# load functions from other script
+source("extract_neon_sc_func.R")
+
+setwd("/Users/chane/OneDrive/Documents/ef lab/R files")
+options(repos = c(CRAN = "https://cran.rstudio.com/"))
+```
+
+Getting soil carbon data from SDA forecast (copying Cami's code)
+
+Naming data frame with site number
+```{r, echo = FALSE}
+
+# Load data
+SIPNET <- load("sda.all.forecast.analysis.Rdata")
+
+# site coordinates
+Site_Info <- readRDS("site.locs.rds")
+
+# Rename lists to years
+names(analysis.all) <- 2012:2021
+year <- 2012:2021
+
+variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
+
+# Loop through each year list in analysis.all
+for (year in names(analysis.all)) {
+
+ # Get the current data frame for the year
+ year_df <- analysis.all[[year]]
+
+ # Initialize a vector for new column names
+ new_colnames <- character(ncol(year_df))
+
+ # Loop over each site and assign variable names
+ for (site in 1:6400) {
+ # Calculate the starting column index for each site's variables
+ start_col <- (site - 1) * 4 + 1
+
+ # Assign the names for the 4 variable columns for this site
+ new_colnames[start_col:(start_col + 3)] <- paste0(variables, "_Site", site)
+ }
+
+ # Apply the new column names to the data frame
+ colnames(year_df) <- new_colnames
+ analysis.all[[year]] <- year_df # Update the list with modified data frame
+}
+
+```
+
+
+Assign the data numbers their coordinates and find which one is closest to each NEON site
+```{r, echo = FALSE}
+## Add column with corresponding site number for each plot in Neonplots
+
+Neonplots <- read.csv("Neon_sites_terrestrial.csv")
+
+ # Add a column for site numbers in Site_Info (site_info has coordinates of each location in df)
+Site_Info$Site_Number <- seq(1, nrow(Site_Info))
+
+ # Extract latitude and longitude from Neonplots and Site_Info tables
+neon_coords <- Neonplots[ ,2:3] %>%
+ select(Latitude = field_latitude, Longitude = field_longitude)
+Site_Info_coords <- Site_Info %>%
+ select(Site_Number, lat, lon)
+
+ # Put Site_Info latitude/longitude in a matrix
+Site_Coordinates <- as.matrix(Site_Info[, c("lat", "lon")])
+
+ # Same for Neonplots
+Neonplots_Coordinates <- as.matrix(Neonplots[, 2:3])
+
+ # Create a new column in Neonplots to store the closest Site_Number
+Neonplots$Closest_Site <- NA
+
+# Loop through each row in Neonplots and calculate each plot's closest site number
+for (i in 1:nrow(Neonplots)) {
+ # Extract the coordinates for the current row in Neonplots
+ plot_coords <- Neonplots_Coordinates[i, ]
+
+ # Compute distances from this plot to all site coordinates (longlat means its in km)
+ distances <- spDistsN1(Site_Coordinates, plot_coords, longlat = TRUE)
+
+ # Find the index of the minimum distance for closest site
+ closest_site_index <- which.min(distances)
+
+ # Assign the corresponding Site_Number from Site_Info to the current row in Neonplots
+ Neonplots$Closest_Site[i] <- Site_Info$Site_Number[closest_site_index]}
+
+```
+A lot of the same domain NEON sights have the same cloests site index - is this right?
+
+
+Filtering analysis data with relevant sites (Cami's also does years - should I do that?)
+```{r, echo=FALSE}
+## Filter analysis.all to only include relevant sites ##
+# Get the list of sites to keep from Neonplots
+sites_to_keep <- Neonplots$Closest_Site
+
+# Loop through each data frame in the analysis.all list
+analysis.fia = list()
+for (year in names(analysis.all)) {
+ # Get the current data frame
+ df <- analysis.all[[year]]
+
+ # Create a pattern to match columns that include sites in sites_to_keep
+ pattern <- paste0("_Site(", paste(sites_to_keep, collapse = "|"), ")$")
+
+ # Keep only columns that match the pattern
+ df_filtered <- df[, grep(pattern, colnames(df))]
+
+ # Replace the original data frame in the list with the filtered one
+ analysis.fia[[year]] <- df_filtered}
+
+```
+two have same closest site, GUAN and LAJA - they should each be different right??
+
+Now extract the sites from Neonplots
+```{r, echo = FALSE}
+## Extract the unique site-year combinations from Neonplots ##
+valid_sites <- unique(Neonplots[, "Closest_Site"])
+
+# Loop over each year list in analysis.all
+for (year in names(analysis.fia)) {
+
+ # Extract the current data frame for the year
+ year_df <- analysis.fia[[year]]
+
+ # Identify the year as a numeric value
+ current_year <- as.numeric(substr(year, 1, 4))
+
+ # Identify columns with site numbers that match valid_sites
+ # Extract the site number from column names using regex
+ matching_columns <- grep(paste0("_Site(", paste(valid_sites, collapse = "|"), ")$"),
+ colnames(year_df), value = TRUE)
+
+ # Subset the data frame to only include matching columns
+ analysis.fia[[year]] <- year_df[, matching_columns, drop = FALSE]
+}
+```
+
+Extract soil carbon data - mean and standard deviation of each year
+```{r, echo = FALSE}
+# Initialize a list to store summary tables for each year
+summary_list <- list()
+
+# Loop through each year's data frame in analysis.all
+names(analysis.fia) = names(analysis.all)
+for (year in names(analysis.all)) {
+ # Extract the year from the full date format
+ year_only <- sub("^(\\d{4}).*", "\\1", year)
+
+ # Get the current data frame
+ df <- analysis.fia[[year]]
+
+ # Create a pattern to match columns that include sites
+ site_numbers <- unique(gsub(".*_Site(\\d+)", "\\1", colnames(df)))
+
+ # Initialize a data frame to hold summary statistics for this year
+ summary_table <- data.frame(Site_Number = numeric(0),
+ Year = character(0),
+ SC_Mean_Mod = numeric(0),
+ SC_SD_Mod = numeric(0),
+ stringsAsFactors = FALSE)
+
+ # Loop through each site number
+ for (site in site_numbers) {
+ # Create a vector for soil carbon data
+ tsc_col <- paste0("TotSoilCarb_Site", site)
+ # Check if columns exist in the data frame
+ if (tsc_col %in% colnames(df)) {
+ soilC = df[[tsc_col]]
+
+ # Calculate mean and SD for each variable
+ mean_soil = mean(soilC, na.rm = TRUE) # Average of soil C
+ sd_soil = sd(soilC, na.rm = TRUE) # Standard deviation of soil C
+
+ # Add to summary table
+ summary_table <- rbind(summary_table,
+ data.frame(Site_Number = as.numeric(site),
+ Year = year_only,
+ SC_Mean_Mod = mean_soil,
+ SC_SD_Mod = sd_soil))
+ }
+ }
+ # Store the summary table for this year in the list
+ summary_list[[year]] <- summary_table
+}
+
+# Combine all the summary tables into one data frame
+final_summary_table <- do.call(rbind, summary_list)
+```
+
+Graph the soil carbon year means in a histogram
+```{r}
+hist(final_summary_table[["SC_Mean_Mod"]], main = "Histogram of Soil Carbon Means across NEON Sites and Year", col = "lightblue", breaks = 10, border = "black", freq = FALSE)
+```
+
+
+Loading soil carbon data from NEON using code from github (extract_NEON_veg_R)
+```{r, echo = FALSE}
+
+# load copy of Alexi's code
+source("extract_neon_sc_func.R")
+
+# I want all of the data over time
+start_date = NA
+end_date = NA
+store_dir <- "/Users/chane/OneDrive/Documents/ef lab/R files" # storing to folder I am in
+
+# create summary table for loading data
+summary_table_NEON_data <- data.frame(sitename = character(240),
+ year = character(240),
+ mean_organicC = numeric(240),
+ sd_organicC = numeric(240),
+ mean_bulkD = numeric(240),
+ sd_bulkD = numeric(240),
+ mean_frac30 = numeric(240),
+ sd_frac30 = numeric(240),
+ mean_soilC= numeric(240),
+ sd_soilC = numeric(240))
+
+# create temp data
+temp_table <- data.frame(sitename = character(240),
+ year = character(240),
+ mean_organicC = numeric(240),
+ sd_organicC = numeric(240),
+ mean_bulkD = numeric(240),
+ sd_bulkD = numeric(240),
+ mean_frac30 = numeric(240),
+ sd_frac30 = numeric(240),
+ mean_soilC= numeric(240),
+ sd_soilC = numeric(240))
+
+# assign first data to the table
+sitename <- Neonplots["field_site_id"][1,1]
+summary_table_NEON_data <-extract_NEON_veg(sitename, start_date, end_date, store_dir, neonsites = NULL)
+
+# loop through and mean and sd of each data point from each year at all of the neonplot sites
+for (i in 2:47){
+ sitename <- Neonplots["field_site_id"][i,1]
+ temp_table <- extract_NEON_veg(sitename, start_date, end_date, store_dir, neonsites = NULL)
+ summary_table_NEON_data <- rbind(summary_table_NEON_data, temp_table)
+}
+
+temp_summary_NEON <- summary_table_NEON_data
+
+```
+
+Add forecast data to summary table
+```{r}
+# create summary_table as copy of neon table
+summary_table <- temp_summary_NEON
+
+#removed NAs
+summary_table <- drop_na(summary_table)
+
+# add row for sitename
+summary_table$Site_Number <- numeric(length(summary_table$sitename))
+
+
+# loop through summary table and Neon plots site and when equal and add closest site
+for (i in 1:length(summary_table$year)){
+ for (j in 1:length(Neonplots$field_site_id)){
+ if (Neonplots$field_site_id[j] == summary_table$sitename[i]){
+ summary_table$Site_Number[i] <- Neonplots$Closest_Site[j]
+ }
+ }
+}
+
+# sort by site number
+summary_table <- summary_table[order(summary_table$Site_Number),]
+
+# sort based on year (like final_summary_table)
+summary_table <- summary_table[order(summary_table$year),]
+
+# add rows for forecast to the summary_table_NEON_data
+summary_table$SC_mean_forecast = numeric(nrow(summary_table));
+summary_table$SC_sd_forecast = numeric(nrow(summary_table))
+
+c<- 1 # counter
+
+# add forecast data, leave zeros where no forecast data
+for (i in 1:length(final_summary_table$Year)){
+ for (j in 1:nrow(summary_table)){
+ if (summary_table$year[j] < 2022){
+ if (final_summary_table$Year[i] == summary_table$year[j] & final_summary_table$Site_Number[i] == summary_table$Site_Number[j]){
+ summary_table$SC_mean_forecast[c] = final_summary_table$SC_Mean_Mod[i];
+ summary_table$SC_sd_forecast[c] = final_summary_table$SC_SD_Mod[i];
+ c <- c +1
+ }
+ }
+ }
+}
+
+```
+
+Analysis!
+```{r, echo = FALSE}
+# Bivariate of Soil carbon averages from forecast and NEON data
+# make table through 2021
+# summary_table$SC_mean_forecast = summary_table$SC_mean_forecast* 10 is it wrong units??
+summary_table_2021 = summary_table[1:77, 1:12]
+ggplot(summary_table_2021, aes(x=SC_mean_forecast, y=mean_soilC)) + geom_point(size = 2)
+
+# all 3 really high ones are from GUAN (forest in Puerto Rico)
+```
+
+Map of RMS error
+```{r}
+# find the error
+summary_table$error_soilC = numeric(nrow(summary_table))
+
+for (i in 1:nrow(summary_table)){
+ # forecast - actual
+ summary_table$error_soilC[i] = (summary_table$SC_mean_forecast[i] - summary_table$mean_soilC[i])
+}
+
+# find rms error for each site
+
+# make dataframe to store name and rms error of each
+error_df <- data.frame(sitename = character(46),
+ rms_error = numeric(46))
+
+error_df$sitename <- unique(summary_table$sitename) # make list of names
+
+# create temp vector for before averages
+vec <- numeric(1)
+c <- 1 # counter
+
+for (i in 1:nrow(error_df)){
+ for (j in 1:nrow(summary_table)){
+ if(error_df$sitename[i] == summary_table$sitename[j]){
+ vec[c] = (summary_table$error_soilC[j])^2
+ c <- c + 1
+ }
+ }
+ error_df$rms_error[i] = sqrt(sum(vec)/(c-1))
+ c <- 1
+ rm(vec)
+ vec <- numeric(1)
+}
+
+# sort alphabetically
+error_df <- error_df[order(error_df$sitename),]
+
+# add lat and long columns
+error_df$site_lat <- numeric(nrow(error_df))
+error_df$site_long <- numeric(nrow(error_df))
+
+# add lat and long
+for (i in 1:nrow(error_df)){
+ for (j in 1:nrow(Neonplots)){
+ if (error_df$sitename[i] == Neonplots$field_site_id[j]){
+ error_df$site_lat[i] <- Neonplots$field_latitude[j]
+ error_df$site_long[i] <- Neonplots$field_longitude[j]
+ }
+ }
+}
+
+# get map of usa
+USA = map_data("world") %>% filter(region == "USA")
+
+# make bubble map with size and color depening on RMS error
+ggplot()+
+ geom_polygon(data = USA, aes(x= long, y = lat, group = group), fill="forestgreen", alpha = 0.3, main = "RMS error of each site") + geom_point(data = error_df, aes(x= site_long, y = site_lat, size = rms_error, color = rms_error)) + geom_point(alpha = 0.7) + xlim(-170, -50) + scale_size_continuous(range = c(1,10)) + theme_void() + labs(title = "RMS error of forecast for NEON sites") + theme(plot.title = element_text(hjust = 0.5),)
+
+# find max errors
+error_df <- error_df[order(error_df$rms_error),]
+
+```
+Most error
+1. GUAN -- Guanica Forest -- D04 (PR) Tropical Forest
+2. SRER -- Santa Rita -- D14 (AZ) Desert
+3. UNDE -- Notre Dame -- D05 (MI)
+4. MLBS -- Mountain Lake -- D07 (VA)
+5. TEAK -- Lower Teakettle -- D17 (CA)
+6. BARR -- Utqiagvik -- D18 (AK)
+7. BART -- Bartlett -- D01 (NH) Forest
+8. PUUM -- Puu Makaala -- D20 (HI)
+9. DEJU -- Delta Juntion -- D19 (AK) Tundra
+10.WREF -- Wind River forest -- D16 (WA) Forest
+
+find rate of change of each
+```{r, echo = FALSE}
+
+# find rate of change of NEON data
+# sort by location
+summary_table <- summary_table[order(summary_table$sitename),]
+
+error_df <- error_df[order(error_df$sitename),]
+
+# create places for rate of change
+error_df$roc_soilC <- numeric(46)
+error_df$roc_forecast <- numeric(46)
+error_df$roc_percent_error <- numeric(46)
+error_df$roc_error <- numeric(46)
+
+c <- 1
+for (i in 1:nrow(summary_table)){
+ if (i == 1){
+ if( summary_table$sitename[1] != summary_table$sitename[2]){
+ error_df$roc_soilC[1] <- NaN
+ error_df$roc_forecast[1] <- NaN
+ error_df$roc_percent_error[1] <- NaN
+ error_df$roc_error <- NaN
+ }
+ }
+ if (i>1){
+ if (summary_table$sitename[i] != summary_table$sitename[i - 1]){
+ temp_name = summary_table$sitename[i]
+ j <- i
+ while (summary_table$sitename[j] == temp_name){
+ j <- j+1
+ if (j == 108){
+ break
+ }
+ }
+ c <- c+1
+ error_df$roc_soilC[c] <- ((summary_table$mean_soilC[j-1] - summary_table$mean_soilC[i])/(as.numeric(summary_table$year[j-1]) - as.numeric(summary_table$year[i])))
+ error_df$roc_forecast[c] <- ((summary_table$SC_mean_forecast[j-1] - summary_table$SC_mean_forecast[i])/(as.numeric(summary_table$year[j-1]) - as.numeric(summary_table$year[i])))
+ error_df$roc_error[c] <- (error_df$roc_soilC[c] - error_df$roc_forecast[c])
+ error_df$roc_percent_error[c] <- abs((error_df$roc_soilC[c] - error_df$roc_forecast[c])/error_df$roc_forecast[c])*100
+ }
+ }
+}
+
+# make a map of rate of change error
+
+# make bubble map with size and color depening on RMS error
+ggplot()+
+ geom_polygon(data = USA, aes(x= long, y = lat, group = group), fill="forestgreen", alpha = 0.3) + geom_point(data = error_df, aes(x= site_long, y = site_lat, size = roc_error, color = roc_error)) + geom_point(alpha = 0.7) + xlim(-170, -50) + scale_size_continuous(range = c(1,10)) + theme_void() + labs(title = "Rate of Change Residual") + theme(plot.title = element_text(hjust = 0.5),)
+
+ggplot()+
+ geom_polygon(data = USA, aes(x= long, y = lat, group = group), fill="forestgreen", alpha = 0.3) + geom_point(data = error_df, aes(x= site_long, y = site_lat, size = roc_percent_error, color = roc_percent_error)) + geom_point(alpha = 0.7) + xlim(-170, -50) + scale_size_continuous(range = c(1,10)) + theme_void() + labs(title="Rate of Change Percent Error") + theme(plot.title = element_text(hjust = 0.5),)
+
+error_df <- error_df[order(error_df$roc_percent_error),]
+
+error_df_roc <- drop_na(error_df)
+```
+Rate of change error over 1000
+1. ONAQ -- Onaqui -- D15 Utah
+2. SRER -- Santa Rita -- D14 (AZ) Desert
+3. TREE -- Treehaven -- D05 Wisconsin
+4. UNDE -- Notre Dame -- D05 (MI)
+
+almost every forecast rate of change in negative and if not negative very small
+neon data has many positive rates of change
+
+```{r, echo = FALSE}
+# Residual map of ROC (copied from cami's)
+
+# define colors based on sign
+error_df$Color <- with(error_df,
+ ifelse(roc_error > 0, "lightsalmon",
+ ifelse(roc_error < 0, "orangered2", "grey")))
+
+
+USA = map_data("world") %>% filter(region == "USA")
+us_map <- st_as_sf(USA,
+ coords = c("long", "lat"),
+ crs = 4326) # CRS 4326 is WGS84 (latitude/longitude)
+
+
+# Make the plot
+ggplot(data = us_map) +
+ geom_sf(fill = "lightgrey") +
+ geom_point(data = error_df,
+ aes(x = site_long, y = site_lat, color = Color, size = abs(roc_error)),
+ alpha = 0.7) +
+ scale_color_manual(
+ name = "Residuals",
+ values = c(
+ "lightsalmon" = "lightsalmon",
+ "orangered2" = "orangered2"
+ ),
+ labels = c(
+ "lightsalmon" = "Rate of change (+)",
+ "orangered2" = "Rate of change (-)"
+ )
+ ) +
+ scale_size(range = c(1, 10), name = "Residual Size") +
+ coord_sf(xlim = c(-160, -60), ylim = c(25, 70)) +
+ labs(title = "Soil Carbon rate of change1
+ Residuals", x = "Longitude", y = "Latitude") +
+ theme_minimal() +
+ theme(
+ plot.title = element_text(hjust = 0.5),
+ )
+
+
+```
+
+```{r, echo = FALSE}
+# spatial variograms (from cami's code)
+
+# coordinates(error_df) <- ~ site_long + site_lat
+proj4string(error_df) <- CRS("+proj=longlat +datum=WGS84")
+
+variogram_model_rmse <- variogram(rms_error ~ 1, data = error_df)
+
+ggplot(data = variogram_model_rmse, aes(x = dist, y = gamma)) +
+ geom_point() +
+ geom_line() +
+ labs(title = "Variogram of RMS Error",
+ x = "Distance",
+ y = "Semivariance (γ)") +
+ theme_minimal() +
+ theme(
+ plot.title = element_text(hjust = 0.5, margin = margin(b = 20)),
+ axis.title.x = element_text(margin = margin(t = 10)),
+ axis.title.y = element_text(margin = margin(r = 10)))
+
+
+```
+
+```{r, echo = FALSE}
+# coordinates(error_df_roc) <- ~ site_long + site_lat
+variogram_model_roc <- variogram(roc_error ~ 1, data = error_df_roc)
+
+ggplot(data = variogram_model_roc, aes(x = dist, y = gamma)) +
+ geom_point() +
+ geom_line() +
+ labs(title = "Variogram of Rate of Change residual",
+ x = "Distance",
+ y = "Semivariance (γ)") +
+ theme_minimal() +
+ theme(
+ plot.title = element_text(hjust = 0.5, margin = margin(b = 20)),
+ axis.title.x = element_text(margin = margin(t = 10)),
+ axis.title.y = element_text(margin = margin(r = 10)))
+
+ggplot(data = us_map) +
+ ggplot
+```
+Compare error to 13 other factors (from Dongchen's folder on github)
+```{r, echo = FALSE}
+
+# get data on US 13 factors
+dat = terra::rast("all_data_layers.tif")
+
+# add columns to error_df
+error_df$ysd <- numeric(46)
+error_df$abg<- numeric(46)
+error_df$fia <- numeric(46)
+error_df$gedi<- numeric(46)
+error_df$twi<- numeric(46)
+error_df$tavg <- numeric(46)
+error_df$srad <- numeric(46)
+error_df$prec <- numeric(46)
+error_df$vapr <- numeric(46)
+error_df$ph <- numeric(46)
+error_df$n <- numeric(46)
+error_df$soc <- numeric(46)
+error_df$sand <- numeric(46)
+
+pt = numeric(1)
+
+# extract data for each from each Neon site coordinate
+for (i in 1:nrow(error_df)){
+ pt = terra::extract(dat,data.frame(lon=error_df$site_long[i], lat = error_df$site_lat[i]))
+ error_df$ysd[i] = pt$year_since_disturb
+ error_df$abg[i] <- pt$agb
+ error_df$fia[i] <- pt$fia
+ error_df$gedi[i] <- pt$gedi
+ error_df$twi[i] <- pt$twi
+ error_df$tavg[i] <- pt$tavg
+ error_df$srad[i] <- pt$srad
+ error_df$prec[i] <- pt$prec
+ error_df$vapr[i] <- pt$vapr
+ error_df$ph[i] <- pt$PH
+ error_df$n[i] <- pt$N
+ error_df$soc[i] <- pt$SOC
+ error_df$sand[i] <- pt$Sand
+}
+
+# use lm to fit a linear model to the data
+fit_ysd = lm(error_df$ysd ~ error_df$rms_error)
+fit_abg = lm(error_df$abg ~ error_df$rms_error)
+fit_fia = lm(error_df$fia ~ error_df$rms_error)
+fit_gedi = lm(error_df$gedi ~ error_df$rms_error)
+fit_twi = lm(error_df$twi ~ error_df$rms_error)
+fit_tavg = lm(error_df$tavg ~ error_df$rms_error)
+fit_srad = lm(error_df$srad ~ error_df$rms_error)
+fit_prec = lm(error_df$prec~ error_df$rms_error)
+fit_vapr = lm(error_df$vapr ~ error_df$rms_error)
+fit_ph = lm(error_df$ph ~ error_df$rms_error)
+fit_n = lm(error_df$n ~ error_df$rms_error)
+fit_soc = lm(error_df$soc ~ error_df$rms_error)
+fit_sand = lm(error_df$sand ~ error_df$rms_error)
+
+
+# make data frame
+summary_factors <- data.frame(factor_name= character(13),
+ r_squared = numeric(13),
+ slope = numeric (13),
+ p_value = numeric(13))
+
+for (i in 23:35){
+ # fit a line
+ error_df$temp = error_df[[i]]
+ fit = lm(error_df$temp ~ error_df$rms_error)
+
+ # graph
+
+ print(ggplot(error_df, aes(x = temp, y = rms_error)) + geom_point() + geom_smooth(method = "lm", se = FALSE) + xlab(colnames(error_df[(i-13)])))
+
+ # add r squared, slope, p value
+ summary_factors$factor_name[(i-22)] <- colnames(error_df[(i-13)])
+ summary_factors$slope[(i-22)] <- coef(fit)[2]
+ f <- summary(fit)$fstatistic
+ summary_factors$p_value[(i-22)] <- pf(f[1], f[2], f[3], lower.tail=F)
+ summary_factors$r_squared[(i-22)] <- summary(fit)$adj.r.squared
+}
+```
+
+
+
+```{r}
+
+
+```
+
+
+
+
+
+make table of (slope, p value, r^2)
+
+
+
+
+
+
From 98fe5ca2312ea185b493d0c1ae4f1b470b7422e7 Mon Sep 17 00:00:00 2001
From: ChaneyFinkeldeiE
Date: Fri, 2 May 2025 10:31:37 -0400
Subject: [PATCH 0091/1193] Adding soil respiration code
---
.../inst/NEON_soils/soil_resp_markdown.Rmd | 643 ++++++++++++++++++
1 file changed, 643 insertions(+)
create mode 100644 modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd
diff --git a/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd b/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd
new file mode 100644
index 00000000000..2ce0ab36406
--- /dev/null
+++ b/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd
@@ -0,0 +1,643 @@
+---
+title: "Soil Resp"
+output: html_document
+date: "2025-04-22"
+---
+
+Libraries
+```{r setup, include=FALSE}
+library(DBI)
+library(RSQLite)
+library(dplyr)
+library(geosphere)
+library(sp)
+library(ggplot2)
+library(sf)
+library(maps)
+library(terra)
+library(knitr)
+library(tidyr)
+library(neonUtilities)
+library(swfscMisc)
+library(maps)
+library(tidyverse)
+library(neonSoilFlux)
+library(parallel)
+library(ncdf4)
+library(data.table)
+library(purrr)
+library(data.table)
+library(parallel)
+library(lubridate)
+library(terra)
+library(httr)
+library(sp)
+library(neonSoilFlux)
+library(stringr)
+library(scales)
+```
+
+Get NEON soil respiraiton data
+
+```{r}
+# get the coordinates of 6400 sites in Dongchen's newest output
+Neonplots <- read.csv("/usr4/ugrad/chaney/R/3_25/Neon_sites_terrestrial.csv")
+
+# Add a column for site numbers in Site_Info (site_info has coordinates of each location in df)
+Site_Info <- readRDS("/usr4/ugrad/chaney/R/3_25/site.locs.rds")
+
+Site_Info$Site_Number <- seq(1, nrow(Site_Info))
+
+# Extract latitude and longitude from Neonplots and Site_Info tables
+neon_coords <- Neonplots[ ,2:3] %>%
+ select(Latitude = field_latitude, Longitude = field_longitude)
+Site_Info_coords <- Site_Info %>%
+ select(Site_Number, lat, lon)
+
+# Put Site_Info latitude/longitude in a matrix
+Site_Coordinates <- as.matrix(Site_Info[, c("lat", "lon")])
+
+# Same for Neonplots
+Neonplots_Coordinates <- as.matrix(Neonplots[, 2:3])
+
+# make list to store matched list
+matched <- data.frame(id = numeric(nrow(Neonplots)), # id in forecast
+ site_id = numeric(nrow(Neonplots))) # NEON id
+
+# Loop through each row in Neonplots and calculate each plot's closest site number
+for (i in 1:nrow(Neonplots)) {
+ # Extract the coordinates for the current row in Neonplots
+ plot_coords <- Neonplots_Coordinates[i, ]
+
+ # Compute distances from this plot to all site coordinates (longlat means its in km)
+ distances <- spDistsN1(Site_Coordinates, plot_coords, longlat = TRUE)
+
+ # Find the index of the minimum distance for closest site
+ closest_site_index <- which.min(distances)
+
+ # Assign the corresponding Site_Number from Site_Info to the current row in Neonplots
+ matched$id[i] <- Site_Info$Site_Number[closest_site_index]
+ matched$site_id[i] <- Neonplots$field_site_id[i]
+}
+
+### Get the model output (0715, 16 outputs) (Yang's code)
+years <- 2000:2025
+Nensem <- 1:25 # 50 if change folder?
+
+# Initialize the output list
+sr_list <- list()
+
+# Process Ensemble Function
+process_ensemble <- function(ens) {
+ ens_id <- sprintf("%05d", ens)
+ sr_ens <- list()
+
+ # go through all the sites
+ for (i in seq_len(nrow(matched))) {
+ site_id <- matched$site_id[i]
+ ID <- matched$id[i]
+
+ # go through the years
+ for (year in years) {
+ file_path <- paste0(
+ "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2025_4_19/out/ENS-",
+ ens_id, "-", ID, "/", year, ".nc"
+ )
+
+ # check if the file exists
+
+ # open NetCDF files
+ nc_data <- tryCatch({
+ nc_open(file_path)
+ }, error = function(e) {
+ return(NULL)
+ })
+
+ # file open
+ if (!is.null(nc_data)) {
+ # time dimension ??
+ if ("time" %in% names(nc_data$dim)) {
+ time_values <- ncvar_get(nc_data, "time")
+ time_units <- ncatt_get(nc_data, "time", "units")$value
+
+ # Analyze the model start date
+ start_datetime <- as.POSIXct(
+ sub("days since ", "", time_units),
+ format = "%Y-%m-%d %H:%M:%S", tz = "UTC"
+ )
+
+ # Calculate the time series
+ time_datetimes <- as.Date(start_datetime) + (time_values - 1)
+
+ # get Soil Respiration data
+ sr <- tryCatch({
+ ncvar_get(nc_data, "SoilResp")
+ }, error = function(e) {
+ return(NULL)
+ })
+
+
+ if (!is.null(sr)) {
+ # replace -999 to NA
+ sr[sr == -999] <- NA
+
+ # create dataframe
+ sr_df <- data.table(
+ Time = time_datetimes,
+ SR = sr,
+ Ensemble = ens,
+ Site_ID = site_id
+ )
+
+ # save the results
+ sr_ens[[length(sr_ens) + 1]] <- sr_df
+ }
+ }
+
+ # Close NetCDF File
+ nc_close(nc_data)
+ }
+ }
+ }
+
+ # Combine the results
+ sr_result <- if (length(sr_ens) > 0) rbindlist(sr_ens, use.names = TRUE, fill = TRUE) else data.table()
+
+ return(list(sr = sr_result))
+}
+
+
+# Apply parallel calculation
+num_cores <- detectCores() - 1
+results <- mclapply(Nensem, process_ensemble, mc.cores = num_cores)
+
+# Combine results from all ensmebles
+sr_output <- rbindlist(lapply(results, `[[`, "sr"))
+
+# Calculate ensemble mean
+sr_mean <- sr_output[, .(SR = mean(SR, na.rm = TRUE)), by = .(Time, Site_ID)]
+
+# Merge SR mean value with a time resolution of 3h (Not 1 day)
+daily_mean_data <- as.data.table(sr_mean)
+
+# Format Time to character for processing
+daily_mean_data[, Time := as.character(Time)]
+
+# Calculate daily SR(time resolution is 1 day)
+final_daily_sr_data <- daily_mean_data[, .(
+ Final_SR = mean(SR, na.rm = TRUE)), by = .(Time, Site_ID)]
+
+```
+
+Get John's NEON soil respiration data
+
+```{r}
+# function to get soil fluxes for specifed years for 1 site
+soil_flux <- function(site_number, flux_type){
+ summary_table <- data.frame(Time = character(5114),
+ Site_id = character(5114),
+ sr_neon = numeric(5114))
+
+ day_count <- 1
+ temp <- 0
+ na_count <- 0
+ tot <- 0
+
+ years = 2012:2025
+ site = matched$site_id[site_number]
+
+ for (year in years){
+ for (i in 01:12){
+ if (i < 10){
+ file_name <- paste0("/projectnb/dietzelab/jzobitz/02-NEON-sites/flux-results/out-flux-", site, "-",year,"-0",i,".Rda")
+ }else{
+ file_name <- paste0("/projectnb/dietzelab/jzobitz/02-NEON-sites/flux-results/out-flux-", site, "-",year,"-",i,".Rda")
+ }
+ if (file.exists(file_name)== TRUE){
+ load(file_name)
+
+ # sort by date so I can average over all horizontal positions
+ out_fluxes <- out_fluxes[order(out_fluxes$startDateTime),]
+ for (j in 1:(nrow(out_fluxes)/240)){
+ if (j == 1){
+ start <- 1
+ end <- 240
+ } else{
+ start <- (j-1)*240
+ end <- j*240
+ }
+ for (k in start:end){
+ temp <- out_fluxes[[3]][[k]]$flux[flux_type]
+ if (is.na(temp)){
+ na_count <- na_count + 1}
+ else{
+ tot <- tot + temp
+ }
+ }
+ summary_table$sr_neon[day_count] <- tot/(240 - na_count)
+ summary_table$Site_id[day_count] <- site
+ if (start == 1){
+ summary_table$Time[day_count] <- str_sub(out_fluxes$startDateTime[start], end=-1)
+ } else{
+ summary_table$Time[day_count] <- str_sub(out_fluxes$startDateTime[k], end=-10)
+ }
+ day_count <- day_count + 1
+ na_count <- 0
+ tot <- 0
+ }
+ }
+ }
+ }
+ # get rid of empty row
+return(list(sr = summary_table))
+}
+r <- soil_flux(1,4)
+
+# Apply parallel calculation for each site
+n = 1:nrow(matched)
+num_cores <- detectCores() - 1
+
+# using marshall method at top of soil (000)
+results_j <- mclapply(n, soil_flux, mc.cores = num_cores, flux_type = 4)
+# Combine results from all ensmebles
+sr_marshall <- rbindlist(lapply(results_j, `[[`, "sr"))
+# delete rows with all zeros
+sr_marshall_filtered <- filter(sr_marshall, sr_neon != 0)
+
+# using Milington- Quirk method at top of soil (000)
+results_j_2 <- mclapply(n, soil_flux, mc.cores = num_cores, flux_type = 8)
+# Combine results from all ensembles
+sr_m_q <- rbindlist(lapply(results_j_2, `[[`, "sr"))
+# delete rows where the soil respiration is 0
+sr_m_q_filtered <-filter(sr_m_q, sr_neon != 0)
+
+
+# combine the two methods
+daily_mean_data_neon <- merge(sr_marshall_filtered, sr_m_q_filtered, by = c("Time", "Site_id"))
+
+# rename rows
+names(daily_mean_data_neon)[names(daily_mean_data_neon) == "sr_neon.x"] <- "Marshall_SR"
+names(daily_mean_data_neon)[names(daily_mean_data_neon) == "sr_neon.y"] <- "Mill_Quirk_SR"
+names(daily_mean_data_neon)[names(daily_mean_data_neon) == "Site_id"] <- "Site_ID"
+
+
+# combine data frame into final_daily_sr_data
+daily_sr <- merge(final_daily_sr_data, daily_mean_data_neon, by = c("Time","Site_ID"))
+names(daily_sr)[names(daily_sr) == "Final_SR"] <- "Forecast_SR"
+
+# convert Time to date class
+daily_sr$Time <- as.Date(daily_sr$Time)
+
+# convert johns to kg/mol ^2 *s (currently in umol/m^2 * s)
+daily_sr$Marshall_SR = daily_sr$Marshall_SR*10^-6*12.011*10^-3
+daily_sr$Mill_Quirk_SR = daily_sr$Mill_Quirk_SR*10^-6*12.011*10^-3
+
+```
+
+Find error
+```{r}
+# calculate error
+daily_sr$Marsh_error <- daily_sr$Forecast_SR - daily_sr$Marshall_SR
+daily_sr$MQ_error <- daily_sr$Forecast_SR - daily_sr$Mill_Quirk_SR
+
+# keep a permanent daily_sr
+daily_sr_perm <- daily_sr
+
+# make table for summary statistics
+summary_stat <- data.frame(Site_ID = character(length(unique(daily_sr$Site_ID))),
+ Marsh_corr_coef = numeric(length(unique(daily_sr$Site_ID))),
+ MQ_corr_coef = numeric(length(unique(daily_sr$Site_ID))),
+ Marsh_rms_error = numeric(length(unique(daily_sr$Site_ID))),
+ MQ_rms_error = numeric(length(unique(daily_sr$Site_ID))))
+
+
+# do summary stats
+sites <- unique(daily_sr$Site_ID)
+
+for (i in 1:length(sites)){
+ temp_data <- filter(daily_sr, Site_ID == sites[i])
+ # remove na values
+ temp_data <- na.omit(temp_data)
+
+ # get rid of first row (so i can remove infinite)
+ temp_data <- subset(temp_data, select = -c(Site_ID, Time))
+
+ # remove inf values
+ temp_data <- temp_data[apply(temp_data, 1, function(row) all(is.finite(row))), ]
+
+ # add site name
+ summary_stat$Site_ID[i] <- sites[i]
+
+ # find correlation coeffs
+ summary_stat$Marsh_corr_coef[i] <- cor(temp_data$Marshall_SR, temp_data$Forecast_SR)
+ summary_stat$MQ_corr_coef[i] <- cor(temp_data$Mill_Quirk_SR, temp_data$Forecast_SR)
+
+ # find RMS errors
+ summary_stat$Marsh_rms_error[i] <- sqrt(mean((temp_data$Marshall_SR - temp_data$Forecast_SR)^2))
+ summary_stat$MQ_rms_error[i] <- sqrt(mean((temp_data$Mill_Quirk_SR - temp_data$Forecast_SR)^2))
+}
+
+```
+
+Graph them
+```{r}
+# make predicted- observed plots
+for (i in 1:length(sites)){
+ temp_data <- filter(daily_sr, Site_ID == sites[i])
+
+
+print(ggplot(temp_data) +
+ geom_line(aes(x = Time, y = Marshall_SR, group = 1), linewidth = 1, color = "orange") +
+ geom_line(aes(x = Time, y = Forecast_SR,group = 1), linewidth = 1, color = "lightblue") +
+ labs(title = sites[i], x = "date", y = "Soil Respiration (kg/ (m^2 s)"))
+}
+
+```
+
+Graph cross-correlation coefficients
+```{r}
+lag_sites <- c("MLBS", "HARV","SERC","UNDE","ABBY", "GRSM","WREF","SJER")
+
+peak <- numeric(length(lag_sites))
+count = 1
+
+for (site in lag_sites){
+ temp_data <- filter(daily_sr, Site_ID == site)
+
+ # remove na values
+ temp_data <- na.omit(temp_data)
+
+ name <- temp_data$Site_ID[1]
+ # get rid of first row (so i can remove infinite)
+ temp_data <- subset(temp_data, select = -c(Site_ID, Time))
+
+ # remove inf values
+ temp_data <- temp_data[apply(temp_data, 1, function(row) all(is.finite(row))), ]
+
+ ccf_data <- ccf(temp_data$Marshall_SR, temp_data$Forecast_SR, lag = 91, plot = TRUE, ylim = range(-1,1))
+
+ title(name)
+
+ peak[count] <- ccf_data$lag[which.max(ccf_data$acf)]
+ count = count + 1
+}
+
+# add titles
+
+# analyze more
+
+```
+
+
+
+
+Soil Moisture Fraction from Model
+```{r}
+
+#
+# Initialize the output list
+sm_list <- list()
+
+# Process Ensemble Function
+
+process_ensemble_sm <- function(ens) {
+ ens_id <- sprintf("%05d", ens)
+ sm_ens <- list()
+
+ # go through all the sites
+ for (i in seq_len(nrow(matched))) {
+ site_id <- matched$site_id[i]
+ ID <- matched$id[i]
+
+ # go through the years
+ for (year in years) {
+ file_path <- paste0(
+ "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2025_4_19/out/ENS-",
+ ens_id, "-", ID, "/", year, ".nc"
+ )
+
+ # check if the file exists
+
+ # open NetCDF files
+ nc_data <- tryCatch({
+ nc_open(file_path)
+ }, error = function(e) {
+ return(NULL)
+ })
+
+ # file open
+ if (!is.null(nc_data)) {
+ if ("time" %in% names(nc_data$dim)) {
+ time_values <- ncvar_get(nc_data, "time")
+ time_units <- ncatt_get(nc_data, "time", "units")$value
+
+ # Analyze the model start date
+ start_datetime <- as.POSIXct(
+ sub("days since ", "", time_units),
+ format = "%Y-%m-%d %H:%M:%S", tz = "UTC"
+ )
+
+ # Calculate the time series
+ time_datetimes <- as.Date(start_datetime) + (time_values - 1)
+
+ # get Soil Respiration data
+ sm <- tryCatch({
+ ncvar_get(nc_data, "SoilMoistFrac")
+ }, error = function(e) {
+ return(NULL)
+ })
+
+
+ if (!is.null(sm)) {
+ # replace -999 to NA
+ sm[sm == -999] <- NA
+
+ # create dataframe
+ sm_df <- data.table(
+ Time = time_datetimes,
+ SM = sm,
+ Ensemble = ens,
+ Site_ID = site_id
+ )
+
+ # save the results
+ sm_ens[[length(sm_ens) + 1]] <- sm_df
+ }
+ }
+
+ # Close NetCDF File
+ nc_close(nc_data)
+ }
+ }
+ }
+
+ # Combine the results
+ sm_result <- if (length(sm_ens) > 0) rbindlist(sm_ens, use.names = TRUE, fill = TRUE) else data.table()
+
+ return(list(sm = sm_result))
+}
+
+# Apply parallel calculation
+num_cores <- detectCores() - 1
+results <- mclapply(Nensem, process_ensemble_sm, mc.cores = num_cores)
+
+```
+
+
+Soil temperature data from Model
+```{r}
+process_ensemble_st <- function(ens) {
+ ens_id <- sprintf("%05d", ens)
+ sm_ens <- list()
+ c = 1
+
+ # create a data frame
+ st_df <- data.table(
+ Time = character(13*365*nrow(matched)),
+ ST = numeric(13*365*nrow(matched)),
+ Site_ID = character(13*365*nrow(matched))
+ )
+
+ # go through all the sites
+ for (i in seq_len(nrow(matched))) {
+
+
+ site_id <- matched$site_id[i]
+ ID <- matched$id[i]
+
+ # get file path
+
+ file_path <- paste0(
+ "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/run/ENS-",
+ ens_id, "-", ID, "/README.txt")
+ if (file.exists(file_path)== TRUE){
+ # read the text file in
+ temp <- readLines(file_path)
+
+ # get the met data path
+ met_data_path <- substr(temp[11],17,119)
+
+ # change path of readme file so that both 2021 are 2024
+ met_data_path <- str_replace(met_data_path,"2021","2024")
+ met_data_path <- str_replace(met_data_path,"2021","2024")
+
+ # open the new file
+ if (file.exists(met_data_path)== TRUE){
+ temp_table <- read.table(met_data_path)
+ temp_table_yr <- temp_table$V2
+
+ for (year in 2012:2024){
+ temp_table_yr <- filter(temp_table, V2 == year)
+ org_date = paste0(year - 1, "-12-31")
+ for (day in 1:366){
+ temp_table_yr_day <- filter(temp_table, V3 == day)
+ st_df$Site_ID[c] = site_id
+ st_df$ST[c] = mean(temp_table_yr_day$V7, na.rm = TRUE)
+
+ # need to convert date into "year-mo-day format"
+ date <- as.Date(day, origin = org_date)
+ st_df$Time[c] <- format(date, "%Y-%m-%d")
+ c = c+1
+ }
+ }
+ }
+ }
+ }
+ return(list(st = st_df))
+}
+
+results <- process_ensemble_st(1)
+# apply parallel computation
+
+results_temp <- mclapply(Nensem, process_ensemble_st, mc.cores = num_cores)
+
+st_output <- rbindlist(lapply(results_temp, `[[`, "st"))
+
+soil_temperature <- filter(st_output, st != 0)
+
+
+```
+useful for this: list.files and readLines
+
+
+
+Get Temp and Soil Moisture from John's data
+
+```{r}
+
+soil_temp_and_moist <- function(site_number){
+ summary_table <- data.frame(Site_id = character(5114),
+ Time = character(5114),
+ soil_temp = numeric(5114),
+ soil_moist = numeric(5114))
+
+ #temp <- 0
+
+ years = 2012:2025
+ site = matched$site_id[site_number]
+ day_count <- 1
+
+ for (year in years){
+ for (i in 01:12){
+ if (i < 10){
+ file_name <- paste0("/projectnb/dietzelab/jzobitz/02-NEON-sites/env-data/env-meas-", site, "-",year,"-0",i,".Rda")
+ }else{
+ file_name <- paste0("/projectnb/dietzelab/jzobitz/02-NEON-sites/env-data/env-meas-", site, "-",year,"-",i,".Rda")
+ }
+ if (file.exists(file_name)== TRUE){
+ load(file_name)
+
+ # get just 501 data
+ site_data_temp <- filter(site_data[[2]][[3]], verticalPosition == "501")
+ site_data_moist <- filter(site_data[[2]][[2]], verticalPosition == "501")
+
+ # sort by date
+ site_data_temp <- site_data_temp[order(site_data_temp$startDateTime),]
+ site_data_moist <- site_data_moist[order(site_data_moist$startDateTime),]
+
+ for (j in 1:(nrow(site_data_temp)/240)){
+ if (j == 1){
+ start <- 1
+ end <- 240
+ } else{
+ start <- (j-1)*240
+ end <- j*240
+ }
+ summary_table$soil_temp[day_count] <- mean(site_data_temp$soilTempMean[start:end], na.rm = TRUE)
+ summary_table$soil_moist[day_count] <- mean(site_data_moist$VSWCMean[start:end], na.rm = TRUE)
+ summary_table$Site_id[day_count] <- site
+ if (start == 1){
+ summary_table$Time[day_count] <- str_sub(site_data_temp$startDateTime[start], end=-1)
+ } else{
+ summary_table$Time[day_count] <- str_sub(site_data_temp$startDateTime[start], end=-10)
+ }
+ day_count = day_count + 1
+ }
+
+ }
+ }
+ }
+ # get rid of empty row
+return(list(soil_tempandmoist = summary_table))
+}
+
+# Apply parallel calculation for each site
+n = 1:nrow(matched)
+num_cores <- detectCores() - 1
+
+# soil_temp results
+results_tempandmoist <- mclapply(n, soil_temp_and_moist, mc.cores = num_cores)
+
+# Combine results from all ensembles
+soil_tm <- rbindlist(lapply(results_tempandmoist, `[[`, "soil_tempandmoist"))
+
+# Delete all rows with just zeros
+soil_tm <- filter(soil_tm, soil_temp != 0)
+
+# temp is in C, soil moisture is a ratio
+
+```
+
+
+
+
From 4f5d851c15fbc06bad48045b1b28b9aeac9839bf Mon Sep 17 00:00:00 2001
From: ChaneyFinkeldeiE
Date: Sun, 4 May 2025 18:03:59 -0400
Subject: [PATCH 0092/1193] I edited some comments to make the code more clear
---
.../inst/NEON_soils/soilcarbon_validation.Rmd | 106 ++++++------------
1 file changed, 33 insertions(+), 73 deletions(-)
diff --git a/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd b/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd
index 4e35ab36c22..4967a58f3db 100644
--- a/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd
+++ b/modules/data.mining/inst/NEON_soils/soilcarbon_validation.Rmd
@@ -35,21 +35,21 @@ setwd("/Users/chane/OneDrive/Documents/ef lab/R files")
options(repos = c(CRAN = "https://cran.rstudio.com/"))
```
-Getting soil carbon data from SDA forecast (copying Cami's code)
-Naming data frame with site number
+Get soil carbon data from SDA forecast (copying Cami's code)
```{r, echo = FALSE}
# Load data
SIPNET <- load("sda.all.forecast.analysis.Rdata")
-# site coordinates
+# Get site coordinates
Site_Info <- readRDS("site.locs.rds")
# Rename lists to years
names(analysis.all) <- 2012:2021
year <- 2012:2021
+# Names of variables
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
# Loop through each year list in analysis.all
@@ -75,16 +75,11 @@ for (year in names(analysis.all)) {
analysis.all[[year]] <- year_df # Update the list with modified data frame
}
-```
-
-
-Assign the data numbers their coordinates and find which one is closest to each NEON site
-```{r, echo = FALSE}
-## Add column with corresponding site number for each plot in Neonplots
+# Load list of neon sites with their latitudes and longitudes
Neonplots <- read.csv("Neon_sites_terrestrial.csv")
- # Add a column for site numbers in Site_Info (site_info has coordinates of each location in df)
+# Add a column for site numbers in Site_Info (site_info has coordinates of each location in df)
Site_Info$Site_Number <- seq(1, nrow(Site_Info))
# Extract latitude and longitude from Neonplots and Site_Info tables
@@ -117,12 +112,11 @@ for (i in 1:nrow(Neonplots)) {
Neonplots$Closest_Site[i] <- Site_Info$Site_Number[closest_site_index]}
```
-A lot of the same domain NEON sights have the same cloests site index - is this right?
-
-Filtering analysis data with relevant sites (Cami's also does years - should I do that?)
+Filtering analysis data with relevant sites
```{r, echo=FALSE}
## Filter analysis.all to only include relevant sites ##
+
# Get the list of sites to keep from Neonplots
sites_to_keep <- Neonplots$Closest_Site
@@ -142,7 +136,6 @@ for (year in names(analysis.all)) {
analysis.fia[[year]] <- df_filtered}
```
-two have same closest site, GUAN and LAJA - they should each be different right??
Now extract the sites from Neonplots
```{r, echo = FALSE}
@@ -159,7 +152,6 @@ for (year in names(analysis.fia)) {
current_year <- as.numeric(substr(year, 1, 4))
# Identify columns with site numbers that match valid_sites
- # Extract the site number from column names using regex
matching_columns <- grep(paste0("_Site(", paste(valid_sites, collapse = "|"), ")$"),
colnames(year_df), value = TRUE)
@@ -176,6 +168,7 @@ summary_list <- list()
# Loop through each year's data frame in analysis.all
names(analysis.fia) = names(analysis.all)
for (year in names(analysis.all)) {
+
# Extract the year from the full date format
year_only <- sub("^(\\d{4}).*", "\\1", year)
@@ -196,6 +189,7 @@ for (year in names(analysis.all)) {
for (site in site_numbers) {
# Create a vector for soil carbon data
tsc_col <- paste0("TotSoilCarb_Site", site)
+
# Check if columns exist in the data frame
if (tsc_col %in% colnames(df)) {
soilC = df[[tsc_col]]
@@ -226,16 +220,16 @@ hist(final_summary_table[["SC_Mean_Mod"]], main = "Histogram of Soil Carbon Mean
```
-Loading soil carbon data from NEON using code from github (extract_NEON_veg_R)
+Loading soil carbon data from NEON (Alexi's code)
```{r, echo = FALSE}
# load copy of Alexi's code
source("extract_neon_sc_func.R")
-# I want all of the data over time
+# Set start and end to NA to get all years
start_date = NA
end_date = NA
-store_dir <- "/Users/chane/OneDrive/Documents/ef lab/R files" # storing to folder I am in
+store_dir <- "/Users/chane/OneDrive/Documents/ef_lab/R_files" # storing to folder I am in
# create summary table for loading data
summary_table_NEON_data <- data.frame(sitename = character(240),
@@ -276,12 +270,12 @@ temp_summary_NEON <- summary_table_NEON_data
```
-Add forecast data to summary table
+Add model to summary table
```{r}
# create summary_table as copy of neon table
summary_table <- temp_summary_NEON
-#removed NAs
+# remove NAs
summary_table <- drop_na(summary_table)
# add row for sitename
@@ -307,7 +301,7 @@ summary_table <- summary_table[order(summary_table$year),]
summary_table$SC_mean_forecast = numeric(nrow(summary_table));
summary_table$SC_sd_forecast = numeric(nrow(summary_table))
-c<- 1 # counter
+c = 1 # counter
# add forecast data, leave zeros where no forecast data
for (i in 1:length(final_summary_table$Year)){
@@ -322,17 +316,16 @@ for (i in 1:length(final_summary_table$Year)){
}
}
+
```
Analysis!
```{r, echo = FALSE}
-# Bivariate of Soil carbon averages from forecast and NEON data
-# make table through 2021
-# summary_table$SC_mean_forecast = summary_table$SC_mean_forecast* 10 is it wrong units??
+# Plot summary table
+
summary_table_2021 = summary_table[1:77, 1:12]
ggplot(summary_table_2021, aes(x=SC_mean_forecast, y=mean_soilC)) + geom_point(size = 2)
-# all 3 really high ones are from GUAN (forest in Puerto Rico)
```
Map of RMS error
@@ -341,7 +334,7 @@ Map of RMS error
summary_table$error_soilC = numeric(nrow(summary_table))
for (i in 1:nrow(summary_table)){
- # forecast - actual
+ # model - actual
summary_table$error_soilC[i] = (summary_table$SC_mean_forecast[i] - summary_table$mean_soilC[i])
}
@@ -355,7 +348,7 @@ error_df$sitename <- unique(summary_table$sitename) # make list of names
# create temp vector for before averages
vec <- numeric(1)
-c <- 1 # counter
+c = 1 # counter
for (i in 1:nrow(error_df)){
for (j in 1:nrow(summary_table)){
@@ -398,19 +391,8 @@ ggplot()+
error_df <- error_df[order(error_df$rms_error),]
```
-Most error
-1. GUAN -- Guanica Forest -- D04 (PR) Tropical Forest
-2. SRER -- Santa Rita -- D14 (AZ) Desert
-3. UNDE -- Notre Dame -- D05 (MI)
-4. MLBS -- Mountain Lake -- D07 (VA)
-5. TEAK -- Lower Teakettle -- D17 (CA)
-6. BARR -- Utqiagvik -- D18 (AK)
-7. BART -- Bartlett -- D01 (NH) Forest
-8. PUUM -- Puu Makaala -- D20 (HI)
-9. DEJU -- Delta Juntion -- D19 (AK) Tundra
-10.WREF -- Wind River forest -- D16 (WA) Forest
-
-find rate of change of each
+
+Find rate of change of soil carbon for each
```{r, echo = FALSE}
# find rate of change of NEON data
@@ -425,7 +407,7 @@ error_df$roc_forecast <- numeric(46)
error_df$roc_percent_error <- numeric(46)
error_df$roc_error <- numeric(46)
-c <- 1
+c = 1
for (i in 1:nrow(summary_table)){
if (i == 1){
if( summary_table$sitename[1] != summary_table$sitename[2]){
@@ -437,12 +419,12 @@ for (i in 1:nrow(summary_table)){
}
if (i>1){
if (summary_table$sitename[i] != summary_table$sitename[i - 1]){
- temp_name = summary_table$sitename[i]
- j <- i
- while (summary_table$sitename[j] == temp_name){
- j <- j+1
- if (j == 108){
- break
+ temp_name = summary_table$sitename[i]
+ j <- i
+ while (summary_table$sitename[j] == temp_name){
+ j <- j+1
+ if (j == 108){
+ break
}
}
c <- c+1
@@ -454,9 +436,7 @@ for (i in 1:nrow(summary_table)){
}
}
-# make a map of rate of change error
-
-# make bubble map with size and color depening on RMS error
+# Bubble map with size and color depending on RMS error
ggplot()+
geom_polygon(data = USA, aes(x= long, y = lat, group = group), fill="forestgreen", alpha = 0.3) + geom_point(data = error_df, aes(x= site_long, y = site_lat, size = roc_error, color = roc_error)) + geom_point(alpha = 0.7) + xlim(-170, -50) + scale_size_continuous(range = c(1,10)) + theme_void() + labs(title = "Rate of Change Residual") + theme(plot.title = element_text(hjust = 0.5),)
@@ -467,17 +447,9 @@ error_df <- error_df[order(error_df$roc_percent_error),]
error_df_roc <- drop_na(error_df)
```
-Rate of change error over 1000
-1. ONAQ -- Onaqui -- D15 Utah
-2. SRER -- Santa Rita -- D14 (AZ) Desert
-3. TREE -- Treehaven -- D05 Wisconsin
-4. UNDE -- Notre Dame -- D05 (MI)
-
-almost every forecast rate of change in negative and if not negative very small
-neon data has many positive rates of change
+Residual map of rate of change (copied from Cami's code)
```{r, echo = FALSE}
-# Residual map of ROC (copied from cami's)
# define colors based on sign
error_df$Color <- with(error_df,
@@ -520,10 +492,9 @@ ggplot(data = us_map) +
```
+spatial variograms (from cami's code)
```{r, echo = FALSE}
-# spatial variograms (from cami's code)
-# coordinates(error_df) <- ~ site_long + site_lat
proj4string(error_df) <- CRS("+proj=longlat +datum=WGS84")
variogram_model_rmse <- variogram(rms_error ~ 1, data = error_df)
@@ -562,6 +533,7 @@ ggplot(data = variogram_model_roc, aes(x = dist, y = gamma)) +
ggplot(data = us_map) +
ggplot
```
+
Compare error to 13 other factors (from Dongchen's folder on github)
```{r, echo = FALSE}
@@ -645,18 +617,6 @@ for (i in 23:35){
-```{r}
-
-
-```
-
-
-
-
-
-make table of (slope, p value, r^2)
-
-
From 9cceaa6706ca58a9ff553d75342eaa60ec7e6f1c Mon Sep 17 00:00:00 2001
From: ChaneyFinkeldeiE
Date: Wed, 7 May 2025 18:04:39 -0400
Subject: [PATCH 0093/1193] Updated some comments and added more soil moisture
analysis
---
.../inst/NEON_soils/soil_resp_markdown.Rmd | 166 +++++++++++++-----
1 file changed, 122 insertions(+), 44 deletions(-)
diff --git a/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd b/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd
index 2ce0ab36406..79afaf78838 100644
--- a/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd
+++ b/modules/data.mining/inst/NEON_soils/soil_resp_markdown.Rmd
@@ -37,10 +37,9 @@ library(stringr)
library(scales)
```
-Get NEON soil respiraiton data
-
+Get NEON soil respiration data
```{r}
-# get the coordinates of 6400 sites in Dongchen's newest output
+# get a list of NEON sites and their coordinates
Neonplots <- read.csv("/usr4/ugrad/chaney/R/3_25/Neon_sites_terrestrial.csv")
# Add a column for site numbers in Site_Info (site_info has coordinates of each location in df)
@@ -66,6 +65,7 @@ matched <- data.frame(id = numeric(nrow(Neonplots)), # id in forecast
# Loop through each row in Neonplots and calculate each plot's closest site number
for (i in 1:nrow(Neonplots)) {
+
# Extract the coordinates for the current row in Neonplots
plot_coords <- Neonplots_Coordinates[i, ]
@@ -82,9 +82,10 @@ for (i in 1:nrow(Neonplots)) {
### Get the model output (0715, 16 outputs) (Yang's code)
years <- 2000:2025
-Nensem <- 1:25 # 50 if change folder?
+Nensem <- 1:25
# Initialize the output list
+
sr_list <- list()
# Process Ensemble Function
@@ -92,20 +93,17 @@ process_ensemble <- function(ens) {
ens_id <- sprintf("%05d", ens)
sr_ens <- list()
- # go through all the sites
+ # loop through all the sites
for (i in seq_len(nrow(matched))) {
site_id <- matched$site_id[i]
ID <- matched$id[i]
- # go through the years
for (year in years) {
file_path <- paste0(
"/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2025_4_19/out/ENS-",
ens_id, "-", ID, "/", year, ".nc"
)
- # check if the file exists
-
# open NetCDF files
nc_data <- tryCatch({
nc_open(file_path)
@@ -115,7 +113,7 @@ process_ensemble <- function(ens) {
# file open
if (!is.null(nc_data)) {
- # time dimension ??
+
if ("time" %in% names(nc_data$dim)) {
time_values <- ncvar_get(nc_data, "time")
time_units <- ncatt_get(nc_data, "time", "units")$value
@@ -129,7 +127,7 @@ process_ensemble <- function(ens) {
# Calculate the time series
time_datetimes <- as.Date(start_datetime) + (time_values - 1)
- # get Soil Respiration data
+ # Extract Soil Respiration data
sr <- tryCatch({
ncvar_get(nc_data, "SoilResp")
}, error = function(e) {
@@ -138,7 +136,7 @@ process_ensemble <- function(ens) {
if (!is.null(sr)) {
- # replace -999 to NA
+
sr[sr == -999] <- NA
# create dataframe
@@ -177,7 +175,7 @@ sr_output <- rbindlist(lapply(results, `[[`, "sr"))
# Calculate ensemble mean
sr_mean <- sr_output[, .(SR = mean(SR, na.rm = TRUE)), by = .(Time, Site_ID)]
-# Merge SR mean value with a time resolution of 3h (Not 1 day)
+# Merge SR mean value
daily_mean_data <- as.data.table(sr_mean)
# Format Time to character for processing
@@ -190,9 +188,8 @@ final_daily_sr_data <- daily_mean_data[, .(
```
Get John's NEON soil respiration data
-
```{r}
-# function to get soil fluxes for specifed years for 1 site
+# function to get soil fluxes for specified years for 1 site
soil_flux <- function(site_number, flux_type){
summary_table <- data.frame(Time = character(5114),
Site_id = character(5114),
@@ -218,6 +215,8 @@ soil_flux <- function(site_number, flux_type){
# sort by date so I can average over all horizontal positions
out_fluxes <- out_fluxes[order(out_fluxes$startDateTime),]
+
+ # Average over each day (data is in 30 min increments)
for (j in 1:(nrow(out_fluxes)/240)){
if (j == 1){
start <- 1
@@ -234,7 +233,7 @@ soil_flux <- function(site_number, flux_type){
tot <- tot + temp
}
}
- summary_table$sr_neon[day_count] <- tot/(240 - na_count)
+ summary_table$sr_neon[day_count] <- mean()
summary_table$Site_id[day_count] <- site
if (start == 1){
summary_table$Time[day_count] <- str_sub(out_fluxes$startDateTime[start], end=-1)
@@ -248,7 +247,7 @@ soil_flux <- function(site_number, flux_type){
}
}
}
- # get rid of empty row
+
return(list(sr = summary_table))
}
r <- soil_flux(1,4)
@@ -257,14 +256,14 @@ r <- soil_flux(1,4)
n = 1:nrow(matched)
num_cores <- detectCores() - 1
-# using marshall method at top of soil (000)
+# Use Marshall method at top of soil (000)
results_j <- mclapply(n, soil_flux, mc.cores = num_cores, flux_type = 4)
-# Combine results from all ensmebles
+# Combine results from all ensembles
sr_marshall <- rbindlist(lapply(results_j, `[[`, "sr"))
# delete rows with all zeros
sr_marshall_filtered <- filter(sr_marshall, sr_neon != 0)
-# using Milington- Quirk method at top of soil (000)
+# Use Milington- Quirk method at top of soil (000)
results_j_2 <- mclapply(n, soil_flux, mc.cores = num_cores, flux_type = 8)
# Combine results from all ensembles
sr_m_q <- rbindlist(lapply(results_j_2, `[[`, "sr"))
@@ -288,7 +287,7 @@ names(daily_sr)[names(daily_sr) == "Final_SR"] <- "Forecast_SR"
# convert Time to date class
daily_sr$Time <- as.Date(daily_sr$Time)
-# convert johns to kg/mol ^2 *s (currently in umol/m^2 * s)
+# convert NEON data to to kg/mol ^2 *s (currently in umol/m^2 * s)
daily_sr$Marshall_SR = daily_sr$Marshall_SR*10^-6*12.011*10^-3
daily_sr$Mill_Quirk_SR = daily_sr$Mill_Quirk_SR*10^-6*12.011*10^-3
@@ -339,9 +338,9 @@ for (i in 1:length(sites)){
```
-Graph them
+Graph Time Series
```{r}
-# make predicted- observed plots
+
for (i in 1:length(sites)){
temp_data <- filter(daily_sr, Site_ID == sites[i])
@@ -354,8 +353,25 @@ print(ggplot(temp_data) +
```
-Graph cross-correlation coefficients
+
+Predicted Obervered plots
```{r}
+# make predicted- observed plots
+for (i in 1:length(sites)){
+ temp_data <- filter(daily_sr, Site_ID == sites[i])
+
+ print(ggplot(temp_data, aes(x = Forecast_umol, y = Marsh_umol)) + geom_point() +
+ geom_smooth(method = "lm", se = FALSE) +
+ labs(title = sites[i], x = "Predicted (kg/m^2*s)", y = "Observed (kg/m^2*s)"))
+
+}
+```
+
+
+Cross-correlation coefficients graphs
+```{r}
+
+# List of sites with a noticable lag
lag_sites <- c("MLBS", "HARV","SERC","UNDE","ABBY", "GRSM","WREF","SJER")
peak <- numeric(length(lag_sites))
@@ -382,19 +398,12 @@ for (site in lag_sites){
count = count + 1
}
-# add titles
-
-# analyze more
-
```
-
-
Soil Moisture Fraction from Model
```{r}
-#
# Initialize the output list
sm_list <- list()
@@ -412,7 +421,7 @@ process_ensemble_sm <- function(ens) {
# go through the years
for (year in years) {
file_path <- paste0(
- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2025_4_19/out/ENS-",
+ "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/run/ENS-",
ens_id, "-", ID, "/", year, ".nc"
)
@@ -477,9 +486,27 @@ process_ensemble_sm <- function(ens) {
return(list(sm = sm_result))
}
+Nensem <- 1:100
# Apply parallel calculation
num_cores <- detectCores() - 1
-results <- mclapply(Nensem, process_ensemble_sm, mc.cores = num_cores)
+
+sm_model_results <- mclapply(Nensem, process_ensemble_sm, mc.cores = num_cores)
+
+# Combine Results from all ensembles
+sm_output <- rbindlist(lapply(sm_model_results, '[[', "sm"))
+
+# Calculate ensemble mean
+sm_mean <- sm_output[, .(sm = mean(sm, na.rm = TRUE)), by = .(Time, Site_ID)]
+
+# Merge SR mean value with a time resolution of 3h (Not 1 day)
+daily_sm_data <- as.data.table(sm_mean)
+
+# Format Time to character for processing
+daily_sm_data[, Time := as.character(Time)]
+
+# Calculate daily SR(time resolution is 1 day)
+final_daily_sm_data <- daily_sm_data[, .(
+ Soil_moist_model = mean(sm, na.rm = TRUE)), by = .(Time, Site_ID)]
```
@@ -557,21 +584,16 @@ soil_temperature <- filter(st_output, st != 0)
```
-useful for this: list.files and readLines
-
-
-Get Temp and Soil Moisture from John's data
+Get Temp and Soil Moisture from John's NEON data
```{r}
soil_temp_and_moist <- function(site_number){
- summary_table <- data.frame(Site_id = character(5114),
+ summary_table <- data.frame(Site_ID = character(5114),
Time = character(5114),
- soil_temp = numeric(5114),
- soil_moist = numeric(5114))
-
- #temp <- 0
+ Soil_temp_neon = numeric(5114),
+ Soil_moist_neon = numeric(5114))
years = 2012:2025
site = matched$site_id[site_number]
@@ -617,8 +639,8 @@ soil_temp_and_moist <- function(site_number){
}
}
}
- # get rid of empty row
-return(list(soil_tempandmoist = summary_table))
+
+return(list(stm = summary_table))
}
# Apply parallel calculation for each site
@@ -629,7 +651,7 @@ num_cores <- detectCores() - 1
results_tempandmoist <- mclapply(n, soil_temp_and_moist, mc.cores = num_cores)
# Combine results from all ensembles
-soil_tm <- rbindlist(lapply(results_tempandmoist, `[[`, "soil_tempandmoist"))
+soil_tm <- rbindlist(lapply(results_tempandmoist, `[[`, "stm"))
# Delete all rows with just zeros
soil_tm <- filter(soil_tm, soil_temp != 0)
@@ -637,7 +659,63 @@ soil_tm <- filter(soil_tm, soil_temp != 0)
# temp is in C, soil moisture is a ratio
```
+Combine the soil moisture data
+```{r}
+soil_moist_table <- merge(final_daily_sm_data, soil_tm, by = c("Time","Site_ID"))
+```
+
+Graph Time series
+```{r}
+
+for (i in 1:length(sites)){
+ temp_data <- filter(soil_moist_table, Site_ID == sites[i])
+
+
+print(ggplot(temp_data) +
+ geom_line(aes(x = Time, y = Soil_moist_neon, group = 1), linewidth = 1, color = "orange") +
+ geom_line(aes(x = Time, y = Soil_moist_model,group = 1), linewidth = 1, color = "lightblue") +
+ labs(title = sites[i], x = "date", y = "Soil Moisture Fraction"))
+}
+
+```
+
+
+Cross- Correlation coefficients
+```{r}
+lag <- data.frame(lag_sites = character(8),
+ half_lag = numeric(8),
+ year_lag = numeric(8))
+
+lag$lag_sites <- c("MLBS", "HARV","SERC","UNDE","ABBY", "GRSM","WREF","SJER")
+
+peak <- numeric(length(lag$lag_sites))
+count = 1
+
+for (site in lag$lag_sites){
+ temp_data <- filter(soil_moist_table, Site_ID == site)
+
+ # remove na values
+ temp_data <- na.omit(temp_data)
+
+ name <- temp_data$Site_ID[1]
+ # get rid of first row (so i can remove infinite)
+ temp_data <- subset(temp_data, select = -c(Site_ID, Time))
+
+ # remove inf values
+ temp_data <- temp_data[apply(temp_data, 1, function(row) all(is.finite(row))), ]
+
+ ccf_data <- ccf(temp_data$Soil_moist_neon, temp_data$Soil_moist_model, lag = 183, plot = TRUE, ylim = range(-1,1))
+
+ #title(name)
+
+ lag$half_lag[count] <- ccf_data$lag[which.max(ccf_data$acf)]
+
+}
+```
+
+To do:
+- find function for soil moisture and respiration data, and for soil temperature and respiration
From 3ceb68d6a8416cced067f49a5365efb481336b81 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 8 May 2025 00:25:39 -0400
Subject: [PATCH 0094/1193] Update path
---
.../inst/anchor/NA_downscale_script.R | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 057229d3214..2b19b0b059e 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -225,18 +225,17 @@ for (y in 2012:2024) {
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
-load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/sda.all.forecast.analysis.Rdata")
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_gedi_2025_5_4/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
-# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/pecanIC.xml"
-settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/"
+settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/AGU_2024/pts.shp"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_gedi_2025_5_4/"
cores <- 28
date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
# loop over years.
for (i in seq_along(date)) {
print(i)
# Assemble covariates.
- covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
+ covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
analysis.yr <- analysis.all[[i]]
time <- date[i]
@@ -244,7 +243,7 @@ for (i in seq_along(date)) {
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_analysis"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -254,7 +253,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
+ outdir = file.path(outdir, "downscale_maps_analysis")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
From ab52194b6267358f96e0faaa6f1c71f5a901edee Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 8 May 2025 23:38:05 -0400
Subject: [PATCH 0095/1193] Update path.
---
modules/assim.sequential/inst/anchor/NA_downscale_script.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 2b19b0b059e..cdd71fc67e4 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -225,17 +225,17 @@ for (y in 2012:2024) {
# setup.
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
-load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_gedi_2025_5_4/sda.all.forecast.analysis.Rdata")
+load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/AGU_2024/pts.shp"
-outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_100ens_gedi_2025_5_4/"
+outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/"
cores <- 28
date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
# loop over years.
for (i in seq_along(date)) {
print(i)
# Assemble covariates.
- covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
+ covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
analysis.yr <- analysis.all[[i]]
time <- date[i]
From 15e36f66d54ccfdd90990380273f9dee7c2ef6d7 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Sat, 10 May 2025 11:45:47 -0400
Subject: [PATCH 0096/1193] Update path.
---
modules/assim.sequential/inst/anchor/NA_downscale_script.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index cdd71fc67e4..a63c4f85b99 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -235,7 +235,7 @@ date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
for (i in seq_along(date)) {
print(i)
# Assemble covariates.
- covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
+ covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_ts/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
analysis.yr <- analysis.all[[i]]
time <- date[i]
@@ -243,7 +243,7 @@ for (i in seq_along(date)) {
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_analysis"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -253,7 +253,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_analysis")),
+ outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
From dc13b2faf6b5384d8261b93554e9435baf72e619 Mon Sep 17 00:00:00 2001
From: Katherine Rein
Date: Mon, 12 May 2025 15:17:33 -0400
Subject: [PATCH 0097/1193] Added folder specific commands
---
.../inst/Python/CCMMF_Irrigation_API.py | 13 ++++++++-----
.../inst/Python/CCMMF_Irrigation_CalcVis.py | 2 +-
.../inst/Python/CCMMF_Irrigation_DataDownload.py | 6 +++++-
.../inst/Python/CCMMF_Irrigation_Events.py | 2 +-
modules/data.remote/inst/Python/README.txt | 14 +++++++++++---
.../CCMMF_Irrigation_CalcVis.cpython-312.pyc | Bin 0 -> 4179 bytes
...CCMMF_Irrigation_DataDownload.cpython-312.pyc | Bin 0 -> 6159 bytes
.../CCMMF_Irrigation_Events.cpython-312.pyc | Bin 0 -> 1536 bytes
8 files changed, 26 insertions(+), 11 deletions(-)
create mode 100644 modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_CalcVis.cpython-312.pyc
create mode 100644 modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_DataDownload.cpython-312.pyc
create mode 100644 modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_Events.cpython-312.pyc
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
index 553c939281b..754c07b7b58 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
@@ -29,11 +29,14 @@
# Define years to look at
years = list(range(2016, 2026))
+# Define main folder
+main_folder = '/projectnb/dietzelab/ccmmf/management/irrigation/'
+
# Define folder name for csv files
-csv_folder = 'WaterBalanceCSV/'
+csv_folder = main_folder + 'WaterBalanceCSV/'
# Define the name of the parquet filename
-pq_filename = 'CCMMF_Irrigation_Parquet'
+pq_filename = main_folder + 'CCMMF_Irrigation_Parquet'
# %% Loading data
@@ -54,7 +57,7 @@
# We only do this if the data is not up to date
cur_year = datetime.now().year
today = datetime.now().date()
-chirps_filename = f'chirps-v2.0.{cur_year}.days_p05.nc'
+chirps_filename = f'{main_folder}chirps-v2.0.{cur_year}.days_p05.nc'
if os.path.exists(chirps_filename):
with Dataset(chirps_filename, 'r') as nc:
@@ -72,14 +75,14 @@
# %% Define locations
# Read in all lat lons
-df_lat_lon = pd.read_csv('design_points.csv')
+df_lat_lon = pd.read_csv(f'{main_folder}design_points.csv')
# Handle duplicates
df_lat_lon = df_lat_lon.drop_duplicates()
# %% Iterate through locations and download data for each
-for row_number in range(29):
+for row_number in range(34):
# Load location data
latitude = df_lat_lon['lat'].iloc[row_number]
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
index 46312c85fca..289112e08e4 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
@@ -98,7 +98,7 @@ def timeseries_graphs(df_water_balance, LAT, LON, YEAR):
plt.grid()
# Save plot
- filename = f'TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
+ filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
plt.savefig(filename)
plt.show()
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
index 742a01f479e..d57b3711da8 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
@@ -51,6 +51,10 @@ def extract_et(img):
def OpenETData(START_DATE, END_DATE, LAT, LON):
+ # Set directory
+ working_dir = '/projectnb/dietzelab/ccmmf/management/irrigation/'
+ os.chdir(working_dir)
+
# Read in API Key
with open('OpenETAPIKey.txt', 'r') as file:
api_key = file.readline()
@@ -90,7 +94,7 @@ def CHIRPSData(YEAR, LAT, LON):
# Set URL and file name
url = f'https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p05/chirps-v2.0.{YEAR}.days_p05.nc'
- destfile = f'chirps-v2.0.{YEAR}.days_p05.nc'
+ destfile = f'/projectnb/dietzelab/ccmmf/management/irrigation/chirps-v2.0.{YEAR}.days_p05.nc'
# Check if the file already exists before downloading
if not os.path.exists(destfile):
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
index e11ab0c6f45..59c56a1b828 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
@@ -50,7 +50,7 @@ def file_creation(data_dict):
eventfile_df = eventfile_df.drop('week', axis = 1)
# Write to file(s)
- folder_name = 'CCMMF_Irrigation_EventFiles/'
+ folder_name = '/projectnb/dietzelab/ccmmf/management/irrigation/CCMMF_Irrigation_EventFiles/'
filename = f'{folder_name}irrigation_eventfile_{key}.txt'
eventfile_df.to_csv(filename, sep = ' ', index = False, header = False)
diff --git a/modules/data.remote/inst/Python/README.txt b/modules/data.remote/inst/Python/README.txt
index 759d65aa280..bd2f2d1ac85 100644
--- a/modules/data.remote/inst/Python/README.txt
+++ b/modules/data.remote/inst/Python/README.txt
@@ -10,6 +10,8 @@ Data Sources:
- https://data.chc.ucsb.edu/products/CHIRPS-2.0/
Main Storage Folder: /projectnb/dietzelab/ccmmf/management/irrigation
+Github Code Storage Folder: /projectnb/dietzelab/ccmmf/management/irrigation/
+ pecan/modules/data.remote/inst/Python
How to use SCC:
- When creating desktop ensure -> Extra qsub options: -l buyin
@@ -24,7 +26,7 @@ How to use SCC:
- This may take a second to run. Be patient it will open eventually.
Google Earth Engine Account:
-- Contact __ for a new Google Earth Enginge project
+- Contact Brian Anderson (andyson@bu.edu) for a new Google Earth Enginge project
- Go to https://code.earthengine.google.com/
- Click on your profile picture in the top right corner
- Select Project Info
@@ -65,7 +67,7 @@ Organization:
- WaterBalanceCSV: This is where all of the csv files for each location get
saved. This is a back up way to save all of the data and also makes it easier
to quickly view data per location. Each file is labeled with the corresponding
- lat and long coordinate. The folder name is defined in "Define multi use
+ lat and long coordinate. The folder name is defined in the "Define multi use
variables" section of CCMMF_Irrigation_API.
- TimeseriesPNG: This is where the timeseries graphs for each location and
each year are saved. There is no variable name for this folder it is simply
@@ -78,6 +80,11 @@ Organization:
- CCMMF_Irrigation_EventFiles: This holds all of the event txt files for each
location. The column names are in the header of CCMMF_Irrigation_Events. The
naming format for the files is irrigation_eventfile_{location_id}.txt.
+ - pecan: This folder contains the entire pecan repo from Github. The only portion
+ of this that is needed is the Python code files which can be found in Python_Code.
+ - Python_Code: This is a symlink to the folder within pecan that holds all
+ of the .py files. This is so that you can add the files to the pecan Github
+ repo.
- Other
- chirps-v2.0.{year}.days_p05.nc: These are the files that contain the downloaded
CHIRPS data on a daily scale for the whole world. They are downloaded from the
@@ -114,7 +121,8 @@ Functions (by files):
- CHIRPSData: This function downloads the .nc file from the CHIRPS website
and then reads in the values for the closest latitude longitude values. It
then returns the data as a dataframe.
- - new_data_entry_API:
+ - new_data_entry_API: This function calls on other functions to download and
+ organize the years and location that was passed to it.
- CCMMF_Irrigation_CalcVis
- water_balance: This function takes the raw data for each location and calculates
the water balance equation for each time step. It also calculates the different
diff --git a/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_CalcVis.cpython-312.pyc b/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_CalcVis.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e65ee0379955ebd8763225640d3629ffab9ed7e
GIT binary patch
literal 4179
zcmbVOPfQ!x8K1GoV`F23NdkmGJg@=mEWrUnQhC!PjspvX&?sb+E}L{Z@r+@}c*dC-
zL$FMgNLj5b_28bCYByR`^+1(9&|8ix^-`&qc_pdQ>sG4Nz2ufQX%9X1dt*BWWV(Nz
zH1pnk@Av(A?|t)r-=F+`F9nUe(6cZbps0V4k7{#N2Cq-T;8RMZ<|vUC9SS|?uzP2k
znRC%)89V0|osX$GkI2BgVZ{+~<<58$x+IvA$Z0Bf6S{at=Y~hPp;NI_r(>f--0;ca
zQLp!ckTn-HJ;vSDC0QLF^1dW~XOc|CJ?U6@obbHn;Qc9O(nLsF-tid(kavDYJ#jv*
zNNFlhtulE!&xnq*1jC5V!_Y4h3wxT#RJ91If_8i-v~62-t-3@u@5;F-Y886Xo%6xS
zop*yh53y&$Ou2~-rn@@MzxDgHvdx#M+rI%#57D#zd#c!4pZACjIXEyfzuExEa-?;U
zHpWX^?H+OGAmDo?6>QvQ7r-U=eHFTz(w{PMR9Wj0lWcBX~<@C_qJM#Mf-
z<3+d$Y_ks0)cwSzPd2oq06Lu~u{xBTudcvl
zs74csD_6$($+v2JLQs-7WMeS1g8jRetcM{k5}WZKILL?8*GxlUwlG!<7DtM`g~8{{
z;a4ox-eWNbx4Q@6UHmCxj)Lgu;D(60MoL{V)D?R+wAJ-vGI<>_9oyYQn_<-bUa9*c
z>c05Z&{p@@4n1GsVPdXZZAgB)zbL5-WBWv-`lUC30
z*3pr_w4N$>VY;n+ojmEBg26Q0v8lZ#&q%9!9};OTm*UcyhA?1EA&3u
zjT?yR+zv&-UFb|H6hom{DRdr%&hIxjjzXY1Q3_o_p(}4eeWy2
s_J7Nfe&k3fnBiBc^v7G$hn}w$z(Iy@{<}yI#13
zn4UN6{=5itF3%@ZkEV*tFPPBRjQf*`M-vthe+v0~t^PC5e5n7Db#2ZP#8(uh(Fw=D
zDav)pQ3u(C`X{U#KeMC-KpON#8=0twY()KI*6b}SnX)qZ7tCrMV;>SfxsZh!qRi5m7rm4HQjp7+$JB>kEX5QhtHWg@-g{nvd9*VqO^8qePkUf5-SjS
z&K$2OJ)R~5SmNU;*y0w2wv>!@_nvi6w~QrAU$&O-TPu0%{wgft0W1N!=7i(n1kt+u
z=yK5ml^lr$EPOnrN8kh_UhL+1QA_eXc0pYWDd42dc$#(v^EXPI7|H
zz+WgBx}Bz^c(haEca&|joHm}b<@)rTego!7_ec*3Y=g)
Re|bjf&eu8}qMK|B{{vN+*h2sS
literal 0
HcmV?d00001
diff --git a/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_DataDownload.cpython-312.pyc b/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_DataDownload.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d397b2f30024ab3266a2016242814e521feeb6ce
GIT binary patch
literal 6159
zcmb7ITWlLwdY<77$>H!O>SoEZY}%49v?J1zolUH@y_H2;k{wZwBs*(4TV~7|QA3kM
z=FG^lh+#@&U06Z_n8LP@f{hU^u)r!!fT~4-`Z2peivaDzaH)leu@)P&dFZ}**DliL
zrTx!PL&}aE>>i2d{O9(cf6nFmzjOZ0?PduG2L}($Y%~(Yf8vZ8EZf4<0tm|lPfQU!
z$y-w76ltWEDauIcDJ!H@%9gfI*~w}fW{NTD98(S>Wv5sRA+X6-Q+Yl8bX$a;^gUwA
z#akhE^EQY*Jj2`XkyGAzOTdwPjU7Y+rwF`1Bl+KgWFU+DeW(1rr~6O$^`Gwb_nqu}
zm1X~en^k5qsNa7J38K{3%l;k@5Dk#JJgvA`wO!M@D
zWr3Qu@RoZ_-m20`cdbKBP_4DrHr1B&)+%a(Irn*+;xh|Xo7syb>ZMB4x9fMbYcXqg
zTj9FKyj`icY%}}RQ`OE>?T|G!a4W*teW_6Z_D=MGE`k4*T(W047ebZpW;
zel_YJnDkE$NBpB>qmk%j?)X?%kRp?zNOU4Hdf|;oXmBigbs{u0J`x^<;?c3_~h3Y7k>WtmWk7si-NbG5&qQe
zU~1ga(`D+Yr@JloBE3k}Hxg{)ZF`forLXtrYa1?aQEJ;b2@vsO;9WzU(
zh{mEao_W9DUnqPt7s{ecQivT5+zv)U6}{8kwSxmqoWsNBeiZZj5XoRoRrFNd@wt&
zv%(z(adDUrCD%C=iB$Jw@N6m*=Th?dV7=N8#>x^z|5G3;-o^)a7Vo^D+w}TNUVqtp
zXo0S{8#djYC3ojXH$P5(nB3eSEbR}L_n-WyL;rl@A5Luc4VL-_KcD!b>+?(HzA^31
zt7Z4KP4`sEJ*C}9mEGwD%a+&oAh(!X>0WDIA20b|FMH2u%$cV!B4Ny4z>S)w$x
z%*S9Rm4f-gi^5KeDMdi=X@c&m$`>-T5-%eU?u!9Nr_x+jx5L7WNYfeJnnie_Xk|VN
zi=O3m2cDOUh)WB)Q^~|&It4{c3js!FCng8RCu8A($%xKGqG3Zq`23+}teB103_;k9
z=nmLivVcTE*4Yf4aG2Q`KP}^>_xs;@2iDtMb$$P*jk+q>Ctu+(s{+|X4R*`gf_fepGD(hs^9
z=HCx((0jLhP0Rb1_N|Cz-?74_t@e(O_kXy5ja~m<=~#cc{riPW3%5&j>sQe8_y*nb
zrL#q&TM#zgKu0G7Z*lX?2PeV0nt(Nn3x5f8`LJiMG(>i@=Vl0Xe3_d(D#7PN3XW*0x=r)(qnM4gF0kZ@sxXmGv
z0|3SdJ1rnViVJX5bnC1nDzff`b1cNF7@P~vC`wlD4~6h~5|kb;D+V(t5ki~{>O=UX
zR!>|givc)5>Gm0c;{_z^bW#R{pk`4DPCGu)tZvE3x-~w-i%6%jLY)Er#-~IH>_x)6
zvw{rc)7gxypnjNebO1+w936xx;6jJ6+=Zh$3^?a9=$0rYVuo65ylw|GW4DBP-BvY5
zr#X}W%}9^|iZChw=Aa{>N5-xa1A`f=0Krz*1w#|M1T9YErvD02fq=u@bdfAv-lA>y
zhwlzA+}NPopV|av^dlb5O_pi)p?xTf^PZ*-r
zzuKlXAFnjFSDIRjaxqZxwnLXn>w${5snXK%2cN@bD}4CT?zJIGQ^wl~rW&)aUmgr{6bJ_x4)$Sg(}FJ-<)?T_{bX}*=ffs82umH810JY#!WoK2oQ|V#DlYO`DPGkB?fPxn~EG793C0JGSSl)JQ+%)GB>#t
zzy%<12=Fh?PlvK6Uk$})M3j|#ZbO@3&K2ajc{v8Om{YJvK=M1W94#*J@$!DV^4q~tZ%#UQi(nK
z38}Hi3xi+|@b&4%>Egu7sio^>&w)+Pk&@>~*>kKgwB>1dSrztNd$)P(aH+B9@3{3n
zkB(}=fl}jz!eywny6%tO9lbY(8}8q1>@GES|9t!x*M4@b-1rJOyp}!7Z!f*QdZOGC
zEJVL_wrh0z*Jt2MH21-83_tqI&V7-+)W7VtM!M|r+C3Jg8ei-1PI}sAxRg32HQWiQ
zlHda1&iGT}VeNqeAcN+KTgPCXfPPjb)ByKN3kD!)Pd&8cX~@&tc`M|tO3jg}v}!e;
zUbdfh;6bBCZ<9N118q2%PzAtl2@^kpad#3_l`(W}YtK7&=-QpSIxyY+tmcmL_79HJ
zMBP;-@kV_is5YRy__4^enB6hTE-p)Dc?N&G34SZL@oS>2yXOr3;0&;S
zgO2LHI{S?!5SN_+c;w+~J1=lZx4~Ts|(?XR20Q19wv>2Y#IbmRu*ay4AR>A_1F+
zxs~3q6Wa`(4g7TMIXiR)gQx^|=a=x@&)+G5d;75WprX8
zQuGy16%QBRU1?ddYpj2RJ_snlG!`d+ZEs)s;ac;rJ9{c^oh#gjOfk3Gsukvey
z>rLg3S2d<>tF>)eToPA1wBUKo|AR01jn;`pC)}ict2fZHME(I@^4-7$Z@mh)AR~?1%Z!&!)rcXOHsD&rBt2avF
zw>QJFQaH95=1XB-n@N?zsnV&mmO+{_r_JYo!@T!|f?OA@}D1j9vizp-7VagN-KXryhcAcuX`tCYpXrbUY>wJSH6Q|0Urr
iSnj*;y6<`a$hMQMf9yXD(KvaO-23brxsTjujPd{a?~I24
literal 0
HcmV?d00001
diff --git a/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_Events.cpython-312.pyc b/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_Events.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a2466f70806cc7d006c6aa2b2d791637ffa0c28
GIT binary patch
literal 1536
zcmb7EO>7%Q6rTODy;;Zn#g0P^XyO*hmP(wsBo!u*R%s(5l(s^gBC6HIGj=xd{xG{P
zv1KC;92|*5atgmvk3s2yV|wO-#DPQ=#F{Io-V*u7i8qczP!LEw%WuDV^WK~HH}i8M
zVIrWFo7uG=BMAL27w4o7KyMVlE~01!QH82CWksdh=V(Q1sCk{dtsrDBa7IE#vFG9s
z*XsC|&vEfhJY6hJ&y=peg^N?g8PmK;>fxHlOL&7b+bvF;=7LwNJ8n=iG0+>h=9No0
z@JVQsnp^+?hqg=ECe8&R;bDMDh^O)c{RSi8O>*p2a9=@!aLo>IXge$jnU4p$lr%~h
z*neo|rXhywv$N;!*+b@rR@m@a2}>?b`?7&c;&^pXAe2Jpxw2CL_vf}}R=Fws4lWbd
z^BdRjG~mFqYu9iQP^IR1G;f;EWRdTdp@@Hx?1%>cUKXUgsD;-5zB^FcAAE142Q7p?
zf?lux}$HV)BIamjq
z%S01{-*fPN{@dvOAkwzh($-^_wc%uwMjpR#?y;@W7u$ME{|3>QS_<%y@AP&AEQkJS
z*^1C4jYB-Cef?}#L0^2TqXx>4-W3s>bGs_ci^iLSuSj;QJAc55%HW8`VJP3sr)SY~kN5P)UrieIn*zzitwBxcITt>cXm4ZWDQe_T|RiPhnf4~b13(L!k){@MiFR<>&
z!CSOzEGUT7g`(BZt^#tasZWNRp-^yMgD9B~1(3HyMAsOhjOQaD2$>UzSe1Kqf3+br
zQmqP|a?cmh(6h?H!+cZ>Q4$i1+GWUfgEfRoD`NQK6j;S8UJV>tE^(MJr?55J&
z^_`_h+RnYRy%PhCjqf&hnvZ7pW)HK)gZg&!Y5LA_dg&;=)Eyb`CNtekwmUx29RY`#
z(?slj#q6QjSiCcQl0;)ykJGuMbZ&3-Ksik3o~BD6j-qr$>BPIE6Wb3Cvo{X3AL<9U
z4@YiwjINP9HZn&>=F9i?77kKBr4EhEug1O88z}jD4^l#TUBoPldS%NJQJ=V!1VZ&`
wKk@GozXB?J0+=x7&Z_z@&%sj8eUJinqA7}U8cisAFN>69&wNd}uk_9S1Ql
Date: Wed, 14 May 2025 00:16:53 +0530
Subject: [PATCH 0098/1193] feat(models): add schema.org keywords to model
DESCRIPTION files
---
models/basgra/DESCRIPTION | 1 +
models/biocro/DESCRIPTION | 1 +
models/cable/DESCRIPTION | 1 +
models/clm45/DESCRIPTION | 1 +
models/dalec/DESCRIPTION | 1 +
models/dvmdostem/DESCRIPTION | 3 +++
models/ed/DESCRIPTION | 1 +
models/fates/DESCRIPTION | 1 +
models/gday/DESCRIPTION | 1 +
models/jules/DESCRIPTION | 1 +
models/ldndc/DESCRIPTION | 1 +
models/linkages/DESCRIPTION | 1 +
models/lpjguess/DESCRIPTION | 1 +
models/maat/DESCRIPTION | 1 +
models/maespa/DESCRIPTION | 2 ++
models/preles/DESCRIPTION | 1 +
models/sibcasa/DESCRIPTION | 1 +
models/sipnet/DESCRIPTION | 1 +
models/stics/DESCRIPTION | 1 +
19 files changed, 22 insertions(+)
diff --git a/models/basgra/DESCRIPTION b/models/basgra/DESCRIPTION
index f34e3f69db1..de0c9830916 100644
--- a/models/basgra/DESCRIPTION
+++ b/models/basgra/DESCRIPTION
@@ -24,3 +24,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema-keywords: BASGRA, grassland, ecosystem, carbon-cycle, nitrogen-cycle, plant-growth, soil-processes, forage-quality, climate-change, agricultural-systems, biogeochemistry
diff --git a/models/biocro/DESCRIPTION b/models/biocro/DESCRIPTION
index 51a2475e4dd..084133c3c6d 100644
--- a/models/biocro/DESCRIPTION
+++ b/models/biocro/DESCRIPTION
@@ -40,3 +40,4 @@ Copyright: Energy Biosciences Institute, Authors
Encoding: UTF-8
VignetteBuilder: knitr, rmarkdown
RoxygenNote: 7.3.2
+X-schema-keywords: BIOCRO, crop-growth, photosynthesis, biomass-allocation, phenology, C3-C4-plants, bioenergy-crops, agricultural-systems
diff --git a/models/cable/DESCRIPTION b/models/cable/DESCRIPTION
index d7de137afa5..148c11c1c44 100644
--- a/models/cable/DESCRIPTION
+++ b/models/cable/DESCRIPTION
@@ -22,3 +22,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: CABLE, land-surface-model, biogeochemistry, energy-balance, water-balance, vegetation-dynamics, soil-processes, climate-vegetation-interactions
diff --git a/models/clm45/DESCRIPTION b/models/clm45/DESCRIPTION
index 363b065e63c..40a8778e665 100644
--- a/models/clm45/DESCRIPTION
+++ b/models/clm45/DESCRIPTION
@@ -24,3 +24,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: CLM45, land-surface-model, biogeochemistry, energy-balance, water-balance, vegetation-dynamics, soil-processes, climate-vegetation-interactions, carbon-cycle, nitrogen-cycle, hydrology, snow-processes
\ No newline at end of file
diff --git a/models/dalec/DESCRIPTION b/models/dalec/DESCRIPTION
index a73bd635c4d..d0b20ead403 100644
--- a/models/dalec/DESCRIPTION
+++ b/models/dalec/DESCRIPTION
@@ -26,3 +26,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: DALEC, carbon-cycle, vegetation-dynamics, soil-processes, biogeochemistry, ecosystem-model, climate-vegetation-interactions, carbon-pools
diff --git a/models/dvmdostem/DESCRIPTION b/models/dvmdostem/DESCRIPTION
index a22b0daa62e..ddd56299adb 100644
--- a/models/dvmdostem/DESCRIPTION
+++ b/models/dvmdostem/DESCRIPTION
@@ -27,3 +27,6 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: dvmdostem, vegetation-dynamics, carbon-cycle, nitrogen-cycle, soil-processes, biogeochemistry, ecosystem-model, climate-vegetation-interactions
+
+
diff --git a/models/ed/DESCRIPTION b/models/ed/DESCRIPTION
index 3e87cfc89f4..b5e702ccf5a 100644
--- a/models/ed/DESCRIPTION
+++ b/models/ed/DESCRIPTION
@@ -66,3 +66,4 @@ Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
Config/testthat/edition: 2
+X-schema.org-keywords: ED2, ecosystem-demography, vegetation-dynamics, carbon-cycle, biogeochemistry, soil-processes, climate-vegetation-interactions, forest-dynamics, phenology, disturbance-ecology
\ No newline at end of file
diff --git a/models/fates/DESCRIPTION b/models/fates/DESCRIPTION
index a44c6011931..bb06d274fb2 100644
--- a/models/fates/DESCRIPTION
+++ b/models/fates/DESCRIPTION
@@ -31,3 +31,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: FATES, ecosystem-demography, vegetation-dynamics, carbon-cycle, biogeochemistry, soil-processes, climate-vegetation-interactions, forest-dynamics, fire-ecology, hydrology
diff --git a/models/gday/DESCRIPTION b/models/gday/DESCRIPTION
index 3cda7b49439..7731a2300bb 100644
--- a/models/gday/DESCRIPTION
+++ b/models/gday/DESCRIPTION
@@ -27,3 +27,4 @@ LazyLoad: yes
LazyData: TRUE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: GDAY, carbon-cycle, nitrogen-cycle, biogeochemistry, soil-processes, vegetation-dynamics, water-balance, climate-vegetation-interactions, forest-dynamics, ecosystem-model
diff --git a/models/jules/DESCRIPTION b/models/jules/DESCRIPTION
index 3e6210fee3e..8e9b163288c 100644
--- a/models/jules/DESCRIPTION
+++ b/models/jules/DESCRIPTION
@@ -23,3 +23,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: JULES, land-surface-model, energy-balance, water-balance, carbon-cycle, vegetation-dynamics, soil-processes, snow-processes, climate-vegetation-interactions, biogeochemistry, surface-fluxes
diff --git a/models/ldndc/DESCRIPTION b/models/ldndc/DESCRIPTION
index 4cb800e9a37..f6514933477 100644
--- a/models/ldndc/DESCRIPTION
+++ b/models/ldndc/DESCRIPTION
@@ -26,3 +26,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: LDNDC, land-surface-model, carbon-cycle, nitrogen-cycle, soil-processes, vegetation-dynamics, biogeochemistry, climate-vegetation-interactions, soil-moisture, soil-temperature, decomposition
diff --git a/models/linkages/DESCRIPTION b/models/linkages/DESCRIPTION
index 79e0b26d0fb..cfca2374fa4 100644
--- a/models/linkages/DESCRIPTION
+++ b/models/linkages/DESCRIPTION
@@ -30,3 +30,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: LINKAGES, forest-dynamics, vegetation-succession, carbon-cycle, nitrogen-cycle, soil-processes, tree-growth, forest-structure, biomass-allocation, leaf-litter, soil-organic-matter
diff --git a/models/lpjguess/DESCRIPTION b/models/lpjguess/DESCRIPTION
index da914cf7d71..64bc18f9f62 100644
--- a/models/lpjguess/DESCRIPTION
+++ b/models/lpjguess/DESCRIPTION
@@ -29,3 +29,4 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
LinkingTo: Rcpp
+X-schema.org-keywords: LPJ-GUESS, vegetation-dynamics, carbon-cycle, nitrogen-cycle, soil-processes, land-use-change, crop-management, forest-dynamics, biogeochemistry, climate-vegetation-interactions, water-balance, phenology, biomass-allocation, disturbance-ecology
diff --git a/models/maat/DESCRIPTION b/models/maat/DESCRIPTION
index ce1684852b2..7114412e228 100644
--- a/models/maat/DESCRIPTION
+++ b/models/maat/DESCRIPTION
@@ -28,3 +28,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: MAAT, leaf-photosynthesis, carbon-assimilation, stomatal-conductance, leaf-respiration, leaf-biochemistry, leaf-physiology, leaf-traits, leaf-nitrogen, leaf-temperature, leaf-water-relations, leaf-radiation, leaf-gas-exchange
diff --git a/models/maespa/DESCRIPTION b/models/maespa/DESCRIPTION
index 40d1f78eaa3..1a8077ae36d 100644
--- a/models/maespa/DESCRIPTION
+++ b/models/maespa/DESCRIPTION
@@ -31,3 +31,5 @@ Copyright: Authors
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: MAESPA, canopy-photosynthesis, radiation-transfer, carbon-assimilation, water-balance, soil-processes, vegetation-dynamics, leaf-physiology, stomatal-conductance, transpiration, energy-balance, climate-vegetation-interactions, ecosystem-fluxes
+
diff --git a/models/preles/DESCRIPTION b/models/preles/DESCRIPTION
index 125a9ffddbe..5bcdd8dbdde 100644
--- a/models/preles/DESCRIPTION
+++ b/models/preles/DESCRIPTION
@@ -30,3 +30,4 @@ License: BSD_3_clause + file LICENSE
Copyright: Authors
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: PRELES, light-use-efficiency, evapotranspiration, soil-moisture, carbon-assimilation, water-balance, photosynthesis, ecosystem-fluxes, climate-vegetation-interactions, soil-processes, vegetation-dynamics
diff --git a/models/sibcasa/DESCRIPTION b/models/sibcasa/DESCRIPTION
index 0c3bae6faed..ffe0c157ba5 100644
--- a/models/sibcasa/DESCRIPTION
+++ b/models/sibcasa/DESCRIPTION
@@ -25,3 +25,4 @@ Copyright: Authors
LazyData: TRUE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: SiBCASA, carbon-cycle, nitrogen-cycle, soil-processes, vegetation-dynamics, photosynthesis, respiration, soil-moisture, soil-temperature, biogeochemistry, climate-vegetation-interactions
\ No newline at end of file
diff --git a/models/sipnet/DESCRIPTION b/models/sipnet/DESCRIPTION
index e5f14edc35d..b8e10d0e840 100644
--- a/models/sipnet/DESCRIPTION
+++ b/models/sipnet/DESCRIPTION
@@ -31,3 +31,4 @@ License: BSD_3_clause + file LICENSE
Copyright: Authors
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: SIPNET, carbon-cycle, nitrogen-cycle, soil-processes, vegetation-dynamics, photosynthesis, respiration, soil-moisture, evapotranspiration, snow-hydrology, leaf-area-index, biomass-allocation, litter-decomposition, soil-organic-matter
diff --git a/models/stics/DESCRIPTION b/models/stics/DESCRIPTION
index 2fcce87bfa7..166bf357b7e 100644
--- a/models/stics/DESCRIPTION
+++ b/models/stics/DESCRIPTION
@@ -31,3 +31,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: STICS, crop-growth, soil-processes, nitrogen-cycle, water-balance, photosynthesis, phenology, biomass-allocation, root-growth, soil-organic-matter, agricultural-management, crop-rotation, fertilization, irrigation, harvest
From c11443c6d90ee4f7c7a78dbdac742f825aa35b32 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 14 May 2025 00:45:03 +0530
Subject: [PATCH 0099/1193] feat(modules) : feat(models): add schema.org
keywords to modules DESCRIPTION files
---
modules/allometry/DESCRIPTION | 1 +
modules/assim.batch/DESCRIPTION | 1 +
modules/assim.sequential/DESCRIPTION | 1 +
modules/benchmark/DESCRIPTION | 1 +
modules/data.atmosphere/DESCRIPTION | 1 +
modules/data.land/DESCRIPTION | 1 +
modules/data.mining/DESCRIPTION | 1 +
modules/data.remote/DESCRIPTION | 1 +
modules/emulator/DESCRIPTION | 1 +
modules/meta.analysis/DESCRIPTION | 1 +
modules/photosynthesis/DESCRIPTION | 1 +
modules/priors/DESCRIPTION | 1 +
modules/rtm/DESCRIPTION | 1 +
modules/uncertainty/DESCRIPTION | 1 +
14 files changed, 14 insertions(+)
diff --git a/modules/allometry/DESCRIPTION b/modules/allometry/DESCRIPTION
index c031dc02766..f6c3973a952 100644
--- a/modules/allometry/DESCRIPTION
+++ b/modules/allometry/DESCRIPTION
@@ -31,3 +31,4 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
VignetteBuilder: knitr, rmarkdown
+X-schema.org-keywords: allometry, biomass-estimation, statistical-analysis, MCMC, Bayesian-inference, plant-traits, scaling-relationships, data-synthesis, uncertainty-quantification, model-fitting
diff --git a/modules/assim.batch/DESCRIPTION b/modules/assim.batch/DESCRIPTION
index 6283791ace5..b459375de1a 100644
--- a/modules/assim.batch/DESCRIPTION
+++ b/modules/assim.batch/DESCRIPTION
@@ -59,3 +59,4 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
+X-schema.org-keywords: data-assimilation, parameter-estimation, Bayesian-inference, MCMC, ecological-forecasting, uncertainty-quantification, model-calibration, hierarchical-modeling, emulator, parallel-computing, likelihood-estimation, prior-distributions, posterior-analysis, model-diagnostics, ensemble-analysis
diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index dfa454384cd..848be265706 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -66,3 +66,4 @@ License: BSD_3_clause + file LICENSE
Copyright: Authors
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: sequential-data-assimilation, ensemble-kalman-filter, particle-filter, ecological-forecasting, state-estimation, uncertainty-quantification, multi-site-analysis, process-variance, observation-operator, ensemble-analysis, time-series-analysis, spatial-localization, model-data-fusion, parameter-uncertainty, forecast-verification
diff --git a/modules/benchmark/DESCRIPTION b/modules/benchmark/DESCRIPTION
index e5a312f0c1d..f4329ea05c6 100644
--- a/modules/benchmark/DESCRIPTION
+++ b/modules/benchmark/DESCRIPTION
@@ -51,3 +51,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: model-evaluation, statistical-metrics, performance-assessment, model-data-comparison, error-analysis, time-series-analysis, ensemble-analysis, visualization, RMSE, MAE, R2, correlation-analysis, residual-analysis, model-validation, benchmark-scores, model-diagnostics
diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
index 2f47345668a..d0d1f4fb74c 100644
--- a/modules/data.atmosphere/DESCRIPTION
+++ b/modules/data.atmosphere/DESCRIPTION
@@ -86,3 +86,4 @@ LazyData: FALSE
VignetteBuilder: knitr, rmarkdown
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: climate-data, meteorological-variables, data-standardization, atmospheric-forcing, data-conversion, weather-data, climate-drivers, data-processing, data-quality-control, data-interpolation, data-downscaling, data-assimilation, climate-forecasting, climate-reanalysis, climate-observations, climate-models, climate-variables, climate-data-sources, climate-data-formats, climate-data-validation, climate-data-transformation, climate-data-integration, climate-data-visualization, climate-data-analysis, climate-data-management, climate-data-archiving, climate-data-distribution, climate-data-access, climate-data-services
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 0a86834e441..c871dd48695 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -73,3 +73,4 @@ Copyright: Authors
LazyData: true
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: land-surface-data, soil-properties, vegetation-data, land-cover, soil-texture, soil-carbon, soil-moisture, soil-thermal-properties, soil-hydraulic-properties, soil-chemistry, vegetation-structure, biomass-data, land-use, spatial-data, data-standardization, soil-depth-profile, soil-bulk-density, soil-organic-matter, soil-nutrients, soil-water-potential, soil-hydraulic-conductivity, soil-thermal-conductivity, soil-albedo, vegetation-biomass, vegetation-cover, vegetation-species, vegetation-phenology, land-cover-classification, spatial-interpolation, data-quality-control, data-format-conversion, soil-data-processing, vegetation-data-processing, land-cover-data-processing, spatial-data-processing, data-validation, data-transformation, data-integration, data-visualization, data-analysis, data-management, data-archiving, data-distribution, data-access, data-services
diff --git a/modules/data.mining/DESCRIPTION b/modules/data.mining/DESCRIPTION
index 60229a54ef7..484b1c854a7 100644
--- a/modules/data.mining/DESCRIPTION
+++ b/modules/data.mining/DESCRIPTION
@@ -21,3 +21,4 @@ LazyData: FALSE
Collate:
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: data.mining, model-residuals, spectral-analysis, wavelet-analysis, time-series-analysis, pattern-detection, model-diagnostics, statistical-analysis, data-mining, model-evaluation, error-analysis, temporal-patterns, frequency-analysis, power-spectrum, model-structure, data-exploration, statistical-significance, model-comparison, residual-normalization, spectral-decomposition, temporal-scales, model-error, data-quality, statistical-methods, model-assessment, data-visualization, statistical-testing, model-validation, data-analysis, statistical-inference, model-diagnostics, data-processing, statistical-computing, model-analysis, data-mining, statistical-learning, model-exploration, data-exploration, statistical-modeling, model-investigation, data-investigation, statistical-analysis, model-research, data-research, statistical-research, model-study, data-study, statistical-study
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index 94677700fc0..df0fb070609 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -54,3 +54,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: remote-sensing, satellite-data, MODIS, Landsat, data-extraction, spatial-data, geospatial-analysis, satellite-imagery, data-processing, data-download, data-standardization, quality-control, data-validation, spatial-analysis, temporal-analysis, data-integration, data-transformation, data-visualization, data-management, data-archiving, data-distribution, data-access, data-services, satellite-products, land-cover, vegetation-indices, phenology-data, biomass-data, climate-data, environmental-data, earth-observation, geospatial-data, remote-sensing-analysis, satellite-data-processing, remote-sensing-data, satellite-imagery-processing, remote-sensing-products, satellite-data-analysis, remote-sensing-tools, satellite-data-tools, remote-sensing-methods, satellite-data-methods, remote-sensing-techniques, satellite-data-techniques
diff --git a/modules/emulator/DESCRIPTION b/modules/emulator/DESCRIPTION
index ab62ac706ad..877babe6ff9 100644
--- a/modules/emulator/DESCRIPTION
+++ b/modules/emulator/DESCRIPTION
@@ -16,3 +16,4 @@ Description: Implementation of a Gaussian Process model (both likelihood and
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: emulator, gaussian-process, model-emulation, kriging, bayesian-inference, likelihood-estimation, spatial-statistics, statistical-modeling, uncertainty-quantification, parameter-estimation, model-calibration, sampling-design, prediction-intervals, credible-intervals, spatial-correlation, covariance-modeling, nugget-effect, isotropic-modeling, anisotropic-modeling, mcmc-sampling, bayesian-calibration, statistical-emulation, surrogate-modeling, uncertainty-analysis, parameter-space-exploration, model-uncertainty, spatial-prediction, statistical-inference, model-validation, statistical-analysis, computational-statistics, bayesian-statistics, likelihood-methods, spatial-analysis, statistical-computing, model-optimization, parameter-optimization, statistical-optimization, model-fitting, statistical-fitting, spatial-modeling, statistical-modeling, computational-modeling, bayesian-modeling, likelihood-modeling, spatial-statistics, statistical-computing, model-calibration, statistical-calibration, spatial-calibration, bayesian-calibration, likelihood-calibration
diff --git a/modules/meta.analysis/DESCRIPTION b/modules/meta.analysis/DESCRIPTION
index 60ba579a6e4..e3e5795027d 100644
--- a/modules/meta.analysis/DESCRIPTION
+++ b/modules/meta.analysis/DESCRIPTION
@@ -48,3 +48,4 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
+X-schema.org-keywords: bayesian-meta-analysis, hierarchical-modeling, trait-data-analysis, parameter-estimation, uncertainty-quantification, statistical-modeling, mcmc-sampling, posterior-distributions, prior-distributions, model-convergence, gelman-rubin-diagnostics, random-effects, fixed-effects, greenhouse-effects, site-effects, treatment-effects, trait-priors, trait-posteriors, statistical-inference, bayesian-statistics, likelihood-methods, data-integration, model-calibration, parameter-optimization, statistical-optimization, model-validation, statistical-validation, data-standardization, statistical-computing, bayesian-computing, likelihood-computing, statistical-analysis, bayesian-analysis, likelihood-analysis, statistical-modeling, bayesian-modeling, likelihood-modeling, statistical-calibration, bayesian-calibration, likelihood-calibration, statistical-optimization, bayesian-optimization, likelihood-optimization, statistical-validation, bayesian-validation, likelihood-validation
diff --git a/modules/photosynthesis/DESCRIPTION b/modules/photosynthesis/DESCRIPTION
index b101522ae90..9e46e9f0ea2 100644
--- a/modules/photosynthesis/DESCRIPTION
+++ b/modules/photosynthesis/DESCRIPTION
@@ -37,3 +37,4 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
VignetteBuilder: knitr, rmarkdown
+X-schema.org-keywords: photosynthesis, ecosystem modeling, PEcAn, carbon cycle, Bayesian calibration, JAGS, rleavanc
\ No newline at end of file
diff --git a/modules/priors/DESCRIPTION b/modules/priors/DESCRIPTION
index abfc6f2bb56..420472b007f 100644
--- a/modules/priors/DESCRIPTION
+++ b/modules/priors/DESCRIPTION
@@ -26,3 +26,4 @@ Suggests:
Encoding: UTF-8
VignetteBuilder: knitr, rmarkdown
RoxygenNote: 7.3.2
+X-schema.org-keywords: prior-estimation, data-analysis, statistical-modeling, Bayesian-inference, likelihood-methods, parameter-estimation, uncertainty-quantification, statistical-analysis, computational-statistics, Bayesian-statistics, likelihood-statistics, statistical-computing, Bayesian-computing, likelihood-computing, statistical-methods, Bayesian-methods, likelihood-methods, statistical-analysis, Bayesian-analysis, likelihood-analysis, statistical-modeling, Bayesian-modeling, likelihood-modeling, statistical-calibration, Bayesian-calibration, likelihood-calibration, statistical-optimization, Bayesian-optimization, likelihood-optimization, statistical-validation, Bayesian-validation, likelihood-validation
diff --git a/modules/rtm/DESCRIPTION b/modules/rtm/DESCRIPTION
index afa1199b6cc..200396ce9d1 100644
--- a/modules/rtm/DESCRIPTION
+++ b/modules/rtm/DESCRIPTION
@@ -41,3 +41,4 @@ Encoding: UTF-8
VignetteBuilder: knitr, rmarkdown
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
+X-schema.org-keywords: radiative transfer modeling, Bayesian inversion, remote sensing, spectral reflectance, hierarchical modeling, PEcAn, RTM, leaf optics
diff --git a/modules/uncertainty/DESCRIPTION b/modules/uncertainty/DESCRIPTION
index 6074bd2cc0d..a86becb8010 100644
--- a/modules/uncertainty/DESCRIPTION
+++ b/modules/uncertainty/DESCRIPTION
@@ -48,3 +48,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
+X-schema.org-keywords: uncertainty quantification, ecological forecasting, ensemble modeling, sensitivity analysis, PEcAn, uncertainty propagation, reanalysis, variance partitioning
\ No newline at end of file
From e024eec1116d384b66733db73eab36dffd23b877 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 14 May 2025 00:52:41 +0530
Subject: [PATCH 0100/1193] feat: added releavant keywords for modules
---
modules/allometry/DESCRIPTION | 3 ++-
modules/assim.batch/DESCRIPTION | 2 +-
modules/benchmark/DESCRIPTION | 2 +-
modules/data.atmosphere/DESCRIPTION | 2 +-
4 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/modules/allometry/DESCRIPTION b/modules/allometry/DESCRIPTION
index f6c3973a952..27deec578da 100644
--- a/modules/allometry/DESCRIPTION
+++ b/modules/allometry/DESCRIPTION
@@ -31,4 +31,5 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
VignetteBuilder: knitr, rmarkdown
-X-schema.org-keywords: allometry, biomass-estimation, statistical-analysis, MCMC, Bayesian-inference, plant-traits, scaling-relationships, data-synthesis, uncertainty-quantification, model-fitting
+X-schema.org-keywords: allometry, biomass-estimation, statistical-analysis, MCMC, Bayesian-inference, plant-traits, scaling-relationships, data-synthesis
+
diff --git a/modules/assim.batch/DESCRIPTION b/modules/assim.batch/DESCRIPTION
index b459375de1a..b07de1c56a6 100644
--- a/modules/assim.batch/DESCRIPTION
+++ b/modules/assim.batch/DESCRIPTION
@@ -59,4 +59,4 @@ LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
-X-schema.org-keywords: data-assimilation, parameter-estimation, Bayesian-inference, MCMC, ecological-forecasting, uncertainty-quantification, model-calibration, hierarchical-modeling, emulator, parallel-computing, likelihood-estimation, prior-distributions, posterior-analysis, model-diagnostics, ensemble-analysis
+X-schema.org-keywords: data-assimilation, parameter-estimation, Bayesian-inference, MCMC, ecological-forecasting, model-calibration, hierarchical-modeling, emulator,likelihood-estimation, posterior-analysis
diff --git a/modules/benchmark/DESCRIPTION b/modules/benchmark/DESCRIPTION
index f4329ea05c6..2a32f37310b 100644
--- a/modules/benchmark/DESCRIPTION
+++ b/modules/benchmark/DESCRIPTION
@@ -51,4 +51,4 @@ LazyLoad: yes
LazyData: FALSE
Encoding: UTF-8
RoxygenNote: 7.3.2
-X-schema.org-keywords: model-evaluation, statistical-metrics, performance-assessment, model-data-comparison, error-analysis, time-series-analysis, ensemble-analysis, visualization, RMSE, MAE, R2, correlation-analysis, residual-analysis, model-validation, benchmark-scores, model-diagnostics
+X-schema.org-keywords: benchmarking, ecological modeling, model evaluation, statistical metrics, model-data comparison, PEcAn
\ No newline at end of file
diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
index d0d1f4fb74c..9965bb4254a 100644
--- a/modules/data.atmosphere/DESCRIPTION
+++ b/modules/data.atmosphere/DESCRIPTION
@@ -86,4 +86,4 @@ LazyData: FALSE
VignetteBuilder: knitr, rmarkdown
Encoding: UTF-8
RoxygenNote: 7.3.2
-X-schema.org-keywords: climate-data, meteorological-variables, data-standardization, atmospheric-forcing, data-conversion, weather-data, climate-drivers, data-processing, data-quality-control, data-interpolation, data-downscaling, data-assimilation, climate-forecasting, climate-reanalysis, climate-observations, climate-models, climate-variables, climate-data-sources, climate-data-formats, climate-data-validation, climate-data-transformation, climate-data-integration, climate-data-visualization, climate-data-analysis, climate-data-management, climate-data-archiving, climate-data-distribution, climate-data-access, climate-data-services
+X-schema.org-keywords: climate-data, atmospheric-forcing, data-standardization, data-assimilation, climate-reanalysis, weather-data, data-processing
From f3871d2a18a4eab701cdac5bb722fb0b867afa00 Mon Sep 17 00:00:00 2001
From: Katherine Rein
Date: Sun, 18 May 2025 17:29:27 -0400
Subject: [PATCH 0101/1193] Added GEE and GEE vs API analysis
---
.../inst/Python/CCMMF_Irrigation_API.py | 16 +-
.../inst/Python/CCMMF_Irrigation_CalcVis.py | 42 +++-
.../Python/CCMMF_Irrigation_DataDownload.py | 93 ++++++--
.../inst/Python/CCMMF_Irrigation_GEE.py | 159 +++++++++++++
.../inst/Python/CCMMF_Irrigation_GEEvAPI.py | 213 ++++++++++++++++++
modules/data.remote/inst/Python/README.txt | 9 +-
.../CCMMF_Irrigation_CalcVis.cpython-312.pyc | Bin 4179 -> 5864 bytes
...MF_Irrigation_DataDownload.cpython-312.pyc | Bin 6159 -> 9269 bytes
8 files changed, 497 insertions(+), 35 deletions(-)
create mode 100644 modules/data.remote/inst/Python/CCMMF_Irrigation_GEE.py
create mode 100644 modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
index 754c07b7b58..5c7d0c16b86 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
@@ -17,12 +17,10 @@
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.dataset as ds
-import ee
import CCMMF_Irrigation_DataDownload
import CCMMF_Irrigation_CalcVis
import CCMMF_Irrigation_Events
-ee.Initialize()
# %% Define multi use variables
@@ -82,12 +80,15 @@
# %% Iterate through locations and download data for each
-for row_number in range(34):
+for row_number in range(35):
# Load location data
latitude = df_lat_lon['lat'].iloc[row_number]
longitude = df_lat_lon['lon'].iloc[row_number]
location = df_lat_lon['id'].iloc[row_number]
+
+ # Create CSV filename
+ csv_filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
if location in data_dict:
@@ -108,8 +109,7 @@
data_dict[location] = df
# Save data
- filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
- df.to_csv(filename, index=False)
+ df.to_csv(csv_filename, index=False)
# Check that all years have been read in
df['time'] = pd.to_datetime(df['time'])
@@ -133,8 +133,7 @@
data_dict[location] = df
# Save data
- filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
- df.to_csv(filename, index=False)
+ df.to_csv(csv_filename, index=False)
# The location is not in the saved dictionary
else:
@@ -144,8 +143,7 @@
data_dict[location] = df
# Save data
- filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
- df.to_csv(filename, index=False)
+ df.to_csv(csv_filename, index=False)
# %% Create Event Files
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
index 289112e08e4..ece689b9160 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
@@ -13,12 +13,8 @@
# %% Turn raw data into usable data
-def water_balance(df_open_et, precip_data, LAT, LON):
+def water_balance(df_water_balance, LAT, LON):
print(f'{LAT} {LON}')
-
- # Create dataframe
- df_water_balance = df_open_et
- df_water_balance['precip'] = precip_data
# Handle NAs
df_water_balance['et'] = df_water_balance['et'].fillna(0)
@@ -68,9 +64,10 @@ def water_balance(df_open_et, precip_data, LAT, LON):
return df_water_balance
+
# %% Time Series
-def timeseries_graphs(df_water_balance, LAT, LON, YEAR):
+def timeseries_graphs_API(df_water_balance, LAT, LON, YEAR):
# Slicing warning if not copied
df_water_balance = df_water_balance.copy()
@@ -101,4 +98,37 @@ def timeseries_graphs(df_water_balance, LAT, LON, YEAR):
filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
plt.savefig(filename)
+ plt.show()
+
+def timeseries_graphs_GEE(df_water_balance, LAT, LON, YEAR):
+
+ # Slicing warning if not copied
+ df_water_balance = df_water_balance.copy()
+
+ # Create cumulative sum columns
+ df_water_balance['et_cumsum'] = df_water_balance['et'].cumsum()
+ df_water_balance['precip_cumsum'] = df_water_balance['precip'].cumsum()
+ df_water_balance['irr_cumsum'] = df_water_balance['irr'].cumsum()
+
+ # Ensure time is dates
+ df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
+
+ # Plot time series
+ plt.figure(figsize=(10, 5))
+ plt.plot(df_water_balance['time'], df_water_balance['et_cumsum'], linestyle = 'dotted', lw = 2.5, label = 'Evapotranspiration')
+ plt.plot(df_water_balance['time'], df_water_balance['precip_cumsum'], linestyle = 'dashed', lw = 2.5, label = 'Precipitation')
+ plt.plot(df_water_balance['time'], df_water_balance['irr_cumsum'], linestyle = 'dashdot', lw = 2.5, label = 'Irrigation')
+ plt.plot(df_water_balance['time'], df_water_balance['runoff'], linestyle = 'solid', lw = 2.5, label = 'Runoff')
+
+ plt.xlabel('Date')
+ plt.ylabel('Monthly Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (mm)')
+ plt.suptitle('Evapotransipiration and Precipitation Time Series in Central Valley CA')
+ plt.title(f'(Lat: {LAT}, Lon: {LON})')
+ plt.legend()
+ plt.grid()
+
+ # Save plot
+ filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG_GEE/CCMMR_GEE_cumsum_{YEAR}_{LAT}_{LON}.png'
+ plt.savefig(filename)
+
plt.show()
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
index d57b3711da8..f51ecca83aa 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_DataDownload.py
@@ -10,7 +10,7 @@
import requests
import numpy as np
import pandas as pd
-from netCDF4 import Dataset
+from netCDF4 import Dataset, num2date
import CCMMF_Irrigation_CalcVis
import os
import ee
@@ -21,7 +21,6 @@
# %% Download GEE OPEN ET Data
def GEEOpenET(START_DATE, END_DATE, LAT, LON):
- '''THIS DOES NOT RUN AT THE MOMENT'''
# Access OpenET dataset
collection = ee.ImageCollection("OpenET/ENSEMBLE/CONUS/GRIDMET/MONTHLY/v2_0") \
@@ -32,18 +31,16 @@ def GEEOpenET(START_DATE, END_DATE, LAT, LON):
def extract_et(img):
date = img.date().format()
et = img.reduceRegion(ee.Reducer.first(), ee.Geometry.Point([LON, LAT]), 1000).get('et_ensemble_mad')
- return ee.Feature(None, {'date': date, 'et': et})
+ return ee.Feature(None, {'time': date, 'et': et})
et_series = collection.map(extract_et)
# Convert data to df
et_series = et_series.getInfo() # Convert from ee.List to Python list
- print(et_series)
- print(type(et_series))
- open_et_df = pd.DataFrame(et_series)
- open_et_df['date'] = pd.to_datetime(open_et_df['date'])
-
- print(open_et_df)
+ et_series = et_series['features'] # Select just the features dictionary
+ open_et_df = pd.DataFrame(et_series) # Turn dictionary into dataframe
+ open_et_df = open_et_df['properties'].apply(pd.Series) # Select properties and turn dictionary into dataframe
+ open_et_df['time'] = pd.to_datetime(open_et_df['time'])
return open_et_df
@@ -111,9 +108,10 @@ def CHIRPSData(YEAR, LAT, LON):
#precip_variable = nc_data.variables['precip']
#print(precip_variable)
- # Extract coordinate variables
+ # Extract coordinate and time variables
lon = nc_data.variables['longitude'][:]
lat = nc_data.variables['latitude'][:]
+ time = nc_data.variables['time']
# Find the nearest lat/lon index
lon_idx = np.abs(lon - LON).argmin()
@@ -121,17 +119,24 @@ def CHIRPSData(YEAR, LAT, LON):
# Extract the data just for that lat lon
precip_data = nc_data.variables['precip'][:, lat_idx, lon_idx]
+
+ # Convert time to standard datetime
+ dates = num2date(time[:], units=time.units, calendar=time.calendar)
+ dates = [pd.Timestamp(date.isoformat()) for date in dates]
# Close the NetCDF file when done
nc_data.close()
# Clean data
precip_data = precip_data.filled(np.nan)
- precip_data_df = pd.DataFrame(precip_data)
+ precip_data_df = pd.DataFrame({
+ 'time': dates,
+ 'precip': precip_data
+ })
return precip_data_df
-# %% Calculate and visualize new data
+# %% Calculate and visualize new data for the API downloded data
def new_data_entry_API(LAT, LON, years, csv_folder, START_DATE = None, END_DATE = None):
print(f'{LAT} {LON} {years}')
@@ -151,14 +156,74 @@ def new_data_entry_API(LAT, LON, years, csv_folder, START_DATE = None, END_DATE
precip_data = pd.concat([precip_data, precip_data_year], ignore_index=True)
# Organize and water balance
- df_water_balance = CCMMF_Irrigation_CalcVis.water_balance(et_df, precip_data, LAT, LON)
+ df_water_balance = et_df
+ df_water_balance['precip'] = precip_data['precip']
+ df_water_balance = CCMMF_Irrigation_CalcVis.water_balance(df_water_balance, LAT, LON)
# Graph
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
for year in years:
- CCMMF_Irrigation_CalcVis.timeseries_graphs(df_water_balance[df_water_balance['time'].dt.year == year], LAT, LON, year)
+ CCMMF_Irrigation_CalcVis.timeseries_graphs_API(df_water_balance[df_water_balance['time'].dt.year == year], LAT, LON, year)
# Save to csv to ensure data is stored
filename = f'{csv_folder}CCMMR_Water_Balance_{LAT}_{LON}.csv'
df_water_balance.to_csv(filename, index=False)
+ return df_water_balance
+
+# %% Calculate and visualize new data for the Google Earth Engine downloded data
+
+def new_data_entry_GEE(LAT, LON, years, csv_folder, START_DATE = None, END_DATE = None):
+ print(f'{LAT} {LON} {years}')
+
+ # Define start and end date
+ if START_DATE == None or END_DATE == None:
+ START_DATE = f'{years[0]}-01-01'
+ END_DATE = f'{years[-1]}-12-31'
+
+ # Download open et data
+ et_df = GEEOpenET(START_DATE, END_DATE, LAT, LON)
+
+ # Download CHIRPS data year by year and concatenate
+ precip_data = pd.DataFrame()
+ for year in years:
+ precip_data_year = CHIRPSData(year, LAT, LON)
+ precip_data = pd.concat([precip_data, precip_data_year], ignore_index=True)
+
+ # Interpolate et data to daily
+ # Find average daily et for each month
+ et_df['time'] = pd.to_datetime(et_df['time'])
+ et_df['days_in_month'] = et_df['time'].dt.days_in_month
+ et_df['avg_et'] = et_df['et'] / et_df['days_in_month']
+ et_df.set_index('time', inplace = True)
+
+ # Expand average to daily dataframe
+ end_of_month = et_df.index.max() + pd.offsets.MonthEnd(0) # extend end to the end of the last month
+ daily_index = pd.date_range(start = et_df.index.min(), end = end_of_month, freq = 'D') # find all days in range
+ daily_et_df = et_df.reindex(daily_index) # Expand dataframe to include all days
+
+ daily_et_df['avg_et'] = daily_et_df['avg_et'].ffill() # Fill in all missing values with the starting value
+ #daily_et_df['avg_et'] = daily_et_df['avg_et'].interpolate(method='time') # linear interpolation
+ daily_et_df = daily_et_df[['avg_et']] # select just the avegarged data
+ daily_et_df = daily_et_df.rename(columns={'avg_et': 'et'})
+
+ # Merge precip and et data
+ precip_data['time'] = pd.to_datetime(precip_data['time'])
+ precip_data.set_index('time', inplace = True)
+ df_water_balance = daily_et_df.join(precip_data, how='inner') # merge with et data (only keeping values from both)
+ df_water_balance = df_water_balance.reset_index().rename(columns={'index': 'time'}) # reset index so theirs a time column back
+
+ # Oragaize and water balacne
+ df_water_balance = CCMMF_Irrigation_CalcVis.water_balance(df_water_balance, LAT, LON)
+
+ # Graph
+ df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
+ years = df_water_balance['time'].dt.year.unique()
+ years.sort()
+
+ for year in years:
+ CCMMF_Irrigation_CalcVis.timeseries_graphs_GEE(df_water_balance[df_water_balance['time'].dt.year == year], LAT, LON, year)
+
+ # Save to csv to ensure data is stored
+ filename = f'{csv_folder}CCMMR_Water_Balance_{LAT}_{LON}_GEE.csv'
+ df_water_balance.to_csv(filename, index=False)
return df_water_balance
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_GEE.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_GEE.py
new file mode 100644
index 00000000000..6d664d50f3c
--- /dev/null
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_GEE.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 6 13:59:07 2025
+
+@author: katherineanne
+"""
+# %% Import modules
+
+import requests
+import numpy as np
+from netCDF4 import Dataset, num2date
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+from datetime import datetime, date, timedelta
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pyarrow.dataset as ds
+import ee
+import CCMMF_Irrigation_DataDownload
+import CCMMF_Irrigation_CalcVis
+import CCMMF_Irrigation_Events
+
+ee.Initialize()
+
+# %% Define multi use variables
+
+# Define years to look at
+years = list(range(2016, 2026))
+
+# Define main folder
+main_folder = '/projectnb/dietzelab/ccmmf/management/irrigation/'
+
+# Define folder name for csv files
+csv_folder = main_folder + 'WaterBalanceCSV_GEE/'
+
+# Define the name of the parquet filename
+pq_filename = main_folder + 'CCMMF_Irrigation_Parquet_GEE'
+
+# %% Loading data
+
+# Read in parquet file
+# Load the full dataset
+dataset = ds.dataset(pq_filename, format="parquet", partitioning = 'hive')
+table = dataset.to_table()
+parquet_df = table.to_pandas()
+days_to_download = 0
+
+# Group by the location column and convert to dictionary
+data_dict = {location: location_df for location, location_df in parquet_df.groupby("location")}
+
+# %% Check current date with most current downloaded data
+
+# Delete the current CHIRPS file for this year
+# This will ensure we read in the new data for the current date
+# We only do this if the data is not up to date
+cur_year = datetime.now().year
+today = datetime.now().date()
+chirps_filename = f'{main_folder}chirps-v2.0.{cur_year}.days_p05.nc'
+
+if os.path.exists(chirps_filename):
+ with Dataset(chirps_filename, 'r') as nc:
+
+ time_var = nc.variables['time']
+ dates = num2date(time_var[:], units=time_var.units)
+ most_recent = max(dates)
+ most_recent_date = date(most_recent.year, most_recent.month, most_recent.day)
+
+ if most_recent_date != today:
+ print('Deleted')
+ days_to_download = (today - most_recent_date).days
+ os.remove(chirps_filename)
+
+# %% Define locations
+
+# Read in all lat lons
+df_lat_lon = pd.read_csv(f'{main_folder}design_points.csv')
+
+# Handle duplicates
+df_lat_lon = df_lat_lon.drop_duplicates()
+
+# %% Iterate through locations and download data for each
+
+for row_number in range(5):
+
+ # Load location data
+ latitude = df_lat_lon['lat'].iloc[row_number]
+ longitude = df_lat_lon['lon'].iloc[row_number]
+ location = df_lat_lon['id'].iloc[row_number]
+
+ # Create CSV name
+ csv_filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}_GEE.csv'
+
+ if location in data_dict:
+
+ df = data_dict[location]
+
+ # If we have not downloaded data for today yet...
+ if days_to_download != 0:
+ # Download new data
+ start_date = today - timedelta(days=days_to_download)
+ new_df = CCMMF_Irrigation_DataDownload.new_data_entry_GEE(latitude, longitude,
+ [start_date.year, cur_year],
+ csv_folder, start_date, today)
+
+ # Concatenate with already saved data
+ old_df = data_dict[location]
+ df = pd.concat([new_df, old_df], ignore_index=True)
+ df = df.sort_values(by='time')
+ data_dict[location] = df
+
+ # Save data
+ df.to_csv(csv_filename, index=False)
+
+ # Check that all years have been read in
+ df['time'] = pd.to_datetime(df['time'])
+ df_years = df['time'].dt.year.unique().tolist()
+
+ if set(df_years) != set(years):
+
+ # Years in what years we want but not in saved data
+ # Does not care if there are values in saved data that are not in wanted years
+ not_saved_years = set(years) - set(df_years)
+ not_saved_years = list(not_saved_years)
+
+ # Download data and calculate for new years
+ new_df = CCMMF_Irrigation_DataDownload.new_data_entry_GEE(latitude, longitude,
+ not_saved_years, csv_folder)
+
+ # Concatenate with already saved data
+ old_df = data_dict[location]
+ df = pd.concat([new_df, old_df], ignore_index=True)
+ df = df.sort_values(by='time')
+ data_dict[location] = df
+
+ # Save data
+ df.to_csv(csv_filename, index=False)
+
+ # The location is not in the saved dictionary
+ else:
+ # Download and calculate if it doesn't exist
+ df = CCMMF_Irrigation_DataDownload.new_data_entry_GEE(latitude, longitude,
+ years, csv_folder)
+ data_dict[location] = df
+
+ # Save data
+ df.to_csv(csv_filename, index=False)
+
+# %% Create Event Files
+
+#CCMMF_Irrigation_Events.file_creation(data_dict)
+
+# %% Write to parquet
+
+for location, loc_df in data_dict.items():
+ loc_df['location'] = location
+ table = pa.Table.from_pandas(loc_df)
+ pq.write_to_dataset(table, root_path = pq_filename, partition_cols = ['location', 'year'])
\ No newline at end of file
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py
new file mode 100644
index 00000000000..0868e22db3e
--- /dev/null
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May 15 21:48:42 2025
+
+@author: krein21
+"""
+
+# %% Import modules
+
+import requests
+import numpy as np
+from netCDF4 import Dataset, num2date
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+from datetime import datetime, date, timedelta
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pyarrow.dataset as ds
+from sklearn.metrics import mean_squared_error, r2_score
+import seaborn as sns
+
+# %% Define multi use variables
+
+# Define years to look at
+years = list(range(2016, 2026))
+
+# Define main folder
+main_folder = '/projectnb/dietzelab/ccmmf/management/irrigation/'
+
+# Define the name of the parquet filename for API
+API_pq_filename = main_folder + 'CCMMF_Irrigation_Parquet'
+
+# Define the name of the parquet filename for GEE
+GEE_pq_filename = main_folder + 'CCMMF_Irrigation_Parquet_GEE'
+
+# %% Loading data
+
+# GEE Data Download
+
+# Read in parquet file
+# Load the full dataset
+GEE_dataset = ds.dataset(GEE_pq_filename, format="parquet", partitioning = 'hive')
+GEE_table = GEE_dataset.to_table()
+GEE_parquet_df = GEE_table.to_pandas()
+
+# Group by the location column and convert to dictionary
+GEE_data_dict = {location: location_df for location, location_df in GEE_parquet_df.groupby("location")}
+
+# API Data Download
+
+# Read in parquet file
+# Load the full dataset
+API_dataset = ds.dataset(API_pq_filename, format="parquet", partitioning = 'hive')
+API_table = API_dataset.to_table()
+API_parquet_df = API_table.to_pandas()
+
+# Group by the location column and convert to dictionary
+API_data_dict = {location: location_df for location, location_df in API_parquet_df.groupby("location")}
+
+# %% Merge API and GEE data
+
+merged_data_dict = {}
+
+for key in GEE_data_dict.keys():
+
+ if key in API_data_dict:
+ print(key)
+
+ # Select both dataframes
+ df_gee = GEE_data_dict[key].copy()
+ df_api = API_data_dict[key].copy()
+
+ # Merge dataframes
+ # Use time as the connecting feature
+ # Only save data if both dataframes have it
+ # Specify suffixes
+ merged_df = pd.merge(df_gee, df_api, on = 'time', how = 'inner', suffixes = ('_GEE', '_API'))
+
+ # Add to dictionary
+ merged_data_dict[key] = merged_df
+
+
+# %% Aggregate weekly
+
+# Create weekly irrigation, precipitation, et data_dict
+merged_data_dict_weekly = {}
+
+for key, df in merged_data_dict.items():
+
+ # Calculate new units for irrigation et and precip(cm)
+ df['irr_GEE'] = df['irr_GEE'] * 0.1
+ df['irr_API'] = df['irr_API'] * 0.1
+ df['et_GEE'] = df['et_GEE'] * 0.1
+ df['et_API'] = df['et_API'] * 0.1
+ df['precip_GEE'] = df['precip_GEE'] * 0.1
+
+ # Add changed units to data dict
+ merged_data_dict[key] = df
+
+ # Aggregate by week
+ # Sum irrigation
+ weekly_df = df.groupby(['year_GEE', 'week_GEE'], as_index = False).agg({
+ 'time': 'first',
+ 'et_GEE': 'sum',
+ 'precip_GEE': 'sum',
+ 'irr_GEE': 'sum',
+ 'et_API': 'sum',
+ 'precip_API': 'sum',
+ 'irr_API': 'sum'
+ })
+
+ # Remove week column
+ weekly_df = weekly_df.drop('week_GEE', axis = 1)
+
+ # Add to weekly data_dict
+ merged_data_dict_weekly[key] = weekly_df
+
+# %% Predicted Observed Plots
+
+# One of all locations
+
+# Flatten all data into one dataframe
+flattened_df = pd.concat(
+ [df.assign(id = key) for key, df in merged_data_dict_weekly.items()],
+ ignore_index = True
+)
+
+# Clean data
+flattened_df = flattened_df.dropna(subset=['irr_API', 'irr_GEE'])
+
+# X - weekly irrigation from API (sum weekly)
+# Y - weekly irrigation from GEE (sum weekly)
+# Delineate location by color
+sns.scatterplot(data = flattened_df, x = 'irr_API', y = 'irr_GEE', hue = 'id',
+ s = 10, legend = False)
+
+# 1:1 line
+min_val = 0
+max_val = max(max(flattened_df['irr_API']), max(flattened_df['irr_GEE']))
+plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='1:1 line')
+
+
+# RMSE/R^2
+rmse = np.sqrt(mean_squared_error(flattened_df['irr_API'], flattened_df['irr_GEE']))
+r2 = r2_score(flattened_df['irr_API'], flattened_df['irr_GEE'])
+
+# Labels
+plt.xlabel('Daily Sampled Evapotranspiration Data')
+plt.ylabel('Monthly Sampled Evapotranspiration Data')
+plt.suptitle('Impact of Interpolation on Irrigation (cm) Calculation')
+plt.title(f'RMSE = {rmse:.2f} $R^2$ = {r2:.2f}')
+plt.grid(True)
+plt.tight_layout()
+plt.show()
+
+# One for each location
+for key, df in merged_data_dict_weekly.items():
+
+ # Clean data
+ df = df.dropna(subset=['irr_API', 'irr_GEE'])
+
+ # Scatterplot
+ sns.scatterplot(data = df, x = 'irr_API', y = 'irr_GEE', s = 10, legend = False)
+
+ # 1:1 line
+ min_val = 0
+ max_val = max(max(df['irr_API']), max(df['irr_GEE']))
+ plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='1:1 line')
+
+
+ # RMSE/R^2
+ rmse = np.sqrt(mean_squared_error(df['irr_API'], df['irr_GEE']))
+ r2 = r2_score(df['irr_API'], df['irr_GEE'])
+
+ # Labels
+ plt.xlabel('Daily Sampled Evapotranspiration Data')
+ plt.ylabel('Monthly Sampled Evapotranspiration Data')
+ plt.suptitle(f'Impact of Interpolation on Irrigation (cm) Calculation for {key}')
+ plt.title(f'RMSE = {rmse:.2f} $R^2$ = {r2:.2f}')
+ plt.grid(True)
+ plt.tight_layout()
+ plt.show()
+
+# %% Time Series Plots
+
+for key, df in merged_data_dict_weekly.items():
+
+ # Sort by time
+ df = df.sort_values(by='time')
+
+ # Create cumulative sum columns
+ df['irr_API_cumsum'] = df['irr_API'].cumsum()
+ df['irr_GEE_cumsum'] = df['irr_GEE'].cumsum()
+ df['precip_GEE_cumsum'] = df['precip_GEE'].cumsum()
+ df['et_API_cumsum'] = df['et_API'].cumsum()
+ df['et_GEE_cumsum'] = df['et_GEE'].cumsum()
+
+ # Plot time series
+ plt.figure(figsize=(10, 5))
+ plt.plot(df['time'], df['irr_API_cumsum'], linestyle = 'dotted', lw = 2.5, color = 'royalblue', label = 'API Irrigation')
+ plt.plot(df['time'], df['irr_GEE_cumsum'], linestyle = 'dotted', lw = 2.5, color = 'yellowgreen', label = 'GEE Irrigation')
+ plt.plot(df['time'], df['precip_GEE_cumsum'], linestyle = 'solid', lw = 2.5, color = 'mediumpurple', label = 'Precipitation')
+ plt.plot(df['time'], df['et_API_cumsum'], linestyle = 'solid', lw = 2.5, color = 'royalblue', label = 'API Evapotranspiration')
+ plt.plot(df['time'], df['et_GEE_cumsum'], linestyle = 'solid', lw = 2.5, color = 'yellowgreen', label = 'GEE Evapotranspiration')
+
+ plt.xlabel('Date')
+ plt.ylabel('Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (cm)')
+ plt.title(f'Timeseries Impact of Interpolation on Irrigation Calculation for {key}')
+ plt.legend()
+ plt.grid()
+ plt.show()
diff --git a/modules/data.remote/inst/Python/README.txt b/modules/data.remote/inst/Python/README.txt
index bd2f2d1ac85..d489d66dd81 100644
--- a/modules/data.remote/inst/Python/README.txt
+++ b/modules/data.remote/inst/Python/README.txt
@@ -6,6 +6,7 @@ in California.
Data Sources:
- Evapotranspiration: OpenET
- https://openet.gitbook.io/docs
+ - https://developers.google.com/earth-engine/datasets/catalog/OpenET_ENSEMBLE_CONUS_GRIDMET_MONTHLY_v2_0
- Precipitation: CHIRPS
- https://data.chc.ucsb.edu/products/CHIRPS-2.0/
@@ -21,7 +22,8 @@ How to use SCC:
- Create or Load environment
- Load: conda activate ccmmf_env
- Create (all on one line): conda create -n ccmmf_env python jupyter
- spyder xarray requests numpy netcdf4 matplotlib pandas pyarrow earthengine-api
+ spyder xarray requests numpy netcdf4 matplotlib pandas pyarrow earthengine-api
+ scikit-learn seaborn
- To open spyder: spyder &
- This may take a second to run. Be patient it will open eventually.
@@ -136,11 +138,6 @@ Functions (by files):
expected columns for the txt file. It also aggregates this data by week.
Next Steps:
-- Get the Google Earth Engine download working
-- Create a CCMMF_Irrigation_GEE file
- - Does the same thing as CCMMF_Irrigation_API except uses GEEOpenET and the
- monthly et values are then assumed to be the same for each day of the month.
-- Compare monthly and daily et values
- Site specific water holding capacity and crop specific rooting depth
diff --git a/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_CalcVis.cpython-312.pyc b/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_CalcVis.cpython-312.pyc
index 3e65ee0379955ebd8763225640d3629ffab9ed7e..8052ac0e291ef8b8e83acafbf912c92881c1e988 100644
GIT binary patch
delta 1176
zcmZ{k&1(}u6u@WpD<8X?ROzR-x(c?|YClv&YpvE=KcH2yMMNzz?QGh#Nt@1YER7qy
z_z%=Mcqkt976d_hF5*!HS+J+Qi3b(*q8F(MzL%s`4-V{a-@NzceeJOCtWSqxZ%i{v
zV13nFr%CN{tdSmDZtICM^#QpVU6R_h<=baY%ji*E8oY7;eLL_Gap|VPqbr0_Pg)_5
z;qD|X07AZhyO<*pD%Besb(aQTUM0rVf-mmN;UdRg<*o=}=GJbfn2@_c6Z=ckpx5ptEMC!!
zlJ=U*0ks0jb~$zyXgO|ArgP$p{JbR=)UZ@yG3nYoF_Fw9=O*ny9v&D2T)8ZK<(P(-
zfUCk#EhPq`IIKR5wt`RtkMlviRd=lZRMYA<_^F@F0o*(zblX#=n1}EEf@iN{$a1lhfHOV=aba(@7I9`DAgv&`vgrj$*m~dZq
ip5-%kKfePy{5d$SqN8R&cjvo%_OA=yZ0oaW
zrX>)2uDG#mwX9)zsx@&5Jxn-iYjtiBiC9N8cq(v?ZO|_zDt7}cu`6MjDZA1uWxs>U
z5|f&U$qeDd3=kRG(MR(bb>o05G2Z%7@cQ949ji?hYfKGQNov9qC&LU_NtT2}T_jS7
z#DY^a*|k}((taYwZb#x$V@GDH{sdd1;cbJKEEVnYcs
zCe%28VvZFjz>EfHwdMsLsd_=VLR^0%AmUmV-~{gvpXIO0T+2Sz9PphqUl^L2nHOoA
z@7A#!kkjB0F?c&`Pa=Xy+LiU0S_5xydDqEZz{LL+*!;uLsC8LBb4=uMIol@x1-^vI
AS^xk5
diff --git a/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_DataDownload.cpython-312.pyc b/modules/data.remote/inst/Python/__pycache__/CCMMF_Irrigation_DataDownload.cpython-312.pyc
index d397b2f30024ab3266a2016242814e521feeb6ce..cfb4ca2428d1ad65173c03aed4a5c9e6310413a0 100644
GIT binary patch
delta 3926
zcmbVPYitzP6`q-$oqf#AzP)3cT`<_NUho5l7!t4z1RFm9LxJ*eh&G#@vAx^%?s9jG
zZL%9L4QbMr2wNi+qc)0(qQn%dP^E1^Rhsli)k^)*9WB+m6Vyhk`lG5+k&votl&C#-
z#v3~{>W|*_oO|y%=brmI_dCazxBj%x@mrp^6Hpq~ZW{gWx>-jxd1*GZrD4oEWF-la
z6se?b$fno!A-i6)Lo7u|Y@%AZW~yxjCHf*U#EWL24v_)s6m6pAB01!O+6uLMiVE9j
z7x!$TG)s3>j%FoUv!=$!9ucFm6r=I%%Y}j31mA;2g(pAfa*SxIfo`6W-OBI0!7y`!
zG&J*;oJFLuEKy}}R#VxfkiLL&$zd~DVB~vzaIHl&W9`a#7;Wx=e$H}?2s49k{e`+r
zy5#tnq*0O_wrezAn({pTH3lCJvMCzrIu-Jci};9Aew0`n$avHlH8vfPHR*~)NJ?&_M<4wWsYVt$tjIFCLu7PHT^kGi_uSqqf!V-ZyyR{y3@o|V%pFwS;j+6`b+`V6dfWD6Tgkmsd55i|P|aI6
zEpJd}eH+S?_LfauU=yv!WCJD}frL4<3AaO-7z($
z2sGN1YW8oe1zP$wzhQdI!HJ|Y$O{x(quk*elv%!sWR+j=2S^UHr#XCWBK%&Ua$Jrb
zHXMSNgTU5H&W@$EM9!XATj8zLz^z;9Ra%Su4Lm=Tyd&r6CN4W;)Cduyo`P-WopQY)
z1LtZ`UUPU&&O`|Ny1(K*v(%*!4MX$JoFx&)B8DV#d}51o#t|f&fZ<^U>U2nUVSWlS^sBB>F
zgvvG)x|P@6Cw70r5Pa3l@vG}+PR@6~IdE;@w(s5Go#0)rx6t#4$2v<~*B$dM`qI(M
zXTE)AuHn^|t1YG4&A;cj+%^|_id>iSsk`yylB;UT6a2_*s%IV$CcmXHc;8NNwMDw-
zpZz4^*r4+3Kl^wq!SBxEll)~c&`X+r#cl3&(7z_ly{sjML9bH2cp00Or#+v0b4=b2
zP-Vq$0Q}qvkfogQ)>-U1B1hgZDKB`#Zrhh|%`V!McfD<-P5H=sm*Q*%bo*}S;0UvmH^HIKQf$7iV|8uAk8ev;0`E>=Pf8NR_&|TA;*)FCJbM}
zK01YGK`(*M%kX8R%7<02G{Xf|2aB|CX?@eeDRupJXwIBc>9zNS*0Rv43Z2ES!Qzo)
z#jl?#38$6U{Ncy8tJdws9lgcAQ^nH}weRV2UrgRUcyR-&HINwe~fDbNJbPQU019N@klqn7yz2iRFI
z%4Kgb#E7$pxCv+wnY)-TP@%Q*PzCDBxAt<*W*c%TL+&l_tU0J#714MSgF>oR}!
z4OfgMz9MiW+VFWXc`8RGw&CUgyk)AJcxne6nRV6D9<5kesbOjVb2f$bwNHC>-0Ru&T($Z<4|n>hGrycnIz0-{95
zAexLK8|E?5b0ZPRzotpe5>I84(U_!>-C>hvhFF4R%>>~~qlb}nHVhh&AB~TtAXvpy
zDG33`G@713*l*DUCX+zUTO*J!wE%wXM^n%b2MnCTx3?OnaQ9hEFjk@IX-vNZM6*G%
zx{Q0+GS25FHA{MU82DMudI&GRCnchGLl{qK79{DAsF{a{~&DSiATLlDSxjz
z%K2v!^NkDpZh1>wyK>OKdzvGvge!xygY)eR{%f5jUt6K)rA-$OEd@8sZ@>CfIoPZQ
zo6EsnYH$|_T2;Ds$-icEYxS6Enc>x3HaDAjhtXhxl;g(ccq
zrfa~?+@^(Y$Pn(*ZA-$2vJg^*&;ol~C<>vHuop)rAygK&slv9B&|VfgRH5U&k>8vv
z_M9jR9VOu;XuvGhTmL&+8IrmUuReG6xnfJ#?Q?3&{?fW$9O0%
zS9rlj_Z6KzO05u>#_%4d1_#3)%@T=-=~yJ9=bM@t{xi%>p(jBO8Eh01)~u0KT2hj{
zuY3Qa2uBzr?bIL89^B=zy1hRYm*df7{Jf<9b7Z6=I1$t{0Nq2c69`Y;ZuqgaV=OI>
zC#5cQ1DGUy**+lmO(aQv#Ly)Dz(J6%_X*GYg!fOxM&$#+zs^tkKYny0kfUTPS^Hp^
QTuTN%=_a?60p0R{078x6hyVZp
delta 1641
zcmZWpZ)_Ar6yMp~+r8Vnz4lt}U+JaWKhm!JZHpg7iqzg=i{T3GfuU&5^>*8CbKSel
zE|kMXYJ^CX1nZb+Vif&iVlXjE3?wEhCh7;0C2~cQLP{;IQ(?TSP<`=rx%ZSmC#+CvT0dEXW1iew55LreS
zXX0eR*Rt@ckP2)5lgxyq@qiI6pb00@L_?KTf(w^+wj)=_sJ3e9c9ElJ+-p3HKIpeT
zA)V;%PCL4yaQO3Rd5)M>song4{e5;ZaygbJBW#GZW8^(4ofkA;3cgq|abUe2)~g#j
z!io+tjxf$B+_AR&@W{BF8O@H%xzW5lwm&Bi=H>iwR^BtZC!5P(xW=AE4Vr4}F00#$
zhy~b@*xWvzPH=!+;XXq$dCC7=&{HWA4v}_#bAN=Y8vtD2tXgq}Dcv41C(EwCf(_Gl
zIlEecEi0NEq>Y$|jDJulSyh7!?N}&t^v}x|z-6
zZdfOvTOR@ZInUe0y;-+=mU{BmeL8j0SpoDP;op(}Lc9eWB4s2Kl
zYby*O-`B9`t+?zey(8WY7F$Vo=$FtxMEH)qkPd5zoRgj)??^HcBO6eNydbs4HAZ7j
zu};VlC%vp54?Dt4nzFA8k{6z#p8coU@fE?0O!hUbMPYI|(x!x0l73urq;aN`>18Z5
z$0Ft+gWz}O$D_W3%p6j}yus$xSX_!FtiCD9v~PHg&m*YEQeoe$=pR3#UPZ
z2PrgCc$5UATl-vpu~fEnm!C8sZJO&dRdY3dO2p+3W(UXag^hmp2zfoak$e-CcSM#C
zD*(;Zf;MSU?%WqinBEKK(D2CE-f=IOQ(*HnbZeOOH(exmnhub&%{|@kuD=@FG@rW}
zO4Yd39}hE;j+)ed$4k!TbZfT7_jQXu+bNK?R2(HqWX)aD7HbRgj^Hp3I?s`8Oo;~m
zRYjr}ASYtIC_p}rjY>g>d5h6R2LXnN6i;WuA4=z0a0D78Mue)0qICqwx>`$qskn(@oRzS$q&hGCumVCY%ZrXxr%$h`SYGP~&x`={nQ=J+t<;u6Qbi;=gZu0ALJtqn6tTP%CQnw)`7?
C8<|7^
From 6534283bcad8f6ceb55dff3e4e13e0614d79e35f Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 22 May 2025 08:40:47 -0400
Subject: [PATCH 0102/1193] Update path.
---
modules/assim.sequential/inst/anchor/NA_downscale_script.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index a63c4f85b99..95bc4a519fb 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -235,7 +235,7 @@ date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
for (i in seq_along(date)) {
print(i)
# Assemble covariates.
- covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_ts/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
+ covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_static/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
analysis.yr <- analysis.all[[i]]
time <- date[i]
@@ -243,7 +243,7 @@ for (i in seq_along(date)) {
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_analysis"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -253,7 +253,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
+ outdir = file.path(outdir, "downscale_maps_analysis")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
From 6e698f7c52b1f05a856b60f2f8e54b94eccf0860 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 14:57:35 +0530
Subject: [PATCH 0103/1193] feat(setup): load core PEcAn packages for workflow
initialization
---
.../_extensions/demo1/run-model/run_pecan.qmd | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100644 base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
new file mode 100644
index 00000000000..c43ba592c4f
--- /dev/null
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -0,0 +1,11 @@
+---
+title: "PEcAn Workflow"
+author: "PEcAn"
+format: pdf
+---
+
+# Load PEcAn packages
+
+```{r libraries}
+library("PEcAn.all")
+```
From 83b2a67faf868cff054bf13b49612938b4afc1ac Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 14:58:19 +0530
Subject: [PATCH 0104/1193] feat(settings): load settings from pecan.xml for
workflow configuration
---
.../_extensions/demo1/run-model/run_pecan.qmd | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index c43ba592c4f..63848512ec9 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -9,3 +9,15 @@ format: pdf
```{r libraries}
library("PEcAn.all")
```
+
+# Load PEcAn settings files.
+
+- If you have pecan.xml generated from web,place the `pecan.xml` file in your directory and copy its file path. Assign this path to the **settings_path** variable.
+- Alternatively, for a sample or initial run, you can use the example pecan.xml file available at:
+`/pecan/base/all/inst/quarto_notebooks/_extensions/demo1/run-model`.
+
+```{r load-settings}
+settings_path <- "~/pecan.xml"
+settings <- PEcAn.settings::read.settings(settings_path)
+```
+
From 5635bd84e2429b6267bb61e6e6b91636daddd004 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 14:59:18 +0530
Subject: [PATCH 0105/1193] chore(settings): validate and prepare PEcAn
settings for model compatibility
---
.../_extensions/demo1/run-model/run_pecan.qmd | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 63848512ec9..85a82a6663a 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -21,3 +21,11 @@ settings_path <- "~/pecan.xml"
settings <- PEcAn.settings::read.settings(settings_path)
```
+# Prepare and Validate Settings
+
+PEcAn provides utilities to process and validate settings before execution, ensuring that all required fields are correctly configured.
+
+```{r prepare-settings}
+settings <- PEcAn.settings::prepare.settings(settings)
+```
+
From 00a92b069aa8bdbb1b2b8c0982bf623ebc97c435 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:00:04 +0530
Subject: [PATCH 0106/1193] feat(model): convert settings into model-specific
format
---
.../_extensions/demo1/run-model/run_pecan.qmd | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 85a82a6663a..8fec626e580 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -29,3 +29,13 @@ PEcAn provides utilities to process and validate settings before execution, ensu
settings <- PEcAn.settings::prepare.settings(settings)
```
+# Convert Settings for Ecosystem Model
+
+Converts settings into the format required by the selected ecosystem model
+
+```{r convert-settings}
+settings <- PEcAn.workflow::do_conversions(settings)
+```
+
+
+
From 3f24d1a312798c3d364b8e37efdd0725ad760db4 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:00:43 +0530
Subject: [PATCH 0107/1193] feat(meta-analysis): retrieve trait data and
perform meta-analysis
---
.../_extensions/demo1/run-model/run_pecan.qmd | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 8fec626e580..682ff31972e 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -37,5 +37,17 @@ Converts settings into the format required by the selected ecosystem model
settings <- PEcAn.workflow::do_conversions(settings)
```
+# Trait and Meta Analysis
+
+Retrieve trait data and generate probabilistic model parameter distributions.
+
+```{r meta-analysis}
+# Retrieve trait data and prior distributions for the specified Plant Functional Types (PFTs)
+settings <- PEcAn.workflow::runModule.get.trait.data(settings)
+# Perform meta-analysis to derive probabilistic distributions for model parameters
+PEcAn.MA::runModule.run.meta.analysis(settings)
+# Save the updated settings, including the retrieved trait data, to an XML file
+PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")
+```
From 5e330eeb7432fcfbbb03801245eff84bf41ddac5 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:01:30 +0530
Subject: [PATCH 0108/1193] feat(config): generate model configuration files
and save updated settings
---
.../_extensions/demo1/run-model/run_pecan.qmd | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 682ff31972e..6337448fd50 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -50,4 +50,12 @@ PEcAn.MA::runModule.run.meta.analysis(settings)
PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")
```
+# Write Model Configuration Files
+
+Generate model configuration files before simulation runs.
+
+```{r run.write.configs}
+settings <-PEcAn.workflow::runModule.run.write.configs(settings)
+PEcAn.settings::write.settings(settings, outputfile = "pecan.CONFIGS.xml")
+```
From 282bac65946af033899fec365fdf09333a0ada10 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:02:07 +0530
Subject: [PATCH 0109/1193] feat(simulation): start model runs and retrieve
simulation output
---
.../_extensions/demo1/run-model/run_pecan.qmd | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 6337448fd50..af8a0aa7243 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -59,3 +59,11 @@ settings <-PEcAn.workflow::runModule.run.write.configs(settings)
PEcAn.settings::write.settings(settings, outputfile = "pecan.CONFIGS.xml")
```
+# Run Model Simulations and Fetch Results
+
+Start model simulations and retrieve output for analysis.
+
+```{r run-model}
+PEcAn.workflow::start_model_runs(settings)
+runModule.get.results(settings)
+```
From 0f290b2044d50589fac0d343746e5e612a60b441 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:02:44 +0530
Subject: [PATCH 0110/1193] feat(netcdf): inspect available variables and
dimensions from model output
---
.../_extensions/demo1/run-model/run_pecan.qmd | 23 +++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index af8a0aa7243..93ff2b4199d 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -67,3 +67,26 @@ Start model simulations and retrieve output for analysis.
PEcAn.workflow::start_model_runs(settings)
runModule.get.results(settings)
```
+
+```{r get-plot-vars}
+library(ncdf4)
+# ⚙️ Use settings$outdir to build base path
+workflow_outdir <- settings$outdir # Correct path from pecan.xml
+
+# Default run_id for demo — you can replace this with your actual run ID
+# Tip: You can find your run ID from the "run-model" chunk output during execution
+run_id <- "99000000195"
+
+#Select year to inspect
+year <- 2006
+#Build NetCDF file path
+netcdf_file <- file.path(workflow_outdir, "out", run_id, paste0(year, ".nc"))
+
+#Open NetCDF file and show variable and dimension names
+nc <- nc_open(netcdf_file)
+cat("Variables:\n")
+print(names(nc$var))
+cat("\n Dimensions:\n")
+print(names(nc$dim))
+nc_close(nc)
+```
\ No newline at end of file
From 280b3add60608fd17084ed5a03d595d3b7bb0c0f Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:03:27 +0530
Subject: [PATCH 0111/1193] feat(visualization): plot selected NetCDF
variables using PEcAn visualization
---
.../_extensions/demo1/run-model/run_pecan.qmd | 31 +++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 93ff2b4199d..a66f3fdb4c4 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -89,4 +89,35 @@ print(names(nc$var))
cat("\n Dimensions:\n")
print(names(nc$dim))
nc_close(nc)
+```
+
+# Plot NetCDF Variables Using Selected Data and Custom Parameters
+
+```{r plot-graph, eval=TRUE}
+# This chunk generates a NetCDF variable plot dynamically
+# ℹ Make sure `settings` object is loaded from pecan.xml before running this
+# Required user inputs (customize these):
+# - year: choose a year to visualize
+# - xvar, yvar: choose variables from the **Variables** printed in the previous chunk (`get-plot-vars`)
+year <- 2006
+xvar <- 'coarse_root_carbon_content' # Select valid variable from previous chunk output
+yvar <- 'LAI' # Select valid variable from previous chunk output
+width <- 800
+height <- 600
+filename <- "plot.png"
+
+workflow_outdir <- settings$outdir
+run_id <- "99000000195" # Update this from "run-model" output
+netcdf_file <- file.path(workflow_outdir, "out", run_id, paste0(year, ".nc"))
+
+PEcAn.visualization::plot_netcdf(
+ datafile = netcdf_file,
+ yvar = yvar,
+ xvar = xvar,
+ width = width,
+ height = height,
+ filename = filename,
+ year = year
+)
+knitr::include_graphics(filename)
```
\ No newline at end of file
From bd7a02db01c3385e783b0b589085299b19282346 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:08:10 +0530
Subject: [PATCH 0112/1193] feat(settings): add initial starting point
pecan.xml
---
.../_extensions/demo1/run-model/pecan.xml | 84 +++++++++++++++++++
1 file changed, 84 insertions(+)
create mode 100644 base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
new file mode 100644
index 00000000000..bf661acd23d
--- /dev/null
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
@@ -0,0 +1,84 @@
+
+
+
+
+ -1
+
+ 2025/05/26 08:25:18 +0000
+
+ /data/workflows/PEcAn_99000000049
+
+
+ bety
+ bety
+ postgres
+ 5432
+ bety
+ PostgreSQL
+ true
+
+ /data/dbfiles
+
+
+
+ temperate.broadleaf.deciduous
+
+
+ temperate.coniferous
+
+
+ temperate.deciduous
+
+
+ temperate.deciduous.ALL
+
+
+
+ 3000
+
+ FALSE
+ TRUE
+
+
+
+ 1
+ NPP
+
+
+ uniform
+
+
+ sampling
+
+
+
+
+ 99000000003
+
+
+ 99000000046
+
+
+
+ 772
+ 2004/01/01
+ 2006/12/31
+
+
+
+ AmerifluxLBL
+
+ Aritra_2004
+
+
+ 2004/01/01
+ 2006/12/31
+
+
+ localhost
+
+ amqp://guest:guest@rabbitmq/%2F
+ SIPNET_git
+
+
+
\ No newline at end of file
From 6a8ae417d4922de34b1b216e2b1791c0991dcae3 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 26 May 2025 15:33:24 +0530
Subject: [PATCH 0113/1193] added changelog
---
CHANGELOG.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 886e894dc7e..c262584ed90 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,10 @@ For more information about this file see also [Keep a Changelog](http://keepacha
- Compiled pages are live at https://pecanproject.github.io/package-documentation and inside Docker at `pecan.localhost/pkgdocs/`, and these are automatically updated each time a PR to the source packages is merged.
- You can compile all pkgdown pages locally at any time with `make pkgdocs`.
+- Initial Quarto notebook `run_pecan.qmd` to run PEcAn Demo 1 workflow from a pre-generated `pecan.xml` file, enabling notebook-based model runs, analysis, and visualization.
+ - Directory structure for PEcAn Quarto notebooks under `pecan/base/inst/quarto-notebooks/_extensions/demo1/`.
+ - Support for inspecting and plotting NetCDF output variables within the notebook workflow.
+
### Fixed
- updated github action to build docker images
- PEcAn.SIPNET now accepts relative paths in its input XML (#3418). Previously all files referenced in the autogenerated `job.sh` needed to be specified as absolute paths.
From a9ff53be194f679105bfa7e042139953e80baf4f Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Wed, 28 May 2025 11:50:51 -0400
Subject: [PATCH 0114/1193] Update path
---
modules/assim.sequential/inst/anchor/NA_downscale_script.R | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 95bc4a519fb..a63c4f85b99 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -235,7 +235,7 @@ date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year")
for (i in seq_along(date)) {
print(i)
# Assemble covariates.
- covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_static/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
+ covariates.dir <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/covariates_lc_ts/", paste0("covariates_", lubridate::year(date[i]), ".tiff"))
# grab analysis.
analysis.yr <- analysis.all[[i]]
time <- date[i]
@@ -243,7 +243,7 @@ for (i in seq_along(date)) {
for (j in seq_along(variables)) {
# setup folder.
variable <- variables[j]
- folder.path <- file.path(file.path(outdir, "downscale_maps_analysis"), paste0(variables[j], "_", date[i]))
+ folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i]))
dir.create(folder.path)
saveRDS(list(settings = settings,
analysis.yr = analysis.yr,
@@ -253,7 +253,7 @@ for (i in seq_along(date)) {
folder.path = folder.path,
base.map.dir = base.map.dir,
cores = cores,
- outdir = file.path(outdir, "downscale_maps_analysis")),
+ outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
file = file.path(folder.path, "dat.rds"))
# prepare for qsub.
jobsh <- c("#!/bin/bash -l",
From 45909270f3cd9a26e362ab975203bc4498c65951 Mon Sep 17 00:00:00 2001
From: Katherine Rein
Date: Thu, 29 May 2025 12:19:25 -0400
Subject: [PATCH 0115/1193] Updated README and irrigation event files
---
.../inst/Python/CCMMF_Irrigation_Events.py | 3 +++
modules/data.remote/inst/Python/README.txt | 19 +++++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
index 59c56a1b828..c46378d7d20 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_Events.py
@@ -49,6 +49,9 @@ def file_creation(data_dict):
# Remove week column
eventfile_df = eventfile_df.drop('week', axis = 1)
+ # Drop all 0 irrigation rows
+ eventfile_df = eventfile_df[eventfile_df['irr'] != 0]
+
# Write to file(s)
folder_name = '/projectnb/dietzelab/ccmmf/management/irrigation/CCMMF_Irrigation_EventFiles/'
filename = f'{folder_name}irrigation_eventfile_{key}.txt'
diff --git a/modules/data.remote/inst/Python/README.txt b/modules/data.remote/inst/Python/README.txt
index d489d66dd81..ba115a0dc95 100644
--- a/modules/data.remote/inst/Python/README.txt
+++ b/modules/data.remote/inst/Python/README.txt
@@ -65,20 +65,35 @@ Organization:
dictionary of dataframes into txt files for each location in the dictionary.
It both selects columns and sets constants for other columns. It also aggregates
the data by week.
+ - CCMMF_Irrigation_GEE: This is the same as CCMMF_Irrigation_API except it
+ grabs the OpenET data from Google Earth Engine. It also does not create any
+ irrigation event files.
+ - CCMMF_Irrigation_GEEvAPI: This script is completely independent of all other
+ workflows. This reads in all saved data from both the Google Earth Enginge
+ downloads and the API downloads. It then creates graphs and summary statistics
+ to help us identify if we can use Google Earth Engine monthly data instead of
+ the daily data from the API.
- Folders
- WaterBalanceCSV: This is where all of the csv files for each location get
saved. This is a back up way to save all of the data and also makes it easier
to quickly view data per location. Each file is labeled with the corresponding
lat and long coordinate. The folder name is defined in the "Define multi use
variables" section of CCMMF_Irrigation_API.
+ - WaterBalanceCSV_GEE: This is the same as the regular WaterBalanceCSV but
+ simply for the et data downloaded from Google Earth Engine.
- TimeseriesPNG: This is where the timeseries graphs for each location and
each year are saved. There is no variable name for this folder it is simply
included in this string f'TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
in the timeseries_graphs function in CCMMF_Irrigation_CalcVis.
+ - TimeseriesPNG_GEE: This is the same as the regular TimeseriesPNG but simply
+ for the et data downloaded from Google Earth Engine. The format for the files
+ is
- CCMMF_Irrigation_Parquet: This folder is a directory for all of the parquet
files. It is written in a way that Python and R can then tile the data by
both location and year. This folder name is also defined in the "Define multi
use variables" section of CCMMF_Irrigation_API.
+ - CCMMF_Irrigation_Parquet_GEE: This is the same as the regular CCMMF_Irrigation_Parquet
+ but simply for the et data downloaded from Google Earth Engine.
- CCMMF_Irrigation_EventFiles: This holds all of the event txt files for each
location. The column names are in the header of CCMMF_Irrigation_Events. The
naming format for the files is irrigation_eventfile_{location_id}.txt.
@@ -99,6 +114,9 @@ Organization:
will be easiest.
Workflow:
+This workflow is the same for both the OpenET API scripts and the Google Earth
+Engine Scripts.
+
- Data is read in from parquet file
- Calculate how old the data is (and how much new data needs to be read in)
- If data is old, then delete the most recent CHIRPS file because we want
@@ -112,6 +130,7 @@ Workflow:
downloaded to what we defined as the years we want to look at (This
really only catches any years that are new at the front)
- If no: download/organize for predefined year span
+- Write irrigation txt files for each location
- Write the data that has been downloaded and organzied to the parquet file
Functions (by files):
From e4d0284a751872f03f7c14f35467195c610df97b Mon Sep 17 00:00:00 2001
From: blesson07asd
Date: Thu, 27 Mar 2025 17:37:53 +0530
Subject: [PATCH 0116/1193] Fix input sampling validation
---
models/sipnet/R/write.configs.SIPNET.R | 4 +-
modules/uncertainty/R/ensemble.R | 57 ++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R
index dc1dd8bbf30..76a9500737d 100755
--- a/models/sipnet/R/write.configs.SIPNET.R
+++ b/models/sipnet/R/write.configs.SIPNET.R
@@ -602,8 +602,8 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs
else if (length(settings$run$inputs$poolinitcond$path)>0) {
ICs_num <- length(settings$run$inputs$poolinitcond$path)
- IC.path <- settings$run$inputs$poolinitcond$path[[sample(1:ICs_num, 1)]]
-
+ IC.path <- settings$run$inputs$poolinitcond$path[[1]]
+
IC.pools <- PEcAn.data.land::prepare_pools(IC.path, constants = list(sla = SLA))
if(!is.null(IC.pools)){
diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R
index f0c0ca17021..79cf0a5854a 100644
--- a/modules/uncertainty/R/ensemble.R
+++ b/modules/uncertainty/R/ensemble.R
@@ -214,6 +214,28 @@ get.ensemble.samples <- function(ensemble.size, pft.samples, env.samples,
write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
clean = FALSE, write.to.db = TRUE, restart = NULL, samples = NULL, rename = FALSE) {
+
+ # --- START OF YOUR NEW CODE ---
+ # Check if there are NO inputs
+ if (is.null(defaults$inputs)) {
+ stop(PEcAn.logger::logger.severe("No inputs provided - cannot configure ensemble"))
+ }
+
+ # Check each input type (e.g., soil, veg)
+ for (input_type in names(defaults$inputs)) {
+ input_paths <- defaults$inputs[[input_type]]$path
+
+ # Case: Multiple inputs + no samples → Error
+ if (length(input_paths) > 1 && is.null(ensemble.samples)) {
+ stop(PEcAn.logger::logger.severe(paste(
+ "Multiple", input_type, "inputs found but no sampling method specified.",
+ "Add a sampling method to pecan.xml (e.g., uniform)"
+ )))
+ }
+ }
+ # --- END OF YOUR NEW CODE ---
+
+
con <- NULL
my.write.config <- paste("write.config.", model, sep = "")
my.write_restart <- paste0("write_restart.", model)
@@ -416,6 +438,41 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
file = file.path(settings$rundir, run.id, "README.txt"))
+ #changing the structure of input tag to what the models are expecting
+ for (input_i in seq_along(settings$run$inputs)) {
+ input_tag <- names(settings$run$inputs)[[input_i]]
+ input <- settings$run$inputs[[input_tag]]
+
+ # --- Start of changes ---
+ # Validate BEFORE handling samples
+ if (is.null(input$path) || length(input$path) == 0) {
+ PEcAn.logger::logger.severe("Input '%s' has no paths specified", input_tag)
+ }
+
+ # Check unsampled inputs first
+ if (!input_tag %in% names(samples)) {
+ if (length(input$path) > 1) {
+ PEcAn.logger::logger.severe(
+ paste("Input '%s' has %d paths but no sampling method.",
+ "Add for this input in pecan.xml"),
+ input_tag, length(input$path)
+ )
+ }
+ if (!file.exists(input$path[[1]])) { # New: Verify file exists
+ PEcAn.logger::logger.severe(
+ "Input '%s' path '%s' not found",
+ input_tag, input$path[[1]]
+ )
+ }
+ next # Valid single path, no sampling needed
+ }
+ # --- End of changes ---
+
+ # Remaining original code for sampled inputs
+ input_paths <- samples[[input_tag]][["samples"]][[i]]
+ # ... (keep existing validation for sampled inputs) ...
+ }
+
do.call(my.write.config, args = list( defaults = defaults,
trait.values = lapply(samples$parameters$samples, function(x, n) { x[n, , drop=FALSE] }, n=i), # this is the params
settings = settings,
From 70266822d9e786d584b07f0e7652363619443e00 Mon Sep 17 00:00:00 2001
From: divine7022
Date: Tue, 25 Feb 2025 04:21:14 +0530
Subject: [PATCH 0117/1193] Improve pkgdown build process and fix documentation
generation
---
.github/workflows/book.yml | 4 ++++
docker-compose.yml | 1 +
docker/base/Dockerfile | 3 +++
3 files changed, 8 insertions(+)
diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml
index ebd1b11cf64..4c849a2aa4d 100644
--- a/.github/workflows/book.yml
+++ b/.github/workflows/book.yml
@@ -29,6 +29,10 @@ jobs:
Rscript \
-e 'repos <- c(getOption("repos"), sub(r"(\d{4}-\d{2}-\d{2})", "latest", getOption("repos")))' \
-e 'remotes::install_version("bookdown", ">= 0.31", dependencies = TRUE, upgrade = FALSE, repos = repos)'
+ # generate package documentation
+ - name: Generate Package Documentation
+ working-directory: ./
+ run: make pkgdocs
# copy files
- name: copy extfiles
run: |
diff --git a/docker-compose.yml b/docker-compose.yml
index 51f96bda62f..1a935c01a5b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -136,6 +136,7 @@ services:
- USERID=${UID:-1001}
- GROUPID=${GID:-1001}
volumes:
+ - ./:/pecan
- pecan:/data
- rstudio:/home
labels:
diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile
index 19a2692ef2d..1497ab0fdee 100644
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -3,6 +3,9 @@ ARG IMAGE_VERSION="latest"
ARG PARENT_IMAGE="pecan/depends"
FROM ${PARENT_IMAGE}:${IMAGE_VERSION}
+# Install R package for pkgdown
+RUN R -e "install.packages(c('pkgdown', 'remotes'))"
+
# ----------------------------------------------------------------------
# PEcAn version information
# ----------------------------------------------------------------------
From 6056493299a0cbb9f3ad6f7ef320022e3e3559f1 Mon Sep 17 00:00:00 2001
From: Akash
Date: Thu, 20 Mar 2025 15:06:08 +0530
Subject: [PATCH 0118/1193] Fixed the missing separator issue by replacing
space indentation with tabbed indentation in Makefile.
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index fa508f1c17f..d6bb3193468 100644
--- a/Makefile
+++ b/Makefile
@@ -108,7 +108,7 @@ depends = .doc/$(1) .install/$(1) .check/$(1) .test/$(1)
.PHONY: all install check test document clean shiny pkgdocs \
check_base check_models check_modules help
-all: install document
+all: install document
# Note: Installs base first as Modules has a circular dependency on base
check_base: $(BASE_C)
From a3978f3569cc92164dcc2f019caf893adaf1eca6 Mon Sep 17 00:00:00 2001
From: Akash
Date: Thu, 20 Mar 2025 15:23:20 +0530
Subject: [PATCH 0119/1193] Removed the redundant document target from .PHONY,
as it was listed twice, and added `pkgdocs`
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index d6bb3193468..fa508f1c17f 100644
--- a/Makefile
+++ b/Makefile
@@ -108,7 +108,7 @@ depends = .doc/$(1) .install/$(1) .check/$(1) .test/$(1)
.PHONY: all install check test document clean shiny pkgdocs \
check_base check_models check_modules help
-all: install document
+all: install document
# Note: Installs base first as Modules has a circular dependency on base
check_base: $(BASE_C)
From 385d398c7babc68c3758bee22d40d14a7bd3991a Mon Sep 17 00:00:00 2001
From: divine7022
Date: Sat, 22 Mar 2025 00:38:49 +0530
Subject: [PATCH 0120/1193] Updated package documentation setup: modified
.github/workflows/pkgdown.yml, docker-compose.yml, docker/docs/Dockerfile,
scripts/build_pkgdown.R
---
docker-compose.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/docker-compose.yml b/docker-compose.yml
index 1a935c01a5b..51f96bda62f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -136,7 +136,6 @@ services:
- USERID=${UID:-1001}
- GROUPID=${GID:-1001}
volumes:
- - ./:/pecan
- pecan:/data
- rstudio:/home
labels:
From 43483e2b9d133d3f068d6111c1d23952be24e08d Mon Sep 17 00:00:00 2001
From: blesson07asd
Date: Sat, 5 Apr 2025 11:25:11 +0530
Subject: [PATCH 0121/1193] changed according to the review
---
modules/uncertainty/R/ensemble.R | 24 +++++++++---------------
1 file changed, 9 insertions(+), 15 deletions(-)
diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R
index 79cf0a5854a..60f82e3f298 100644
--- a/modules/uncertainty/R/ensemble.R
+++ b/modules/uncertainty/R/ensemble.R
@@ -215,10 +215,9 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
clean = FALSE, write.to.db = TRUE, restart = NULL, samples = NULL, rename = FALSE) {
- # --- START OF YOUR NEW CODE ---
# Check if there are NO inputs
if (is.null(defaults$inputs)) {
- stop(PEcAn.logger::logger.severe("No inputs provided - cannot configure ensemble"))
+ PEcAn.logger::logger.severe("No inputs provided - cannot configure ensemble")
}
# Check each input type (e.g., soil, veg)
@@ -227,13 +226,13 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
# Case: Multiple inputs + no samples → Error
if (length(input_paths) > 1 && is.null(ensemble.samples)) {
- stop(PEcAn.logger::logger.severe(paste(
+ PEcAn.logger::logger.error(paste(
"Multiple", input_type, "inputs found but no sampling method specified.",
"Add a sampling method to pecan.xml (e.g., uniform)"
- )))
+ ))
}
}
- # --- END OF YOUR NEW CODE ---
+
con <- NULL
@@ -443,7 +442,7 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
input_tag <- names(settings$run$inputs)[[input_i]]
input <- settings$run$inputs[[input_tag]]
- # --- Start of changes ---
+
# Validate BEFORE handling samples
if (is.null(input$path) || length(input$path) == 0) {
PEcAn.logger::logger.severe("Input '%s' has no paths specified", input_tag)
@@ -458,19 +457,14 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
input_tag, length(input$path)
)
}
- if (!file.exists(input$path[[1]])) { # New: Verify file exists
- PEcAn.logger::logger.severe(
- "Input '%s' path '%s' not found",
- input_tag, input$path[[1]]
- )
- }
+
next # Valid single path, no sampling needed
}
- # --- End of changes ---
+
- # Remaining original code for sampled inputs
+
input_paths <- samples[[input_tag]][["samples"]][[i]]
- # ... (keep existing validation for sampled inputs) ...
+
}
do.call(my.write.config, args = list( defaults = defaults,
From df752106925a850df9b6074b7775f938791a10e3 Mon Sep 17 00:00:00 2001
From: Blesson
Date: Fri, 9 May 2025 21:49:59 +0530
Subject: [PATCH 0122/1193] added test file
---
modules/uncertainty/R/ensemble.R | 71 +++++-------
.../tests/testthat/test_ensemble.R | 105 ++++++++++++++++++
wait | 0
3 files changed, 135 insertions(+), 41 deletions(-)
create mode 100644 modules/uncertainty/tests/testthat/test_ensemble.R
create mode 100644 wait
diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R
index 60f82e3f298..719f78be910 100644
--- a/modules/uncertainty/R/ensemble.R
+++ b/modules/uncertainty/R/ensemble.R
@@ -216,22 +216,24 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
# Check if there are NO inputs
- if (is.null(defaults$inputs)) {
- PEcAn.logger::logger.severe("No inputs provided - cannot configure ensemble")
+
+for (input_tag in names(settings$run$inputs)) {
+ input <- settings$run$inputs[[input_tag]]
+ input_paths <- input$path
+
+ # Check for required paths
+ if (is.null(input_paths) || length(input_paths) == 0) {
+ PEcAn.logger::logger.error("Input '%s' has no paths specified", input_tag)
}
- # Check each input type (e.g., soil, veg)
- for (input_type in names(defaults$inputs)) {
- input_paths <- defaults$inputs[[input_type]]$path
-
- # Case: Multiple inputs + no samples → Error
- if (length(input_paths) > 1 && is.null(ensemble.samples)) {
- PEcAn.logger::logger.error(paste(
- "Multiple", input_type, "inputs found but no sampling method specified.",
- "Add a sampling method to pecan.xml (e.g., uniform)"
- ))
- }
+ # Check for unsampled multi-path inputs
+ if (length(input_paths) > 1 &&
+ !(input_tag %in% names(settings$ensemble$samplingspace))) {
+ PEcAn.logger::logger.error(
+ "Input '%s' has %d paths but no sampling method. Add for this input in pecan.xml",
+ input_tag, length(input_paths))
}
+}
@@ -437,35 +439,21 @@ write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
file = file.path(settings$rundir, run.id, "README.txt"))
- #changing the structure of input tag to what the models are expecting
- for (input_i in seq_along(settings$run$inputs)) {
- input_tag <- names(settings$run$inputs)[[input_i]]
- input <- settings$run$inputs[[input_tag]]
-
+ #changing the structure of input tag to what the models are expecting
+for (input_i in seq_along(settings$run$inputs)) {
+ input_tag <- names(settings$run$inputs)[[input_i]]
+ input <- settings$run$inputs[[input_tag]]
+
+
+ if (!input_tag %in% names(samples)) {
+ # Use first path (already validated as single path)
+ settings$run$inputs[[input_tag]]$path <- input$path[1]
+ } else {
+ # Use sampled path
+ settings$run$inputs[[input_tag]]$path <- samples[[input_tag]][["samples"]][[i]]
+ }
- # Validate BEFORE handling samples
- if (is.null(input$path) || length(input$path) == 0) {
- PEcAn.logger::logger.severe("Input '%s' has no paths specified", input_tag)
- }
-
- # Check unsampled inputs first
- if (!input_tag %in% names(samples)) {
- if (length(input$path) > 1) {
- PEcAn.logger::logger.severe(
- paste("Input '%s' has %d paths but no sampling method.",
- "Add for this input in pecan.xml"),
- input_tag, length(input$path)
- )
- }
-
- next # Valid single path, no sampling needed
- }
-
-
-
- input_paths <- samples[[input_tag]][["samples"]][[i]]
-
- }
+}
do.call(my.write.config, args = list( defaults = defaults,
trait.values = lapply(samples$parameters$samples, function(x, n) { x[n, , drop=FALSE] }, n=i), # this is the params
@@ -591,3 +579,4 @@ input.ens.gen <- function(settings, input, method = "sampling", parent_ids = NUL
return(samples)
}
+
diff --git a/modules/uncertainty/tests/testthat/test_ensemble.R b/modules/uncertainty/tests/testthat/test_ensemble.R
new file mode 100644
index 00000000000..0c152c40e0a
--- /dev/null
+++ b/modules/uncertainty/tests/testthat/test_ensemble.R
@@ -0,0 +1,105 @@
+context("Ensemble Configuration Tests")
+
+# Mock the core functions we need to test
+write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
+ clean = FALSE, write.to.db = TRUE,
+ restart = NULL, rename = FALSE) {
+ # Input validation
+ if (length(settings$run$inputs) == 0) {
+ stop("has no paths specified")
+ }
+
+ # Check for unsampled multi-path inputs
+ for (input in names(settings$run$inputs)) {
+ paths <- settings$run$inputs[[input]]$path
+ if (length(paths) > 1 && !(input %in% names(settings$ensemble$samplingspace))) {
+ stop("no sampling method")
+ }
+ }
+
+ # Return dummy result
+ list(
+ runs = data.frame(id = paste0("ENS", 1:settings$ensemble$size)),
+ ensemble.id = 1,
+ samples = lapply(settings$run$inputs, function(x) list(samples = x$path))
+ )
+}
+
+# Helper to create minimal settings
+get_test_settings <- function(inputs = list(met = list(path = "default/path")),
+ ensemble_size = 1,
+ samplingspace = list()) {
+ list(
+ run = list(
+ inputs = inputs,
+ site = list(id = 1, name = "test"),
+ outdir = "test_out",
+ rundir = "test_run"
+ ),
+ ensemble = list(
+ size = ensemble_size,
+ samplingspace = samplingspace
+ ),
+ model = list(id = 100),
+ database = list(bety = list(write = FALSE)),
+ host = list(name = "localhost")
+ )
+}
+
+# Test cases
+test_that("single input without sampling uses the input directly", {
+ settings <- get_test_settings()
+ result <- write.ensemble.configs(NULL, NULL, settings, "SIPNET")
+ expect_equal(result$samples$met$samples, "default/path")
+})
+
+test_that("single input with matching samples works", {
+ settings <- get_test_settings()
+ samples <- list(pft1 = data.frame(param1 = rep(1, 3))) # 3 identical samples
+ expect_silent(write.ensemble.configs(NULL, samples, settings, "SIPNET"))
+})
+
+test_that("multiple inputs without sampling throws error", {
+ settings <- get_test_settings(
+ inputs = list(met = list(path = c("path1", "path2")))
+ )
+ expect_error(
+ write.ensemble.configs(NULL, NULL, settings, "SIPNET"),
+ "no sampling method"
+ )
+})
+
+test_that("multiple inputs with correct sampling works", {
+ settings <- get_test_settings(
+ inputs = list(met = list(path = c("path1", "path2"))),
+ samplingspace = list(met = list(method = "sampling"))
+ )
+ result <- write.ensemble.configs(NULL, NULL, settings, "SIPNET")
+ expect_equal(length(result$samples$met$samples), 2)
+})
+
+test_that("no inputs throws error", {
+ settings <- get_test_settings(inputs = list())
+ expect_error(
+ write.ensemble.configs(NULL, NULL, settings, "SIPNET"),
+ "has no paths specified"
+ )
+})
+
+test_that("mismatched samples throw error", {
+ settings <- get_test_settings(
+ inputs = list(met = list(path = "only/path")),
+ samplingspace = list(met = list(method = "sampling"))
+ )
+ # This would fail in real implementation
+ # For this simplified version, we test the sampling check
+ expect_error(
+ write.ensemble.configs(
+ NULL,
+ NULL,
+ get_test_settings(inputs = list(met = list(path = "only/path"))),
+ "SIPNET"
+ ),
+ NA # Expect no error in this simplified version
+ )
+})
\ No newline at end of file
diff --git a/wait b/wait
new file mode 100644
index 00000000000..e69de29bb2d
From 1ff2168ca3dfb15bd0371d2798d934487a34ccdf Mon Sep 17 00:00:00 2001
From: Blesson
Date: Fri, 9 May 2025 23:31:32 +0530
Subject: [PATCH 0123/1193] Delete wait
---
wait | 0
1 file changed, 0 insertions(+), 0 deletions(-)
delete mode 100644 wait
diff --git a/wait b/wait
deleted file mode 100644
index e69de29bb2d..00000000000
From 495614a20351a3ccec44296476c54831d1026de1 Mon Sep 17 00:00:00 2001
From: Blesson
Date: Sat, 10 May 2025 13:45:46 +0530
Subject: [PATCH 0124/1193] Added previous test file in the correct place
---
.../tests/testthat/test_ensemble.R | 154 ++++++++----------
1 file changed, 67 insertions(+), 87 deletions(-)
diff --git a/modules/uncertainty/tests/testthat/test_ensemble.R b/modules/uncertainty/tests/testthat/test_ensemble.R
index 0c152c40e0a..472e1a4fa3f 100644
--- a/modules/uncertainty/tests/testthat/test_ensemble.R
+++ b/modules/uncertainty/tests/testthat/test_ensemble.R
@@ -1,105 +1,85 @@
-context("Ensemble Configuration Tests")
+library(testthat)
+library(PEcAn.logger)
+library(PEcAn.DB)
-# Mock the core functions we need to test
-write.ensemble.configs <- function(defaults, ensemble.samples, settings, model,
- clean = FALSE, write.to.db = TRUE,
- restart = NULL, rename = FALSE) {
- # Input validation
- if (length(settings$run$inputs) == 0) {
- stop("has no paths specified")
- }
-
- # Check for unsampled multi-path inputs
- for (input in names(settings$run$inputs)) {
- paths <- settings$run$inputs[[input]]$path
- if (length(paths) > 1 && !(input %in% names(settings$ensemble$samplingspace))) {
- stop("no sampling method")
- }
+
+source("modules/uncertainty/R/ensemble.R")
+dummy_binary_path <- file.path(tempdir(), "sipnet")
+file.create(dummy_binary_path)
+# Mock SIPNET writer
+if (!exists("write.config.SIPNET")) {
+ write.config.SIPNET <- function(...) {
+ PEcAn.logger::logger.info("Mock SIPNET writer called")
+ return(invisible(TRUE))
}
-
- # Return dummy result
- list(
- runs = data.frame(id = paste0("ENS", 1:settings$ensemble$size)),
- ensemble.id = 1,
- samples = lapply(settings$run$inputs, function(x) list(samples = x$path))
- )
}
-# Helper to create minimal settings
-get_test_settings <- function(inputs = list(met = list(path = "default/path")),
- ensemble_size = 1,
- samplingspace = list()) {
+context("Ensemble Input Validation Tests")
+
+create_base_settings <- function() {
list(
+ workflow = list(id = 1),
+ model = list(
+ id = 1000,
+ type = "SIPNET",
+ binary = dummy_binary_path
+ ),
run = list(
- inputs = inputs,
- site = list(id = 1, name = "test"),
- outdir = "test_out",
- rundir = "test_run"
+ site = list(id = 1, name = "Test Site", lat = 40.0, lon = -80.0),
+ start.date = "2004-01-01",
+ end.date = "2004-12-31"
),
- ensemble = list(
- size = ensemble_size,
- samplingspace = samplingspace
+ host = list(
+ outdir = tempdir(),
+ rundir = tempdir(),
+ name = "localhost"
),
- model = list(id = 100),
- database = list(bety = list(write = FALSE)),
- host = list(name = "localhost")
+ database = list(bety = list(write = FALSE))
)
}
-# Test cases
-test_that("single input without sampling uses the input directly", {
- settings <- get_test_settings()
- result <- write.ensemble.configs(NULL, NULL, settings, "SIPNET")
- expect_equal(result$samples$met$samples, "default/path")
-})
-
-test_that("single input with matching samples works", {
- settings <- get_test_settings()
- samples <- list(pft1 = data.frame(param1 = rep(1, 3))) # 3 identical samples
- expect_silent(write.ensemble.configs(NULL, samples, settings, "SIPNET"))
-})
-
-test_that("multiple inputs without sampling throws error", {
- settings <- get_test_settings(
- inputs = list(met = list(path = c("path1", "path2")))
- )
- expect_error(
- write.ensemble.configs(NULL, NULL, settings, "SIPNET"),
- "no sampling method"
+test_that("Single input with no samples works", {
+ withr::local_tempdir()
+
+ def <- list(
+ inputs = list(soil = list(path = "soil1.nc")),
+ pfts = list(list(
+ name = "temperate.pft",
+ constants = list(param1 = 0.5)
+ )),
+ model = list(id = 1000),
+ database = list(bety = list(write = FALSE))
)
+
+ settings <- create_base_settings()
+ settings$run$inputs <- list(soil = list(path = "soil1.nc"))
+ settings$ensemble <- list(size = 1)
+
+ writeLines("", "soil1.nc")
+
+ result <- write.ensemble.configs(def, NULL, settings, "SIPNET")
+ expect_true(!is.null(result$runs))
+ expect_true(!is.null(result$ensemble.id))
})
-test_that("multiple inputs with correct sampling works", {
- settings <- get_test_settings(
- inputs = list(met = list(path = c("path1", "path2"))),
- samplingspace = list(met = list(method = "sampling"))
+test_that("Multiple inputs without samples throws error", {
+ def <- list(
+ inputs = list(soil = list(path = c("soil1.nc", "soil2.nc"))),
+ pfts = list(list(
+ name = "temperate.pft",
+ constants = list(param1 = 0.5)
+ )),
+ model = list(id = 1000),
+ database = list(bety = list(write = FALSE))
)
- result <- write.ensemble.configs(NULL, NULL, settings, "SIPNET")
- expect_equal(length(result$samples$met$samples), 2)
-})
-
-test_that("no inputs throws error", {
- settings <- get_test_settings(inputs = list())
+
+ settings <- create_base_settings()
+ settings$ensemble <- list(size = 1)
+
+ purrr::walk(c("soil1.nc", "soil2.nc"), ~ writeLines("", .x))
+
expect_error(
- write.ensemble.configs(NULL, NULL, settings, "SIPNET"),
- "has no paths specified"
+ write.ensemble.configs(def, NULL, settings, "SIPNET"),
+ "Multiple soil inputs found but no sampling method specified"
)
})
-
-test_that("mismatched samples throw error", {
- settings <- get_test_settings(
- inputs = list(met = list(path = "only/path")),
- samplingspace = list(met = list(method = "sampling"))
- )
- # This would fail in real implementation
- # For this simplified version, we test the sampling check
- expect_error(
- write.ensemble.configs(
- NULL,
- NULL,
- get_test_settings(inputs = list(met = list(path = "only/path"))),
- "SIPNET"
- ),
- NA # Expect no error in this simplified version
- )
-})
\ No newline at end of file
From aebdcf38b0eef8ac12a1a4fb0b9cac53a86cb109 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 20 Mar 2025 16:15:54 -0400
Subject: [PATCH 0125/1193] Update dependencies and add the package detection
codes.
---
docker/depends/pecan_package_dependencies.csv | 1 +
modules/data.remote/DESCRIPTION | 1 +
2 files changed, 2 insertions(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index bcc4a0ff8e2..63fb3e50449 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -471,6 +471,7 @@
"reticulate","*","modules/data.atmosphere","Suggests",FALSE
"reticulate","*","modules/data.land","Suggests",FALSE
"reticulate","*","modules/data.remote","Imports",FALSE
+"rhdf5","*","modules/data.remote","Suggests",FALSE
"rjags","*","base/utils","Suggests",FALSE
"rjags","*","modules/assim.batch","Imports",FALSE
"rjags","*","modules/data.land","Imports",FALSE
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index bff565d6969..d94da9b9d76 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -44,6 +44,7 @@ Suggests:
lubridate,
raster,
reshape,
+ rhdf5,
sf,
testthat (>= 1.0.2),
tibble,
From 38a17e0c02a611aaa28af4e06e0639f546b3a3fa Mon Sep 17 00:00:00 2001
From: Blesson
Date: Sun, 11 May 2025 02:59:38 +0530
Subject: [PATCH 0126/1193] changed the Indentation
---
modules/uncertainty/R/ensemble.R | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R
index 719f78be910..5f8e7dd43bb 100644
--- a/modules/uncertainty/R/ensemble.R
+++ b/modules/uncertainty/R/ensemble.R
@@ -223,15 +223,15 @@ for (input_tag in names(settings$run$inputs)) {
# Check for required paths
if (is.null(input_paths) || length(input_paths) == 0) {
- PEcAn.logger::logger.error("Input '%s' has no paths specified", input_tag)
+ PEcAn.logger::logger.error("Input", sQuote(input_tag), "has no paths specified")
}
# Check for unsampled multi-path inputs
if (length(input_paths) > 1 &&
!(input_tag %in% names(settings$ensemble$samplingspace))) {
PEcAn.logger::logger.error(
- "Input '%s' has %d paths but no sampling method. Add for this input in pecan.xml",
- input_tag, length(input_paths))
+ "Input", sQuote(input_tag), "has", length(input_paths), "paths but no sampling method.",
+ "Add for this input in pecan.xml")
}
}
@@ -437,23 +437,26 @@ for (input_tag in names(settings$run$inputs)) {
"rundir : ", file.path(settings$host$rundir, run.id), "\n",
"outdir : ", file.path(settings$host$outdir, run.id), "\n",
file = file.path(settings$rundir, run.id, "README.txt"))
-
+
+
#changing the structure of input tag to what the models are expecting
for (input_i in seq_along(settings$run$inputs)) {
- input_tag <- names(settings$run$inputs)[[input_i]]
- input <- settings$run$inputs[[input_tag]]
+ input_tag <- names(settings$run$inputs)[[input_i]]
+ input <- settings$run$inputs[[input_tag]]
- if (!input_tag %in% names(samples)) {
- # Use first path (already validated as single path)
- settings$run$inputs[[input_tag]]$path <- input$path[1]
- } else {
- # Use sampled path
- settings$run$inputs[[input_tag]]$path <- samples[[input_tag]][["samples"]][[i]]
+ if (!input_tag %in% names(samples)) {
+ # Use first path (already validated as single path)
+ settings$run$inputs[[input_tag]]$path <- input$path[1] }
+ else {
+ # Use sampled path
+ settings$run$inputs[[input_tag]]$path <- samples[[input_tag]][["samples"]][[i]]
}
}
+
+
do.call(my.write.config, args = list( defaults = defaults,
trait.values = lapply(samples$parameters$samples, function(x, n) { x[n, , drop=FALSE] }, n=i), # this is the params
From 7e460a7d44df099a4b07c277d1b4e7a8690f1f7b Mon Sep 17 00:00:00 2001
From: Blesson
Date: Mon, 12 May 2025 00:30:44 +0530
Subject: [PATCH 0127/1193] added incomplete test file
---
modules/uncertainty/R/ensemble.R | 2 +-
.../tests/testthat/ensemble-test.R | 88 ------------
.../tests/testthat/test_ensemble.R | 130 ++++++++----------
3 files changed, 59 insertions(+), 161 deletions(-)
delete mode 100644 modules/uncertainty/tests/testthat/ensemble-test.R
diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R
index 5f8e7dd43bb..2d4e80d3d86 100644
--- a/modules/uncertainty/R/ensemble.R
+++ b/modules/uncertainty/R/ensemble.R
@@ -441,7 +441,7 @@ for (input_tag in names(settings$run$inputs)) {
#changing the structure of input tag to what the models are expecting
-for (input_i in seq_along(settings$run$inputs)) {
+ for (input_i in seq_along(settings$run$inputs)) {
input_tag <- names(settings$run$inputs)[[input_i]]
input <- settings$run$inputs[[input_tag]]
diff --git a/modules/uncertainty/tests/testthat/ensemble-test.R b/modules/uncertainty/tests/testthat/ensemble-test.R
deleted file mode 100644
index 07b3d548dc9..00000000000
--- a/modules/uncertainty/tests/testthat/ensemble-test.R
+++ /dev/null
@@ -1,88 +0,0 @@
-library(testthat)
-library(PEcAn.logger)
-library(PEcAn.DB)
-
-
-source("modules/uncertainty/R/ensemble.R")
-dummy_binary_path <- file.path(tempdir(), "sipnet")
-file.create(dummy_binary_path)
-# Mock SIPNET writer
-if (!exists("write.config.SIPNET")) {
- write.config.SIPNET <- function(...) {
- PEcAn.logger::logger.info("Mock SIPNET writer called")
- return(invisible(TRUE))
- }
-}
-
-context("Ensemble Input Validation Tests")
-
-create_base_settings <- function() {
- list(
- workflow = list(id = 1),
- model = list(
- id = 1000,
- type = "SIPNET",
- binary = dummy_binary_path
- ),
- run = list(
- site = list(id = 1, name = "Test Site", lat = 40.0, lon = -80.0),
- start.date = "2004-01-01",
- end.date = "2004-12-31"
- ),
- host = list(
- outdir = tempdir(),
- rundir = tempdir(),
- name = "localhost"
- ),
- database = list(bety = list(write = FALSE))
- )
-}
-
-test_that("Single input with no samples works", {
- withr::local_tempdir()
-
- def <- list(
- inputs = list(soil = list(path = "soil1.nc")),
- pfts = list(list(
- name = "temperate.pft",
- constants = list(param1 = 0.5)
- )),
- model = list(id = 1000),
- database = list(bety = list(write = FALSE))
- )
-
- settings <- create_base_settings()
- settings$run$inputs <- list(soil = list(path = "soil1.nc"))
- settings$ensemble <- list(size = 1)
-
- writeLines("", "soil1.nc")
-
- result <- write.ensemble.configs(def, NULL, settings, "SIPNET")
- expect_true(!is.null(result$runs))
- expect_true(!is.null(result$ensemble.id))
-})
-
-test_that("Multiple inputs without samples throws error", {
- def <- list(
- inputs = list(soil = list(path = c("soil1.nc", "soil2.nc"))),
- pfts = list(list(
- name = "temperate.pft",
- constants = list(param1 = 0.5)
- )),
- model = list(id = 1000),
- database = list(bety = list(write = FALSE))
- )
-
- settings <- create_base_settings()
- settings$ensemble <- list(size = 1)
-
- purrr::walk(c("soil1.nc", "soil2.nc"), ~ writeLines("", .x))
-
- expect_error(
- write.ensemble.configs(def, NULL, settings, "SIPNET"),
- "Multiple soil inputs found but no sampling method specified"
- )
-})
-
-# ... rest of tests with similar corrections ...
-
diff --git a/modules/uncertainty/tests/testthat/test_ensemble.R b/modules/uncertainty/tests/testthat/test_ensemble.R
index 472e1a4fa3f..0fc5a6974d2 100644
--- a/modules/uncertainty/tests/testthat/test_ensemble.R
+++ b/modules/uncertainty/tests/testthat/test_ensemble.R
@@ -1,85 +1,71 @@
library(testthat)
-library(PEcAn.logger)
-library(PEcAn.DB)
-
+library(mockery)
+# Source the ensemble config function
source("modules/uncertainty/R/ensemble.R")
-dummy_binary_path <- file.path(tempdir(), "sipnet")
-file.create(dummy_binary_path)
-# Mock SIPNET writer
-if (!exists("write.config.SIPNET")) {
- write.config.SIPNET <- function(...) {
- PEcAn.logger::logger.info("Mock SIPNET writer called")
- return(invisible(TRUE))
- }
-}
-context("Ensemble Input Validation Tests")
+context("input validation for write.ensemble.configs")
+
+# Mock a model write.configs function to avoid model-specific errors
+write.configs.SIPNET <- function(...) TRUE
-create_base_settings <- function() {
- list(
- workflow = list(id = 1),
- model = list(
- id = 1000,
- type = "SIPNET",
- binary = dummy_binary_path
- ),
- run = list(
- site = list(id = 1, name = "Test Site", lat = 40.0, lon = -80.0),
- start.date = "2004-01-01",
- end.date = "2004-12-31"
- ),
- host = list(
- outdir = tempdir(),
- rundir = tempdir(),
- name = "localhost"
- ),
- database = list(bety = list(write = FALSE))
- )
+# Helper: make input with correct structure
+make_input_sets <- function(paths) {
+ lapply(paths, function(p) list(path = p))
}
-test_that("Single input with no samples works", {
- withr::local_tempdir()
-
- def <- list(
- inputs = list(soil = list(path = "soil1.nc")),
- pfts = list(list(
- name = "temperate.pft",
- constants = list(param1 = 0.5)
- )),
- model = list(id = 1000),
- database = list(bety = list(write = FALSE))
- )
-
- settings <- create_base_settings()
- settings$run$inputs <- list(soil = list(path = "soil1.nc"))
- settings$ensemble <- list(size = 1)
-
- writeLines("", "soil1.nc")
+# Helper: make ensemble.samples with the correct structure
+make_samples <- function(samples) {
+ list(input = data.frame(samples = samples, stringsAsFactors = FALSE))
+}
+
+# 1. One input, no samples → should pass
+test_that("1 input, no samples: passes", {
+ settings <- list(run = list(inputs = list(input = list(path = "IC1"))))
+ ensemble.samples <- NULL
+ defaults <- list()
- result <- write.ensemble.configs(def, NULL, settings, "SIPNET")
- expect_true(!is.null(result$runs))
- expect_true(!is.null(result$ensemble.id))
+ expect_silent(write.ensemble.configs(
+ defaults = defaults,
+ ensemble.samples = ensemble.samples,
+ settings = settings,
+ model = "SIPNET",
+ write.to.db = FALSE
+ ))
})
-test_that("Multiple inputs without samples throws error", {
- def <- list(
- inputs = list(soil = list(path = c("soil1.nc", "soil2.nc"))),
- pfts = list(list(
- name = "temperate.pft",
- constants = list(param1 = 0.5)
- )),
- model = list(id = 1000),
- database = list(bety = list(write = FALSE))
- )
-
- settings <- create_base_settings()
- settings$ensemble <- list(size = 1)
+
+
+test_that("no input error", {
+ settings <- list(run = list(inputs = list(input = NULL)))
+ ensemble.samples <- NULL
+ defaults <- list()
- purrr::walk(c("soil1.nc", "soil2.nc"), ~ writeLines("", .x))
+ # Capture logger message
+ expect_silent(write.ensemble.configs(
+ defaults = defaults,
+ ensemble.samples = ensemble.samples,
+ settings = settings,
+ model = "SIPNET",
+ write.to.db = FALSE
+ ))
+})
+
+
+
+test_that("multiple inputs, valid matching samples ", {
+ settings <- list(run = list(inputs = list(input = NULL))) # or missing/empty paths
+ ensemble.samples <- make_samples(c("IC1", "IC2", "IC3", "IC2"))
+ defaults <- list()
- expect_error(
- write.ensemble.configs(def, NULL, settings, "SIPNET"),
- "Multiple soil inputs found but no sampling method specified"
- )
+ # Capture logger message, but don't stop execution
+ expect_silent(write.ensemble.configs(
+ defaults = defaults,
+ ensemble.samples = ensemble.samples,
+ settings = settings,
+ model = "SIPNET",
+ write.to.db = FALSE
+ ))
})
+
+
From 159dd70e9b2a840c4d19a326d09899ac5e7488d5 Mon Sep 17 00:00:00 2001
From: Blesson
Date: Thu, 15 May 2025 15:34:55 +0530
Subject: [PATCH 0128/1193] updated test
---
.../tests/testthat/test_ensemble.R | 93 ++++++++++++++++---
1 file changed, 80 insertions(+), 13 deletions(-)
diff --git a/modules/uncertainty/tests/testthat/test_ensemble.R b/modules/uncertainty/tests/testthat/test_ensemble.R
index 0fc5a6974d2..24fb1830cea 100644
--- a/modules/uncertainty/tests/testthat/test_ensemble.R
+++ b/modules/uncertainty/tests/testthat/test_ensemble.R
@@ -16,7 +16,7 @@ make_input_sets <- function(paths) {
# Helper: make ensemble.samples with the correct structure
make_samples <- function(samples) {
- list(input = data.frame(samples = samples, stringsAsFactors = FALSE))
+ lapply(paths, function(p) list(path = p))
}
# 1. One input, no samples → should pass
@@ -53,19 +53,86 @@ test_that("no input error", {
-test_that("multiple inputs, valid matching samples ", {
- settings <- list(run = list(inputs = list(input = NULL))) # or missing/empty paths
- ensemble.samples <- make_samples(c("IC1", "IC2", "IC3", "IC2"))
- defaults <- list()
+
- # Capture logger message, but don't stop execution
- expect_silent(write.ensemble.configs(
- defaults = defaults,
- ensemble.samples = ensemble.samples,
- settings = settings,
- model = "SIPNET",
- write.to.db = FALSE
- ))
+
+
+
+test_that("multiple inputs and multiple samples", {
+ # Mock the SIPNET config writer
+ mockery::stub(write.ensemble.configs, "write.config.SIPNET", function(...) TRUE)
+
+ # Create temp directories
+ temp_rundir <- tempfile()
+ temp_modeloutdir <- tempfile()
+ dir.create(temp_rundir)
+ dir.create(temp_modeloutdir)
+ on.exit({
+ unlink(temp_rundir, recursive = TRUE)
+ unlink(temp_modeloutdir, recursive = TRUE)
+ }, add = TRUE)
+
+ # Complete settings
+ settings <- list(
+ run = list(
+ inputs = list(input = list(path = "IC1")),
+ site = list(id = 1, name = "Test Site"),
+ start.date = "2000-01-01",
+ end.date = "2000-12-31",
+ outdir = temp_modeloutdir
+ ),
+ ensemble = list(size = 5),
+ database = NULL,
+ rundir = temp_rundir,
+ modeloutdir = temp_modeloutdir,
+ host = list(
+ rundir = temp_rundir,
+ outdir = temp_modeloutdir
+ ),
+ model = list(id = "SIPNET", type = "SIPNET"),
+ pfts = list(
+ list(name = "temperate",
+ constants = list(1),
+ posteriorid = 1)
+ )
+ )
+
+ # Sample parameters
+ ensemble.samples <- list(
+ temperate = data.frame(
+ SLA = c(15.2, 16.8, 14.7, 18.1, 17.5),
+ Vm0 = c(45.0, 50.3, 47.8, 49.1, 51.0)
+ )
+ )
+
+ # Default PFT settings
+ defaults <- list(
+ list(
+ name = "temperate",
+ constants = list(1),
+ posteriorid = 1
+ )
+ )
+
+ # Run test - should create directories and configs
+ result <- expect_silent(
+ write.ensemble.configs(
+ defaults = defaults,
+ ensemble.samples = ensemble.samples,
+ settings = settings,
+ model = "SIPNET",
+ write.to.db = FALSE
+ )
+ )
+
+ # Verify outputs
+ expect_type(result, "list")
+ expect_named(result, c("runs", "ensemble.id", "samples"))
+ expect_equal(nrow(result$runs), settings$ensemble$size)
})
+
+
+
+
From e4d6adc163ef0a45f4e751303cea82c84fd93059 Mon Sep 17 00:00:00 2001
From: Michael Dietze
Date: Wed, 28 May 2025 19:34:47 -0400
Subject: [PATCH 0129/1193] Update
modules/uncertainty/tests/testthat/test_ensemble.R
---
modules/uncertainty/tests/testthat/test_ensemble.R | 1 -
1 file changed, 1 deletion(-)
diff --git a/modules/uncertainty/tests/testthat/test_ensemble.R b/modules/uncertainty/tests/testthat/test_ensemble.R
index 24fb1830cea..65d49e59dad 100644
--- a/modules/uncertainty/tests/testthat/test_ensemble.R
+++ b/modules/uncertainty/tests/testthat/test_ensemble.R
@@ -2,7 +2,6 @@ library(testthat)
library(mockery)
# Source the ensemble config function
-source("modules/uncertainty/R/ensemble.R")
context("input validation for write.ensemble.configs")
From 49dd66abf3833d03e71c8e55bed9b0f4a1aed650 Mon Sep 17 00:00:00 2001
From: Michael Dietze
Date: Thu, 29 May 2025 17:35:30 -0400
Subject: [PATCH 0130/1193] Update modules/data.remote/DESCRIPTION
---
modules/data.remote/DESCRIPTION | 1 -
1 file changed, 1 deletion(-)
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index d94da9b9d76..bff565d6969 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -44,7 +44,6 @@ Suggests:
lubridate,
raster,
reshape,
- rhdf5,
sf,
testthat (>= 1.0.2),
tibble,
From 124a8844e5cd845f1a797357af71933c9eddcc85 Mon Sep 17 00:00:00 2001
From: Michael Dietze
Date: Thu, 29 May 2025 17:35:42 -0400
Subject: [PATCH 0131/1193] Update
docker/depends/pecan_package_dependencies.csv
---
docker/depends/pecan_package_dependencies.csv | 1 -
1 file changed, 1 deletion(-)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 63fb3e50449..bcc4a0ff8e2 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -471,7 +471,6 @@
"reticulate","*","modules/data.atmosphere","Suggests",FALSE
"reticulate","*","modules/data.land","Suggests",FALSE
"reticulate","*","modules/data.remote","Imports",FALSE
-"rhdf5","*","modules/data.remote","Suggests",FALSE
"rjags","*","base/utils","Suggests",FALSE
"rjags","*","modules/assim.batch","Imports",FALSE
"rjags","*","modules/data.land","Imports",FALSE
From a43309c4199e997b8d8f8217e0e8802e9d54de52 Mon Sep 17 00:00:00 2001
From: Yinghao Sun
Date: Fri, 30 May 2025 02:36:50 -0400
Subject: [PATCH 0132/1193] LPJ-GUESS: add functional write_restart + update
read_restart & config pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* implement write_restart.LPJGUESS() with binary-state update
* adjust read_restart.LPJGUESS()
* extend write.config.LPJGUESS() – CRU bin / misc placeholders (@CLIMATEFILE@, @MISCFILE@)
* split_inputs.LPJGUESS(): bypass *.cru / *.cru.bin files
* add template.ins placeholder @MISCFILE@; update pecan.ins
* docs + NAMESPACE entries
---
models/lpjguess/NAMESPACE | 5 +
models/lpjguess/R/read_restart.LPJGUESS.R | 38 +--
models/lpjguess/R/read_state.R | 206 +++++++++++++---
models/lpjguess/R/split_inputs.LPJGUESS.R | 233 +++++++++---------
models/lpjguess/R/write.config.LPJGUESS.R | 37 +--
models/lpjguess/R/write_restart.LPJGUESS.R | 115 +++++++++
models/lpjguess/R/write_state.R | 103 ++++++++
models/lpjguess/inst/pecan.ins | 8 +-
models/lpjguess/inst/template.ins | 3 +-
.../lpjguess/man/extract_from_state_by_key.Rd | 23 ++
models/lpjguess/man/find_closing.Rd | 1 +
models/lpjguess/man/find_stream_size.Rd | 1 +
models/lpjguess/man/find_stream_type.Rd | 1 +
models/lpjguess/man/find_stream_var.Rd | 1 +
models/lpjguess/man/make_key.Rd | 18 ++
models/lpjguess/man/read_binary_LPJGUESS.Rd | 5 +-
models/lpjguess/man/read_restart.LPJGUESS.Rd | 39 +++
models/lpjguess/man/serialize_starts_ends.Rd | 1 +
models/lpjguess/man/write_binary_LPJGUESS.Rd | 22 ++
models/lpjguess/man/write_restart.LPJGUESS.Rd | 51 ++++
20 files changed, 728 insertions(+), 183 deletions(-)
create mode 100644 models/lpjguess/R/write_restart.LPJGUESS.R
create mode 100644 models/lpjguess/R/write_state.R
create mode 100644 models/lpjguess/man/extract_from_state_by_key.Rd
create mode 100644 models/lpjguess/man/make_key.Rd
create mode 100644 models/lpjguess/man/read_restart.LPJGUESS.Rd
create mode 100644 models/lpjguess/man/write_binary_LPJGUESS.Rd
create mode 100644 models/lpjguess/man/write_restart.LPJGUESS.Rd
diff --git a/models/lpjguess/NAMESPACE b/models/lpjguess/NAMESPACE
index 56bfda9e366..bb388d29f54 100644
--- a/models/lpjguess/NAMESPACE
+++ b/models/lpjguess/NAMESPACE
@@ -6,12 +6,17 @@ export(met2model.LPJGUESS)
export(model2netcdf.LPJGUESS)
export(pecan2lpjguess)
export(readStateBinary)
+export(read_binary_LPJGUESS)
+export(read_restart.LPJGUESS)
export(split_inputs.LPJGUESS)
export(update_state_LPJGUESS)
export(write.config.LPJGUESS)
export(write.insfile.LPJGUESS)
+export(write_binary_LPJGUESS)
+export(write_restart.LPJGUESS)
importFrom(PEcAn.utils,days_in_year)
importFrom(Rcpp,sourceCpp)
+importFrom(dplyr,"%>%")
importFrom(ncdf4,nc_close)
importFrom(ncdf4,ncatt_get)
importFrom(ncdf4,ncatt_put)
diff --git a/models/lpjguess/R/read_restart.LPJGUESS.R b/models/lpjguess/R/read_restart.LPJGUESS.R
index d828137c8f1..19826f9fe39 100644
--- a/models/lpjguess/R/read_restart.LPJGUESS.R
+++ b/models/lpjguess/R/read_restart.LPJGUESS.R
@@ -1,13 +1,23 @@
-
-# developing
-# outdir = "/fs/data2/output//PEcAn_1000010473/out"
-# runid = 1002656839
-# stop.time = "1960-12-31 23:59:59 UTC"
-# load("/fs/data2/output/PEcAn_1000010473/SDAsettings_develop.Rdata")
-# var.names = c("AGB.pft", "TotSoilCarb")
-# load("/fs/data2/output/PEcAn_1000010473/SDAparams_develop.Rdata")
-
-
+#' Read Restart for LPJGUESS
+#'
+#' @param outdir output directory
+#' @param runid run ID
+#' @param stop.time year that is being read
+#' @param settings PEcAn settings object
+#' @param var.names var.names to be extracted
+#' @param params passed on to return value
+#'
+#' @return X_tmp vector of forecasts
+#' @export
+#' @examples
+#' # example code
+#' outdir = "/fs/data2/output//PEcAn_1000010473/out"
+#' runid = 1002656839
+#' stop.time = "1960-12-31 23:59:59 UTC"
+#' load("/fs/data2/output/PEcAn_1000010473/SDAsettings_develop.Rdata")
+#' var.names = c("AGB.pft", "TotSoilCarb")
+#' load("/fs/data2/output/PEcAn_1000010473/SDAparams_develop.Rdata")
+#' @author Istem Fer, Yinghao Sun
read_restart.LPJGUESS <- function(outdir, runid, stop.time, settings, var.names, params){
# which LPJ-GUESS version, the structure of state file depends a lot on version
@@ -24,7 +34,6 @@ read_restart.LPJGUESS <- function(outdir, runid, stop.time, settings, var.names,
# read binary state file, takes a couple of minutes
Gridcell_container <- read_binary_LPJGUESS(outdir = file.path(outdir, runid),
version = lpjguess_ver)
-
forecast <- list()
# additional varnames for LPJ-GUESS?
@@ -33,8 +42,8 @@ read_restart.LPJGUESS <- function(outdir, runid, stop.time, settings, var.names,
if (var_name == "AGB.pft") {
- cmass_sap_perpft <- calculateGridcellVariablePerPFT(model.state = Gridcell_container, variable = "cmass_sap")
- cmass_heart_perpft <- calculateGridcellVariablePerPFT(model.state = Gridcell_container, variable = "cmass_heart")
+ cmass_sap_perpft <- calculateGridcellVariablePerPFT(model.state = Gridcell_container$state, variable = "cmass_sap")
+ cmass_heart_perpft <- calculateGridcellVariablePerPFT(model.state = Gridcell_container$state, variable = "cmass_heart")
cmass_wood <- cmass_sap_perpft + cmass_heart_perpft
cmass_wood <- PEcAn.utils::ud_convert(cmass_wood, "kg/m^2", "Mg/ha")
@@ -45,11 +54,12 @@ read_restart.LPJGUESS <- function(outdir, runid, stop.time, settings, var.names,
cmass_abvg_wood <- cmass_wood - cmass_blwg_wood
forecast[[length(forecast) + 1]] <- cmass_abvg_wood
- names(forecast[[length(forecast)]]) <- paste0("AGB.pft.", unlist(Gridcell_container$meta_data$pft))
+ names(forecast[[length(forecast)]]) <- paste0("AGB.pft.", unlist(Gridcell_container$state$meta_data$pft))
}
}
+ # params$LPJGUESS_state include state, pos_list, siz_list
params$LPJGUESS_state <- Gridcell_container
PEcAn.logger::logger.info("Finished --", runid)
diff --git a/models/lpjguess/R/read_state.R b/models/lpjguess/R/read_state.R
index c1f0ee149bc..a4900967672 100644
--- a/models/lpjguess/R/read_state.R
+++ b/models/lpjguess/R/read_state.R
@@ -1,6 +1,3 @@
-
-######################## Helper functions ########################
-
#' Find Stream Variable
#'
#' A helper function that lists streamed variables. It returns the names of streamed variables.
@@ -8,6 +5,7 @@
#' @param file_in A character vector representing the file content to search through.
#' @param line_nos A numeric vector of length 2, specifying the start and end lines to search for streamed variables.
#' @return A character vector of streamed variable names.
+#' @keywords internal
# helper function that lists streamed variables, it just returns the names, types are checked by other fucntion
find_stream_var <- function(file_in, line_nos){
@@ -85,6 +83,7 @@ find_stream_var <- function(file_in, line_nos){
#' @param pattern A character string pattern to look for in the file.
#' @return A numeric vector of length 2, giving the start and end line numbers.
#' @importFrom stringr str_match
+#' @keywords internal
# helper function that scans LPJ-GUESS that returns the beginning and the ending lines of serialized object
serialize_starts_ends <- function(file_in, pattern = "void Gridcell::serialize"){
# find the starting line from the given pattern
@@ -116,6 +115,7 @@ serialize_starts_ends <- function(file_in, pattern = "void Gridcell::serialize")
#' @param if_else_check Optional. A logical value indicating whether to check for if/else blocks (default is FALSE).
#' @export
#' @return A numeric value indicating the line number of the matching closing bracket.
+#' @keywords internal
# helper function that finds the closing bracket, can work over if-else
find_closing <- function(find = "}", line_no, file_in, if_else_check = FALSE){
opened <- 1
@@ -159,6 +159,7 @@ find_closing <- function(find = "}", line_no, file_in, if_else_check = FALSE){
#' @return A numeric value representing the size (number of streamed variables).
#' @importFrom stringr str_match
#' @importFrom utils glob2rx
+#' @keywords internal
# helper function that determines the stream size to read
find_stream_size <- function(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS){
@@ -322,6 +323,7 @@ read_state <- function(file_path) {
#' @param LPJ_GUESS_TYPES A character vector of recognized LPJ-GUESS types.
#' @param guessh_in A character vector of LPJ-GUESS header file content.
#' @return A character string indicating the stream type.
+#' @keywords internal
# helper function to decide the type of the stream
# this function relies on the architecture of LPJ-GUESS and has bunch of harcoded checks, see model documentation
find_stream_type <- function(class = NULL, current_stream_var, LPJ_GUESS_CLASSES, LPJ_GUESS_TYPES, guessh_in){
@@ -429,13 +431,16 @@ find_stream_type <- function(class = NULL, current_stream_var, LPJ_GUESS_CLASSES
return(list(type = gsub(" ", "", stream_type), name = stream_name, substring = sub_string))
} # find_stream_type
-
-###################################### READ STATE
+#' Create a flat key from a nested path
+#'
+#' @param ... Parts of a nested list path (e.g., "Gridcell", "Stand", 1, "Patch", 2)
+#' @return A single string key like "Gridcell/Stand/1/Patch/2"
+#' @keywords internal
+make_key <- function(...) paste(..., sep = "/")
# this fcn is for potential natural vegetation only
# when there is landcover, there will be more stand types
-
# also for cohort mode only
# Gridcell: Top-level object containing all dynamic and static data for a particular gridcell
@@ -450,26 +455,31 @@ find_stream_type <- function(class = NULL, current_stream_var, LPJ_GUESS_CLASSES
# Soil : Stores state variables for soils and the snow pack. One object of class Soil is defined for each patch.
# Fluxes : The Fluxes class stores accumulated monthly and annual fluxes. One object of type Fluxes is defined for each patch.
# Individual : Stores state variables for an average individual plant. In cohort mode, it is the average individual of a cohort of plants approximately the same age and from the same patch.
-
-
-# test path
-#outdir <- "/fs/data2/output/PEcAn_1000010473/out/1002656304"
-
-# outdir, at least model version, maybe also settings
+#
#' Read Binary File for LPJ-GUESS
#'
#' Reads a binary file formatted for LPJ-GUESS and extracts relevant data.
#'
-#' @param outdir A character string specifying the output directory containing the binary state files.
+#' @param outdir The output directory where ".state" and "meta.bin" will be written
#' @param version A character string specifying the LPJ-GUESS version (default is "PalEON").
#' @importFrom stringr str_match
#' @importFrom utils glob2rx
#' @return A matrix or list containing the extracted data.
+#' @export
+#' @author Istem Fer, Yinghao Sun
read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
+ # ## FOR TEST
+ # outdir <- "/projectnb/dietzelab/yinghao/try/write_test/out"
+ # rundir <- "/projectnb/dietzelab/yinghao/try/write_test/run"
+
# find rundir too, params.ins is in there and we need to get some values from there
rundir <- file.path(dirname(dirname(outdir)), "run", basename(outdir))
+ # create lists to store byte offset and byte size for each variable
+ pos_list <- list()
+ siz_list <- list()
+
# guess.cpp has the info of what is being written
guesscpp_name <- paste0("guess.", version, ".cpp") # these are gonna be in the package guess.VERSION.cpp
guesscpp_in <- readLines(con = system.file(guesscpp_name, package = "PEcAn.LPJGUESS"), n = -1)
@@ -629,6 +639,10 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
Gridcell <- list()
level <- "Gridcell"
for(g_i in seq_along(streamed_vars_gridcell)){ # Gridcell-loop starts
+
+ # # Debug for empty nstands
+ # if(g_i == 7) browser()
+
current_stream <- streamed_vars_gridcell[g_i]
# weird, it doesn't go into Gridcell st
if(current_stream == "st[i]") next #current_stream <- "Gridcellst"
@@ -643,7 +657,12 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
# note that this is streamed under Gridcell, not Stand in guess.cpp,
# but I think this info needs to go together with the Stand sublist
# so prepend landcovertype to the streamed_vars_stand EDIT: I'll actually just read it here
- Gridcell[["Stand"]][["landcovertype"]] <- readBin(zz, what = integer(), n = 1, size = 4)
+
+ ## Past version
+ #Gridcell[["Stand"]][["landcovertype"]] <- readBin(zz, what = integer(), n = 1, size = 4)
+
+ # # Landcover will be read again under stand. So "landcovertype" here is meaningless but we need to read/write.
+ Gridcell[["landcovertype"]] <- readBin(zz, what = integer(), n = 1, size = 4)
num_stnd <- as.numeric(Gridcell$nstands)
Gridcell[["Stand"]] <- vector("list", num_stnd)
@@ -652,7 +671,6 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
# "(*this)[*]" points to different things under different levels, here it is stand
if(grepl(utils::glob2rx("(*this)[*]"), current_stream)){ # note that first else-part will be evaluated considering the order in guess.cpp
-
# STAND
level <- "Stand"
current_stream <- "Stand"
@@ -669,15 +687,16 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
for(stnd_i in seq_len(num_stnd)){ #looping over the stands
for(svs_i in seq_along(streamed_vars_stand)){ # looping over the streamed stand vars
-
current_stream <- streamed_vars_stand[svs_i]
if(grepl(utils::glob2rx("pft[*]"), current_stream)) current_stream <- paste0(level, "pft") # i counter might change, using wildcard
if(current_stream == "nobj" & level == "Stand"){
- # nobj points to different things under different levels, here it is the number of patches
+ # nobj: Number of Patches
# number of patches is set through insfiles, read by write.configs and passed to this fcn
# but it's also written to the state file, need to move bytes
+ pos <- seek(zz)
nofpatch <- readBin(zz, integer(), 1, size = 4)
+ # browser()
if(npatches == nofpatch){ # also not a bad place to check if everything is going fine so far
Gridcell[["Stand"]][[stnd_i]]$npatches <- npatches
#Gridcell[["Stand"]] <- vector("list", npatches)
@@ -687,8 +706,8 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
next
}
- # "(*this)[*]" points to different things under different levels, here it is patch
- if(grepl(utils::glob2rx("(*this)[*]"), current_stream)){
+ ##### "(*this)[*]" points to different things under different levels, here it is PATCH ####
+ if(grepl(utils::glob2rx("(*this)[*]"), current_stream)){
# PATCH
level <- "Patch"
current_stream <- "Patch"
@@ -704,6 +723,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
for(ptch_i in seq_len(npatches)){ #looping over the patches
for(svp_i in seq_along(streamed_vars_patch)){ #looping over the streamed patch vars
+ # if(svp_i == 17) browser()
current_stream <- streamed_vars_patch[svp_i]
if(grepl(utils::glob2rx("pft[*]"), current_stream)) current_stream <- paste0(level, "pft") # i counter might change, using wildcard
@@ -734,16 +754,20 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
streamed_vars_veg <- find_stream_var(file_in = guesscpp_in, line_nos = beg_end)
# NOTE : Unlike other parts, this bit is a lot less generalized!!!
- # I'm gonna asumme Vegetation class won't change much in the future
+ # I'm gonna assume Vegetation class won't change much in the future
# indiv.pft.id and indiv needs to be looped over nobj times
if(!setequal(streamed_vars_veg, c("nobj", "indiv.pft.id", "indiv"))){
PEcAn.logger::logger.severe("Vegetation class object changed in this model version, you need to fix read.state")
}
# nobj points to different things under different levels, here it is the number of individuals
+ pos <- seek(zz)
number_of_individuals <- readBin(zz, integer(), 1, size = 4)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]] <- list()
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["number_of_individuals"]] <- number_of_individuals
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "number_of_individuals")
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 4
# few checks for sensible vals
if(number_of_individuals < 0 | number_of_individuals > 10000){ # should there be an upper limit here too?
@@ -764,7 +788,12 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
for(indv_i in seq_len(number_of_individuals)){
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["Individuals"]][[indv_i]] <- list()
# which PFT is this?
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["Individuals"]][[indv_i]][["indiv.pft.id"]] <- readBin(zz, integer(), 1, size = 4)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, "indiv.pft.id")
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 4
+
# read all the individual class
for(svi_i in seq_along(streamed_vars_indv)){ #
@@ -797,11 +826,14 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
-
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["Individuals"]][[indv_i]][["PhotosynthesisResult"]][[current_stream_type$name]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, "PhotosynthesisResult", current_stream_type$name)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
}# streamed_vars_photo-loop ends
@@ -812,16 +844,24 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
if(current_stream_specs$single){
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["Individuals"]][[indv_i]][[current_stream_type$name]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, current_stream_type$name)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
}else{
for(css.i in seq_along(current_stream_specs$what)){
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["Individuals"]][[indv_i]][[current_stream_specs$names[css.i]]]<- readBin(con = zz,
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, current_stream_specs$names[css.i])
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css.i]
}
}
}
@@ -845,18 +885,33 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
# parse from guess.h
PerPFTFluxType <- c("NPP", "GPP", "RA", "ISO", "MON")
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]] <- list()
- key1 <- readBin(zz, "integer", 1, 8)
+ # The number of PFTS
+ pos <- seek(zz)
+ key1 <- readBin(zz, "integer", 1, 8)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][["n_pft"]] <- key1
- for(fpft_i in seq_len(key1)){ # key1 11 PFTs
- Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[fpft_i]] <- list()
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", "n_pft")
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 8
+
+ for(fpft_i in seq_len(key1)){ # key1 12 PFTs
+ Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[paste0("pft", fpft_i)]] <- list()
+ pos <- seek(zz)
key2 <- readBin(zz, "integer", 1, 8)
if(key2 > 10000){ #make sure you dind't read a weird number, this is supposed to be number of fluxes per pft, can't have too many
PEcAn.logger::logger.severe("Number of fluxes per pft read from the state file is too high. Check read.state function")
}
- Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[fpft_i]][["key2"]] <- key2
+ Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[paste0("pft", fpft_i)]][["key2"]] <- key2
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", paste0("pft", fpft_i), "key2")
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 8
+
for(flux_i in seq_len(key2)){
# is this double?
- Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[fpft_i]][[PerPFTFluxType[flux_i]]] <- readBin(zz, "double", 1, 8)
+ pos <- seek(zz)
+ Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[paste0("pft", fpft_i)]][[PerPFTFluxType[flux_i]]] <- readBin(zz, "double", 1, 8)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", paste0("pft", fpft_i), PerPFTFluxType[flux_i])
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 8
}
}
@@ -864,16 +919,25 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
# double monthly_fluxes_patch[12][NPERPATCHFLUXTYPES];
# maybe read this as a matrix?
n_monthly_fluxes_patch <- 12 * LPJ_GUESS_CONST_INTS$val[LPJ_GUESS_CONST_INTS$var =="PerPatchFluxType"]
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["monthly_fluxes_patch"]] <- readBin(zz, "double", n_monthly_fluxes_patch, 8)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "monthly_fluxes_patch")
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 8
# monthly_fluxes_pft read as a vector at once
# double monthly_fluxes_pft[12][NPERPFTFLUXTYPES];
# maybe read this as a matrix?
n_monthly_fluxes_pft <- 12 * LPJ_GUESS_CONST_INTS$val[LPJ_GUESS_CONST_INTS$var =="PerPFTFluxType"]
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["monthly_fluxes_pft"]] <- readBin(zz, "double", n_monthly_fluxes_pft, 8)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "monthly_fluxes_pft")
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- 8
}else{
- # NOT VEGETATION OR FLUX
+ # NOT VEGETATION OR FLUX.
+ # Patchpft or Soil in this case
streamed_vars <- find_stream_var(file_in = guesscpp_in, line_nos = beg_end)
# NO CROPS, NATURAL VEG
if("*cropphen" %in% streamed_vars) streamed_vars <- streamed_vars[!(streamed_vars == "*cropphen")]
@@ -883,6 +947,12 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[current_stream_type$name]][[varname]] <- vector("list", num_pft)
}
+ if (current_stream == "soil"){
+ past_stream <- tools::toTitleCase(current_stream)
+ } else{
+ past_stream <- current_stream
+ }
+
# maybe try modifying this bit later to make it a function
for(pft_i in seq_len(num_pft)){
for(sv_i in seq_along(streamed_vars)){
@@ -921,10 +991,17 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
if(current_stream_specs$single){
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Soil"]][["Sompool"]][[current_stream_type$name]][[som_i]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+
+
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Soil", "Sompool", current_stream_type$name, som_i)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
+
}else{
PEcAn.logger::logger.severe("Historic under sompool.") # Not expecting any
}
@@ -935,16 +1012,24 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
if(current_stream_specs$single){ # maybe use current_stream in sublist names to find correct place
- Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[length( Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]])]][[current_stream_type$name]][[pft_i]] <- readBin(con = zz,
+ pos <- seek(zz)
+ Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[length(Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]])]][[current_stream_type$name]][[pft_i]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, past_stream, current_stream_type$name, pft_i)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
}else{ # only for historic type?
for(css.i in seq_along(current_stream_specs$what)){ # maybe use current_stream in sublist names to find correct place
- Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[length( Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]])]][[current_stream_specs$names[css.i]]]<- readBin(con = zz,
+ pos <- seek(zz)
+ Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[length(Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]])]][[current_stream_specs$names[css.i]]]<- readBin(con = zz,
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, past_stream, current_stream_type$names[css.i], pft_i)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css.i]
}
}
}
@@ -958,18 +1043,25 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
if(current_stream_specs$single){
-
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[current_stream_type$name]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, current_stream_type$name)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
}else{ # probably don't need this but let's keep
for(css_i in seq_along(current_stream_specs$what)){
- # CHANGE ALL THESE HISTORIC TYPES SO THAT cirrent_index and full goes together with the variable
+ # CHANGE ALL THESE HISTORIC TYPES SO THAT current_index and full goes together with the variable
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][[current_stream_specs$names[css_i]]] <- readBin(con = zz,
what = current_stream_specs$what[css_i],
n = current_stream_specs$n[css_i],
size = current_stream_specs$size[css_i])
+ key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, current_stream_specs$names[css_i])
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css_i]
}
}
}# end if-class within Patch
@@ -988,6 +1080,9 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
Gridcell[["Stand"]][[stnd_i]][[length(Gridcell[["Stand"]][[stnd_i]])+1]] <- list()
names(Gridcell[["Stand"]][[stnd_i]])[length(Gridcell[["Stand"]][[stnd_i]])] <- current_stream_type$name
+ # Save the past stream like Standpft
+ past_stream <- current_stream
+
if(current_stream_type$type == "class"){
# CLASS
@@ -1019,16 +1114,24 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
if(current_stream_specs$single){
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][[length(Gridcell[["Stand"]][[stnd_i]])]][[current_stream_type$name]][[pft_i]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+ key <- make_key("Gridcell", "Stand", stnd_i, past_stream, current_stream_type$name, pft_i)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
}else{
for(css.i in seq_along(current_stream_specs$what)){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_type$name]][[pft_i]][[current_stream_specs$names[css.i]]]<- readBin(con = zz,
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
+ key <- make_key("Gridcell", "Stand", stnd_i, past_stream, current_stream_type$name[css.i])
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css.i]
}
}
}
@@ -1040,16 +1143,24 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
if(current_stream_specs$single){
+ pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][[current_stream_type$name]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
+ key <- make_key("Gridcell", "Stand", stnd_i, current_stream_type$name)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
}else{ # probably don't need this but let's keep
for(css_i in seq_along(current_stream_specs$what)){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_specs$names[css_i]]] <- readBin(con = zz,
what = current_stream_specs$what[css_i],
n = current_stream_specs$n[css_i],
size = current_stream_specs$size[css_i])
+ key <- make_key("Gridcell", "Stand", stnd_i, current_stream_type$name)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css_i]
}
}
}# end if-class within Stand
@@ -1062,6 +1173,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
}else{ #not reading in Stand variables
# NOT STAND
+ past_stream <- current_stream
current_stream_type <- find_stream_type(NULL, current_stream, LPJ_GUESS_CLASSES, LPJ_GUESS_TYPES, guessh_in)
@@ -1092,27 +1204,40 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
if(current_stream_specs$single){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_type$name]][[pft_i]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- }else if(current_stream_specs$name %in% c("hmtemp_20", "hmprec_20", "hmeet_20")){
+ key <- make_key("Gridcell", past_stream, current_stream_type$name, pft_i)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
+
+ }else if(current_stream_type$name %in% c("hmtemp_20", "hmprec_20", "hmeet_20")){
# these three are just too different, maybe extract their names in the beginning
# be careful while writing back to the binary
# Gridcell[[length(Gridcell)]][[current_stream_type$name]] <- readBin(con = zz, double(), 264, 8)
Gridcell[[length(Gridcell)]][[current_stream_type$name]] <- vector("list", length(current_stream_specs) - 2)
for(css.i in seq_len(length(current_stream_specs) - 2)){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_type$name]][[css.i]] <- readBin(con = zz,
what = current_stream_specs[[css.i]]$what,
n = current_stream_specs[[css.i]]$n,
size = current_stream_specs[[css.i]]$size)
+ key <- make_key("Gridcell", past_stream, current_stream_type$name, css.i)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs[[css.i]]$size
}
}else{
for(css.i in seq_along(current_stream_specs$what)){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_type$name]][[pft_i]][[current_stream_specs$names[css.i]]]<- readBin(con = zz,
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
+ key <- make_key("Gridcell", past_stream, current_stream_type$name, pft_i, current_stream_specs$names[css.i])
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css.i]
}
}
@@ -1124,16 +1249,24 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
current_stream_specs <- find_stream_size(current_stream_type, guessh_in, LPJ_GUESS_TYPES, LPJ_GUESS_CONST_INTS)
# and read!
if(current_stream_specs$single){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_type$name]] <- readBin(con = zz,
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- }else{ # probably don't need this but let's keep
+ key <- make_key("Gridcell", past_stream, current_stream_type$name)
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size
+ }else{
for(css_i in seq_along(current_stream_specs$what)){
+ pos <- seek(zz)
Gridcell[[length(Gridcell)]][[current_stream_specs$names[css_i]]] <- readBin(con = zz,
what = current_stream_specs$what[css_i],
n = current_stream_specs$n[css_i],
size = current_stream_specs$size[css_i])
+ key <- make_key("Gridcell", past_stream, current_stream_type$name[css_i])
+ pos_list[[key]] <- pos
+ siz_list[[key]] <- current_stream_specs$size[css_i]
}
}
}# end if-class within Gridcell
@@ -1145,7 +1278,12 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
Gridcell$meta_data <- meta_data
- return(Gridcell)
+ # return(Gridcell)
+ return(list(
+ state = Gridcell,
+ pos_list = pos_list,
+ siz_list = siz_list
+ ))
} # read_binary_LPJGUESS end
diff --git a/models/lpjguess/R/split_inputs.LPJGUESS.R b/models/lpjguess/R/split_inputs.LPJGUESS.R
index 9a598e50e8b..8ba5ef99491 100644
--- a/models/lpjguess/R/split_inputs.LPJGUESS.R
+++ b/models/lpjguess/R/split_inputs.LPJGUESS.R
@@ -14,120 +14,125 @@
##' @importFrom PEcAn.utils days_in_year
##' @export
split_inputs.LPJGUESS <- function(settings, start.time, stop.time, inputs, overwrite = FALSE, outpath = NULL){
-
- #### Lubridate start and end times
- start.day <- lubridate::yday(start.time)
- start.year <- lubridate::year(start.time)
- end.day <- lubridate::yday(stop.time)
- end.year <- lubridate::year(stop.time)
-
- # Whole run period
- run.start <- lubridate::year(settings$run$start.date)
- run.end <- lubridate::year(settings$run$end.date)
-
- #### Get met paths
- met <- inputs
- path <- dirname(met)
- prefix <- substr(basename(met), 1, nchar(basename(met))-16) #assuming we'll always have "PREFIX.1920.2010.tmp"
- if(is.null(outpath)){
- outpath <- path
- }
- if(!dir.exists(outpath)) dir.create(outpath)
-
- var.names <- c("tmp", "pre", "cld")
- long.names <- c("air_temperature",
- "precipitation_flux",
- "surface_downwelling_shortwave_flux_in_air")
-
- # !!! always full years with LPJ-GUESS !!!
- files.in <- file.path(outpath, paste0(prefix, run.start, ".", run.end, ".", var.names, ".nc"))
- files.out <- file.path(outpath, paste0(prefix, start.year, ".", end.year, ".", var.names, ".nc"))
-
- if(file.exists(files.out[1]) & !overwrite){
- return(files.out[1])
- }
-
- ## open netcdf files
- fnc.tmp <- ncdf4::nc_open(files.in[1])
- fnc.pre <- ncdf4::nc_open(files.in[2])
- fnc.cld <- ncdf4::nc_open(files.in[3])
-
- ## read climate data
- nc.tmp <- ncdf4::ncvar_get(fnc.tmp, var.names[1])
- nc.pre <- ncdf4::ncvar_get(fnc.pre, var.names[2])
- nc.cld <- ncdf4::ncvar_get(fnc.cld, var.names[3])
-
- # cut where
- if(start.year == run.start){
- years <- start.year:end.year
- inds <- 1:sum(PEcAn.utils::days_in_year(years))
+ #### If using CRU input, return directly
+ if (grepl("\\.cru(\\.bin)?$", inputs, ignore.case = TRUE)) {
+ PEcAn.logger::logger.info(paste("Input is a CRU file:", inputs, "- returning path directly without splitting."))
+ return(inputs) # Without cropping, use the original file directly
}else{
- ### come back
- }
-
- # split
- nc.tmp <- nc.tmp[1,1,inds]
- nc.pre <- nc.pre[1,1,inds]
- nc.cld <- nc.cld[1,1,inds]
-
- var.list <- list(nc.tmp, nc.pre, nc.cld)
-
- # not that these will be different than "K", "kg m-2 s-1", "W m-2"
- var.units <- c(fnc.tmp$var$tmp$units,
- fnc.pre$var$pre$units,
- fnc.cld$var$cld$units)
-
- # get other stuff to be written to ncdf
-
- ## retrieve lat/lon
- lon <- ncdf4::ncvar_get(fnc.tmp, "lon")
- lat <- ncdf4::ncvar_get(fnc.tmp, "lat")
-
- # write back
- ## write climate data define dimensions
-
- latdim <- ncdf4::ncdim_def(name = "lat", "degrees_north", as.double(lat))
- londim <- ncdf4::ncdim_def(name = "lon", "degrees_east", as.double(lon))
- timedim <- ncdf4::ncdim_def("time", paste0("days since ", start.year - 1, "-12-31", sep = ""), as.double(c(1:length(nc.tmp))))
-
- fillvalue <- 9.96920996838687e+36
-
- for (n in seq_along(var.names)) {
- # define variable
- var.def <- ncdf4::ncvar_def(name = var.names[n],
- units = var.units[n],
- dim = (list(londim, latdim, timedim)),
- fillvalue, long.names[n],
- verbose = FALSE,
- prec = "float")
-
- # create netCD file for LPJ-GUESS
- ncfile <- ncdf4::nc_create(files.out[[n]], vars = var.def, force_v4 = TRUE)
-
-
- # put variable, rep(...,each=4) is a hack to write the same data for all grids (which all are the
- # same)
- ncdf4::ncvar_put(ncfile, var.def, rep(var.list[[n]], each = 4))
-
-
- # additional attributes for LPJ-GUESS
- ncdf4::ncatt_put(nc = ncfile, varid = var.names[n], attname = "standard_name", long.names[n])
-
- ncdf4::ncatt_put(nc = ncfile, varid = "lon", attname = "axis", "X")
- ncdf4::ncatt_put(nc = ncfile, varid = "lon", attname = "standard_name", "longitude")
-
- ncdf4::ncatt_put(nc = ncfile, varid = "lat", attname = "axis", "Y")
- ncdf4::ncatt_put(nc = ncfile, varid = "lat", attname = "standard_name", "latitude")
-
- ncdf4::ncatt_put(nc = ncfile, varid = "time", attname = "calendar", "gregorian")
-
- ncdf4::nc_close(ncfile)
+ #### Lubridate start and end times
+ start.day <- lubridate::yday(start.time)
+ start.year <- lubridate::year(start.time)
+ end.day <- lubridate::yday(stop.time)
+ end.year <- lubridate::year(stop.time)
+
+ # Whole run period
+ run.start <- lubridate::year(settings$run$start.date)
+ run.end <- lubridate::year(settings$run$end.date)
+
+ #### Get met paths
+ met <- inputs
+ path <- dirname(met)
+ prefix <- substr(basename(met), 1, nchar(basename(met))-16) #assuming we'll always have "PREFIX.1920.2010.tmp"
+ if(is.null(outpath)){
+ outpath <- path
+ }
+ if(!dir.exists(outpath)) dir.create(outpath)
+
+ var.names <- c("tmp", "pre", "cld")
+ long.names <- c("air_temperature",
+ "precipitation_flux",
+ "surface_downwelling_shortwave_flux_in_air")
+
+ # !!! always full years with LPJ-GUESS !!!
+ files.in <- file.path(outpath, paste0(prefix, run.start, ".", run.end, ".", var.names, ".nc"))
+ files.out <- file.path(outpath, paste0(prefix, start.year, ".", end.year, ".", var.names, ".nc"))
+
+ if(file.exists(files.out[1]) & !overwrite){
+ return(files.out[1])
+ }
+
+ ## open netcdf files
+ fnc.tmp <- ncdf4::nc_open(files.in[1])
+ fnc.pre <- ncdf4::nc_open(files.in[2])
+ fnc.cld <- ncdf4::nc_open(files.in[3])
+
+ ## read climate data
+ nc.tmp <- ncdf4::ncvar_get(fnc.tmp, var.names[1])
+ nc.pre <- ncdf4::ncvar_get(fnc.pre, var.names[2])
+ nc.cld <- ncdf4::ncvar_get(fnc.cld, var.names[3])
+
+ # cut where
+ if(start.year == run.start){
+ years <- start.year:end.year
+ inds <- 1:sum(PEcAn.utils::days_in_year(years))
+ }else{
+ ### come back
+ }
+
+ # split
+ nc.tmp <- nc.tmp[1,1,inds]
+ nc.pre <- nc.pre[1,1,inds]
+ nc.cld <- nc.cld[1,1,inds]
+
+ var.list <- list(nc.tmp, nc.pre, nc.cld)
+
+ # not that these will be different than "K", "kg m-2 s-1", "W m-2"
+ var.units <- c(fnc.tmp$var$tmp$units,
+ fnc.pre$var$pre$units,
+ fnc.cld$var$cld$units)
+
+ # get other stuff to be written to ncdf
+
+ ## retrieve lat/lon
+ lon <- ncdf4::ncvar_get(fnc.tmp, "lon")
+ lat <- ncdf4::ncvar_get(fnc.tmp, "lat")
+
+ # write back
+ ## write climate data define dimensions
+
+ latdim <- ncdf4::ncdim_def(name = "lat", "degrees_north", as.double(lat))
+ londim <- ncdf4::ncdim_def(name = "lon", "degrees_east", as.double(lon))
+ timedim <- ncdf4::ncdim_def("time", paste0("days since ", start.year - 1, "-12-31", sep = ""), as.double(c(1:length(nc.tmp))))
+
+ fillvalue <- 9.96920996838687e+36
+
+ for (n in seq_along(var.names)) {
+ # define variable
+ var.def <- ncdf4::ncvar_def(name = var.names[n],
+ units = var.units[n],
+ dim = (list(londim, latdim, timedim)),
+ fillvalue, long.names[n],
+ verbose = FALSE,
+ prec = "float")
+
+ # create netCD file for LPJ-GUESS
+ ncfile <- ncdf4::nc_create(files.out[[n]], vars = var.def, force_v4 = TRUE)
+
+
+ # put variable, rep(...,each=4) is a hack to write the same data for all grids (which all are the
+ # same)
+ ncdf4::ncvar_put(ncfile, var.def, rep(var.list[[n]], each = 4))
+
+
+ # additional attributes for LPJ-GUESS
+ ncdf4::ncatt_put(nc = ncfile, varid = var.names[n], attname = "standard_name", long.names[n])
+
+ ncdf4::ncatt_put(nc = ncfile, varid = "lon", attname = "axis", "X")
+ ncdf4::ncatt_put(nc = ncfile, varid = "lon", attname = "standard_name", "longitude")
+
+ ncdf4::ncatt_put(nc = ncfile, varid = "lat", attname = "axis", "Y")
+ ncdf4::ncatt_put(nc = ncfile, varid = "lat", attname = "standard_name", "latitude")
+
+ ncdf4::ncatt_put(nc = ncfile, varid = "time", attname = "calendar", "gregorian")
+
+ ncdf4::nc_close(ncfile)
+ }
+
+ # close nc
+ ncdf4::nc_close(fnc.tmp)
+ ncdf4::nc_close(fnc.pre)
+ ncdf4::nc_close(fnc.cld)
+
+ return(files.out[1])
}
-
- # close nc
- ncdf4::nc_close(fnc.tmp)
- ncdf4::nc_close(fnc.pre)
- ncdf4::nc_close(fnc.cld)
-
- return(files.out[1])
} # split_inputs.LPJGUESS
\ No newline at end of file
diff --git a/models/lpjguess/R/write.config.LPJGUESS.R b/models/lpjguess/R/write.config.LPJGUESS.R
index 6f2ea4f5d7f..f1828a22665 100644
--- a/models/lpjguess/R/write.config.LPJGUESS.R
+++ b/models/lpjguess/R/write.config.LPJGUESS.R
@@ -14,7 +14,6 @@
##' @export
##' @author Istem Fer, Tony Gardella
write.config.LPJGUESS <- function(defaults, trait.values, settings, run.id, restart = NULL) {
-
# find out where to write run/ouput
rundir <- file.path(settings$host$rundir, run.id)
if (!file.exists(rundir)) {
@@ -194,16 +193,20 @@ write.insfile.LPJGUESS <- function(settings, trait.values, rundir, outdir, run.i
paramsins <- paramsins[-pftindx]
paramsins <- c(paramsins, unlist(write2pftblock))
-
- # write clim file names
-
- tmp.file <- settings$run$inputs$met$path
- pre.file <- gsub(".tmp.nc", ".pre.nc", tmp.file)
- cld.file <- gsub(".tmp.nc", ".cld.nc", tmp.file)
-
- guessins <- gsub("@TEMP_FILE@", tmp.file, guessins)
- guessins <- gsub("@PREC_FILE@", pre.file, guessins)
- guessins <- gsub("@INSOL_FILE@", cld.file, guessins)
+ # # Past version: write clim file names (cf input)
+ # tmp.file <- settings$run$inputs$met$path
+ # pre.file <- gsub(".tmp.nc", ".pre.nc", tmp.file)
+ # cld.file <- gsub(".tmp.nc", ".cld.nc", tmp.file)
+ #
+ # guessins <- gsub("@TEMP_FILE@", tmp.file, guessins)
+ # guessins <- gsub("@PREC_FILE@", pre.file, guessins)
+ # guessins <- gsub("@INSOL_FILE@", cld.file, guessins)
+
+ # when using cru input, lpjguess will not use these clim files
+ cru.file <- settings$run$inputs$met$path
+ misc.file <- sub("\\.bin$", "misc.bin", cru.file)
+ guessins <- gsub("@SOIL_FILE@", cru.file, guessins)
+ guessins <- gsub("@MISC_FILE@", misc.file, guessins)
# create and write CO2 file
start.year <- lubridate::year(settings$run$start.date)
@@ -232,16 +235,20 @@ write.insfile.LPJGUESS <- function(settings, trait.values, rundir, outdir, run.i
utils::write.table(CO2, file = co2.file, row.names = FALSE, col.names = FALSE, sep = "\t", eol = "\n")
guessins <- gsub("@CO2_FILE@", co2.file, guessins)
- # write soil file path
- soil.file <- settings$run$inputs$soil$path
- guessins <- gsub("@SOIL_FILE@", soil.file, guessins)
+ # # write soil file path
+ # # when using cru input, it's also climate file
+ # soil.file <- settings$run$inputs$soil$path
+ # misc.file <- sub("\\.bin$", "misc.bin", soil.file)
+ # guessins <- gsub("@SOIL_FILE@", soil.file, guessins)
+ # guessins <- gsub("@MISC_FILE@", misc.file, guessins)
settings$model$insfile <- file.path(settings$rundir, run.id, "guess.ins")
# version check
if(!is.null(settings$model$revision)){
if(settings$model$revision == "PalEON"){
- rm_inds <- which(grepl("@@@@@ Remove in PalEON version @@@@@", paramsins))
+ #rm_inds <- which(grepl("@@@@@ Remove in PalEON version @@@@@", paramsins))
+ rm_inds <- which(grepl("##### Remove in PalEON version #####", paramsins))
paramsins <- paramsins[-(rm_inds[1]:rm_inds[2])]
}
}
diff --git a/models/lpjguess/R/write_restart.LPJGUESS.R b/models/lpjguess/R/write_restart.LPJGUESS.R
new file mode 100644
index 00000000000..983f797ad9b
--- /dev/null
+++ b/models/lpjguess/R/write_restart.LPJGUESS.R
@@ -0,0 +1,115 @@
+##' write_restart.LPJGUESS
+##'
+##' Write restart files for LPJGUESS
+##' new.state includes X (AGB.pft) from Analysis
+##' new.params includes LPJGUESS_state
+##'
+##' @param outdir output directory
+##' @param runid run ID
+##' @param start.time start date and time for each SDA ensemble
+##' @param stop.time stop date and time for each SDA ensemble
+##' @param settings PEcAn settings object
+##' @param new.state analysis state vector
+##' @param RENAME flag to either rename output file or not
+##' @param new.params list of parameters to convert between different states
+##' @param inputs list of model inputs to use in write.configs.SIPNET
+##' @param verbose decide if we want to print the runid
+##'
+##' @return NONE
+##'
+##' @importFrom dplyr %>%
+##' @export
+##' @author Yinghao Sun
+write_restart.LPJGUESS <- function(outdir, runid,
+ start.time, stop.time, settings,
+ new.state, RENAME = TRUE,
+ new.params, inputs = NULL, verbose = FALSE){
+
+ rundir <- settings$host$rundir
+ variables <- colnames(new.state)
+
+ ## ---- Rename old output, remove old clim ----
+ if (RENAME) {
+ file.rename(file.path(outdir, runid, "lpjguess.out"),
+ file.path(outdir, runid, paste0("lpjguess.", as.Date(start.time), ".out")))
+ system(paste("rm", file.path(rundir, runid, "lpjguess.clim")))
+ } else {
+ print(paste("Files not renamed -- Need to rerun timestep", start.time, "before next time step"))
+ }
+
+ settings$run$start.date <- start.time
+ settings$run$end.date <- stop.time
+
+ ## ---- Pull old state ----
+ if (is.null(new.params$LPJGUESS_state))
+ PEcAn.logger::logger.severe("LPJGUESS_state missing in new.params")
+ # new.params$LPJGUESS_state include state, pos_list, siz_list
+ Gridcell <- new.params$LPJGUESS_state$state
+ pos_list <- new.params$LPJGUESS_state$pos_list
+ siz_list <- new.params$LPJGUESS_state$siz_list
+
+
+
+ ## ---- Build PFT parameter table from new.params ----
+ # TODO: find accurate parameters; read params from settings
+ pft_par_table <- data.frame()
+ # PFTs <- c("Ace_rub","Bet_all","Fag_gra","Que_rub","Tsu_can")
+ PFTs <- names(new.params)
+ for(PFT in PFTs) {
+ this.param.row <- c()
+ this.param.row["sla"] <- new.params[[PFT]]$SLA
+ this.param.row["k_latosa"] <- new.params[[PFT]]$sapwood_ratio
+ this.param.row["wooddens"] <- 200 #kg/m-3
+ # this.param.row["wooddens"] <- 0.2 #g/cm-3
+ this.param.row["lifeform"] <- 1
+ this.param.row["k_rp"] <- 1.6
+ this.param.row["k_allom1"] <- 250
+ this.param.row["k_allom2"] <- 60
+ this.param.row["k_allom3"] <- 0.67
+ this.param.row["crownarea_max"] <- 50
+ # conifer special case
+ if(PFT == "Tsu_can") {
+ this.param.row["k_allom1"] <- 150
+ }
+ pft_par_table <- rbind(pft_par_table , this.param.row)
+ }
+ names(pft_par_table) <- c("sla", "k_latosa", "wooddens", "lifeform", "k_rp", "k_allom1", "k_allom2", "k_allom3", "crownarea_max")
+ rownames(pft_par_table) <- PFTs
+
+ ## --- Build initial & target AGB vectors (kg m-2) ---
+ agb.init <- calculateGridcellVariablePerPFT(Gridcell, "AbvGrndWood", min.diam=min.diam, pft.params=pft_par_table)
+ if (any(grepl("^AGB.pft", variables))) { # column names were set in read.restart
+ agb.targ <- PEcAn.utils::ud_convert(
+ unlist(new.state[, grepl("^AGB.pft", variables), drop=TRUE]),
+ "Mg/ha","kg/m^2")
+ }
+
+ ### dens will not change because we wont do dens SDA temporarily
+ dens.init <- calculateGridcellVariablePerPFT(Gridcell, "densindiv", min.diam=min.diam, pft.params=pft_par_table)
+ dens.targ <- dens.init
+
+ ## --- Update state ---
+ # choose a minimum diameter
+ min.diam = 0.5
+ Gridcell_updated <- update_state_LPJGUESS(Gridcell, pft_par_table,
+ dens.init, dens.targ,
+ agb.init, agb.targ,
+ AbvGrndWood.epsilon = 0.05,
+ trace = FALSE, min.diam)
+
+ State_updated <- list(state = Gridcell_updated,
+ pos_list = pos_list,
+ siz_list = siz_list)
+
+ write_binary_LPJGUESS(State_updated, file.path(outdir, runid))
+
+ ## --- Regenerate config for next run ---
+ do.call(write.config.LPJGUESS,
+ list(defaults = NULL,
+ trait.values = new.params,
+ settings = settings,
+ run.id = runid)
+ )
+
+ if(verbose) PEcAn.logger::logger.info("restart written for", runid)
+}
diff --git a/models/lpjguess/R/write_state.R b/models/lpjguess/R/write_state.R
new file mode 100644
index 00000000000..a65c27e58bf
--- /dev/null
+++ b/models/lpjguess/R/write_state.R
@@ -0,0 +1,103 @@
+#' Extract nested value from a state list using flat key
+#'
+#' @param state A nested list (usually the model.state$state)
+#' @param key A flat string like "Gridcell/Stand/1/Patch/1/Vegetation/Individuals/3/cmass_leaf"
+#' @return The value stored at that nested position
+#' @keywords internal
+#' @author Yinghao Sun
+extract_from_state_by_key <- function(state, key) {
+ # Optional: remove "Gridcell/" prefix
+ key <- sub("^Gridcell/", "", key)
+
+ parts <- strsplit(key, "/")[[1]]
+ val <- state
+
+ for (p in parts) {
+ if (is.null(val)) {
+ warning("NULL reached prematurely at: ", p)
+ return(NULL)
+ }
+
+ # Case 1: numeric index
+ if (grepl("^[0-9]+$", p)) {
+ idx <- as.integer(p)
+ if (idx > length(val)) {
+ warning("Index out of bounds: ", idx)
+ return(NULL)
+ }
+ val <- val[[idx]]
+
+ # Case 2: named element (case-insensitive match)
+ } else {
+ val_names <- names(val)
+ match_idx <- which(tolower(val_names) == tolower(p))
+
+ if (length(match_idx) == 0) {
+ warning("Name not found (case-insensitive): ", p)
+ return(NULL)
+ }
+
+ val <- val[[match_idx[1]]] # use first match
+ }
+ }
+
+ return(val)
+}
+
+
+#' Write updated variables into a copy of the original LPJ-GUESS .state file
+#'
+#' @param State_updated A list containing updated state variables, position list and size list (get from read_binary)
+#' @param outdir Path to a directory containing the `0.state` and `meta.bin` files.
+#'
+#' @return No return value. Writes files to disk as side effect.
+#' @author Yinghao Sun
+#' @export
+write_binary_LPJGUESS <- function(State_updated, outdir) {
+
+ # Build full paths to source files
+ src_state <- file.path(outdir, "0.state")
+ meta_file <- file.path(outdir, "meta.bin")
+
+ # back-up
+ bak_state <- file.path(outdir, "bak.state")
+ file.copy(src_state, bak_state, overwrite = TRUE)
+
+ # a copy to the temporary file
+ new_state <- file.path(outdir, "new.state")
+ file.copy(src_state, new_state, overwrite = TRUE)
+
+ # # Ensure output directory exists
+ # dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+ #
+ # # Copy template files to output directory so we don't overwrite it
+ # file.copy(c(meta_file, original_state), to = output_dir, overwrite = TRUE)
+ #
+ # # Open copied 0.state file for binary modification
+ # state_path <- file.path(outdir, "0.state")
+ # con <- file(state_path, open = "r+b")
+
+ # Open temporary new.state file for binary modification
+ con <- file(new_state, open = "r+b")
+
+ # A named list of byte positions for each variable (generated during reading)
+ pos_list <- State_updated$pos_list
+ # A named list of writeBin sizes for each variable (same keys as pos_list)
+ siz_list <- State_updated$siz_list
+
+ # Loop over all keys
+ for (key in names(pos_list)) {
+ value <- extract_from_state_by_key(State_updated$state, key)
+ pos <- pos_list[[key]]
+ size <- siz_list[[key]]
+
+ # Seek and write
+ seek(con, where = pos, origin = "start")
+ writeBin(object = value, con = con, size = size)
+ }
+
+ close(con)
+
+ # Atomic substitution
+ file.rename(new_state, src_state) # After success, bak is still there and can be manually deleted
+}
diff --git a/models/lpjguess/inst/pecan.ins b/models/lpjguess/inst/pecan.ins
index d8d52274477..8619eb1cad5 100755
--- a/models/lpjguess/inst/pecan.ins
+++ b/models/lpjguess/inst/pecan.ins
@@ -73,7 +73,7 @@ title 'LPJ-GUESS cohort mode - global pfts'
vegmode "cohort" ! "cohort", "individual" or "population"
nyear_spinup 500 ! number of years to spin up the simulation for
-spinup_lifeform "nolifeform"
+! spinup_lifeform "tree"
ifcalcsla 0 ! whether to calculate SLA from leaf longevity
! (PFT-specific value can be specified in this file instead)
ifcalccton 1 ! whether to calculate leaf C:N min from leaf longevity
@@ -84,7 +84,7 @@ patcharea 1000 ! patch area (m2)
estinterval 5 ! years between establishment events in cohort mode
ifdisturb 1 ! whether generic patch-destroying disturbances enabled
distinterval 500 ! average return time for generic patch-destroying disturbances
-disturb_year -1
+! disturb_year -1
ifbgestab 1 ! whether background establishment enabled
ifsme 1 ! whether spatial mass effect enabled
ifstochestab 1 ! whether establishment stochastic
@@ -133,7 +133,7 @@ run_pasture 0 ! whether to simulate pasture (1) or not (0)
run_natural 1 ! whether to simulate natural vegetation (1) or not (0)
run_peatland 1 ! whether to simulate peatland (1) or not (0)
-@@@@@ Remove in PalEON version @@@@@
+! Remove in PalEON version
lcfrac_fixed 0 ! use landcover fractions (%) below (1) or read from input file (0)
lc_fixed_urban 0 ! URBAN
lc_fixed_cropland 50 ! CROPLAND
@@ -142,7 +142,7 @@ lc_fixed_forest 0 ! FOREST
lc_fixed_natural 25 ! NATURAL
lc_fixed_peatland 25 ! PEATLAND
equal_landcover_area 0 ! divide gridcell into equal active landcover fractions
-@@@@@ Remove in PalEON version @@@@@
+! Remove in PalEON version
!///////////////////////////////////////////////////////////////////////////////////////
diff --git a/models/lpjguess/inst/template.ins b/models/lpjguess/inst/template.ins
index aa5520e66c4..42fb17e7d88 100755
--- a/models/lpjguess/inst/template.ins
+++ b/models/lpjguess/inst/template.ins
@@ -14,10 +14,11 @@ coordinates_precision 2
! Forcing Data & gridlists
!
-param "file_gridlist_cf" (str "@GRID_FILE@")
+param "file_gridlist" (str "@GRID_FILE@")
param "file_co2" (str "@CO2_FILE@")
param "file_cru" (str "@SOIL_FILE@")
+param "file_cru_misc" (str "@MISC_FILE@")
! N deposition (blank string to use constant pre-industrial level of 2 kgN/ha/year)
param "file_ndep" (str "")
diff --git a/models/lpjguess/man/extract_from_state_by_key.Rd b/models/lpjguess/man/extract_from_state_by_key.Rd
new file mode 100644
index 00000000000..4d72aface7a
--- /dev/null
+++ b/models/lpjguess/man/extract_from_state_by_key.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_state.R
+\name{extract_from_state_by_key}
+\alias{extract_from_state_by_key}
+\title{Extract nested value from a state list using flat key}
+\usage{
+extract_from_state_by_key(state, key)
+}
+\arguments{
+\item{state}{A nested list (usually the model.state$state)}
+
+\item{key}{A flat string like "Gridcell/Stand/1/Patch/1/Vegetation/Individuals/3/cmass_leaf"}
+}
+\value{
+The value stored at that nested position
+}
+\description{
+Extract nested value from a state list using flat key
+}
+\author{
+Yinghao Sun
+}
+\keyword{internal}
diff --git a/models/lpjguess/man/find_closing.Rd b/models/lpjguess/man/find_closing.Rd
index dd24834b211..daefc1af76d 100644
--- a/models/lpjguess/man/find_closing.Rd
+++ b/models/lpjguess/man/find_closing.Rd
@@ -21,3 +21,4 @@ A numeric value indicating the line number of the matching closing bracket.
\description{
Identifies the line number of the matching closing bracket for a given opening bracket.
}
+\keyword{internal}
diff --git a/models/lpjguess/man/find_stream_size.Rd b/models/lpjguess/man/find_stream_size.Rd
index bc57bf17949..2aebe539c95 100644
--- a/models/lpjguess/man/find_stream_size.Rd
+++ b/models/lpjguess/man/find_stream_size.Rd
@@ -26,3 +26,4 @@ A numeric value representing the size (number of streamed variables).
\description{
Determines the size (number of variables) in a stream based on the file content.
}
+\keyword{internal}
diff --git a/models/lpjguess/man/find_stream_type.Rd b/models/lpjguess/man/find_stream_type.Rd
index d89e8c40136..df03f662ef2 100644
--- a/models/lpjguess/man/find_stream_type.Rd
+++ b/models/lpjguess/man/find_stream_type.Rd
@@ -29,3 +29,4 @@ A character string indicating the stream type.
\description{
Determines the type of a given stream variable in an LPJ-GUESS file.
}
+\keyword{internal}
diff --git a/models/lpjguess/man/find_stream_var.Rd b/models/lpjguess/man/find_stream_var.Rd
index 2de82ee1895..95c672f2c88 100644
--- a/models/lpjguess/man/find_stream_var.Rd
+++ b/models/lpjguess/man/find_stream_var.Rd
@@ -17,3 +17,4 @@ A character vector of streamed variable names.
\description{
A helper function that lists streamed variables. It returns the names of streamed variables.
}
+\keyword{internal}
diff --git a/models/lpjguess/man/make_key.Rd b/models/lpjguess/man/make_key.Rd
new file mode 100644
index 00000000000..251929251c6
--- /dev/null
+++ b/models/lpjguess/man/make_key.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/read_state.R
+\name{make_key}
+\alias{make_key}
+\title{Create a flat key from a nested path}
+\usage{
+make_key(...)
+}
+\arguments{
+\item{...}{Parts of a nested list path (e.g., "Gridcell", "Stand", 1, "Patch", 2)}
+}
+\value{
+A single string key like "Gridcell/Stand/1/Patch/2"
+}
+\description{
+Create a flat key from a nested path
+}
+\keyword{internal}
diff --git a/models/lpjguess/man/read_binary_LPJGUESS.Rd b/models/lpjguess/man/read_binary_LPJGUESS.Rd
index 5ff044916b9..81a68fa47e8 100644
--- a/models/lpjguess/man/read_binary_LPJGUESS.Rd
+++ b/models/lpjguess/man/read_binary_LPJGUESS.Rd
@@ -7,7 +7,7 @@
read_binary_LPJGUESS(outdir, version = "PalEON")
}
\arguments{
-\item{outdir}{A character string specifying the output directory containing the binary state files.}
+\item{outdir}{The output directory where ".state" and "meta.bin" will be written}
\item{version}{A character string specifying the LPJ-GUESS version (default is "PalEON").}
}
@@ -17,3 +17,6 @@ A matrix or list containing the extracted data.
\description{
Reads a binary file formatted for LPJ-GUESS and extracts relevant data.
}
+\author{
+Istem Fer, Yinghao Sun
+}
diff --git a/models/lpjguess/man/read_restart.LPJGUESS.Rd b/models/lpjguess/man/read_restart.LPJGUESS.Rd
new file mode 100644
index 00000000000..5db188f2573
--- /dev/null
+++ b/models/lpjguess/man/read_restart.LPJGUESS.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/read_restart.LPJGUESS.R
+\name{read_restart.LPJGUESS}
+\alias{read_restart.LPJGUESS}
+\title{Read Restart for LPJGUESS}
+\usage{
+read_restart.LPJGUESS(outdir, runid, stop.time, settings, var.names, params)
+}
+\arguments{
+\item{outdir}{output directory}
+
+\item{runid}{run ID}
+
+\item{stop.time}{year that is being read}
+
+\item{settings}{PEcAn settings object}
+
+\item{var.names}{var.names to be extracted}
+
+\item{params}{passed on to return value}
+}
+\value{
+X_tmp vector of forecasts
+}
+\description{
+Read Restart for LPJGUESS
+}
+\examples{
+# example code
+outdir = "/fs/data2/output//PEcAn_1000010473/out"
+runid = 1002656839
+stop.time = "1960-12-31 23:59:59 UTC"
+load("/fs/data2/output/PEcAn_1000010473/SDAsettings_develop.Rdata")
+var.names = c("AGB.pft", "TotSoilCarb")
+load("/fs/data2/output/PEcAn_1000010473/SDAparams_develop.Rdata")
+}
+\author{
+Istem Fer, Yinghao Sun
+}
diff --git a/models/lpjguess/man/serialize_starts_ends.Rd b/models/lpjguess/man/serialize_starts_ends.Rd
index 5c743458b55..3b3bfa242fd 100644
--- a/models/lpjguess/man/serialize_starts_ends.Rd
+++ b/models/lpjguess/man/serialize_starts_ends.Rd
@@ -17,3 +17,4 @@ A numeric vector of length 2, giving the start and end line numbers.
\description{
Finds the start and end lines for serialization.
}
+\keyword{internal}
diff --git a/models/lpjguess/man/write_binary_LPJGUESS.Rd b/models/lpjguess/man/write_binary_LPJGUESS.Rd
new file mode 100644
index 00000000000..e6718a5155e
--- /dev/null
+++ b/models/lpjguess/man/write_binary_LPJGUESS.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_state.R
+\name{write_binary_LPJGUESS}
+\alias{write_binary_LPJGUESS}
+\title{Write updated variables into a copy of the original LPJ-GUESS .state file}
+\usage{
+write_binary_LPJGUESS(State_updated, outdir)
+}
+\arguments{
+\item{State_updated}{A list containing updated state variables, position list and size list (get from read_binary)}
+
+\item{outdir}{Path to a directory containing the `0.state` and `meta.bin` files.}
+}
+\value{
+No return value. Writes files to disk as side effect.
+}
+\description{
+Write updated variables into a copy of the original LPJ-GUESS .state file
+}
+\author{
+Yinghao Sun
+}
diff --git a/models/lpjguess/man/write_restart.LPJGUESS.Rd b/models/lpjguess/man/write_restart.LPJGUESS.Rd
new file mode 100644
index 00000000000..2b06018b468
--- /dev/null
+++ b/models/lpjguess/man/write_restart.LPJGUESS.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_restart.LPJGUESS.R
+\name{write_restart.LPJGUESS}
+\alias{write_restart.LPJGUESS}
+\title{write_restart.LPJGUESS}
+\usage{
+write_restart.LPJGUESS(
+ outdir,
+ runid,
+ start.time,
+ stop.time,
+ settings,
+ new.state,
+ RENAME = TRUE,
+ new.params,
+ inputs = NULL,
+ verbose = FALSE
+)
+}
+\arguments{
+\item{outdir}{output directory}
+
+\item{runid}{run ID}
+
+\item{start.time}{start date and time for each SDA ensemble}
+
+\item{stop.time}{stop date and time for each SDA ensemble}
+
+\item{settings}{PEcAn settings object}
+
+\item{new.state}{analysis state vector}
+
+\item{RENAME}{flag to either rename output file or not}
+
+\item{new.params}{list of parameters to convert between different states}
+
+\item{inputs}{list of model inputs to use in write.configs.SIPNET}
+
+\item{verbose}{decide if we want to print the runid}
+}
+\value{
+NONE
+}
+\description{
+Write restart files for LPJGUESS
+new.state includes X (AGB.pft) from Analysis
+new.params includes LPJGUESS_state
+}
+\author{
+Yinghao Sun
+}
From d16fd199fe17a5d6408d9522478a7ad5b47c9d76 Mon Sep 17 00:00:00 2001
From: Blesson
Date: Fri, 30 May 2025 15:28:08 +0530
Subject: [PATCH 0133/1193] updated pecan package dependencies
---
docker/depends/pecan_package_dependencies.csv | 1 +
modules/uncertainty/DESCRIPTION | 1 +
2 files changed, 2 insertions(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index bcc4a0ff8e2..01e80a9433e 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -209,6 +209,7 @@
"mockery","*","base/workflow","Suggests",FALSE
"mockery","*","modules/data.atmosphere","Suggests",FALSE
"mockery","*","modules/meta.analysis","Suggests",FALSE
+"mockery","*","modules/uncertainty","Suggests",FALSE
"mockery",">= 0.3.0","models/biocro","Suggests",FALSE
"mockery",">= 0.4.3","base/db","Suggests",FALSE
"MODISTools",">= 1.1.0","modules/data.remote","Imports",FALSE
diff --git a/modules/uncertainty/DESCRIPTION b/modules/uncertainty/DESCRIPTION
index d0f4511ada8..f8a3d8193be 100644
--- a/modules/uncertainty/DESCRIPTION
+++ b/modules/uncertainty/DESCRIPTION
@@ -42,6 +42,7 @@ Imports:
rlang
Suggests:
testthat (>= 1.0.2),
+ mockery
License: BSD_3_clause + file LICENSE
Copyright: Authors
LazyLoad: yes
From 8b523a918248754cd0dd9aa7c0cf8ff20dde5d5d Mon Sep 17 00:00:00 2001
From: Blesson
Date: Fri, 30 May 2025 15:29:06 +0530
Subject: [PATCH 0134/1193] removed lib imports
---
modules/uncertainty/tests/testthat/test_ensemble.R | 5 -----
1 file changed, 5 deletions(-)
diff --git a/modules/uncertainty/tests/testthat/test_ensemble.R b/modules/uncertainty/tests/testthat/test_ensemble.R
index 65d49e59dad..68de5eb1ad0 100644
--- a/modules/uncertainty/tests/testthat/test_ensemble.R
+++ b/modules/uncertainty/tests/testthat/test_ensemble.R
@@ -1,8 +1,3 @@
-library(testthat)
-library(mockery)
-
-# Source the ensemble config function
-
context("input validation for write.ensemble.configs")
# Mock a model write.configs function to avoid model-specific errors
From 6a73c752e5b42b6a118021125d5752b1a2f07cc9 Mon Sep 17 00:00:00 2001
From: Blesson
Date: Fri, 30 May 2025 16:44:32 +0530
Subject: [PATCH 0135/1193] Revert "Improve pkgdown build process and fix
documentation generation"
This reverts commit 70266822d9e786d584b07f0e7652363619443e00.
---
.github/workflows/book.yml | 4 ----
docker/base/Dockerfile | 3 ---
2 files changed, 7 deletions(-)
diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml
index 4c849a2aa4d..ebd1b11cf64 100644
--- a/.github/workflows/book.yml
+++ b/.github/workflows/book.yml
@@ -29,10 +29,6 @@ jobs:
Rscript \
-e 'repos <- c(getOption("repos"), sub(r"(\d{4}-\d{2}-\d{2})", "latest", getOption("repos")))' \
-e 'remotes::install_version("bookdown", ">= 0.31", dependencies = TRUE, upgrade = FALSE, repos = repos)'
- # generate package documentation
- - name: Generate Package Documentation
- working-directory: ./
- run: make pkgdocs
# copy files
- name: copy extfiles
run: |
diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile
index 1497ab0fdee..19a2692ef2d 100644
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -3,9 +3,6 @@ ARG IMAGE_VERSION="latest"
ARG PARENT_IMAGE="pecan/depends"
FROM ${PARENT_IMAGE}:${IMAGE_VERSION}
-# Install R package for pkgdown
-RUN R -e "install.packages(c('pkgdown', 'remotes'))"
-
# ----------------------------------------------------------------------
# PEcAn version information
# ----------------------------------------------------------------------
From 97c11f7584c3e02cb6e28128c16817fb43a03f03 Mon Sep 17 00:00:00 2001
From: Yinghao Sun
Date: Fri, 30 May 2025 21:25:19 +0800
Subject: [PATCH 0136/1193] Delete dplyr %>% in write_restart.LPJGUESS
---
models/lpjguess/R/write_restart.LPJGUESS.R | 1 -
1 file changed, 1 deletion(-)
diff --git a/models/lpjguess/R/write_restart.LPJGUESS.R b/models/lpjguess/R/write_restart.LPJGUESS.R
index 983f797ad9b..8df5f70548a 100644
--- a/models/lpjguess/R/write_restart.LPJGUESS.R
+++ b/models/lpjguess/R/write_restart.LPJGUESS.R
@@ -17,7 +17,6 @@
##'
##' @return NONE
##'
-##' @importFrom dplyr %>%
##' @export
##' @author Yinghao Sun
write_restart.LPJGUESS <- function(outdir, runid,
From a7b4a8618d136f9b17406214b19afccb8a450070 Mon Sep 17 00:00:00 2001
From: Katherine Rein
Date: Fri, 30 May 2025 10:36:22 -0400
Subject: [PATCH 0137/1193] Added to dos for next person
---
modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py | 2 +-
modules/data.remote/inst/Python/README.txt | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py b/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py
index 0868e22db3e..819a56ae104 100644
--- a/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py
+++ b/modules/data.remote/inst/Python/CCMMF_Irrigation_GEEvAPI.py
@@ -97,7 +97,7 @@
df['precip_GEE'] = df['precip_GEE'] * 0.1
# Add changed units to data dict
- merged_data_dict[key] = df
+ #merged_data_dict[key] = df
# Aggregate by week
# Sum irrigation
diff --git a/modules/data.remote/inst/Python/README.txt b/modules/data.remote/inst/Python/README.txt
index ba115a0dc95..6af839449e4 100644
--- a/modules/data.remote/inst/Python/README.txt
+++ b/modules/data.remote/inst/Python/README.txt
@@ -157,6 +157,9 @@ Functions (by files):
expected columns for the txt file. It also aggregates this data by week.
Next Steps:
+- Figure out what is wrong with time series and predicted observed irrigation plots
+ - What do cumulative and monthly evapotranspiration not match?
+- Missing/mislabeled weekly data in irrigation files
- Site specific water holding capacity and crop specific rooting depth
From 5b99c00950ff35dc610b9386a6e875764d9b1d5a Mon Sep 17 00:00:00 2001
From: Yinghao Sun
Date: Fri, 30 May 2025 14:41:35 -0400
Subject: [PATCH 0138/1193] Fix some issues.
---
models/lpjguess/NAMESPACE | 1 -
models/lpjguess/R/read_restart.LPJGUESS.R | 16 +++---
models/lpjguess/R/read_state.R | 57 +++++++++-----------
models/lpjguess/R/write.config.LPJGUESS.R | 5 +-
models/lpjguess/R/write_restart.LPJGUESS.R | 8 +--
models/lpjguess/inst/pecan.ins | 4 +-
models/lpjguess/inst/template.ins | 2 +-
models/lpjguess/man/find_closing.Rd | 1 -
models/lpjguess/man/make_key.Rd | 18 -------
models/lpjguess/man/read_restart.LPJGUESS.Rd | 16 +++---
10 files changed, 51 insertions(+), 77 deletions(-)
delete mode 100644 models/lpjguess/man/make_key.Rd
diff --git a/models/lpjguess/NAMESPACE b/models/lpjguess/NAMESPACE
index bb388d29f54..e814d6a2aec 100644
--- a/models/lpjguess/NAMESPACE
+++ b/models/lpjguess/NAMESPACE
@@ -16,7 +16,6 @@ export(write_binary_LPJGUESS)
export(write_restart.LPJGUESS)
importFrom(PEcAn.utils,days_in_year)
importFrom(Rcpp,sourceCpp)
-importFrom(dplyr,"%>%")
importFrom(ncdf4,nc_close)
importFrom(ncdf4,ncatt_get)
importFrom(ncdf4,ncatt_put)
diff --git a/models/lpjguess/R/read_restart.LPJGUESS.R b/models/lpjguess/R/read_restart.LPJGUESS.R
index 19826f9fe39..a8bd857f302 100644
--- a/models/lpjguess/R/read_restart.LPJGUESS.R
+++ b/models/lpjguess/R/read_restart.LPJGUESS.R
@@ -10,13 +10,15 @@
#' @return X_tmp vector of forecasts
#' @export
#' @examples
-#' # example code
-#' outdir = "/fs/data2/output//PEcAn_1000010473/out"
-#' runid = 1002656839
-#' stop.time = "1960-12-31 23:59:59 UTC"
-#' load("/fs/data2/output/PEcAn_1000010473/SDAsettings_develop.Rdata")
-#' var.names = c("AGB.pft", "TotSoilCarb")
-#' load("/fs/data2/output/PEcAn_1000010473/SDAparams_develop.Rdata")
+#' \dontrun{
+#' rx <- read_restart.LPJGUESS(
+#' outdir = "/projectnb/…/LPJ_output",
+#' runid = "123456",
+#' stop.time = as.POSIXct("2001-12-31 23:59:59", tz = "UTC"),
+#' settings = settings,
+#' var.names = c("AGB.pft"),
+#' params = params)
+#' }
#' @author Istem Fer, Yinghao Sun
read_restart.LPJGUESS <- function(outdir, runid, stop.time, settings, var.names, params){
diff --git a/models/lpjguess/R/read_state.R b/models/lpjguess/R/read_state.R
index a4900967672..7d54c4a8525 100644
--- a/models/lpjguess/R/read_state.R
+++ b/models/lpjguess/R/read_state.R
@@ -115,7 +115,6 @@ serialize_starts_ends <- function(file_in, pattern = "void Gridcell::serialize")
#' @param if_else_check Optional. A logical value indicating whether to check for if/else blocks (default is FALSE).
#' @export
#' @return A numeric value indicating the line number of the matching closing bracket.
-#' @keywords internal
# helper function that finds the closing bracket, can work over if-else
find_closing <- function(find = "}", line_no, file_in, if_else_check = FALSE){
opened <- 1
@@ -431,14 +430,6 @@ find_stream_type <- function(class = NULL, current_stream_var, LPJ_GUESS_CLASSES
return(list(type = gsub(" ", "", stream_type), name = stream_name, substring = sub_string))
} # find_stream_type
-#' Create a flat key from a nested path
-#'
-#' @param ... Parts of a nested list path (e.g., "Gridcell", "Stand", 1, "Patch", 2)
-#' @return A single string key like "Gridcell/Stand/1/Patch/2"
-#' @keywords internal
-make_key <- function(...) paste(..., sep = "/")
-
-
# this fcn is for potential natural vegetation only
# when there is landcover, there will be more stand types
# also for cohort mode only
@@ -765,7 +756,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
number_of_individuals <- readBin(zz, integer(), 1, size = 4)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]] <- list()
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["number_of_individuals"]] <- number_of_individuals
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "number_of_individuals")
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "number_of_individuals", fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 4
@@ -790,7 +781,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
# which PFT is this?
pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Vegetation"]][["Individuals"]][[indv_i]][["indiv.pft.id"]] <- readBin(zz, integer(), 1, size = 4)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, "indiv.pft.id")
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, "indiv.pft.id", fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 4
@@ -831,7 +822,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, "PhotosynthesisResult", current_stream_type$name)
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, "PhotosynthesisResult", current_stream_type$name, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
@@ -849,7 +840,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, current_stream_type$name)
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, current_stream_type$name, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
}else{
@@ -859,7 +850,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, current_stream_specs$names[css.i])
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Vegetation", "Individuals", indv_i, current_stream_specs$names[css.i], fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css.i]
}
@@ -889,7 +880,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
pos <- seek(zz)
key1 <- readBin(zz, "integer", 1, 8)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][["n_pft"]] <- key1
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", "n_pft")
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", "n_pft", fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 8
@@ -901,7 +892,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
PEcAn.logger::logger.severe("Number of fluxes per pft read from the state file is too high. Check read.state function")
}
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[paste0("pft", fpft_i)]][["key2"]] <- key2
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", paste0("pft", fpft_i), "key2")
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", paste0("pft", fpft_i), "key2", fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 8
@@ -909,7 +900,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
# is this double?
pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["annual_fluxes_per_pft"]][[paste0("pft", fpft_i)]][[PerPFTFluxType[flux_i]]] <- readBin(zz, "double", 1, 8)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", paste0("pft", fpft_i), PerPFTFluxType[flux_i])
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "annual_fluxes_per_pft", paste0("pft", fpft_i), PerPFTFluxType[flux_i], fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 8
}
@@ -921,7 +912,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
n_monthly_fluxes_patch <- 12 * LPJ_GUESS_CONST_INTS$val[LPJ_GUESS_CONST_INTS$var =="PerPatchFluxType"]
pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["monthly_fluxes_patch"]] <- readBin(zz, "double", n_monthly_fluxes_patch, 8)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "monthly_fluxes_patch")
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "monthly_fluxes_patch", fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 8
@@ -931,7 +922,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
n_monthly_fluxes_pft <- 12 * LPJ_GUESS_CONST_INTS$val[LPJ_GUESS_CONST_INTS$var =="PerPFTFluxType"]
pos <- seek(zz)
Gridcell[["Stand"]][[stnd_i]][["Patch"]][[ptch_i]][["Fluxes"]][["monthly_fluxes_pft"]] <- readBin(zz, "double", n_monthly_fluxes_pft, 8)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "monthly_fluxes_pft")
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Fluxes", "monthly_fluxes_pft", fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- 8
@@ -998,7 +989,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Soil", "Sompool", current_stream_type$name, som_i)
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, "Soil", "Sompool", current_stream_type$name, som_i, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
@@ -1017,7 +1008,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, past_stream, current_stream_type$name, pft_i)
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, past_stream, current_stream_type$name, pft_i, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
}else{ # only for historic type?
@@ -1027,7 +1018,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, past_stream, current_stream_type$names[css.i], pft_i)
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, past_stream, current_stream_type$names[css.i], pft_i, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css.i]
}
@@ -1048,7 +1039,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, current_stream_type$name)
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, current_stream_type$name, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
}else{ # probably don't need this but let's keep
@@ -1059,7 +1050,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css_i],
n = current_stream_specs$n[css_i],
size = current_stream_specs$size[css_i])
- key <- make_key("Gridcell", "Stand", stnd_i, "Patch", ptch_i, current_stream_specs$names[css_i])
+ key <- file.path("Gridcell", "Stand", stnd_i, "Patch", ptch_i, current_stream_specs$names[css_i], fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css_i]
}
@@ -1119,7 +1110,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, past_stream, current_stream_type$name, pft_i)
+ key <- file.path("Gridcell", "Stand", stnd_i, past_stream, current_stream_type$name, pft_i, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
}else{
@@ -1129,7 +1120,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
- key <- make_key("Gridcell", "Stand", stnd_i, past_stream, current_stream_type$name[css.i])
+ key <- file.path("Gridcell", "Stand", stnd_i, past_stream, current_stream_type$name[css.i], fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css.i]
}
@@ -1148,7 +1139,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", "Stand", stnd_i, current_stream_type$name)
+ key <- file.path("Gridcell", "Stand", stnd_i, current_stream_type$name, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
}else{ # probably don't need this but let's keep
@@ -1158,7 +1149,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css_i],
n = current_stream_specs$n[css_i],
size = current_stream_specs$size[css_i])
- key <- make_key("Gridcell", "Stand", stnd_i, current_stream_type$name)
+ key <- file.path("Gridcell", "Stand", stnd_i, current_stream_type$name, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css_i]
}
@@ -1209,7 +1200,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", past_stream, current_stream_type$name, pft_i)
+ key <- file.path("Gridcell", past_stream, current_stream_type$name, pft_i, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
@@ -1224,7 +1215,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs[[css.i]]$what,
n = current_stream_specs[[css.i]]$n,
size = current_stream_specs[[css.i]]$size)
- key <- make_key("Gridcell", past_stream, current_stream_type$name, css.i)
+ key <- file.path("Gridcell", past_stream, current_stream_type$name, css.i, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs[[css.i]]$size
}
@@ -1235,7 +1226,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css.i],
n = current_stream_specs$n[css.i],
size = current_stream_specs$size[css.i])
- key <- make_key("Gridcell", past_stream, current_stream_type$name, pft_i, current_stream_specs$names[css.i])
+ key <- file.path("Gridcell", past_stream, current_stream_type$name, pft_i, current_stream_specs$names[css.i], fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css.i]
}
@@ -1254,7 +1245,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what,
n = current_stream_specs$n,
size = current_stream_specs$size)
- key <- make_key("Gridcell", past_stream, current_stream_type$name)
+ key <- file.path("Gridcell", past_stream, current_stream_type$name, fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size
}else{
@@ -1264,7 +1255,7 @@ read_binary_LPJGUESS <- function(outdir, version = "PalEON"){
what = current_stream_specs$what[css_i],
n = current_stream_specs$n[css_i],
size = current_stream_specs$size[css_i])
- key <- make_key("Gridcell", past_stream, current_stream_type$name[css_i])
+ key <- file.path("Gridcell", past_stream, current_stream_type$name[css_i], fsep = "/")
pos_list[[key]] <- pos
siz_list[[key]] <- current_stream_specs$size[css_i]
}
diff --git a/models/lpjguess/R/write.config.LPJGUESS.R b/models/lpjguess/R/write.config.LPJGUESS.R
index f1828a22665..b1ac07b5c45 100644
--- a/models/lpjguess/R/write.config.LPJGUESS.R
+++ b/models/lpjguess/R/write.config.LPJGUESS.R
@@ -205,7 +205,7 @@ write.insfile.LPJGUESS <- function(settings, trait.values, rundir, outdir, run.i
# when using cru input, lpjguess will not use these clim files
cru.file <- settings$run$inputs$met$path
misc.file <- sub("\\.bin$", "misc.bin", cru.file)
- guessins <- gsub("@SOIL_FILE@", cru.file, guessins)
+ guessins <- gsub("@MET_AND_SOIL_FILE@", cru.file, guessins)
guessins <- gsub("@MISC_FILE@", misc.file, guessins)
# create and write CO2 file
@@ -247,8 +247,7 @@ write.insfile.LPJGUESS <- function(settings, trait.values, rundir, outdir, run.i
# version check
if(!is.null(settings$model$revision)){
if(settings$model$revision == "PalEON"){
- #rm_inds <- which(grepl("@@@@@ Remove in PalEON version @@@@@", paramsins))
- rm_inds <- which(grepl("##### Remove in PalEON version #####", paramsins))
+ rm_inds <- which(grepl("@@@@@ Remove in PalEON version @@@@@", paramsins))
paramsins <- paramsins[-(rm_inds[1]:rm_inds[2])]
}
}
diff --git a/models/lpjguess/R/write_restart.LPJGUESS.R b/models/lpjguess/R/write_restart.LPJGUESS.R
index 8df5f70548a..49271ef0e55 100644
--- a/models/lpjguess/R/write_restart.LPJGUESS.R
+++ b/models/lpjguess/R/write_restart.LPJGUESS.R
@@ -33,7 +33,9 @@ write_restart.LPJGUESS <- function(outdir, runid,
file.path(outdir, runid, paste0("lpjguess.", as.Date(start.time), ".out")))
system(paste("rm", file.path(rundir, runid, "lpjguess.clim")))
} else {
- print(paste("Files not renamed -- Need to rerun timestep", start.time, "before next time step"))
+ PEcAn.logger::logger.severe(paste("rename = FALSE: Restart cannot proceed without output file",
+ "lpjguess.out being renamed for", start.time))
+ stop("RENAME flag is FALSE. Must rerun this timestep before continuing.")
}
settings$run$start.date <- start.time
@@ -47,8 +49,6 @@ write_restart.LPJGUESS <- function(outdir, runid,
pos_list <- new.params$LPJGUESS_state$pos_list
siz_list <- new.params$LPJGUESS_state$siz_list
-
-
## ---- Build PFT parameter table from new.params ----
# TODO: find accurate parameters; read params from settings
pft_par_table <- data.frame()
@@ -90,7 +90,7 @@ write_restart.LPJGUESS <- function(outdir, runid,
## --- Update state ---
# choose a minimum diameter
min.diam = 0.5
- Gridcell_updated <- update_state_LPJGUESS(Gridcell, pft_par_table,
+ Gridcell_updated <- update_state_LPJGUESS(Gridcell, pft_par_table,
dens.init, dens.targ,
agb.init, agb.targ,
AbvGrndWood.epsilon = 0.05,
diff --git a/models/lpjguess/inst/pecan.ins b/models/lpjguess/inst/pecan.ins
index 8619eb1cad5..9d9cdb06ad1 100755
--- a/models/lpjguess/inst/pecan.ins
+++ b/models/lpjguess/inst/pecan.ins
@@ -133,7 +133,7 @@ run_pasture 0 ! whether to simulate pasture (1) or not (0)
run_natural 1 ! whether to simulate natural vegetation (1) or not (0)
run_peatland 1 ! whether to simulate peatland (1) or not (0)
-! Remove in PalEON version
+@@@@@ Remove in PalEON version @@@@@
lcfrac_fixed 0 ! use landcover fractions (%) below (1) or read from input file (0)
lc_fixed_urban 0 ! URBAN
lc_fixed_cropland 50 ! CROPLAND
@@ -142,7 +142,7 @@ lc_fixed_forest 0 ! FOREST
lc_fixed_natural 25 ! NATURAL
lc_fixed_peatland 25 ! PEATLAND
equal_landcover_area 0 ! divide gridcell into equal active landcover fractions
-! Remove in PalEON version
+@@@@@ Remove in PalEON version @@@@@
!///////////////////////////////////////////////////////////////////////////////////////
diff --git a/models/lpjguess/inst/template.ins b/models/lpjguess/inst/template.ins
index 42fb17e7d88..25094f1b2e7 100755
--- a/models/lpjguess/inst/template.ins
+++ b/models/lpjguess/inst/template.ins
@@ -17,7 +17,7 @@ coordinates_precision 2
param "file_gridlist" (str "@GRID_FILE@")
param "file_co2" (str "@CO2_FILE@")
-param "file_cru" (str "@SOIL_FILE@")
+param "file_cru" (str "@MET_AND_SOIL_FILE@")
param "file_cru_misc" (str "@MISC_FILE@")
! N deposition (blank string to use constant pre-industrial level of 2 kgN/ha/year)
diff --git a/models/lpjguess/man/find_closing.Rd b/models/lpjguess/man/find_closing.Rd
index daefc1af76d..dd24834b211 100644
--- a/models/lpjguess/man/find_closing.Rd
+++ b/models/lpjguess/man/find_closing.Rd
@@ -21,4 +21,3 @@ A numeric value indicating the line number of the matching closing bracket.
\description{
Identifies the line number of the matching closing bracket for a given opening bracket.
}
-\keyword{internal}
diff --git a/models/lpjguess/man/make_key.Rd b/models/lpjguess/man/make_key.Rd
deleted file mode 100644
index 251929251c6..00000000000
--- a/models/lpjguess/man/make_key.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/read_state.R
-\name{make_key}
-\alias{make_key}
-\title{Create a flat key from a nested path}
-\usage{
-make_key(...)
-}
-\arguments{
-\item{...}{Parts of a nested list path (e.g., "Gridcell", "Stand", 1, "Patch", 2)}
-}
-\value{
-A single string key like "Gridcell/Stand/1/Patch/2"
-}
-\description{
-Create a flat key from a nested path
-}
-\keyword{internal}
diff --git a/models/lpjguess/man/read_restart.LPJGUESS.Rd b/models/lpjguess/man/read_restart.LPJGUESS.Rd
index 5db188f2573..0bacaa4070b 100644
--- a/models/lpjguess/man/read_restart.LPJGUESS.Rd
+++ b/models/lpjguess/man/read_restart.LPJGUESS.Rd
@@ -26,13 +26,15 @@ X_tmp vector of forecasts
Read Restart for LPJGUESS
}
\examples{
-# example code
-outdir = "/fs/data2/output//PEcAn_1000010473/out"
-runid = 1002656839
-stop.time = "1960-12-31 23:59:59 UTC"
-load("/fs/data2/output/PEcAn_1000010473/SDAsettings_develop.Rdata")
-var.names = c("AGB.pft", "TotSoilCarb")
-load("/fs/data2/output/PEcAn_1000010473/SDAparams_develop.Rdata")
+\dontrun{
+ rx <- read_restart.LPJGUESS(
+ outdir = "/projectnb/…/LPJ_output",
+ runid = "123456",
+ stop.time = as.POSIXct("2001-12-31 23:59:59", tz = "UTC"),
+ settings = settings,
+ var.names = c("AGB.pft"),
+ params = params)
+}
}
\author{
Istem Fer, Yinghao Sun
From 7a7c091fad229138c8cee3f6983920052ce49c96 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 08:49:01 +0000
Subject: [PATCH 0139/1193] refactor(soilgrids): improve soil carbon IC
generation and uncertainty handling
---
modules/data.land/DESCRIPTION | 3 +-
modules/data.land/R/IC_SOILGRID_Utilities.R | 474 ++++++++----------
.../man/generate_soilgrids_ensemble.Rd | 26 +-
.../man/preprocess_soilgrids_data.Rd | 21 +-
modules/data.land/man/soilgrids_ic_process.Rd | 30 +-
5 files changed, 248 insertions(+), 306 deletions(-)
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 0a86834e441..1bb15d651d9 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -55,7 +55,8 @@ Imports:
tidyr,
tidyselect,
traits,
- XML (>= 3.98-1.4)
+ XML (>= 3.98-1.4),
+ MASS
Suggests:
dataone,
datapack,
diff --git a/modules/data.land/R/IC_SOILGRID_Utilities.R b/modules/data.land/R/IC_SOILGRID_Utilities.R
index feab19b9cc9..379f3149f07 100644
--- a/modules/data.land/R/IC_SOILGRID_Utilities.R
+++ b/modules/data.land/R/IC_SOILGRID_Utilities.R
@@ -4,370 +4,322 @@
#' @description Functions for generating soil carbon IC files from SoilGrids250m data
#' @details This module provides functions for extracting, processing, and generating
#' ensemble members for soil carbon initial conditions using SoilGrids data.
-#' All soil carbon values are in kg/m².
-
-# Required package
-library(truncnorm)
+#' All soil carbon values are in kg/m2.
#' Process SoilGrids data for initial conditions
#'
-#' @param settings PEcAn settings list containing site information. Should include:
-#' \itemize{
-#' \item settings$run$site - Site information with id, lat, lon
-#' \item settings$ensemble$size - (Optional) Number of ensemble members to create
-#' \item settings$soil$default_soilC - (Optional) Default soil carbon value in kg/m²
-#' \item settings$soil$default_uncertainty - (Optional) Default uncertainty as fraction
-#' }
+#' @param settings PEcAn settings list containing site information
#' @param csv_path Path to a CSV file containing site information (optional)
#' @param dir Output directory for IC files
#' @param overwrite Overwrite existing files? (Default: FALSE)
-#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE
-#'
-#' @return List of paths to generated IC files
-#' @export
-#'
-#' @details This function processes SoilGrids data to create carbon initial condition
-#' files. It extracts soil carbon data for all sites, handles missing values,
-#' generates ensemble members, and writes NetCDF files.
+#' @param verbose Print detailed progress information? (Default: FALSE)
#'
+#' @return List of paths to generated IC files, organized by site ID
+#' @export
+#'
#' @examples
#' \dontrun{
#' # From settings object
#' settings <- PEcAn.settings::read.settings("pecan.xml")
-#' ic_files <- soilgrids_ic_process(settings, dir = "output/IC/")
+#' ic_files <- soilgrids_ic_process(settings, dir = "~/output/IC")
#'
#' # From CSV file
-#' ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "output/IC/")
+#' ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "~/output/IC")
#' }
-soilgrids_ic_process <- function(settings, csv_path=NULL, dir, overwrite = FALSE, verbose = FALSE) {
- # Start timing
+soilgrids_ic_process <- function(settings, csv_path = NULL, dir, overwrite = FALSE, verbose = FALSE) {
start_time <- proc.time()
- # Extract site information using PEcAn.settings::get.site.info
- site_info <- PEcAn.settings::get.site.info(settings = settings, csv_path = csv_path)
+ site_info <- PEcAn.settings::get.site.info(input = if (is.null(csv_path)) settings else csv_path)
+ n_sites <- nrow(site_info)
+ if (n_sites == 0) {
+ PEcAn.logger::logger.severe("No sites found in the provided input")
+ }
- # Get optional parameters from settings if available
- ensemble_size <- ifelse(is.null(settings$ensemble$size), 1, settings$ensemble$size)
- default_soilC <- ifelse(is.null(settings$soil$default_soilC), 5.0, settings$soil$default_soilC)
- default_uncertainty <- ifelse(is.null(settings$soil$default_uncertainty), 0.2, settings$soil$default_uncertainty)
+ size <- ifelse(is.null(settings$ensemble$size), 1, settings$ensemble$size)
- # Create output directory if it doesn't exist
if (!dir.exists(dir)) {
- PEcAn.logger::logger.info(sprintf("Creating output directory: %s", dir))
dir.create(dir, recursive = TRUE)
}
- # Create a data folder for intermediate outputs
data_dir <- file.path(dir, "SoilGrids_data")
if (!dir.exists(data_dir)) {
dir.create(data_dir, recursive = TRUE)
}
- # Log the number of sites being processed
- n_sites <- nrow(site_info)
- PEcAn.logger::logger.info(sprintf("Processing %d site(s)", n_sites))
-
- if (verbose) {
- for (i in 1:nrow(site_info)) {
- PEcAn.logger::logger.info(sprintf("Site %d: %s (lat=%f, lon=%f)",
- i, site_info$site_name[i],
- site_info$lat[i], site_info$lon[i]))
- }
- }
-
# Check for cached data
soilc_csv_path <- file.path(data_dir, "soilgrids_soilC_data.csv")
if (file.exists(soilc_csv_path) && !overwrite) {
- PEcAn.logger::logger.info("Using existing SoilGrids data:", soilc_csv_path)
soil_data <- utils::read.csv(soilc_csv_path, check.names = FALSE)
} else {
- # Extract data for all sites at once
- PEcAn.logger::logger.info("Extracting SoilGrids data for", nrow(site_info), "sites")
soil_data <- PEcAn.data.land::soilgrids_soilC_extract(
site_info = site_info,
outdir = data_dir,
verbose = verbose
)
-
# Save the extracted data for future use
utils::write.csv(soil_data, soilc_csv_path, row.names = FALSE)
}
# Validate soil carbon data units through range check
if (any(soil_data$`Total_soilC_0-30cm` > 150, na.rm = TRUE)) {
- PEcAn.logger::logger.warn("Some soil carbon values exceed 150 kg/m², values may be in wrong units")
+ PEcAn.logger::logger.warn("Some soil carbon values exceed 150 kg/m2, values may be in wrong units")
}
- # Preprocess data
- PEcAn.logger::logger.info("Preprocessing soil carbon data")
- processed_data <- preprocess_soilgrids_data(
- soil_data = soil_data,
- default_soilC = default_soilC,
- default_uncertainty = default_uncertainty,
- verbose = verbose
- )
+ processed_data <- preprocess_soilgrids_data(soil_data, verbose)
+
+ if (nrow(processed_data$data) == 0) {
+ PEcAn.logger::logger.severe("No valid sites remain after preprocessing")
+ }
- # Create a list to hold the ensemble files for each site
- all_ensemble_files <- list()
+ ens_files <- list()
- # Process each site
- for (s in 1:nrow(site_info)) {
- current_site <- site_info[s, ]
+ for (s in 1:nrow(processed_data$data)) {
+ site_data <- processed_data$data[s, ]
+
+ site_idx <- which(site_info$site_id == site_data$Site_ID)
+ if (length(site_idx) == 0) {
+ PEcAn.logger::logger.warn(sprintf("Site %s not found in site_info", site_data$Site_ID))
+ next
+ }
+ current_site <- site_info[site_idx, ]
# Create output directory for this site
- site_outfolder <- file.path(dir, paste0("SoilGrids_site_", current_site$str_id))
- if (!dir.exists(site_outfolder)) {
- dir.create(site_outfolder, recursive = TRUE)
+ site_folder <- file.path(dir, paste0("SoilGrids_site_", current_site$str_id))
+ if (!dir.exists(site_folder)) {
+ dir.create(site_folder, recursive = TRUE)
}
# Check for existing files
- existing_files <- list.files(site_outfolder, "*.nc$", full.names = TRUE)
+ existing_files <- list.files(site_folder, "*.nc$", full.names = TRUE)
if (length(existing_files) > 0 && !overwrite) {
- PEcAn.logger::logger.info(sprintf("Using existing SoilGrids IC files for site %s", current_site$site_name))
- all_ensemble_files[[current_site$str_id]] <- existing_files
+ ens_files[[current_site$str_id]] <- existing_files
next
}
- if (verbose) {
- PEcAn.logger::logger.info(sprintf("Generating ensemble members for site %s", current_site$site_name))
- }
+ # Generate all ensemble members
+ ens_data_30cm <- generate_soilgrids_ensemble(
+ processed_data = processed_data,
+ site_id = current_site$site_id,
+ size = size,
+ depth_layer = "0-30cm",
+ verbose = verbose
+ )
- # Generate ensemble members for this site
- ensemble_data <- generate_soilgrids_ensemble(
+ ens_data_200cm <- generate_soilgrids_ensemble(
processed_data = processed_data,
site_id = current_site$site_id,
- lat = current_site$lat,
- lon = current_site$lon,
- ensemble_size = ensemble_size,
+ size = size,
+ depth_layer = "0-200cm",
verbose = verbose
)
- # Write ensemble members to NetCDF files
- site_ensemble_files <- list()
+ site_files <- list()
- for (ens in seq_len(ensemble_size)) {
- # Write to NetCDF
+ # Write each ensemble member to NetCDF files
+ for (ens in seq_len(size)) {
+ ens_input <- list(
+ dims = list(
+ lat = current_site$lat,
+ lon = current_site$lon,
+ time = 1,
+ depth = c(0.3, 2.0)
+ ),
+ vals = list(
+ soil_organic_carbon_content = c(ens_data_30cm[ens], ens_data_200cm[ens]),
+ wood_carbon_content = 0,
+ litter_carbon_content = 0
+ )
+ )
+
+ # Write to NetCDF file
result <- PEcAn.data.land::pool_ic_list2netcdf(
- input = ensemble_data[[ens]],
- outdir = site_outfolder,
+ input = ens_input,
+ outdir = site_folder,
siteid = current_site$site_id,
ens = ens
)
- site_ensemble_files[[ens]] <- result$file
-
- if (verbose) {
- PEcAn.logger::logger.info(sprintf("Generated IC file: %s for site %s",
- basename(result$file),
- current_site$site_name))
- }
+ site_files[[ens]] <- result$file
}
- # Add this site's files to the overall list
- all_ensemble_files[[current_site$str_id]] <- site_ensemble_files
+ ens_files[[current_site$str_id]] <- site_files
}
- # Log performance metrics
- end_time <- proc.time()
- elapsed_time <- end_time - start_time
- PEcAn.logger::logger.info(sprintf("IC generation completed for %d site(s) in %.2f seconds",
- n_sites, elapsed_time[3]))
+ if (verbose) {
+ end_time <- proc.time()
+ elapsed_time <- end_time - start_time
+ PEcAn.logger::logger.info(sprintf("IC generation completed for %d site(s) in %.2f seconds",
+ n_sites, elapsed_time[3]))
+ }
- return(all_ensemble_files)
+ return(ens_files)
}
-#' Preprocess SoilGrids data
-#'
-#' @param soil_data Raw soil carbon data from soilgrids_soilC_extract
-#' @param default_soilC Default soil carbon value in kg/m² to use when data is missing
-#' @param default_uncertainty Default uncertainty as fraction to use when data is missing
-#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE
+#' Preprocess SoilGrids data for ensemble generation
+#'
+#' @param soil_data Dataframe with SoilGrids soil carbon data
+#' @param verbose Logical, print detailed progress information
#'
-#' @return Processed soil carbon data
+#' @return List containing processed data and CV distributions for both depths
#' @export
-preprocess_soilgrids_data <- function(soil_data, default_soilC = 5.0,
- default_uncertainty = 0.2, verbose = FALSE) {
+preprocess_soilgrids_data <- function(soil_data, verbose = FALSE) {
if (verbose) {
- PEcAn.logger::logger.info("Preprocessing soil carbon data")
+ PEcAn.logger::logger.info("Preprocessing soil carbon data following PEcAn standards")
+ }
+
+ # Only process sites with complete mean data for both depths
+ complete_sites <- !is.na(soil_data$`Total_soilC_0-30cm`) &
+ soil_data$`Total_soilC_0-30cm` > 0 &
+ !is.na(soil_data$`Total_soilC_0-200cm`) &
+ soil_data$`Total_soilC_0-200cm` > 0
+
+ if (!any(complete_sites)) {
+ PEcAn.logger::logger.severe("No sites with complete data for both depth intervals found")
}
- # Create a copy to avoid modifying the original
- processed <- soil_data
+ processed <- soil_data[complete_sites, ]
+
+ if (verbose) {
+ removed_count <- nrow(soil_data) - nrow(processed)
+ PEcAn.logger::logger.info(sprintf("Removed %d site(s) with incomplete data. Processing %d sites",
+ removed_count, nrow(processed)))
+ }
- # Handle missing values in Total_soilC_0-30cm
- na_count <- sum(is.na(processed$`Total_soilC_0-30cm`))
- if (na_count > 0) {
- PEcAn.logger::logger.warn(sprintf("Found %d missing values in soil carbon data", na_count))
+ # Calculate CV distributions
+ depths <- list(
+ "30cm" = list(mean_col = "Total_soilC_0-30cm", std_col = "Std_soilC_0-30cm"),
+ "200cm" = list(mean_col = "Total_soilC_0-200cm", std_col = "Std_soilC_0-200cm")
+ )
+
+ cv_dist <- lapply(depths, function(depth_info) {
+ valid_cv <- processed[[depth_info$mean_col]] > 0 &
+ !is.na(processed[[depth_info$std_col]]) &
+ processed[[depth_info$std_col]] > 0
- # Sites with missing 0-30cm but available 0-200cm data
- has_200cm_data <- is.na(processed$`Total_soilC_0-30cm`) & !is.na(processed$`Total_soilC_0-200cm`)
- if (any(has_200cm_data)) {
- processed$`Total_soilC_0-30cm`[has_200cm_data] <- processed$`Total_soilC_0-200cm`[has_200cm_data] * 0.15
- PEcAn.logger::logger.warn(sprintf(
- "Using scaled 0-200cm soil carbon values for %d site(s)", sum(has_200cm_data)
- ))
-
- if (verbose) {
- for (i in which(has_200cm_data)) {
- PEcAn.logger::logger.debug(sprintf(
- "Using scaled 0-200cm soil carbon value (%.2f) for site %s",
- processed$`Total_soilC_0-30cm`[i], processed$Site_ID[i]
- ))
- }
- }
+ if (sum(valid_cv) < 5) {
+ return(list(type = "none"))
}
- # Sites still with missing data - use default value
- still_missing <- is.na(processed$`Total_soilC_0-30cm`)
- if (any(still_missing)) {
- processed$`Total_soilC_0-30cm`[still_missing] <- default_soilC
- PEcAn.logger::logger.warn(sprintf(
- "Using default soil carbon value (%.2f kg/m²) for %d site(s)",
- default_soilC, sum(still_missing)
- ))
-
- if (verbose) {
- for (i in which(still_missing)) {
- PEcAn.logger::logger.debug(sprintf(
- "Using default soil carbon value (%.2f kg/m²) for site %s",
- default_soilC, processed$Site_ID[i]
- ))
- }
- }
- }
- }
-
- # Handle missing values in Std_soilC_0-30cm
- na_count <- sum(is.na(processed$`Std_soilC_0-30cm`))
- if (na_count > 0) {
- PEcAn.logger::logger.warn(sprintf("Found %d missing values in soil carbon uncertainty", na_count))
+ cv_values <- processed[[depth_info$std_col]][valid_cv] / processed[[depth_info$mean_col]][valid_cv]
+ cv_bounds <- quantile(cv_values, probs = c(0.05, 0.95), na.rm = TRUE)
+ cv_filtered <- cv_values[cv_values >= cv_bounds[1] & cv_values <= cv_bounds[2]]
- # Sites with missing 0-30cm but available 0-200cm uncertainty data
- has_200cm_data <- is.na(processed$`Std_soilC_0-30cm`) & !is.na(processed$`Std_soilC_0-200cm`)
- if (any(has_200cm_data)) {
- processed$`Std_soilC_0-30cm`[has_200cm_data] <- processed$`Std_soilC_0-200cm`[has_200cm_data] * 0.15
- PEcAn.logger::logger.warn(sprintf(
- "Using scaled 0-200cm soil carbon uncertainty for %d site(s)", sum(has_200cm_data)
- ))
-
- if (verbose) {
- for (i in which(has_200cm_data)) {
- PEcAn.logger::logger.debug(sprintf(
- "Using scaled 0-200cm soil carbon uncertainty (%.2f) for site %s",
- processed$`Std_soilC_0-30cm`[i], processed$Site_ID[i]
- ))
- }
- }
+ if (length(cv_filtered) < 5) {
+ return(list(type = "none"))
}
- # Sites still with missing uncertainty - use default percentage of mean
- still_missing <- is.na(processed$`Std_soilC_0-30cm`)
- if (any(still_missing)) {
- processed$`Std_soilC_0-30cm`[still_missing] <-
- processed$`Total_soilC_0-30cm`[still_missing] * default_uncertainty
- PEcAn.logger::logger.warn(sprintf(
- "Using default uncertainty (%.1f%% of mean) for %d site(s)",
- default_uncertainty * 100, sum(still_missing)
- ))
-
- if (verbose) {
- for (i in which(still_missing)) {
- PEcAn.logger::logger.debug(sprintf(
- "Using default uncertainty (%.1f%% of mean) for site %s",
- default_uncertainty * 100, processed$Site_ID[i]
- ))
- }
- }
+ gamma_fit <- try(MASS::fitdistr(cv_filtered, "gamma"), silent = TRUE)
+ if (!inherits(gamma_fit, "try-error")) {
+ list(
+ type = "gamma",
+ shape = gamma_fit$estimate["shape"],
+ rate = gamma_fit$estimate["rate"],
+ bounds = as.vector(cv_bounds)
+ )
+ } else {
+ list(
+ type = "empirical",
+ values = cv_filtered,
+ bounds = as.vector(cv_bounds)
+ )
}
- }
-
- # Ensure standard deviation is non-negative
- neg_sd_count <- sum(processed$`Std_soilC_0-30cm` < 0, na.rm = TRUE)
- if (neg_sd_count > 0) {
- PEcAn.logger::logger.warn(sprintf("Found %d negative standard deviations", neg_sd_count))
- processed$`Std_soilC_0-30cm` <- pmax(processed$`Std_soilC_0-30cm`, 0, na.rm = TRUE)
- }
-
- # Ensure mean is non-negative
- neg_mean_count <- sum(processed$`Total_soilC_0-30cm` < 0, na.rm = TRUE)
- if (neg_mean_count > 0) {
- PEcAn.logger::logger.warn(sprintf("Found %d negative mean values", neg_mean_count))
- processed$`Total_soilC_0-30cm` <- pmax(processed$`Total_soilC_0-30cm`, 0, na.rm = TRUE)
- }
-
- # Add minimum standard deviation to avoid zero uncertainty
- min_sd <- 0.1 * processed$`Total_soilC_0-30cm` # 10% of mean as minimum SD
- is_zero_sd <- processed$`Std_soilC_0-30cm` == 0 | is.na(processed$`Std_soilC_0-30cm`)
- zero_sd_count <- sum(is_zero_sd)
-
- if (zero_sd_count > 0) {
- PEcAn.logger::logger.info(sprintf("Setting minimum uncertainty for %d zero/NA standard deviations",
- zero_sd_count))
- processed$`Std_soilC_0-30cm` <- pmax(processed$`Std_soilC_0-30cm`, min_sd, na.rm = TRUE)
- }
+ })
- return(processed)
+ return(list(
+ data = processed,
+ cv_distribution_30cm = cv_dist[["30cm"]],
+ cv_distribution_200cm = cv_dist[["200cm"]]
+ ))
}
-#' Generate ensemble members for a site
-#'
-#' @param processed_data Processed soil carbon data
-#' @param site_id Site ID
-#' @param lat Site latitude
-#' @param lon Site longitude
-#' @param ensemble_size Number of ensemble members to create
-#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE
+#' Generate soil carbon ensemble members for specific depth
+#'
+#' @param processed_data Output from preprocess_soilgrids_data()
+#' @param site_id Target site ID
+#' @param size Number of ensemble members to generate
+#' @param depth_layer Depth layer ("0-30cm" or "0-200cm")
+#' @param verbose Logical, print detailed progress information
+#' @param seed Optional random seed for reproducibility
#'
-#' @return List of ensemble data for the site
+#' @return Vector of soil carbon values with proper uncertainty handling
#' @export
-generate_soilgrids_ensemble <- function(processed_data, site_id, lat, lon, ensemble_size, verbose = FALSE) {
+generate_soilgrids_ensemble <- function(processed_data, site_id, size, depth_layer, verbose = FALSE, seed = NULL) {
if (verbose) {
- PEcAn.logger::logger.info(sprintf("Generating %d ensemble members for site %s", ensemble_size, site_id))
+ PEcAn.logger::logger.info(sprintf("Generating %d ensemble members for site %s (%s)",size, site_id, depth_layer))
}
- # Get site row from processed data
- site_row <- which(processed_data$Site_ID == site_id)
+ if (!is.null(seed)) {
+ if (exists(".Random.seed", envir = .GlobalEnv)) {
+ old_seed <- .Random.seed
+ on.exit(assign(".Random.seed", old_seed, envir = .GlobalEnv))
+ }
+ set.seed(seed)
+ }
+
+ site_row <- which(processed_data$data$Site_ID == site_id)
if (length(site_row) == 0) {
PEcAn.logger::logger.severe(sprintf("Site %s not found in processed data", site_id))
}
- # Set random seed for reproducibility
- set.seed(as.numeric(site_id))
+ # Select appropriate columns based on depth layer
+ if (depth_layer == "0-30cm") {
+ mean_c <- processed_data$data$`Total_soilC_0-30cm`[site_row]
+ original_sd <- processed_data$data$`Std_soilC_0-30cm`[site_row]
+ cv_dist <- processed_data$cv_distribution_30cm
+ } else {
+ mean_c <- processed_data$data$`Total_soilC_0-200cm`[site_row]
+ original_sd <- processed_data$data$`Std_soilC_0-200cm`[site_row]
+ cv_dist <- processed_data$cv_distribution_200cm
+ }
- # Generate all ensemble members at once
- soil_c_values <- truncnorm::rtruncnorm(
- n = ensemble_size,
- a = 0, # Lower bound (no negative values)
- b = Inf, # Upper bound
- mean = processed_data$`Total_soilC_0-30cm`[site_row],
- sd = processed_data$`Std_soilC_0-30cm`[site_row]
- )
+ if (is.na(mean_c) || mean_c <= 0) {
+ PEcAn.logger::logger.severe(sprintf("Invalid mean soil carbon value for site %s (%s)",
+ site_id, depth_layer))
+ }
+
+ soil_c_values <- numeric(size)
+
+ # Use site-specific uncertainty
+ if (!is.na(original_sd) && original_sd > 0) {
+ shape <- (mean_c^2) / (original_sd^2)
+ rate <- mean_c / (original_sd^2)
+ if (is.finite(shape) && is.finite(rate) && shape > 0 && rate > 0) {
+ soil_c_values <- pmax(stats::rgamma(size, shape, rate), 0)
+ } else {
+ soil_c_values <- rep(mean_c, size)
+ }
+ } else if (cv_dist$type != "none") {
+ # Integrate over uncertainty using CV distribution
+ if (cv_dist$type == "gamma") {
+ cv_samples <- stats::rgamma(size, cv_dist$shape, cv_dist$rate)
+ if (all(is.finite(cv_dist$bounds))) {
+ cv_samples <- pmax(pmin(cv_samples, cv_dist$bounds[2]), cv_dist$bounds[1])
+ }
+ } else {
+ cv_samples <- sample(cv_dist$values, size, replace = TRUE)
+ }
+
+ sd_values <- mean_c * cv_samples
+ valid <- !is.na(sd_values) & sd_values > 0
+
+ if (any(valid)) {
+ shape_vec <- (mean_c^2) / (sd_values[valid]^2)
+ rate_vec <- mean_c / (sd_values[valid]^2)
+ soil_c_values[valid] <- pmax(stats::rgamma(sum(valid), shape_vec, rate_vec), 0)
+ soil_c_values[!valid] <- mean_c
+ } else {
+ soil_c_values <- rep(mean_c, size)
+ }
+ } else {
+ # Deterministic fallback
+ soil_c_values <- rep(mean_c, size)
+ }
if (verbose) {
- PEcAn.logger::logger.debug(sprintf(
- "Generated %d soil carbon values for site %s (mean: %.2f, sd: %.2f)",
- ensemble_size,
- site_id,
- processed_data$`Total_soilC_0-30cm`[site_row],
- processed_data$`Std_soilC_0-30cm`[site_row]
+ PEcAn.logger::logger.debug(sprintf("Generated ensemble for site %s (%s): mean=%.2f, sd=%.2f",
+ site_id, depth_layer, mean(soil_c_values), stats::sd(soil_c_values)
))
}
- # Create input lists for pool_ic_list2netcdf
- ensemble_data <- lapply(seq_len(ensemble_size), function(ens) {
- list(
- dims = list(
- lat = lat,
- lon = lon,
- time = 1
- ),
- vals = list(
- soil_organic_carbon_content = soil_c_values[ens],
- wood_carbon_content = 0, # Not provided by SoilGrids
- litter_carbon_content = 0 # Not provided by SoilGrids
- )
- )
- })
-
- return(ensemble_data)
-}
+ return(soil_c_values)
+}
\ No newline at end of file
diff --git a/modules/data.land/man/generate_soilgrids_ensemble.Rd b/modules/data.land/man/generate_soilgrids_ensemble.Rd
index 7604bfc8b71..635fdbf090d 100644
--- a/modules/data.land/man/generate_soilgrids_ensemble.Rd
+++ b/modules/data.land/man/generate_soilgrids_ensemble.Rd
@@ -2,33 +2,33 @@
% Please edit documentation in R/IC_SOILGRID_Utilities.R
\name{generate_soilgrids_ensemble}
\alias{generate_soilgrids_ensemble}
-\title{Generate ensemble members for a site}
+\title{Generate soil carbon ensemble members for specific depth}
\usage{
generate_soilgrids_ensemble(
processed_data,
site_id,
- lat,
- lon,
- ensemble_size,
- verbose = FALSE
+ size,
+ depth_layer,
+ verbose = FALSE,
+ seed = NULL
)
}
\arguments{
-\item{processed_data}{Processed soil carbon data}
+\item{processed_data}{Output from preprocess_soilgrids_data()}
-\item{site_id}{Site ID}
+\item{site_id}{Target site ID}
-\item{lat}{Site latitude}
+\item{size}{Number of ensemble members to generate}
-\item{lon}{Site longitude}
+\item{depth_layer}{Depth layer ("0-30cm" or "0-200cm")}
-\item{ensemble_size}{Number of ensemble members to create}
+\item{verbose}{Logical, print detailed progress information}
-\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE}
+\item{seed}{Optional random seed for reproducibility}
}
\value{
-List of ensemble data for the site
+Vector of soil carbon values with proper uncertainty handling
}
\description{
-Generate ensemble members for a site
+Generate soil carbon ensemble members for specific depth
}
diff --git a/modules/data.land/man/preprocess_soilgrids_data.Rd b/modules/data.land/man/preprocess_soilgrids_data.Rd
index b1e3cbd524e..e8f2dee1ed1 100644
--- a/modules/data.land/man/preprocess_soilgrids_data.Rd
+++ b/modules/data.land/man/preprocess_soilgrids_data.Rd
@@ -2,27 +2,18 @@
% Please edit documentation in R/IC_SOILGRID_Utilities.R
\name{preprocess_soilgrids_data}
\alias{preprocess_soilgrids_data}
-\title{Preprocess SoilGrids data}
+\title{Preprocess SoilGrids data for ensemble generation}
\usage{
-preprocess_soilgrids_data(
- soil_data,
- default_soilC = 5,
- default_uncertainty = 0.2,
- verbose = FALSE
-)
+preprocess_soilgrids_data(soil_data, verbose = FALSE)
}
\arguments{
-\item{soil_data}{Raw soil carbon data from soilgrids_soilC_extract}
+\item{soil_data}{Dataframe with SoilGrids soil carbon data}
-\item{default_soilC}{Default soil carbon value in kg/m² to use when data is missing}
-
-\item{default_uncertainty}{Default uncertainty as fraction to use when data is missing}
-
-\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE}
+\item{verbose}{Logical, print detailed progress information}
}
\value{
-Processed soil carbon data
+List containing processed data and CV distributions for both depths
}
\description{
-Preprocess SoilGrids data
+Preprocess SoilGrids data for ensemble generation
}
diff --git a/modules/data.land/man/soilgrids_ic_process.Rd b/modules/data.land/man/soilgrids_ic_process.Rd
index 5a8cd74ff03..fbe9631c5e4 100644
--- a/modules/data.land/man/soilgrids_ic_process.Rd
+++ b/modules/data.land/man/soilgrids_ic_process.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/IC_SOILGRID_Utilities.R
\name{soilgrids_ic_process}
\alias{soilgrids_ic_process}
-\title{Process SoilGrids data for initial conditions}
+\title{SoilGrids Initial Conditions (IC) Utilities}
\usage{
soilgrids_ic_process(
settings,
@@ -13,13 +13,7 @@ soilgrids_ic_process(
)
}
\arguments{
-\item{settings}{PEcAn settings list containing site information. Should include:
-\itemize{
- \item settings$run$site - Site information with id, lat, lon
- \item settings$ensemble$size - (Optional) Number of ensemble members to create
- \item settings$soil$default_soilC - (Optional) Default soil carbon value in kg/m²
- \item settings$soil$default_uncertainty - (Optional) Default uncertainty as fraction
-}}
+\item{settings}{PEcAn settings list containing site information}
\item{csv_path}{Path to a CSV file containing site information (optional)}
@@ -27,26 +21,30 @@ soilgrids_ic_process(
\item{overwrite}{Overwrite existing files? (Default: FALSE)}
-\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE}
+\item{verbose}{Print detailed progress information? (Default: FALSE)}
}
\value{
-List of paths to generated IC files
+List of paths to generated IC files, organized by site ID
}
\description{
-Process SoilGrids data for initial conditions
+Functions for generating soil carbon IC files from SoilGrids250m data
}
\details{
-This function processes SoilGrids data to create carbon initial condition
- files. It extracts soil carbon data for all sites, handles missing values,
- generates ensemble members, and writes NetCDF files.
+This module provides functions for extracting, processing, and generating
+ ensemble members for soil carbon initial conditions using SoilGrids data.
+ All soil carbon values are in kg/m2.
+Process SoilGrids data for initial conditions
}
\examples{
\dontrun{
# From settings object
settings <- PEcAn.settings::read.settings("pecan.xml")
-ic_files <- soilgrids_ic_process(settings, dir = "output/IC/")
+ic_files <- soilgrids_ic_process(settings, dir = "~/output/IC")
# From CSV file
-ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "output/IC/")
+ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "~/output/IC")
+}
}
+\author{
+Akash
}
From c646fd1589bb826f7e2b11a98459559bb4646a58 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 08:57:40 +0000
Subject: [PATCH 0140/1193] simplified general function for site information
extraction and validation logic
---
base/settings/R/get.site.info.R | 272 ++++++++++----------------------
1 file changed, 83 insertions(+), 189 deletions(-)
diff --git a/base/settings/R/get.site.info.R b/base/settings/R/get.site.info.R
index d252c54dace..df6304ca8f1 100644
--- a/base/settings/R/get.site.info.R
+++ b/base/settings/R/get.site.info.R
@@ -1,205 +1,99 @@
#' Extract and validate site information from settings or CSV file
#'
-#' @param settings PEcAn settings list containing site information (optional)
-#' @param csv_path Path to a CSV file containing site information (optional)
-#' @param strict_checking Logical. If TRUE, will validate coordinates more strictly
+#' @param input Settings object, path to settings XML, or path to CSV file.
+#' For settings objects, both single sites and MultiSettings are supported.
+#' @param validate Logical. If TRUE (default), performs strict validation of coordinates.
+#' When FALSE, skips coordinate validation checks.
+#' @param verbose Logical. If TRUE, prints progress messages (default: FALSE).
#'
-#' @return A data frame with site_id, site_name, lat, lon, and str_id
-#' @export get.site.info
-#'
-#' @details This function extracts and validates site information from either a PEcAn settings
-#' object or a CSV file. At least one input must be provided. If both are provided,
-#' the settings object takes precedence.
-#'
-#' If using a CSV file, it must contain at minimum the columns: site_id, lat, and lon.
-#' The column site_name is optional and will default to site_id if not provided.
-#'
-#' @examples
-#' \dontrun{
-#' # From settings object
-#' settings <- PEcAn.settings::read.settings("pecan.xml")
-#' site_info <- PEcAn.settings::get.site.info(settings)
-#'
-#' # From CSV file
-#' site_info <- PEcAn.settings::get.site.info(csv_path = "sites.csv")
+#' @return A data frame containing site information with columns:
+#' \describe{
+#' \item{site_id}{Numeric site identifier}
+#' \item{site_name}{Character site name (defaults to site_id if not provided)}
+#' \item{lat}{Numeric latitude in decimal degrees}
+#' \item{lon}{Numeric longitude in decimal degrees}
+#' \item{str_id}{Character version of site_id for display purposes}
#' }
-get.site.info <- function(settings = NULL, csv_path = NULL, strict_checking = TRUE) {
+#'
+#' @export
+get.site.info <- function(input, validate = TRUE, verbose = FALSE) {
- # Check if at least one input is provided
- if (is.null(settings) && is.null(csv_path)) {
- PEcAn.logger::logger.severe("No site information provided. Please provide either settings or csv_path.")
+ # Process input and collect sites in a list
+ if (inherits(input, "MultiSettings")) {
+ sites_list <- purrr::map(input, ~.x$run$site)
+ } else if (is.list(input) && !is.null(input$run)) {
+ if (verbose) PEcAn.logger::logger.debug("Processing settings object")
+ sites_list <- if (is.list(input$run) && length(input$run) > 1) {
+ # Vectorized runs in single settings
+ purrr::map(input$run, ~.x$site)
+ } else {
+ # Single run
+ list(input$run$site)
+ }
+ } else if (is.character(input)) {
+ if (!file.exists(input)) PEcAn.logger::logger.severe("File not found:", input)
+
+ if (grepl("\\.xml$", input, ignore.case = TRUE)) {
+ if (verbose) PEcAn.logger::logger.debug("Processing XML file:", input)
+ settings <- PEcAn.settings::read.settings(input)
+ # Recursive call to handle the loaded settings
+ return(get.site.info(settings, validate = validate, verbose = verbose))
+ } else if (grepl("\\.csv$", input, ignore.case = TRUE)) {
+ if (verbose) PEcAn.logger::logger.debug("Processing CSV file:", input)
+ csv_data <- utils::read.csv(input, stringsAsFactors = FALSE)
+ required_cols <- c("site_id", "lat", "lon")
+ if (!all(required_cols %in% colnames(csv_data))) {
+ PEcAn.logger::logger.severe("Missing required columns:", setdiff(required_cols, colnames(csv_data)))
+ }
+
+ sites_list <- lapply(seq_len(nrow(csv_data)), function(i) {
+ list(
+ id = csv_data$site_id[i],
+ name = if("site_name" %in% colnames(csv_data)) csv_data$site_name[i] else NULL,
+ lat = csv_data$lat[i],
+ lon = csv_data$lon[i]
+ )
+ })
+ } else {
+ PEcAn.logger::logger.severe("File must be XML or CSV:", input)
+ }
+ } else {
+ PEcAn.logger::logger.severe("Input must be a settings object or file path")
}
- # Process settings object (highest precedence when both are provided)
- if (!is.null(settings)) {
- PEcAn.logger::logger.debug("Extracting site information from settings object")
-
- # Check if this is a MultiSettings object
- if (inherits(settings, "MultiSettings")) {
- PEcAn.logger::logger.info("Detected MultiSettings object")
+ # Unified processing for all site types
+ site_info <- sites_list %>%
+ purrr::map_dfr(function(site) {
+ site_id <- as.numeric(site$id)
+ lat <- as.numeric(site$lat)
+ lon <- as.numeric(site$lon)
- # Process sites from MultiSettings
- site_list <- lapply(settings, function(s) {
- if (is.null(s$run) || is.null(s$run$site)) {
- PEcAn.logger::logger.severe("Site information missing from one of the settings in MultiSettings")
+ # Validation
+ if (validate) {
+ if (!is.numeric(lat) || lat < -90 || lat > 90) {
+ PEcAn.logger::logger.severe(sprintf("Invalid latitude (%s) for site: %s", lat, site_id))
+ }
+ if (!is.numeric(lon) || lon < -180 || lon > 180) {
+ PEcAn.logger::logger.severe(sprintf("Invalid longitude (%s) for site: %s", lon, site_id))
}
- return(s$run$site)
- })
- } else {
- # Process single settings object
- if (is.null(settings$run) || is.null(settings$run$site)) {
- PEcAn.logger::logger.severe("Site information missing from settings (settings$run$site)")
}
- # Check if we have vectorized site information
- site_fields <- c("id", "name", "lat", "lon")
- field_lengths <- sapply(site_fields, function(f) {
- if (is.null(settings$run$site[[f]])) 0 else length(settings$run$site[[f]])
- })
-
- max_length <- max(field_lengths)
- is_vectorized <- max_length > 1
-
- if (is_vectorized) {
- PEcAn.logger::logger.info("Detected vectorized site information in settings")
-
- # Create a list of site information from vectorized input
- site_list <- list()
- for (i in 1:max_length) {
- site <- list()
- for (field in site_fields) {
- if (!is.null(settings$run$site[[field]]) && i <= length(settings$run$site[[field]])) {
- site[[field]] <- settings$run$site[[field]][i]
- }
- }
- site_list[[i]] <- site
- }
+ str_id <- if (isTRUE(site_id > 1e9)) {
+ paste0(site_id %/% 1e+09, "-", site_id %% 1e+09)
} else {
- # Just a single non-vectorized site
- site_list <- list(settings$run$site)
+ as.character(site_id)
}
- }
- } else {
- # Process CSV file input
- PEcAn.logger::logger.debug("Reading site information from CSV file:", csv_path)
-
- # Check if file exists
- if (!file.exists(csv_path)) {
- PEcAn.logger::logger.severe("CSV file not found:", csv_path)
- }
-
- # Read CSV file
- csv_data <- utils::read.csv(csv_path, stringsAsFactors = FALSE)
-
- # Check for required columns
- required_cols <- c("site_id", "lat", "lon")
- missing_cols <- setdiff(required_cols, colnames(csv_data))
- if (length(missing_cols) > 0) {
- PEcAn.logger::logger.severe("Missing required columns in CSV file: ",
- paste(missing_cols, collapse = ", "))
- }
-
- # Add site_name if missing (use site_id as default)
- if (!"site_name" %in% colnames(csv_data)) {
- csv_data$site_name <- as.character(csv_data$site_id)
- PEcAn.logger::logger.debug("Added site_name column using site_id values")
- }
-
- # Convert CSV data to the site_list format for consistent processing
- site_list <- lapply(1:nrow(csv_data), function(i) {
- row <- csv_data[i, ]
- list(
- id = row$site_id,
- name = row$site_name,
- lat = row$lat,
- lon = row$lon
+
+ # Return standardized data frame row
+ data.frame(
+ site_id = as.integer(site_id),
+ site_name = if(is.null(site$name)) as.character(site_id) else site$name,
+ lat = lat,
+ lon = lon,
+ str_id = str_id,
+ stringsAsFactors = FALSE
)
})
- }
-
- # Process each site from the site_list
- result <- lapply(seq_along(site_list), function(i) {
- site <- site_list[[i]]
-
- # Check for required site ID
- if (is.null(site$id)) {
- PEcAn.logger::logger.severe(sprintf("Site ID is required but missing for site %d", i))
- }
-
- # Extract and validate site ID
- site_id <- as.numeric(site$id)
- if (is.na(site_id)) {
- PEcAn.logger::logger.severe(sprintf("Site ID must be numeric for site %d", i))
- }
-
- # Check if the site name exists, use ID as name if missing
- site_name <- ifelse(!is.null(site$name), site$name, as.character(site_id))
-
- # Check for required coordinates
- if (is.null(site$lat) || is.null(site$lon)) {
- PEcAn.logger::logger.severe(sprintf("Site coordinates are required but missing for site %d", i))
- }
-
- # Extract and validate coordinates
- lat <- as.numeric(site$lat)
- lon <- as.numeric(site$lon)
-
- if (is.na(lat) || is.na(lon)) {
- PEcAn.logger::logger.severe(sprintf("Site coordinates must be numeric for site %d", i))
- }
-
- # site ID for display and file naming
- str_id <- as.character(site$id)
-
- # Return a standardized site info list
- return(list(
- site_id = site_id,
- site_name = site_name,
- lat = lat,
- lon = lon,
- str_id = str_id
- ))
- })
-
- # Create the data frame using vapply to maintain types
- site_df <- data.frame(
- site_id = vapply(result, function(x) x$site_id, numeric(1)),
- site_name = vapply(result, function(x) x$site_name, character(1)),
- lat = vapply(result, function(x) x$lat, numeric(1)),
- lon = vapply(result, function(x) x$lon, numeric(1)),
- str_id = vapply(result, function(x) x$str_id, character(1)),
- stringsAsFactors = FALSE
- )
-
- # Validate coordinates based on strictness settings
- if (strict_checking) {
- # Check for valid latitude range
- invalid_lats <- site_df$lat < -90 | site_df$lat > 90
- if (any(invalid_lats)) {
- invalid_sites <- paste(site_df$site_id[invalid_lats], collapse = ", ")
- PEcAn.logger::logger.severe(sprintf("Invalid latitude values (outside -90 to 90) found for sites: %s", invalid_sites))
- }
-
- # Check for valid longitude range
- invalid_lons <- site_df$lon < -180 | site_df$lon > 180
- if (any(invalid_lons)) {
- invalid_sites <- paste(site_df$site_id[invalid_lons], collapse = ", ")
- PEcAn.logger::logger.severe(sprintf("Invalid longitude values (outside -180 to 180) found for sites: %s", invalid_sites))
- }
- } else {
- # Just warn if coordinates are suspicious
- suspicious_lats <- site_df$lat < -90 | site_df$lat > 90
- if (any(suspicious_lats)) {
- suspicious_sites <- paste(site_df$site_id[suspicious_lats], collapse = ", ")
- PEcAn.logger::logger.warn(sprintf("Suspicious latitude values (outside -90 to 90) found for sites: %s", suspicious_sites))
- }
-
- suspicious_lons <- site_df$lon < -180 | site_df$lon > 180
- if (any(suspicious_lons)) {
- suspicious_sites <- paste(site_df$site_id[suspicious_lons], collapse = ", ")
- PEcAn.logger::logger.warn(sprintf("Suspicious longitude values (outside -180 to 180) found for sites: %s", suspicious_sites))
- }
- }
- return(site_df)
-}
\ No newline at end of file
+ return(site_info)
+}
From 2f389baa87f8464057064556e6360936c940f99b Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 09:00:17 +0000
Subject: [PATCH 0141/1193] updated .Rd and test for get.site.info
---
base/settings/man/get.site.info.Rd | 38 +++-----
.../tests/testthat/test.get.site.info.R | 89 +++----------------
2 files changed, 25 insertions(+), 102 deletions(-)
diff --git a/base/settings/man/get.site.info.Rd b/base/settings/man/get.site.info.Rd
index fa8bc90ca22..c6338275ea3 100644
--- a/base/settings/man/get.site.info.Rd
+++ b/base/settings/man/get.site.info.Rd
@@ -4,37 +4,27 @@
\alias{get.site.info}
\title{Extract and validate site information from settings or CSV file}
\usage{
-get.site.info(settings = NULL, csv_path = NULL, strict_checking = TRUE)
+get.site.info(input, validate = TRUE, verbose = FALSE)
}
\arguments{
-\item{settings}{PEcAn settings list containing site information (optional)}
+\item{input}{Settings object, path to settings XML, or path to CSV file.
+For settings objects, both single sites and MultiSettings are supported.}
-\item{csv_path}{Path to a CSV file containing site information (optional)}
+\item{validate}{Logical. If TRUE (default), performs strict validation of coordinates.
+When FALSE, skips coordinate validation checks.}
-\item{strict_checking}{Logical. If TRUE, will validate coordinates more strictly}
+\item{verbose}{Logical. If TRUE, prints progress messages (default: FALSE).}
}
\value{
-A data frame with site_id, site_name, lat, lon, and str_id
+A data frame containing site information with columns:
+\describe{
+\item{site_id}{Numeric site identifier}
+\item{site_name}{Character site name (defaults to site_id if not provided)}
+\item{lat}{Numeric latitude in decimal degrees}
+\item{lon}{Numeric longitude in decimal degrees}
+\item{str_id}{Character version of site_id for display purposes}
+}
}
\description{
Extract and validate site information from settings or CSV file
}
-\details{
-This function extracts and validates site information from either a PEcAn settings
-object or a CSV file. At least one input must be provided. If both are provided,
-the settings object takes precedence.
-
-\if{html}{\out{
}}\preformatted{ If using a CSV file, it must contain at minimum the columns: site_id, lat, and lon.
- The column site_name is optional and will default to site_id if not provided.
-}\if{html}{\out{
}}
-}
-\examples{
-\dontrun{
-# From settings object
-settings <- PEcAn.settings::read.settings("pecan.xml")
-site_info <- PEcAn.settings::get.site.info(settings)
-
-# From CSV file
-site_info <- PEcAn.settings::get.site.info(csv_path = "sites.csv")
-}
-}
diff --git a/base/settings/tests/testthat/test.get.site.info.R b/base/settings/tests/testthat/test.get.site.info.R
index d2ec7e4b821..eea21bcec72 100644
--- a/base/settings/tests/testthat/test.get.site.info.R
+++ b/base/settings/tests/testthat/test.get.site.info.R
@@ -13,7 +13,7 @@ test_that("get.site.info works with settings object", {
)
)
- # Call get.site.info
+
site_info <- get.site.info(settings)
# Check the result
@@ -23,14 +23,13 @@ test_that("get.site.info works with settings object", {
expect_equal(site_info$site_name, "Test Site")
expect_equal(site_info$lat, 45.0)
expect_equal(site_info$lon, -90.0)
- expect_equal(site_info$str_id, as.character(settings$run$site$id))
})
test_that("get.site.info works with CSV file", {
# Create a temporary CSV file
csv_file <- tempfile(fileext = ".csv")
csv_data <- data.frame(
- site_id = c(1000000002, 1000000003),
+ site_id = c(123, 456),
site_name = c("Site 1", "Site 2"),
lat = c(40.0, 50.0),
lon = c(-80.0, -100.0)
@@ -38,16 +37,15 @@ test_that("get.site.info works with CSV file", {
write.csv(csv_data, csv_file, row.names = FALSE)
# Call get.site.info
- site_info <- get.site.info(csv_path = csv_file)
+ site_info <- get.site.info(csv_file)
# Check the result
expect_is(site_info, "data.frame")
expect_equal(nrow(site_info), 2)
- expect_equal(site_info$site_id, c(1000000002, 1000000003))
+ expect_equal(site_info$site_id, c(123, 456))
expect_equal(site_info$site_name, c("Site 1", "Site 2"))
expect_equal(site_info$lat, c(40.0, 50.0))
expect_equal(site_info$lon, c(-80.0, -100.0))
- expect_equal(site_info$str_id, as.character(csv_data$site_id))
# Clean up
unlink(csv_file)
@@ -92,91 +90,26 @@ test_that("get.site.info works with MultiSettings object", {
expect_equal(site_info$site_name, c("Multi Site 1", "Multi Site 2"))
expect_equal(site_info$lat, c(35.0, 55.0))
expect_equal(site_info$lon, c(-85.0, -95.0))
- expect_equal(site_info$str_id, as.character(c(1000000004, 1000000005)))
})
-test_that("get.site.info works with vectorized site information", {
- # Create a settings object with vectorized site information
- settings <- list(
- run = list(
- site = list(
- id = c(1000000006, 1000000007),
- name = c("Vector Site 1", "Vector Site 2"),
- lat = c(30.0, 60.0),
- lon = c(-75.0, -105.0)
- )
- )
- )
-
- # Call get.site.info
- site_info <- get.site.info(settings)
-
- # Check the result
- expect_is(site_info, "data.frame")
- expect_equal(nrow(site_info), 2)
- expect_equal(site_info$site_id, c(1000000006, 1000000007))
- expect_equal(site_info$site_name, c("Vector Site 1", "Vector Site 2"))
- expect_equal(site_info$lat, c(30.0, 60.0))
- expect_equal(site_info$lon, c(-75.0, -105.0))
- expect_equal(site_info$str_id, as.character(c(1000000006, 1000000007)))
-})
-
-test_that("get.site.info validates coordinates with strict_checking", {
+test_that("get.site.info validates coordinates", {
# Create a settings object with invalid coordinates
settings <- list(
run = list(
site = list(
- id = 1000000008,
+ id = 999,
name = "Invalid Site",
lat = 100.0, # Invalid latitude
- lon = -180.0
+ lon = -90.0
)
)
)
- # Call get.site.info with strict_checking = TRUE
- expect_error(get.site.info(settings, strict_checking = TRUE),
- "Invalid latitude values")
-
- # Call get.site.info with strict_checking = FALSE
- site_info <- get.site.info(settings, strict_checking = FALSE)
+ # Should throw error with validation
+ expect_error(get.site.info(settings, validate = TRUE))
- # Check the result
+ # Should work without validation
+ site_info <- get.site.info(settings, validate = FALSE)
expect_is(site_info, "data.frame")
- expect_equal(nrow(site_info), 1)
- expect_equal(site_info$site_id, 1000000008)
- expect_equal(site_info$site_name, "Invalid Site")
expect_equal(site_info$lat, 100.0)
- expect_equal(site_info$lon, -180.0)
- expect_equal(site_info$str_id, as.character(settings$run$site$id))
})
-
-test_that("str_id is correctly generated as a character string", {
- settings <- list(
- run = list(
- site = list(
- id = 1000000001,
- name = "Test Site",
- lat = 45.0,
- lon = -90.0
- )
- )
- )
- site_info <- get.site.info(settings)
- expect_type(site_info$str_id, "character")
- expect_equal(site_info$str_id, as.character(settings$run$site$id))
-
- # Test with CSV input
- csv_file <- tempfile(fileext = ".csv")
- csv_data <- data.frame(
- site_id = c(1000000002, 1000000003),
- site_name = c("Site 1", "Site 2"),
- lat = c(40.0, 50.0),
- lon = c(-80.0, -100.0)
- )
- write.csv(csv_data, csv_file, row.names = FALSE)
- site_info_csv <- get.site.info(csv_path = csv_file)
- expect_type(site_info_csv$str_id, "character")
- expect_equal(site_info_csv$str_id, as.character(csv_data$site_id))
- unlink(csv_file)
-})
\ No newline at end of file
From f90b881de94f104369f44398a63b9a4c1493eef4 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 09:02:59 +0000
Subject: [PATCH 0142/1193] add the support of soilgrid source to ic_process
---
modules/data.land/R/ic_process.R | 54 +++++++++++++++++++++++---------
1 file changed, 40 insertions(+), 14 deletions(-)
diff --git a/modules/data.land/R/ic_process.R b/modules/data.land/R/ic_process.R
index 6d07bb10656..c3b50152d2a 100644
--- a/modules/data.land/R/ic_process.R
+++ b/modules/data.land/R/ic_process.R
@@ -95,20 +95,46 @@ ic_process <- function(settings, input, dir, overwrite = FALSE){
settings$run$inputs[['poolinitcond']]$path <- newfile
return(settings)
- }else if (input$source == "NEON_veg"){
- #For debugging purposes I am hard coding in the start and end dates, will revisit and adjust once extract_NEON_veg is working within ic_process
- start_date = as.Date(input$startdate)
- end_date = as.Date(input$enddate)
- # start_date = as.Date("2020-01-01")
- # end_date = as.Date("2021-09-01")
- #Note the start and end dates for ICs are not the same as those for the forecast runs
- #please check out NEON products DP1.10098.001 for your desired site to check data availability before setting start and end dates
- }else{
-
- query <- paste0("SELECT * FROM inputs where id = ", input$id)
- input_file <- PEcAn.DB::db.query(query, con = con)
- start_date <- input_file$start_date
- end_date <- input_file$end_date
+ } else if (input$source == "SoilGrids"){
+
+ outfolder <- file.path(dir, paste0(input$source, "_site_", str_ns))
+ if(!dir.exists(outfolder)) dir.create(outfolder)
+
+ #see if there are already files generated there
+ newfile <- list.files(outfolder, "*.nc$", full.names = TRUE) %>%
+ as.list()
+ names(newfile) <- rep("path", length(newfile))
+
+ if (length(newfile) == 0 || overwrite$getveg) {
+ newfile <- PEcAn.data.land::soilgrids_ic_process(
+ settings = settings,
+ dir = outfolder,
+ overwrite = overwrite$getveg,
+ verbose = TRUE
+ )
+ }
+
+ settings$run$inputs[['poolinitcond']]$path <- newfile
+
+ return(settings)
+ } else if (input$source == "NEON_veg"){
+ #For debugging purposes I am hard coding in the start and end dates, will revisit and adjust once extract_NEON_veg is working within ic_process
+ start_date = as.Date(input$startdate)
+ end_date = as.Date(input$enddate)
+ # start_date = as.Date("2020-01-01")
+ # end_date = as.Date("2021-09-01")
+ #Note the start and end dates for ICs are not the same as those for the forecast runs
+ #please check out NEON products DP1.10098.001 for your desired site to check data availability before setting start and end dates
+ } else{
+ if(!is.null(input$id)){
+ query <- paste0("SELECT * FROM inputs where id = ", input$id)
+ input_file <- PEcAn.DB::db.query(query, con = con)
+ start_date <- input_file$start_date
+ end_date <- input_file$end_date
+ }
+ else{
+ PEcAn.logger::logger.severe(sprintf("Unsupported source: %s", input$source))
+ }
}
# set up host information
From 29bdbe737ba350d4f4be207058f2107fa9d35f91 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 10:54:20 +0000
Subject: [PATCH 0143/1193] gSSURGO Enhancements: SOC Integration, enhanced XML
Parsing, and Ensemble improvements
---
modules/data.land/R/extract_soil_nc.R | 132 ++++++++++++++++++--------
1 file changed, 91 insertions(+), 41 deletions(-)
diff --git a/modules/data.land/R/extract_soil_nc.R b/modules/data.land/R/extract_soil_nc.R
index 974173370cc..e0c3c836e59 100644
--- a/modules/data.land/R/extract_soil_nc.R
+++ b/modules/data.land/R/extract_soil_nc.R
@@ -46,29 +46,44 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
"&OUTPUTFORMAT=XMLMukeyList"
)
+ # XML handling with temp file
+ temp_file <- tempfile(fileext = ".xml")
xmll <- curl::curl_download(
mu.Path,
- ssl.verifyhost = FALSE,
- ssl.verifypeer = FALSE)
-
- mukey_str <- XML::xpathApply(
- doc = XML::xmlParse(xmll),
- path = "//MapUnitKeyList",
- fun = XML::xmlValue)
+ destfile = temp_file,
+ handle = curl::new_handle(ssl_verifypeer = FALSE, ssl_verifyhost = FALSE)
+ )
+
+ # mukey extraction with error recovery
+ mukey_str <- tryCatch({
+ result <- XML::xpathApply(
+ doc = XML::xmlParse(temp_file),
+ path = "//MapUnitKeyList",
+ fun = XML::xmlValue)
+ if (is.list(result)) result[[1]] else result
+ }, error = function(e) {
+ xml_doc <- XML::xmlParse(temp_file)
+ all_text <- XML::xpathSApply(xml_doc, "//text()", XML::xmlValue)
+ mukey_candidates <- all_text[grepl("^[0-9,]+$", all_text)]
+ if (length(mukey_candidates) > 0) mukey_candidates[1] else NULL
+ })
+ if (file.exists(temp_file)) unlink(temp_file)
+
mukeys <- strsplit(mukey_str, ",")[[1]]
-
if (length(mukeys) == 0) {
- PEcAn.logger::logger.error("No mapunit keys were found for this site.")
+ PEcAn.logger::logger.severe("No mapunit keys were found for this site.")
+ return(NULL)
}
-
# calling the query function sending the mapunit keys
soilprop <- gSSURGO.Query(
mukeys,
c("chorizon.sandtotal_r",
"chorizon.silttotal_r",
"chorizon.claytotal_r",
- "chorizon.hzdept_r"))
-
+ "chorizon.hzdept_r",
+ "chorizon.hzdepb_r",
+ "chorizon.om_r",
+ "chorizon.dbthirdbar_r"))
soilprop.new <- soilprop %>%
dplyr::arrange(.data$hzdept_r) %>%
dplyr::select(
@@ -76,19 +91,33 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
fraction_of_silt_in_soil = "silttotal_r",
fraction_of_clay_in_soil = "claytotal_r",
soil_depth = "hzdept_r",
+ soil_depth_bottom = "hzdepb_r",
+ organic_matter_pct = "om_r",
+ bulk_density = "dbthirdbar_r",
mukey = "mukey") %>%
- dplyr::mutate(dplyr::across(
- c(dplyr::starts_with("fraction_of"),
- "soil_depth"),
- function(x) x / 100))
-
- soilprop.new <- soilprop.new[ stats::complete.cases(soilprop.new) , ]
+ dplyr::mutate(
+ dplyr::across(c(dplyr::starts_with("fraction_of"), "soil_depth", "soil_depth_bottom"),
+ ~ as.numeric(.) / 100),
+ horizon_thickness_m = soil_depth_bottom - soil_depth,
+ # Van Bemmelen factor conversion: OM to SOC
+ soc_percent = organic_matter_pct / 1.724,
+ soil_organic_carbon_stock = horizon_thickness_m * (soc_percent / 100) * bulk_density * 10
+ ) %>%
+ dplyr::filter(stats::complete.cases(.))
+ if(nrow(soilprop.new) == 0) {
+ PEcAn.logger::logger.error("No valid soil properties after filtering")
+ return(NULL)
+ }
+ if(!dir.exists(outdir)) dir.create(outdir, recursive = TRUE)
+
#converting it to list
- soil.data.gssurgo <- names(soilprop.new)[1:4] %>%
- purrr::map(function(var) {
- soilprop.new[, var]
- }) %>%
- stats::setNames(names(soilprop.new)[1:4])
+ soil.data.gssurgo <- list(
+ fraction_of_sand_in_soil = as.numeric(soilprop.new$fraction_of_sand_in_soil),
+ fraction_of_silt_in_soil = as.numeric(soilprop.new$fraction_of_silt_in_soil),
+ fraction_of_clay_in_soil = as.numeric(soilprop.new$fraction_of_clay_in_soil),
+ soil_depth = as.numeric(soilprop.new$soil_depth),
+ soil_organic_carbon_stock = as.numeric(soilprop.new$soil_organic_carbon_stock)
+ )
#This ensures that I have at least one soil ensemble in case the modeling part failed
all.soil.ens <-c(all.soil.ens, list(soil.data.gssurgo))
@@ -97,14 +126,17 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
#- see if we need to generate soil ensemble and add that to the list of all
tryCatch({
# find the soil depth levels based on the depth argument
- # if soil profile is deeper than what is specified in the argument then I go as deep as the soil profile.
- if (max(soilprop.new$soil_depth) > max(depths)) depths <- sort (c(depths, max(max(soilprop.new$soil_depth))))
+ # if soil profile is deeper than what is specified in the argument then i go as deep as the soil profile.
+ current_max_depth <- max(soilprop.new$soil_depth, na.rm = TRUE)
+ if (!is.na(current_max_depth) && current_max_depth > max(depths)) {
+ depths <- sort(c(depths, current_max_depth))
+ }
depth.levs<-findInterval(soilprop.new$soil_depth, depths)
depth.levs[depth.levs==0] <-1
depth.levs[depth.levs>length(depths)] <-length(depths)
- soilprop.new.grouped<-soilprop.new %>%
+ soilprop.new.grouped<-soilprop.new %>%
dplyr::mutate(DepthL=depths[depth.levs])
# let's fit dirichlet for each depth level separately
@@ -120,45 +152,60 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
alpha <- dir.model$alpha
alpha <- matrix(alpha, nrow= size, ncol=length(alpha), byrow=TRUE )
simulated.soil <- sirt::dirichlet.simul(alpha)
- # # using the simulated sand/silt/clay to generate soil ensemble
+
+ # Simulate SOC uncertainty using Gamma distribution to ensure positive values
+ soc_mean <- mean(DepthL.Data$soil_organic_carbon_stock, na.rm = TRUE)
+ soc_sd <- stats::sd(DepthL.Data$soil_organic_carbon_stock, na.rm = TRUE)
+ if (!is.na(soc_sd) && soc_sd > 0) {
+ shape <- (soc_mean^2) / (soc_sd^2)
+ rate <- soc_mean / (soc_sd^2)
+ simulated_soc <- pmax(stats::rgamma(size, shape=shape, rate=rate), 0)
+ } else {
+ simulated_soc <- rep(soc_mean, size)
+ }
+
simulated.soil<-simulated.soil %>%
as.data.frame %>%
dplyr::mutate(DepthL=rep(DepthL.Data[1,6], size),
- mukey=rep(DepthL.Data[1,5], size)) %>%
+ mukey=rep(DepthL.Data[1,5], size),
+ soil_organic_carbon_stock = simulated_soc) %>%
`colnames<-`(c("fraction_of_sand_in_soil",
"fraction_of_silt_in_soil",
"fraction_of_clay_in_soil",
"soil_depth",
- "mukey"))
+ "mukey",
+ "soil_organic_carbon_stock"))
simulated.soil
},
error = function(e) {
PEcAn.logger::logger.warn(conditionMessage(e))
return(NULL)
})
-
})
# estimating the proportion of areas for those mukeys which are modeled
+ mukey_area <- data.frame(
+ mukeys = unique(simulated.soil.props$mukey),
+ Area = rep(1/length(unique(simulated.soil.props$mukey)),
+ length(unique(simulated.soil.props$mukey)))
+ )
mukey_area <- mukey_area %>%
- dplyr::filter(mukeys %in% simulated.soil.props$mukey) %>%
- dplyr::mutate(Area=.data$Area/sum(.data$Area))
-
+ dplyr::filter(mukeys %in% simulated.soil.props$mukey) %>%
+ dplyr::mutate(Area = Area/sum(Area))
#--- Mixing the depths
soil.profiles<-simulated.soil.props %>%
split(.$mukey)%>%
purrr::map(function(soiltype.sim){
- sizein <- (mukey_area$Area[ mukey_area$mukey == soiltype.sim$mukey %>% unique()])*size
+ sizein <- (mukey_area$Area[mukey_area$mukey == unique(soiltype.sim$mukey)])*size
1:ceiling(sizein) %>%
purrr::map(function(x){
soiltype.sim %>%
split(.$soil_depth)%>%
- purrr::map_dfr(~.x[x,])
+ purrr::map_dfr(~.x[min(x, nrow(.x)),])
})
}) %>%
purrr::flatten()
-
#- add them to the list of all the ensembles ready to be converted to .nc file
all.soil.ens<-soil.profiles %>%
purrr::map(function(SEns){
@@ -187,16 +234,14 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
new.file <- file.path(outdir, paste0(prefix, ".nc"))
#sending it to the func where some new params will be added and then it will be written down as nc file.
suppressWarnings({
- soil2netcdf(all.soil.ens[[i]][1:4], new.file)
+ soil2netcdf(all.soil.ens[[i]], new.file)
})
-
new.file
},
error = function(e) {
PEcAn.logger::logger.warn(conditionMessage(e))
return(NULL)
})
-
})
# removing the nulls or the ones that throw exception in the above trycatch
out.ense<- out.ense %>%
@@ -382,8 +427,13 @@ soil.units <- function(varname = NA){
"soil_thermal_conductivity","W m-1 K-1",
"soil_thermal_conductivity_at_saturation","W m-1 K-1",
"soil_thermal_capacity","J kg-1 K-1",
- "soil_albedo","1"
- ),
+ "soil_albedo","1",
+ "slpotwp","m",
+ "slpotcp","m",
+ "slcpd","J m-3 K-1",
+ "slden","kg m-3",
+ "soil_organic_carbon_stock","kg m-2"
+ ),
ncol=2,byrow = TRUE))
colnames(variables) <- c('var','unit')
From becb02f47ccd2188a9ac7fb895aef1030a926ab0 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 11:31:52 +0000
Subject: [PATCH 0144/1193] Fix SQL field list construction in gSSURGO.Query to
prevent malformed queries and HTTP 500 errors
---
modules/data.land/R/gSSURGO_Query.R | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/modules/data.land/R/gSSURGO_Query.R b/modules/data.land/R/gSSURGO_Query.R
index eaf78b94c39..cd6e3da9d17 100644
--- a/modules/data.land/R/gSSURGO_Query.R
+++ b/modules/data.land/R/gSSURGO_Query.R
@@ -43,8 +43,7 @@ gSSURGO.Query <- function(mukeys,
SELECT ',
- paste(fixed_fields, collapse = ", "),
- paste(qry_fields, collapse = ", "),
+ paste(c(fixed_fields, qry_fields), collapse = ", "),
' from mapunit
join muaggatt on mapunit.mukey=muaggatt.mukey
join component on mapunit.mukey=component.mukey
From d50b43b347d3c99c849c28e46cc218a3ce4478b3 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Thu, 5 Jun 2025 11:55:08 +0000
Subject: [PATCH 0145/1193] fixed the bug in soil2netcdf
---
modules/data.land/R/soil2netcdf.R | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/data.land/R/soil2netcdf.R b/modules/data.land/R/soil2netcdf.R
index 8f3afb1affc..f8078e9c7b6 100644
--- a/modules/data.land/R/soil2netcdf.R
+++ b/modules/data.land/R/soil2netcdf.R
@@ -56,7 +56,7 @@ soil2netcdf <- function(soil.data, new.file){
ncvar <- list()
good_vars <- 0
for(n in seq_along(soil.data)){
- if(all(is.null(soil.data[[n]])) | all(is.na(soil.data[[n]]))) next
+ if(all(is.null(soil.data[[n]])) || all(is.na(soil.data[[n]]))) next
varname <- names(soil.data)[n]
if(length(soil.data[[n]])>1){
## if vector, save by depth
@@ -81,7 +81,7 @@ soil2netcdf <- function(soil.data, new.file){
## add data
for (i in seq_along(ncvar)) {
- if(is.null(soil.data[[i]])|is.na(soil.data[[i]])) next
+ if(is.null(soil.data[[i]]) || (length(soil.data[[i]]) == 1 && is.na(soil.data[[i]]))) next
ncdf4::ncvar_put(nc, ncvar[[i]], soil.data[[i]])
}
From 19e9ffcde5121a070ac97cf42d34828c78d429bf Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 7 Jun 2025 05:35:07 +0000
Subject: [PATCH 0146/1193] refactored code and fixed some bugs
---
modules/data.land/R/extract_soil_nc.R | 30 +++++++++++++--------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/modules/data.land/R/extract_soil_nc.R b/modules/data.land/R/extract_soil_nc.R
index e0c3c836e59..42280ae65bf 100644
--- a/modules/data.land/R/extract_soil_nc.R
+++ b/modules/data.land/R/extract_soil_nc.R
@@ -126,11 +126,8 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
#- see if we need to generate soil ensemble and add that to the list of all
tryCatch({
# find the soil depth levels based on the depth argument
- # if soil profile is deeper than what is specified in the argument then i go as deep as the soil profile.
- current_max_depth <- max(soilprop.new$soil_depth, na.rm = TRUE)
- if (!is.na(current_max_depth) && current_max_depth > max(depths)) {
- depths <- sort(c(depths, current_max_depth))
- }
+ # if soil profile is deeper than what is specified in the argument then I go as deep as the soil profile.
+ if (max(soilprop.new$soil_depth) > max(depths)) depths <- sort (c(depths, max(soilprop.new$soil_depth)))
depth.levs<-findInterval(soilprop.new$soil_depth, depths)
depth.levs[depth.levs==0] <-1
@@ -166,8 +163,8 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
simulated.soil<-simulated.soil %>%
as.data.frame %>%
- dplyr::mutate(DepthL=rep(DepthL.Data[1,6], size),
- mukey=rep(DepthL.Data[1,5], size),
+ dplyr::mutate(DepthL=rep(DepthL.Data[1,12], size),
+ mukey=rep(DepthL.Data[1,8], size),
soil_organic_carbon_stock = simulated_soc) %>%
`colnames<-`(c("fraction_of_sand_in_soil",
"fraction_of_silt_in_soil",
@@ -184,34 +181,37 @@ extract_soil_gssurgo<-function(outdir, lat, lon, size=1, radius=500, depths=c(0.
})
# estimating the proportion of areas for those mukeys which are modeled
+
+ # defining mukey_area
+ unique_mukeys <- unique(soilprop.new$mukey)
mukey_area <- data.frame(
- mukeys = unique(simulated.soil.props$mukey),
- Area = rep(1/length(unique(simulated.soil.props$mukey)),
- length(unique(simulated.soil.props$mukey)))
+ mukey = unique_mukeys,
+ Area = rep(1/length(unique_mukeys), length(unique_mukeys))
)
mukey_area <- mukey_area %>%
- dplyr::filter(mukeys %in% simulated.soil.props$mukey) %>%
- dplyr::mutate(Area = Area/sum(Area))
+ dplyr::filter(mukey %in% unique(simulated.soil.props$mukey)) %>%
+ dplyr::mutate(Area=.data$Area/sum(.data$Area, na.rm = TRUE))
#--- Mixing the depths
soil.profiles<-simulated.soil.props %>%
split(.$mukey)%>%
purrr::map(function(soiltype.sim){
- sizein <- (mukey_area$Area[mukey_area$mukey == unique(soiltype.sim$mukey)])*size
+ sizein <- (mukey_area$Area[ mukey_area$mukey == unique(soiltype.sim$mukey)[1]])*size
1:ceiling(sizein) %>%
purrr::map(function(x){
soiltype.sim %>%
split(.$soil_depth)%>%
- purrr::map_dfr(~.x[min(x, nrow(.x)),])
+ purrr::map_dfr(~.x[x,])
})
}) %>%
purrr::flatten()
#- add them to the list of all the ensembles ready to be converted to .nc file
all.soil.ens<-soil.profiles %>%
purrr::map(function(SEns){
+ SEns <- SEns[, setdiff(names(SEns), "mukey")]
names(SEns) %>%
purrr::map(function(var){
- SEns[,var]
+ as.numeric(unlist(SEns[, var]))
})%>%
stats::setNames(names(SEns))
})%>%
From 04f3cddb18b86b4a4a1c17394857fede86cae32f Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 7 Jun 2025 05:49:25 +0000
Subject: [PATCH 0147/1193] update soil.units doc with supported variables
---
modules/data.land/R/extract_soil_nc.R | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/modules/data.land/R/extract_soil_nc.R b/modules/data.land/R/extract_soil_nc.R
index 42280ae65bf..b1d760ae700 100644
--- a/modules/data.land/R/extract_soil_nc.R
+++ b/modules/data.land/R/extract_soil_nc.R
@@ -393,6 +393,11 @@ extract_soil_nc <- function(in.file,outdir,lat,lon){
#' * `soil_thermal_conductivity_at_saturation`
#' * `soil_thermal_capacity`
#' * `soil_albedo`
+#' * `slpotwp`
+#' * `slpotcp`
+#' * `slcpd`
+#' * `slden`
+#' * `soil_organic_carbon_stock`
#'
#' @param varname character vector. See details
#'
From ec957571ab7b33fa0ea726ed3d7a04d8d9676a4c Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sat, 7 Jun 2025 22:01:57 +0530
Subject: [PATCH 0148/1193] refractor: one pft for setting config
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/pecan.xml | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
index bf661acd23d..9491b0f8b06 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
@@ -23,15 +23,6 @@
temperate.broadleaf.deciduous
-
- temperate.coniferous
-
-
- temperate.deciduous
-
-
- temperate.deciduous.ALL
- 3000
From 35311adcd19d8396869669e414c75edd2a491a07 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sun, 8 Jun 2025 20:55:15 +0530
Subject: [PATCH 0149/1193] refractor: read and prepare settings in same block
Signed-off-by: Aritra Dey
---
.../quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index a66f3fdb4c4..8eb1002f8cb 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -18,7 +18,6 @@ library("PEcAn.all")
```{r load-settings}
settings_path <- "~/pecan.xml"
-settings <- PEcAn.settings::read.settings(settings_path)
```
# Prepare and Validate Settings
@@ -26,6 +25,7 @@ settings <- PEcAn.settings::read.settings(settings_path)
PEcAn provides utilities to process and validate settings before execution, ensuring that all required fields are correctly configured.
```{r prepare-settings}
+settings <- PEcAn.settings::read.settings(settings_path)
settings <- PEcAn.settings::prepare.settings(settings)
```
From d5697ef0b7e99f83f811feb611aec48e088bd5d7 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sun, 8 Jun 2025 20:59:04 +0530
Subject: [PATCH 0150/1193] removed trait and meta analysis block
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 8eb1002f8cb..2c39d5c19db 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -24,11 +24,16 @@ settings_path <- "~/pecan.xml"
PEcAn provides utilities to process and validate settings before execution, ensuring that all required fields are correctly configured.
-```{r prepare-settings}
+```{r read-prepare-settings}
settings <- PEcAn.settings::read.settings(settings_path)
settings <- PEcAn.settings::prepare.settings(settings)
```
+Exploring the settings objects
+```{r explore-settings}
+str(settings, max.level = 2)
+```
+
# Convert Settings for Ecosystem Model
Converts settings into the format required by the selected ecosystem model
@@ -37,18 +42,6 @@ Converts settings into the format required by the selected ecosystem model
settings <- PEcAn.workflow::do_conversions(settings)
```
-# Trait and Meta Analysis
-
-Retrieve trait data and generate probabilistic model parameter distributions.
-
-```{r meta-analysis}
-# Retrieve trait data and prior distributions for the specified Plant Functional Types (PFTs)
-settings <- PEcAn.workflow::runModule.get.trait.data(settings)
-# Perform meta-analysis to derive probabilistic distributions for model parameters
-PEcAn.MA::runModule.run.meta.analysis(settings)
-# Save the updated settings, including the retrieved trait data, to an XML file
-PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")
-```
# Write Model Configuration Files
From 9feb7619466f1e7d12b63d3df59afff25b40461b Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sun, 8 Jun 2025 21:00:46 +0530
Subject: [PATCH 0151/1193] removed uncertainty and sensivity analysis
Signed-off-by: Aritra Dey
---
.../quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd | 1 -
1 file changed, 1 deletion(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 2c39d5c19db..75c6144356a 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -58,7 +58,6 @@ Start model simulations and retrieve output for analysis.
```{r run-model}
PEcAn.workflow::start_model_runs(settings)
-runModule.get.results(settings)
```
```{r get-plot-vars}
From 488318c2d9d63ad52da20a1f9a6c5b5bd9072bfa Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sun, 8 Jun 2025 20:55:35 +0000
Subject: [PATCH 0152/1193] refactor Read.IC.info.BADM
---
modules/data.land/R/IC_BADM_Utilities.R | 32 ++++++++++---------------
1 file changed, 13 insertions(+), 19 deletions(-)
diff --git a/modules/data.land/R/IC_BADM_Utilities.R b/modules/data.land/R/IC_BADM_Utilities.R
index 7c9ec829d1d..7492b98b36a 100644
--- a/modules/data.land/R/IC_BADM_Utilities.R
+++ b/modules/data.land/R/IC_BADM_Utilities.R
@@ -31,7 +31,7 @@ Read.IC.info.BADM <-function(lat, long){
biomass.df <- U.S.SB %>%
dplyr::filter(
.data$NA_L2CODE == Code_Level,
- grepl("ROOT_|AG_BIOMASS|SOIL_STOCK|SOIL_CHEM", .data$VARIABLE)
+ grepl("ROOT_|AG_BIOMASS|SOIL_STOCK|LIT_BIOMASS", .data$VARIABLE)
) %>%
dplyr::select("SITE_ID", "GROUP_ID", "VARIABLE_GROUP", "VARIABLE", "DATAVALUE")
@@ -43,7 +43,7 @@ Read.IC.info.BADM <-function(lat, long){
biomass.df <- U.S.SB %>%
dplyr::filter(
.data$NA_L1CODE == Code_Level,
- grepl("ROOT_|AG_BIOMASS|SOIL_STOCK|SOIL_CHEM", .data$VARIABLE)
+ grepl("ROOT_|AG_BIOMASS|SOIL_STOCK|LIT_BIOMASS", .data$VARIABLE)
) %>%
dplyr::select("SITE_ID", "GROUP_ID", "VARIABLE_GROUP", "VARIABLE", "DATAVALUE")
}
@@ -53,7 +53,7 @@ Read.IC.info.BADM <-function(lat, long){
if (nrow(biomass.df) < 3) {
Code_Level <- "ALL"
biomass.df <- U.S.SB %>%
- dplyr::filter(grepl("ROOT_|AG_BIOMASS|SOIL_STOCK|SOIL_CHEM", .data$VARIABLE)) %>%
+ dplyr::filter(grepl("ROOT_|AG_BIOMASS|SOIL_STOCK|LIT_BIOMASS", .data$VARIABLE)) %>%
dplyr::select("SITE_ID", "GROUP_ID", "VARIABLE_GROUP", "VARIABLE", "DATAVALUE")
}
@@ -69,18 +69,16 @@ Read.IC.info.BADM <-function(lat, long){
SoilIni <- NA
litterIni <- NA
Rootini <- NA
- litterIni <- NA
Date.in <- NA
Organ.in <- NA
# find what type of entry it is - biomass/soil or litter
if (nrow(Gdf) > 0) {
type <-
sapply(c(
- "*LIT",
"*SOIL",
- "*_BIOMASS",
+ "*_LIT_BIOMASS",
"*_ROOT_BIOMASS",
- "*_LIT_BIOMASS"
+ "*_BIOMASS"
),
grepl,
Gdf[1, 3])
@@ -100,9 +98,8 @@ Read.IC.info.BADM <-function(lat, long){
#Converting DM to C content
#Variations and determinants of carbon content in plants:a global synthesis - https://www.biogeosciences.net/15/693/2018/bg-15-693-2018.pdf
- if (length(unit.in) > 0)
- if (unit.in =="kgDM m-2") cov.factor <- cov.factor *0.48
-
+ if (length(unit.in) > 0 && unit.in == "kgDM m-2") cov.factor <- cov.factor *0.48
+
unit.ready <- ifelse(unit.in == "gC m-2",
"g/m^2",
ifelse(unit.in == "kgDM m-2", "kg/m^2",
@@ -132,7 +129,7 @@ Read.IC.info.BADM <-function(lat, long){
as.numeric()*cov.factor, unit.ready, "kg/m^2")#"AG_BIOMASS_CROP","AG_BIOMASS_SHRUB","AG_BIOMASS_TREE","AG_BIOMASS_OTHER"
} else if (type == "*SOIL") {
- val <- Gdf %>%
+ val <- Gdf %>%
dplyr::filter(grepl("SOIL_STOCK_C_ORG", .data$VARIABLE)) %>%
dplyr::pull(.data$DATAVALUE) %>%
as.numeric()
@@ -142,8 +139,7 @@ Read.IC.info.BADM <-function(lat, long){
} else if (type == "*_LIT_BIOMASS") {
litterIni <-
- PEcAn.utils::ud_convert(Gdf$DATAVALUE[1] %>%
- as.numeric()*cov.factor, unit.ready, "kg/m^2")
+ PEcAn.utils::ud_convert(suppressWarnings(as.numeric(Gdf$DATAVALUE[1]))*cov.factor, unit.ready, "kg/m^2")
} else if (type == "*_ROOT_BIOMASS") {
Rootini <-
@@ -156,21 +152,19 @@ Read.IC.info.BADM <-function(lat, long){
Site = Gdf$SITE_ID %>% unique(),
Var = Gdf$VARIABLE[1],
Date = Date.in,
- # Organ = Organ.in,
+ Organ = Organ.in,
AGB = PlantWoodIni,
soil_organic_carbon_content = SoilIni,
- litter_carbon_content = litterIni
+ litter_carbon_content = litterIni,
+ root_carbon_content = Rootini
)
)
})
-
#cleaning
-ind <- apply(entries[,5:7], 1, function(x) all(is.na(x)))
+ind <- apply(entries[,5:8], 1, function(x) all(is.na(x)))
entries <- entries[-which(ind),]
-
-
return(entries)
}
From 6940be08b746b4595e98419afed0fb7360d1eba4 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sun, 8 Jun 2025 21:03:09 +0000
Subject: [PATCH 0153/1193] updated BADM_IC_process
---
modules/data.land/R/IC_BADM_Utilities.R | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/data.land/R/IC_BADM_Utilities.R b/modules/data.land/R/IC_BADM_Utilities.R
index 7492b98b36a..c4c76464869 100644
--- a/modules/data.land/R/IC_BADM_Utilities.R
+++ b/modules/data.land/R/IC_BADM_Utilities.R
@@ -213,7 +213,7 @@ netcdf.writer.BADM <- function(lat, long, siteid, outdir, ens){
input$dims <- dims
input$vals <- variables
-
+
return(pool_ic_list2netcdf(
input = input,
outdir = outdir,
@@ -249,9 +249,9 @@ BADM_IC_process <- function(settings, dir, overwrite=TRUE){
as.list()
out.ense <- seq_len(settings$ensemble$size) %>%
- purrr::map(~ netcdf.writer.BADM(new.site$lat,
- new.site$lon,
- new.site$id,
+ purrr::map(~ netcdf.writer.BADM(new.site$lat[.x],
+ new.site$lon[.x],
+ new.site$id[.x],
outdir=dir,
ens=.x))
From 30c5ff0d57da0b3d0d40c43f06b7fcf26168484a Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sun, 8 Jun 2025 21:40:50 +0000
Subject: [PATCH 0154/1193] Ensure output directory exists before writing
NetCDF IC file and avoid namespace conflicts
---
modules/data.land/R/IC_BADM_Utilities.R | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/data.land/R/IC_BADM_Utilities.R b/modules/data.land/R/IC_BADM_Utilities.R
index c4c76464869..cf3e8d7c7c9 100644
--- a/modules/data.land/R/IC_BADM_Utilities.R
+++ b/modules/data.land/R/IC_BADM_Utilities.R
@@ -213,8 +213,8 @@ netcdf.writer.BADM <- function(lat, long, siteid, outdir, ens){
input$dims <- dims
input$vals <- variables
-
- return(pool_ic_list2netcdf(
+ if(!dir.exists(outdir)) dir.create(outdir, recursive = TRUE)
+ return(PEcAn.data.land::pool_ic_list2netcdf(
input = input,
outdir = outdir,
siteid = siteid,
From 3c1bdccbcc333c55160224c47e348a9a54308333 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Tue, 10 Jun 2025 01:22:09 +0530
Subject: [PATCH 0155/1193] exploring settings object
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 75c6144356a..3a04a02a388 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -29,9 +29,10 @@ settings <- PEcAn.settings::read.settings(settings_path)
settings <- PEcAn.settings::prepare.settings(settings)
```
-Exploring the settings objects
+
+# Exploring the settings objects
```{r explore-settings}
-str(settings, max.level = 2)
+str(settings)
```
# Convert Settings for Ecosystem Model
From cabd51a80d34be8410db0246a837ed0df309d96f Mon Sep 17 00:00:00 2001
From: David LeBauer
Date: Mon, 9 Jun 2025 16:56:47 -0400
Subject: [PATCH 0156/1193] added clip_and_move_raster function
---
CHANGELOG.md | 6 +-
modules/data.land/NAMESPACE | 1 +
modules/data.land/NEWS.md | 4 ++
.../data.land/R/clip_and_move_raster_file.R | 47 +++++++++++++++
.../testthat/test-clip_and_move_raster_file.R | 57 +++++++++++++++++++
5 files changed, 114 insertions(+), 1 deletion(-)
create mode 100644 modules/data.land/R/clip_and_move_raster_file.R
create mode 100644 modules/data.land/tests/testthat/test-clip_and_move_raster_file.R
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f9f4a00b64..481be5b71bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,11 @@ All notable changes are kept in this file. All changes made should be added to t
`Unreleased`. Once a new release is made this file will be updated to create a new `Unreleased`
section for the next release.
-For more information about this file see also [Keep a Changelog](http://keepachangelog.com/) .
+ For more information about this file see also [Keep a Changelog](http://keepachangelog.com/) .
+
+## Unreleased
+
+* Add `clip_and_move_raster()` function to PEcAn.data.land
## [1.9.0] - 2025-05-25
diff --git a/modules/data.land/NAMESPACE b/modules/data.land/NAMESPACE
index 9f1343e0d1c..8e67df1e744 100644
--- a/modules/data.land/NAMESPACE
+++ b/modules/data.land/NAMESPACE
@@ -10,6 +10,7 @@ export(Read.IC.info.BADM)
export(Read_Tucson)
export(Soilgrids_SoilC_prep)
export(buildJAGSdata_InventoryRings)
+export(clip_and_move_raster_file)
export(cohort2pool)
export(dataone_download)
export(download.SM_CDS)
diff --git a/modules/data.land/NEWS.md b/modules/data.land/NEWS.md
index 629f275f22b..d17dee9a186 100644
--- a/modules/data.land/NEWS.md
+++ b/modules/data.land/NEWS.md
@@ -1,3 +1,7 @@
+# Unreleased
+
+* Add function `clip_and_move_raster()`.
+
# PEcAn.data.land 1.8.1
* Dependency `datapack` is now optional. It is only used by `dataone_download()` (#3373).
diff --git a/modules/data.land/R/clip_and_move_raster_file.R b/modules/data.land/R/clip_and_move_raster_file.R
new file mode 100644
index 00000000000..4cd9782b995
--- /dev/null
+++ b/modules/data.land/R/clip_and_move_raster_file.R
@@ -0,0 +1,47 @@
+#' Clip and Move a Raster File
+#'
+#' Clips a raster to a polygon bounding box, optionally masks to polygon, and saves the
+#' output in the same format as the input.
+#'
+#' @param input_path Character. Path to the input raster file.
+#' @param polygon An `sf` or `SpatVector` object to be used for clipping and masking.
+#' @param out_path Character. Path to save the processed raster.
+#' @return Character. The path to the saved output raster.
+#' @export
+#' @author David LeBauer
+clip_and_move_raster_files <- function(input_path, polygon, out_path, mask = TRUE, overwrite = TRUE) {
+ rast_in <- terra::rast(input_path)
+ # check that input file exists
+ if (!file.exists(input_path)) {
+ PEcAn.logger::logger.error("Input raster file does not exist: ", input_path)
+ }
+ # check that polygon is valid
+ if (!inherits(polygon, c("sf", "SpatVector"))) {
+ PEcAn.logger::logger.error("Polygon must be an sf object or SpatVector")
+ }
+ if (inherits(polygon, "sf")) {
+ # Convert sf object to SpatVector
+ polygon <- terra::vect(polygon)
+ }
+ # Reproject polygon to raster CRS, convert to SpatVector
+ polygon_proj <- sf::st_transform(polygon, crs = terra::crs(rast_in))
+ polygon_vect <- terra::vect(polygon_proj)
+ rast_crop <- terra::crop(rast_in, polygon_vect)
+
+ if (mask) {
+ rast_to_write <- terra::mask(rast_crop, polygon_vect)
+ } else {
+ rast_to_write <- rast_crop
+ }
+ filetype <- terra::filetype(rast_in)
+ gdal_opts <- terra::gdal(rast_in)
+
+ terra::writeRaster(
+ rast_to_write,
+ filename = out_path,
+ overwrite = overwrite,
+ filetype = filetype,
+ gdal = gdal_opts
+ )
+ return(out_path)
+}
\ No newline at end of file
diff --git a/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R b/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R
new file mode 100644
index 00000000000..5fb1879ea05
--- /dev/null
+++ b/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R
@@ -0,0 +1,57 @@
+library(testthat)
+library(terra)
+library(sf)
+
+# load function under test
+source("../../clip_and_move_rasters.R") # adjust relative path as needed
+
+# helper to create a small test raster
+make_raster <- function(crs = "EPSG:4326") {
+ r <- terra::rast(matrix(1:16, 4, 4),
+ extent = terra::ext(0, 4, 0, 4), crs = crs)
+ f <- tempfile(fileext = ".tif")
+ terra::writeRaster(r, f, filetype = "GTiff", overwrite = TRUE)
+ f
+}
+
+test_that("clip & mask works: output clipped to polygon bbox and masked", {
+ in_r <- make_raster()
+ on.exit(unlink(in_r), add = TRUE)
+ # box polygon (1,1)-(3,3)
+ poly <- sf::st_as_sfc(sf::st_bbox(c(xmin=1, ymin=1, xmax=3, ymax=3)), crs = "EPSG:4326")
+ out_f <- tempfile(fileext = ".tif")
+ clip_and_move_raster_files(in_r, poly, out_f, mask = TRUE)
+ expect_true(file.exists(out_f))
+ r_out <- terra::rast(out_f)
+ # extent == polygon bbox
+ expect_equal(terra::ext(r_out), terra::ext(sf::st_bbox(poly)))
+ # some values NA (corners) and some not (center)
+ vals <- terra::values(r_out)
+ expect_true(any(is.na(vals)))
+ expect_true(any(!is.na(vals)))
+ unlink(out_f)
+})
+
+test_that("clip without mask retains all values within bbox", {
+ in_r <- make_raster()
+ on.exit(unlink(in_r), add = TRUE)
+ poly <- sf::st_as_sfc(sf::st_bbox(c(xmin=1, ymin=1, xmax=3, ymax=3)), crs = "EPSG:4326")
+ out_f <- tempfile(fileext = ".tif")
+ clip_and_move_raster_files(in_r, poly, out_f, mask = FALSE)
+ r_out <- terra::rast(out_f)
+ expect_false(any(is.na(terra::values(r_out))))
+ unlink(in_r); unlink(out_f)
+})
+
+test_that("preserves CRS and filetype", {
+ in_r <- make_raster(crs = "EPSG:3857")
+ on.exit(unlink(in_r), add = TRUE)
+ poly <- sf::st_as_sfc(sf::st_bbox(c(xmin=0, ymin=0, xmax=2, ymax=2)), crs = "EPSG:3857")
+ out_f <- tempfile(fileext = ".tif")
+ clip_and_move_raster_files(in_r, poly, out_f)
+ r_out <- terra::rast(out_f)
+ expect_true(terra::same.crs(r_out, terra::rast(in_r)))
+ # file extension implies GTiff
+ expect_true(grepl("\\.tif$", out_f))
+ unlink(in_r); unlink(out_f)
+})
From 3ee41819c80d4725384ef2b83c70ab6f50d5e1b6 Mon Sep 17 00:00:00 2001
From: David LeBauer
Date: Mon, 9 Jun 2025 17:06:26 -0400
Subject: [PATCH 0157/1193] add added section to unreleased changelog
---
CHANGELOG.md | 2 ++
1 file changed, 2 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 481be5b71bb..50d5b0dce4a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ section for the next release.
## Unreleased
+### Added
+
* Add `clip_and_move_raster()` function to PEcAn.data.land
## [1.9.0] - 2025-05-25
From 8ebdec51477f25cdecef068b748b7a5a78bc6e42 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 10 Jun 2025 08:13:54 +0000
Subject: [PATCH 0158/1193] Bump requests from 2.32.0 to 2.32.4 in
/docker/monitor
Bumps [requests](https://github.com/psf/requests) from 2.32.0 to 2.32.4.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.32.0...v2.32.4)
---
updated-dependencies:
- dependency-name: requests
dependency-version: 2.32.4
dependency-type: direct:production
...
Signed-off-by: dependabot[bot]
---
docker/monitor/requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/monitor/requirements.txt b/docker/monitor/requirements.txt
index d5537687d4d..8bf47055ece 100644
--- a/docker/monitor/requirements.txt
+++ b/docker/monitor/requirements.txt
@@ -1,4 +1,4 @@
pika==1.3.2
-requests==2.32.0
+requests==2.32.4
psycopg2-binary==2.9.9
python-dateutil==2.8.2
From 0b7ae8232ac2c6033afeb1899f2b1491ad8e8840 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Tue, 10 Jun 2025 10:32:08 -0400
Subject: [PATCH 0159/1193] Update the function to write the setup credentials.
---
modules/data.remote/R/NASA_DAAC_download.R | 88 ++++++++++---------
.../data.remote/man/DAAC_Set_Credential.Rd | 19 ----
modules/data.remote/man/NASA_DAAC_download.Rd | 4 +-
modules/data.remote/man/getnetrc.Rd | 17 ++++
4 files changed, 65 insertions(+), 63 deletions(-)
delete mode 100644 modules/data.remote/man/DAAC_Set_Credential.Rd
create mode 100644 modules/data.remote/man/getnetrc.Rd
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index d193e2d86bd..0ba0efd358e 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -13,7 +13,7 @@
#' downloaded files. Default is the current work directory(getwd()).
#' @param band Character: the band name of data to be requested.
#' @param credential.folder Character: physical path to the folder that contains
-#' the credential file. The default is NULL.
+#' the credential file. The default is the current working directory.
#' @param doi Character: data DOI on the NASA DAAC server, it can be obtained
#' directly from the NASA ORNL DAAC data portal (e.g., GEDI L4A through
#' https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=2056).
@@ -52,16 +52,16 @@ NASA_DAAC_download <- function(ul_lat,
to,
outdir = getwd(),
band = NULL,
- credential.folder = NULL,
+ credential.folder = getwd(),
doi,
just_path = FALSE) {
# Determine if we have enough inputs.
if (is.null(outdir) & !just_path) {
- PEcAn.logger::logger.info("Please provide outdir if you want to download the file.")
+ message("Please provide outdir if you want to download the file.")
return(0)
}
# setup DAAC Credentials.
- DAAC_Set_Credential(folder.path = credential.folder)
+ netrc <- getnetrc(credential.folder)
# setup arguments for URL.
daterange <- c(from, to)
# grab provider and concept id from CMR based on DOI.
@@ -133,7 +133,7 @@ NASA_DAAC_download <- function(ul_lat,
if (!just_path) {
# check if the doSNOW package is available.
if ("try-error" %in% class(try(find.package("doSNOW")))) {
- PEcAn.logger::logger.info("The doSNOW package is not installed.")
+ message("The doSNOW package is not installed.")
return(NA)
}
# printing out parallel environment.
@@ -158,19 +158,19 @@ NASA_DAAC_download <- function(ul_lat,
# if there is a problem in downloading file.
while ("try-error" %in% class(try(
response <-
- httr::GET(
- granules_href[i],
- httr::write_disk(file.path(outdir, basename(granules_href)[i]), overwrite = T),
- httr::authenticate(user = Sys.getenv("ed_un"),
- password = Sys.getenv("ed_pw"))
- )
+ httr::GET(
+ granules_href[i],
+ httr::write_disk(file.path(outdir, basename(granules_href)[i]), overwrite = T),
+ httr::config(netrc = TRUE, netrc_file = netrc),
+ httr::set_cookies("LC" = "cookies")
+ )
))){
response <-
httr::GET(
granules_href[i],
httr::write_disk(file.path(outdir, basename(granules_href)[i]), overwrite = T),
- httr::authenticate(user = Sys.getenv("ed_un"),
- password = Sys.getenv("ed_pw"))
+ httr::config(netrc = TRUE, netrc_file = netrc),
+ httr::set_cookies("LC" = "cookies")
)
}
# Check if we can successfully open the downloaded file.
@@ -178,7 +178,7 @@ NASA_DAAC_download <- function(ul_lat,
if (grepl(pattern = ".h5", x = basename(granules_href)[i], fixed = T)) {
# check if the hdf5r package exists.
if ("try-error" %in% class(try(find.package("hdf5r")))) {
- PEcAn.logger::logger.info("The hdf5r package is not installed.")
+ message("The hdf5r package is not installed.")
return(NA)
}
while ("try-error" %in% class(try(hdf5r::H5File$new(file.path(outdir, basename(granules_href)[i]), mode = "r"), silent = T))) {
@@ -186,8 +186,8 @@ NASA_DAAC_download <- function(ul_lat,
httr::GET(
granules_href[i],
httr::write_disk(file.path(outdir, basename(granules_href)[i]), overwrite = T),
- httr::authenticate(user = Sys.getenv("ed_un"),
- password = Sys.getenv("ed_pw"))
+ httr::config(netrc = TRUE, netrc_file = netrc),
+ httr::set_cookies("LC" = "cookies")
)
}
# if it's HDF4 or regular GeoTIFF file.
@@ -199,8 +199,8 @@ NASA_DAAC_download <- function(ul_lat,
httr::GET(
granules_href[i],
httr::write_disk(file.path(outdir, basename(granules_href)[i]), overwrite = T),
- httr::authenticate(user = Sys.getenv("ed_un"),
- password = Sys.getenv("ed_pw"))
+ httr::config(netrc = TRUE, netrc_file = netrc),
+ httr::set_cookies("LC" = "cookies")
)
}
}
@@ -215,8 +215,8 @@ NASA_DAAC_download <- function(ul_lat,
httr::GET(
granules_href[i],
httr::write_disk(file.path(outdir, basename(granules_href)[i]), overwrite = T),
- httr::authenticate(user = Sys.getenv("ed_un"),
- password = Sys.getenv("ed_pw"))
+ httr::config(netrc = TRUE, netrc_file = netrc),
+ httr::set_cookies("LC" = "cookies")
)
}
}
@@ -298,30 +298,34 @@ NASA_CMR_finder <- function(doi) {
return(as.list(data.frame(cbind(provider, concept_id))))
}
-#' Set NASA DAAC credentials to the current environment.
+#' Set NASA DAAC credentials to the .netrc file.
#'
-#' @param replace Boolean: determine if we want to replace the current credentials from the environment. The default is FALSE.
-#' @param folder.path Character: physical path to the folder that contains the credential file. The default is NULL.
+#' @param dl_dir Character: physical path to the folder that the .netrc file will be generated.
#'
#' @author Dongchen Zhang
-DAAC_Set_Credential <- function(replace = FALSE, folder.path = NULL) {
- if (replace) {
- PEcAn.logger::logger.info("Replace previous stored NASA DAAC credentials.")
- }
- # if we have the credential file.
- if (!is.null(folder.path)) {
- if (file.exists(file.path(folder.path, ".nasadaacapirc"))) {
- key <- readLines(file.path(folder.path, ".nasadaacapirc"))
- Sys.setenv(ed_un = key[1], ed_pw = key[2])
- }
- }
- # otherwise we will type the credentials manually.
- if (replace | nchar(Sys.getenv("ed_un")) == 0 | nchar(Sys.getenv("ed_un")) == 0) {
- Sys.setenv(ed_un = sprintf(
- getPass::getPass(msg = "Enter NASA Earthdata Login Username \n (or create an account at urs.earthdata.nasa.gov) :")
- ),
- ed_pw = sprintf(
- getPass::getPass(msg = "Enter NASA Earthdata Login Password:")
- ))
+getnetrc <- function (dl_dir) {
+ netrc <- file.path(dl_dir, ".netrc")
+ netrc <- gsub("~", Sys.getenv("HOME"), netrc)
+ if (file.exists(netrc) == FALSE ||
+ any(grepl("urs.earthdata.nasa.gov",
+ readLines(netrc))) == FALSE) {
+ netrc_conn <- file(netrc)
+ writeLines(c(
+ "machine urs.earthdata.nasa.gov",
+ sprintf(
+ "login %s",
+ getPass::getPass(msg = "Enter NASA Earthdata Login Username \n (or create an account at urs.earthdata.nasa.gov) :")
+ ),
+ sprintf(
+ "password %s",
+ getPass::getPass(msg = "Enter NASA Earthdata Login Password:")
+ )
+ ),
+ netrc_conn)
+ close(netrc_conn)
+ message(
+ "A netrc file with your Earthdata Login credentials was stored in the output directory "
+ )
}
+ return(netrc)
}
\ No newline at end of file
diff --git a/modules/data.remote/man/DAAC_Set_Credential.Rd b/modules/data.remote/man/DAAC_Set_Credential.Rd
deleted file mode 100644
index fd4f566c2cf..00000000000
--- a/modules/data.remote/man/DAAC_Set_Credential.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/NASA_DAAC_download.R
-\name{DAAC_Set_Credential}
-\alias{DAAC_Set_Credential}
-\title{Set NASA DAAC credentials to the current environment.}
-\usage{
-DAAC_Set_Credential(replace = FALSE, folder.path = NULL)
-}
-\arguments{
-\item{replace}{Boolean: determine if we want to replace the current credentials from the environment. The default is FALSE.}
-
-\item{folder.path}{Character: physical path to the folder that contains the credential file. The default is NULL.}
-}
-\description{
-Set NASA DAAC credentials to the current environment.
-}
-\author{
-Dongchen Zhang
-}
diff --git a/modules/data.remote/man/NASA_DAAC_download.Rd b/modules/data.remote/man/NASA_DAAC_download.Rd
index a097ecbcc7f..23e0de9a0e8 100644
--- a/modules/data.remote/man/NASA_DAAC_download.Rd
+++ b/modules/data.remote/man/NASA_DAAC_download.Rd
@@ -14,7 +14,7 @@ NASA_DAAC_download(
to,
outdir = getwd(),
band = NULL,
- credential.folder = NULL,
+ credential.folder = getwd(),
doi,
just_path = FALSE
)
@@ -42,7 +42,7 @@ downloaded files. Default is the current work directory(getwd()).}
\item{band}{Character: the band name of data to be requested.}
\item{credential.folder}{Character: physical path to the folder that contains
-the credential file. The default is NULL.}
+the credential file. The default is the current working directory.}
\item{doi}{Character: data DOI on the NASA DAAC server, it can be obtained
directly from the NASA ORNL DAAC data portal (e.g., GEDI L4A through
diff --git a/modules/data.remote/man/getnetrc.Rd b/modules/data.remote/man/getnetrc.Rd
new file mode 100644
index 00000000000..d78ca3be0db
--- /dev/null
+++ b/modules/data.remote/man/getnetrc.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/NASA_DAAC_download.R
+\name{getnetrc}
+\alias{getnetrc}
+\title{Set NASA DAAC credentials to the .netrc file.}
+\usage{
+getnetrc(dl_dir)
+}
+\arguments{
+\item{dl_dir}{Character: physical path to the folder that the .netrc file will be generated.}
+}
+\description{
+Set NASA DAAC credentials to the .netrc file.
+}
+\author{
+Dongchen Zhang
+}
From a19355fe42668f3e3b7706b558710ccbcb2b97b7 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Tue, 10 Jun 2025 10:47:37 -0400
Subject: [PATCH 0160/1193] Add error-catch control.
---
modules/data.remote/R/NASA_DAAC_download.R | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 0ba0efd358e..1a98010917e 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -109,6 +109,11 @@ NASA_DAAC_download <- function(ul_lat,
page <- page + 1
}
}
+ # if no files are found.
+ if (is.null(granules_href)) {
+ PEcAn.logger::logger.info("No files found. Please check the spatial and temporal search window.")
+ return(NA)
+ }
# remove duplicated files.
inds <- which(duplicated(basename(granules_href)))
if (length(inds) > 0) {
From f3df67962ece8dec24079197373b2556fed7667d Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Tue, 10 Jun 2025 11:01:49 -0400
Subject: [PATCH 0161/1193] Add nc file download to the function.
---
modules/data.remote/R/NASA_DAAC_download.R | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 1a98010917e..846c01a7f0d 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -122,7 +122,8 @@ NASA_DAAC_download <- function(ul_lat,
# remove non-image files.
inds <- which(grepl(".h5", basename(granules_href)) |
grepl(".tif", basename(granules_href)) |
- grepl(".hdf", basename(granules_href)))
+ grepl(".hdf", basename(granules_href)) |
+ grepl(".nc", basename(granules_href)))
granules_href <- granules_href[inds]
# detect existing files if we want to download the files.
if (!just_path) {
@@ -198,7 +199,8 @@ NASA_DAAC_download <- function(ul_lat,
# if it's HDF4 or regular GeoTIFF file.
} else if (grepl(pattern = ".tif", x = basename(granules_href)[i], fixed = T) |
grepl(pattern = ".tiff", x = basename(granules_href)[i], fixed = T) |
- grepl(pattern = ".hdf", x = basename(granules_href)[i], fixed = T)) {
+ grepl(pattern = ".hdf", x = basename(granules_href)[i], fixed = T) |
+ grepl(pattern = ".nc", x = basename(granules_href)[i], fixed = T)) {
while ("try-error" %in% class(try(terra::rast(file.path(outdir, basename(granules_href)[i])), silent = T))) {
response <-
httr::GET(
From 2c2b774ee63c041b8b397c7e6cacf13c5365ddc7 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Tue, 10 Jun 2025 11:06:44 -0400
Subject: [PATCH 0162/1193] Allow for multiple bands input.
---
modules/data.remote/R/NASA_DAAC_download.R | 4 ++--
modules/data.remote/man/NASA_DAAC_download.Rd | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 846c01a7f0d..6dd71bf6968 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -11,7 +11,7 @@
#' "yyyy-mm-dd".
#' @param outdir Character: path of the directory in which to save the
#' downloaded files. Default is the current work directory(getwd()).
-#' @param band Character: the band name of data to be requested.
+#' @param band Character: the band name (or vector of band names) of data to be requested.
#' @param credential.folder Character: physical path to the folder that contains
#' the credential file. The default is the current working directory.
#' @param doi Character: data DOI on the NASA DAAC server, it can be obtained
@@ -104,7 +104,7 @@ NASA_DAAC_download <- function(ul_lat,
}
# grab specific band.
if (!is.null(band)) {
- granules_href <- granules_href[which(grepl(band, granules_href, fixed = T))]
+ granules_href <- granules_href[which(grepl(paste(band, collapse = "|"), granules_href))]
}
page <- page + 1
}
diff --git a/modules/data.remote/man/NASA_DAAC_download.Rd b/modules/data.remote/man/NASA_DAAC_download.Rd
index 23e0de9a0e8..6fd2e922409 100644
--- a/modules/data.remote/man/NASA_DAAC_download.Rd
+++ b/modules/data.remote/man/NASA_DAAC_download.Rd
@@ -39,7 +39,7 @@ NASA_DAAC_download(
\item{outdir}{Character: path of the directory in which to save the
downloaded files. Default is the current work directory(getwd()).}
-\item{band}{Character: the band name of data to be requested.}
+\item{band}{Character: the band name (or vector of band names) of data to be requested.}
\item{credential.folder}{Character: physical path to the folder that contains
the credential file. The default is the current working directory.}
From 7f073c3b424486b9894af20f3a619beffbb32f65 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Tue, 10 Jun 2025 12:21:27 -0700
Subject: [PATCH 0163/1193] write MultiSettings with only one site correctly
---
base/settings/DESCRIPTION | 2 +-
base/settings/NEWS.md | 6 ++++++
base/settings/R/MultiSettings.R | 2 +-
base/settings/tests/testthat/test.MultiSettings.class.R | 8 ++++++++
4 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/base/settings/DESCRIPTION b/base/settings/DESCRIPTION
index a1efa5acca4..44c0f263eb3 100644
--- a/base/settings/DESCRIPTION
+++ b/base/settings/DESCRIPTION
@@ -5,7 +5,7 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"),
person("Rob", "Kooper", role = c("aut"),
email = "kooper@illinois.edu"),
person("University of Illinois, NCSA", role = c("cph")))
-Version: 1.9.0
+Version: 1.9.1
License: BSD_3_clause + file LICENSE
Copyright: Authors
LazyLoad: yes
diff --git a/base/settings/NEWS.md b/base/settings/NEWS.md
index 1416a6f632f..9f2bad1dfd0 100644
--- a/base/settings/NEWS.md
+++ b/base/settings/NEWS.md
@@ -1,3 +1,9 @@
+# PEcAn.settings 1.9.1
+
+## Fixed
+
+* listToXml.MultiSettings now produces valid XML from a MultiSettings with length 1.
+
# PEcAn.settings 1.9.0
## Changed
diff --git a/base/settings/R/MultiSettings.R b/base/settings/R/MultiSettings.R
index 2bc8dfae088..6c6b50709fa 100644
--- a/base/settings/R/MultiSettings.R
+++ b/base/settings/R/MultiSettings.R
@@ -177,7 +177,7 @@ printAll.MultiSettings <- function(x) {
#' @export
listToXml.MultiSettings <- function(item, tag, collapse = TRUE) {
- if (collapse && length(item) > 1) {
+ if (collapse) {
if (.expandableItemsTag %in% names(item)) {
stop("Settings can't contain reserved tag 'multisettings'.")
}
diff --git a/base/settings/tests/testthat/test.MultiSettings.class.R b/base/settings/tests/testthat/test.MultiSettings.class.R
index 28d2114b2a9..b1469febe8c 100644
--- a/base/settings/tests/testthat/test.MultiSettings.class.R
+++ b/base/settings/tests/testthat/test.MultiSettings.class.R
@@ -14,8 +14,10 @@ test_that("MultiSettings constructor works as expected", {
expect_error(MultiSettings(settings, l))
multiSettings <- MultiSettings(settings, settings, settings)
+ multiSettings1 <- MultiSettings(settings)
multiSettings2 <- MultiSettings(list(settings, settings, settings))
multiSettings3 <- MultiSettings(multiSettings)
+ expect_identical(multiSettings1[[1]], settings)
expect_identical(multiSettings2, multiSettings)
expect_identical(multiSettings3, multiSettings)
@@ -291,6 +293,12 @@ test_that("multiSettings write to and read from xml as expcted (i.e., with colla
expect_true(are.equal.possiblyNumericToCharacter(msNew, msOrig))
})
+test_that("length one MultiSettings is collapsed same as longer ones", {
+ ms1 <- MultiSettings(settings)
+ ms1XML <- listToXml(ms1, "pecan")
+ expect_length(XML::getNodeSet(ms1XML, "/pecan/multisettings"), 1)
+})
+
test_that("expandMultiSettings does nothing to a non-MultiSettings list", {
expect_identical(settings, expandMultiSettings(settings))
From 7739ed06fd1bc0b7fa15080eb72766b8e5c24d4c Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Tue, 10 Jun 2025 18:52:51 -0400
Subject: [PATCH 0164/1193] Update the logic when creating the credential file.
---
modules/data.remote/R/NASA_DAAC_download.R | 23 +++++++++++--------
modules/data.remote/man/NASA_DAAC_download.Rd | 5 ++--
modules/data.remote/man/getnetrc.Rd | 4 ++--
3 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 6dd71bf6968..7f0cc7289c0 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -12,8 +12,7 @@
#' @param outdir Character: path of the directory in which to save the
#' downloaded files. Default is the current work directory(getwd()).
#' @param band Character: the band name (or vector of band names) of data to be requested.
-#' @param credential.folder Character: physical path to the folder that contains
-#' the credential file. The default is the current working directory.
+#' @param credential_path Character: physical path to the credential file. The default NULL.
#' @param doi Character: data DOI on the NASA DAAC server, it can be obtained
#' directly from the NASA ORNL DAAC data portal (e.g., GEDI L4A through
#' https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=2056).
@@ -52,16 +51,21 @@ NASA_DAAC_download <- function(ul_lat,
to,
outdir = getwd(),
band = NULL,
- credential.folder = getwd(),
+ credential_path = NULL,
doi,
just_path = FALSE) {
# Determine if we have enough inputs.
if (is.null(outdir) & !just_path) {
message("Please provide outdir if you want to download the file.")
- return(0)
+ return(NA)
}
# setup DAAC Credentials.
- netrc <- getnetrc(credential.folder)
+ # detect if we need the credential or not.
+ if (!just_path & is.null(credential_path)) {
+ PEcAn.logger::logger.info("Please provide the physical path to the credential file!")
+ return(NA)
+ }
+ netrc <- getnetrc(credential_path)
# setup arguments for URL.
daterange <- c(from, to)
# grab provider and concept id from CMR based on DOI.
@@ -307,16 +311,15 @@ NASA_CMR_finder <- function(doi) {
#' Set NASA DAAC credentials to the .netrc file.
#'
-#' @param dl_dir Character: physical path to the folder that the .netrc file will be generated.
+#' @param dl_path Character: physical path to the .netrc credential file.
#'
#' @author Dongchen Zhang
-getnetrc <- function (dl_dir) {
- netrc <- file.path(dl_dir, ".netrc")
- netrc <- gsub("~", Sys.getenv("HOME"), netrc)
+getnetrc <- function (dl_path) {
+ netrc <- path.expand(dl_path)
if (file.exists(netrc) == FALSE ||
any(grepl("urs.earthdata.nasa.gov",
readLines(netrc))) == FALSE) {
- netrc_conn <- file(netrc)
+ netrc_conn <- file(netrc, open = "at")
writeLines(c(
"machine urs.earthdata.nasa.gov",
sprintf(
diff --git a/modules/data.remote/man/NASA_DAAC_download.Rd b/modules/data.remote/man/NASA_DAAC_download.Rd
index 6fd2e922409..66edf36d73a 100644
--- a/modules/data.remote/man/NASA_DAAC_download.Rd
+++ b/modules/data.remote/man/NASA_DAAC_download.Rd
@@ -14,7 +14,7 @@ NASA_DAAC_download(
to,
outdir = getwd(),
band = NULL,
- credential.folder = getwd(),
+ credential_path = NULL,
doi,
just_path = FALSE
)
@@ -41,8 +41,7 @@ downloaded files. Default is the current work directory(getwd()).}
\item{band}{Character: the band name (or vector of band names) of data to be requested.}
-\item{credential.folder}{Character: physical path to the folder that contains
-the credential file. The default is the current working directory.}
+\item{credential_path}{Character: physical path to the credential file. The default NULL.}
\item{doi}{Character: data DOI on the NASA DAAC server, it can be obtained
directly from the NASA ORNL DAAC data portal (e.g., GEDI L4A through
diff --git a/modules/data.remote/man/getnetrc.Rd b/modules/data.remote/man/getnetrc.Rd
index d78ca3be0db..420cdf42f7b 100644
--- a/modules/data.remote/man/getnetrc.Rd
+++ b/modules/data.remote/man/getnetrc.Rd
@@ -4,10 +4,10 @@
\alias{getnetrc}
\title{Set NASA DAAC credentials to the .netrc file.}
\usage{
-getnetrc(dl_dir)
+getnetrc(dl_path)
}
\arguments{
-\item{dl_dir}{Character: physical path to the folder that the .netrc file will be generated.}
+\item{dl_path}{Character: physical path to the .netrc credential file.}
}
\description{
Set NASA DAAC credentials to the .netrc file.
From f3d139315b7f34e896bdbf7fb24fffcf2b8dad46 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 11 Jun 2025 20:06:25 +0530
Subject: [PATCH 0165/1193] remove conversion of settings
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 3a04a02a388..1290b4d2ccd 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -35,15 +35,6 @@ settings <- PEcAn.settings::prepare.settings(settings)
str(settings)
```
-# Convert Settings for Ecosystem Model
-
-Converts settings into the format required by the selected ecosystem model
-
-```{r convert-settings}
-settings <- PEcAn.workflow::do_conversions(settings)
-```
-
-
# Write Model Configuration Files
Generate model configuration files before simulation runs.
From 78531b9ddf80d7e17ecd0670d6701c4abb5824ef Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 11 Jun 2025 20:56:45 +0530
Subject: [PATCH 0166/1193] added some basic intro for the notebook
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 99 +++++++++++++++++--
1 file changed, 90 insertions(+), 9 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 1290b4d2ccd..5ecd730acf4 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -4,51 +4,132 @@ author: "PEcAn"
format: pdf
---
+# Introduction {#introduction}
+
+Welcome to this PEcAn workflow notebook! This notebook will guide you through running an ecosystem model using PEcAn's programmatic interface.
+
+## What is PEcAn?
+
+PEcAn (Predictive Ecosystem Analyzer) is a scientific workflow system designed to make ecosystem modeling more transparent, repeatable, and accessible. It helps researchers:
+
+- Run ecosystem models with standardized inputs and outputs
+- Perform uncertainty analysis on model parameters
+- Compare model predictions with observations
+- Share and reproduce scientific workflows
+
+## What This Notebook Does
+
+This notebook demonstrates how to:
+1. Set up and configure a PEcAn workflow
+2. Run an ecosystem model simulation
+3. Analyze and visualize the results
+
+## Prerequisites
+
+Before running this notebook, make sure you have:
+- The PEcAn R packages installed
+- A valid `pecan.xml` configuration file (or use the example provided)
+
+## How to Use This Notebook
+
+1. Each section is clearly marked with a heading
+2. Code chunks are provided with explanations
+3. You can run the code chunks sequentially
+4. Feel free to modify parameters to suit your needs
+
+Let's get started!
+
+#### Objective
+
+This demo illustrates how to run a basic PEcAn workflow using an R-based Quarto notebook. We will cover loading settings, performing trait and meta-analysis, writing model configuration files, and running model simulations. This approach provides a programmatic alternative to the web-based PEcAn interface for executing ecosystem models.
+
# Load PEcAn packages
+First, we need to load the PEcAn R packages. These packages provide all the functions we'll use to run the workflow.
+
```{r libraries}
+# Load the PEcAn.all package, which includes all necessary PEcAn functionality
library("PEcAn.all")
```
-# Load PEcAn settings files.
+# Load PEcAn settings files
+
+This section handles the crucial step of loading the PEcAn settings.
-- If you have pecan.xml generated from web,place the `pecan.xml` file in your directory and copy its file path. Assign this path to the **settings_path** variable.
-- Alternatively, for a sample or initial run, you can use the example pecan.xml file available at:
-`/pecan/base/all/inst/quarto_notebooks/_extensions/demo1/run-model`.
+- If you have a `pecan.xml` file generated from the web interface, place it in your working directory and copy its file path. Assign this path to the **settings_path** variable.
+- Alternatively, for a sample or initial run, you can use the example `pecan.xml` file available at: `/pecan/base/all/inst/quarto_notebooks/_extensions/demo1/run-model`.
```{r load-settings}
+# Specify the path to your pecan.xml file
+# Replace this with your actual pecan.xml file path
settings_path <- "~/pecan.xml"
```
# Prepare and Validate Settings
-PEcAn provides utilities to process and validate settings before execution, ensuring that all required fields are correctly configured.
+After specifying the path to your `pecan.xml` file, the next step involves reading and preparing these settings. PEcAn provides robust utilities to process and validate your configurations before any execution begins.
+
+* `PEcAn.settings::read.settings(settings_path)`: This function parses the `pecan.xml` file, converting its contents into an R list object that PEcAn can work with. This step ensures that the XML structure is correctly interpreted.
+* `PEcAn.settings::prepare.settings(settings)`: After reading, the settings are passed to this function for further preparation and validation. This involves checking for missing required fields, setting up default values where necessary, and ensuring that all paths and configurations are consistent with PEcAn's operational requirements. This step is crucial for preventing errors during subsequent workflow stages.
```{r read-prepare-settings}
+# Read the settings from the pecan.xml file
settings <- PEcAn.settings::read.settings(settings_path)
+
+# Prepare and validate the settings
settings <- PEcAn.settings::prepare.settings(settings)
```
-
# Exploring the settings objects
+
+Once the settings have been read and prepared, it is often useful to inspect the structure of the `settings` object. This object is a comprehensive R list that contains all the parameters and configurations for your PEcAn workflow, derived directly from the `pecan.xml` file.
+
+* `str(settings)`: This function provides a concise, human-readable summary of the structure of the `settings` object. It displays the internal structure of an R object, showing its type, length, and the first few elements for each component. This helps in understanding how your XML configurations are represented in R and can be invaluable for debugging or verifying settings during the workflow.
+
```{r explore-settings}
+# Display the structure of the settings object
+# This helps you understand what configurations are available
str(settings)
```
+# Trait and Meta Analysis
+
+This section performs two critical steps in the PEcAn workflow: trait data retrieval and meta-analysis. These steps are essential for parameterizing your model with site-specific information.
+
+* `runModule.get.trait.data(settings)`: This function retrieves trait data for the specified PFTs (Plant Functional Types) from the BETY database. It gathers observational data that will be used to parameterize your model.
+* `runModule.run.meta.analysis(settings)`: After collecting trait data, this function performs a meta-analysis to synthesize the trait data and generate probability distributions for model parameters.
+* `write.settings(settings, outputfile = "pecan_checked.xml")`: This function saves the updated settings to a new XML file, preserving all the changes made during the trait and meta-analysis process.
+
+```{r trait-meta-analysis}
+# Retrieve trait data for the specified PFTs
+settings <- PEcAn.workflow::runModule.get.trait.data(settings)
+# Perform meta-analysis on the trait data
+PEcAn.MA::runModule.run.meta.analysis(settings)
+# Save the updated settings to a new XML file
+PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")```
+
# Write Model Configuration Files
-Generate model configuration files before simulation runs.
+This section generates the model-specific configuration files needed to run your simulations. The process involves converting the PEcAn settings into the format required by your chosen ecosystem model.
+
+* `runModule.run.write.configs(settings)`: This function takes your PEcAn settings and generates the necessary configuration files for your selected model. It handles the translation of PEcAn parameters into model-specific formats.
+* `write.settings(settings, outputfile = "pecan.CONFIGS.xml")`: After generating the configuration files, this function saves the updated settings to a new XML file, which now includes paths to the generated model configuration files.
-```{r run.write.configs}
+```{r write-configs}
+# Generate model-specific configuration files
settings <-PEcAn.workflow::runModule.run.write.configs(settings)
+# Save the updated settings with configuration file paths
PEcAn.settings::write.settings(settings, outputfile = "pecan.CONFIGS.xml")
```
# Run Model Simulations and Fetch Results
-Start model simulations and retrieve output for analysis.
+This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
+
+* `start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
```{r run-model}
+# Start the model simulations
PEcAn.workflow::start_model_runs(settings)
```
From a9bec253b2a1bb55990ac7f62b08ee150607637d Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 11 Jun 2025 21:45:57 +0530
Subject: [PATCH 0167/1193] remove unused write configs step
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 5ecd730acf4..62f0180b312 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -106,22 +106,10 @@ settings <- PEcAn.workflow::runModule.get.trait.data(settings)
# Perform meta-analysis on the trait data
PEcAn.MA::runModule.run.meta.analysis(settings)
# Save the updated settings to a new XML file
-PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")```
-
-# Write Model Configuration Files
-
-This section generates the model-specific configuration files needed to run your simulations. The process involves converting the PEcAn settings into the format required by your chosen ecosystem model.
-
-* `runModule.run.write.configs(settings)`: This function takes your PEcAn settings and generates the necessary configuration files for your selected model. It handles the translation of PEcAn parameters into model-specific formats.
-* `write.settings(settings, outputfile = "pecan.CONFIGS.xml")`: After generating the configuration files, this function saves the updated settings to a new XML file, which now includes paths to the generated model configuration files.
-
-```{r write-configs}
-# Generate model-specific configuration files
-settings <-PEcAn.workflow::runModule.run.write.configs(settings)
-# Save the updated settings with configuration file paths
-PEcAn.settings::write.settings(settings, outputfile = "pecan.CONFIGS.xml")
+PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")
```
+
# Run Model Simulations and Fetch Results
This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
From f3fdbc1b8bbd4b673746b13c02205548e6131431 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 11 Jun 2025 21:49:16 +0530
Subject: [PATCH 0168/1193] fixed model runs
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 62f0180b312..5a80fc4da66 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -103,10 +103,10 @@ This section performs two critical steps in the PEcAn workflow: trait data retri
```{r trait-meta-analysis}
# Retrieve trait data for the specified PFTs
settings <- PEcAn.workflow::runModule.get.trait.data(settings)
-# Perform meta-analysis on the trait data
-PEcAn.MA::runModule.run.meta.analysis(settings)
# Save the updated settings to a new XML file
PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")
+# Perform meta-analysis on the trait data
+PEcAn.MA::runModule.run.meta.analysis(settings)
```
@@ -118,7 +118,7 @@ This section executes the actual model simulations and retrieves the results. Th
```{r run-model}
# Start the model simulations
-PEcAn.workflow::start_model_runs(settings)
+PEcAn.workflow::runModule_start_model_runs(settings)
```
```{r get-plot-vars}
From f799cef9fe65dfbe028d31028e78e0eebac965ca Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 11 Jun 2025 21:50:16 +0530
Subject: [PATCH 0169/1193] fixed model runs
Signed-off-by: Aritra Dey
---
.../quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 5a80fc4da66..f41be2249cb 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -114,7 +114,7 @@ PEcAn.MA::runModule.run.meta.analysis(settings)
This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
-* `start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
+* `runModulr_start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
```{r run-model}
# Start the model simulations
From be9f79df89abd0b6e98ddae4e3886192d1edca5d Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Wed, 11 Jun 2025 23:47:51 +0530
Subject: [PATCH 0170/1193] fix: function name in workflow
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index f41be2249cb..895573e942b 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -114,11 +114,11 @@ PEcAn.MA::runModule.run.meta.analysis(settings)
This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
-* `runModulr_start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
+* `start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
```{r run-model}
# Start the model simulations
-PEcAn.workflow::runModule_start_model_runs(settings)
+PEcAn.workflow::start_model_runs(settings)
```
```{r get-plot-vars}
From a948363d4bd2b68ea9bf169b4b500460ad0ba72f Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Wed, 11 Jun 2025 23:56:53 +0530
Subject: [PATCH 0171/1193] added multisite-workflow-example.qmd
---
.../multisite-workflow-example.qmd | 69 +++++++++++++++++++
1 file changed, 69 insertions(+)
create mode 100644 documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
diff --git a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
new file mode 100644
index 00000000000..415d7615616
--- /dev/null
+++ b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
@@ -0,0 +1,69 @@
+---
+title: "PEcAn Multisite Flat-File Workflow Example"
+format: html
+editor: visual
+---
+
+## Introduction
+
+This notebook demonstrates how to use a flat-file (`site_info.csv`) to set up a multisite workflow in PEcAn, without querying the database.
+You will learn how to read site metadata, filter by group, and create a MultiSettings object for your runs.
+
+---
+
+## Example `site_info.csv`
+
+Suppose you have a CSV file like this:
+
+| id | lat | lon | site_group |
+|-----|-------|--------|------------|
+| 101 | 40.1 | -88.2 | NEON |
+| 102 | 41.2 | -87.9 | NEON |
+| 201 | 39.9 | -90.0 | Ameriflux |
+| 202 | 38.5 | -89.5 | Ameriflux |
+
+Save this as `site_info.csv` in your working directory.
+
+---
+
+## Load Required Libraries
+
+```{r}
+library(dplyr)
+# library(PEcAn.settings) # Uncomment if needed
+# library(PEcAn.all) # Uncomment if needed
+```
+
+---
+
+## Add the following lines to your workflow code .
+
+```{r}
+site_info <- read.csv("site_info.csv")
+# Filter for NEON sites (change "NEON" to your desired group)
+selected_sites <- site_info %>% filter(site_group == "NEON")
+print(selected_sites) ## prints demo output (not required in main code)
+settings <- createMultiSiteSettings(settings, selected_sites) ## populates settings with metadata for each site in the sitegroup
+
+```
+
+---
+
+## (Optional) Integrate with workflow
+
+```{r}
+settings$site_info <- selected_sites
+```
+Now, any downstream function that receives settings can access the filtered site info.
+
+---
+
+
+## Summary
+- You can use a flat-file (site_info.csv) to manage multisite workflows in PEcAn.
+- Filter your sites in R, then pass the filtered data frame to createMultiSiteSettings().
+- No database queries are needed for site metadata.
+
+
+---
+
From cc0d556592bd93a6491b3fe89e17fb171517d101 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Thu, 12 Jun 2025 01:09:35 +0530
Subject: [PATCH 0172/1193] removed trait and meta analysis
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 895573e942b..59572dbf959 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -92,24 +92,6 @@ Once the settings have been read and prepared, it is often useful to inspect the
str(settings)
```
-# Trait and Meta Analysis
-
-This section performs two critical steps in the PEcAn workflow: trait data retrieval and meta-analysis. These steps are essential for parameterizing your model with site-specific information.
-
-* `runModule.get.trait.data(settings)`: This function retrieves trait data for the specified PFTs (Plant Functional Types) from the BETY database. It gathers observational data that will be used to parameterize your model.
-* `runModule.run.meta.analysis(settings)`: After collecting trait data, this function performs a meta-analysis to synthesize the trait data and generate probability distributions for model parameters.
-* `write.settings(settings, outputfile = "pecan_checked.xml")`: This function saves the updated settings to a new XML file, preserving all the changes made during the trait and meta-analysis process.
-
-```{r trait-meta-analysis}
-# Retrieve trait data for the specified PFTs
-settings <- PEcAn.workflow::runModule.get.trait.data(settings)
-# Save the updated settings to a new XML file
-PEcAn.settings::write.settings(settings, outputfile = "pecan.TRAIT.xml")
-# Perform meta-analysis on the trait data
-PEcAn.MA::runModule.run.meta.analysis(settings)
-```
-
-
# Run Model Simulations and Fetch Results
This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
From 7fc94650ad011236778fcf02ffa8b616bbb9443d Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 12 Jun 2025 13:25:30 -0400
Subject: [PATCH 0173/1193] Update the function argument.
---
modules/data.remote/R/GEDI_AGB_prep.R | 8 ++++----
modules/data.remote/R/NASA_DAAC_download.R | 2 +-
modules/data.remote/man/GEDI_AGB_prep.Rd | 4 ++--
modules/data.remote/man/NASA_DAAC_download.Rd | 2 +-
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/modules/data.remote/R/GEDI_AGB_prep.R b/modules/data.remote/R/GEDI_AGB_prep.R
index f7cb2c1400f..25a7e50dad7 100644
--- a/modules/data.remote/R/GEDI_AGB_prep.R
+++ b/modules/data.remote/R/GEDI_AGB_prep.R
@@ -14,7 +14,7 @@
#' @param prerun Character: series of pre-launch shell command before running the shell job (default is NULL).
#' @param num.folder Numeric: the number of batch folders to be created when submitting jobs to the queue.
#' @param cores Numeric: numbers of core to be used for the parallel computation. The default is the maximum current CPU number.
-#' @param credential.folder Character: the physical path to the folder that contains the credential file (.nasadaacapirc).
+#' @param credential_path Character: the physical path to the credential file. (.netrc).
#'
#' @return A data frame containing AGB and sd for each site and each time step.
#' @export
@@ -50,7 +50,7 @@ GEDI_AGB_prep <- function(site_info,
prerun = NULL,
num.folder = NULL,
cores = parallel::detectCores(),
- credential.folder = "~") {
+ credential_path = "~/.netrc") {
# convert list to vector.
if (is.list(bbox)) {
bbox <- as.numeric(unlist(bbox))
@@ -70,7 +70,7 @@ GEDI_AGB_prep <- function(site_info,
dir.create(outdir)
}
# detect if we generate the NASA DAAC credential file.
- if (!file.exists(file.path(credential.folder, ".nasadaacapirc"))) {
+ if (!file.exists(file.path(credential_path, ".nasadaacapirc"))) {
PEcAn.logger::logger.info("There is no credential file for NASA DAAC server.")
PEcAn.logger::logger.info("Please create the .nasadaacapirc file within the credential folder.")
PEcAn.logger::logger.info("The first and second lines of the file are the username and password.")
@@ -112,7 +112,7 @@ GEDI_AGB_prep <- function(site_info,
outdir = download.path,
doi = "10.3334/ORNLDAAC/2056",
just_path = F,
- credential.folder = credential.folder)
+ credential_path = credential_path)
# if we want to submit jobs to the queue.
if (batch) {
if (is.null(num.folder)) {
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 7f0cc7289c0..e2c402f66dc 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -12,7 +12,7 @@
#' @param outdir Character: path of the directory in which to save the
#' downloaded files. Default is the current work directory(getwd()).
#' @param band Character: the band name (or vector of band names) of data to be requested.
-#' @param credential_path Character: physical path to the credential file. The default NULL.
+#' @param credential_path Character: physical path to the credential file (.netrc file). The default NULL.
#' @param doi Character: data DOI on the NASA DAAC server, it can be obtained
#' directly from the NASA ORNL DAAC data portal (e.g., GEDI L4A through
#' https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=2056).
diff --git a/modules/data.remote/man/GEDI_AGB_prep.Rd b/modules/data.remote/man/GEDI_AGB_prep.Rd
index dac18b276fa..195c8353ef5 100644
--- a/modules/data.remote/man/GEDI_AGB_prep.Rd
+++ b/modules/data.remote/man/GEDI_AGB_prep.Rd
@@ -15,7 +15,7 @@ GEDI_AGB_prep(
prerun = NULL,
num.folder = NULL,
cores = parallel::detectCores(),
- credential.folder = "~"
+ credential_path = "~/.netrc"
)
}
\arguments{
@@ -39,7 +39,7 @@ GEDI_AGB_prep(
\item{cores}{Numeric: numbers of core to be used for the parallel computation. The default is the maximum current CPU number.}
-\item{credential.folder}{Character: the physical path to the folder that contains the credential file (.nasadaacapirc).}
+\item{credential_path}{Character: the physical path to the credential file. (.netrc).}
}
\value{
A data frame containing AGB and sd for each site and each time step.
diff --git a/modules/data.remote/man/NASA_DAAC_download.Rd b/modules/data.remote/man/NASA_DAAC_download.Rd
index 66edf36d73a..a748e4dd93d 100644
--- a/modules/data.remote/man/NASA_DAAC_download.Rd
+++ b/modules/data.remote/man/NASA_DAAC_download.Rd
@@ -41,7 +41,7 @@ downloaded files. Default is the current work directory(getwd()).}
\item{band}{Character: the band name (or vector of band names) of data to be requested.}
-\item{credential_path}{Character: physical path to the credential file. The default NULL.}
+\item{credential_path}{Character: physical path to the credential file (.netrc file). The default NULL.}
\item{doi}{Character: data DOI on the NASA DAAC server, it can be obtained
directly from the NASA ORNL DAAC data portal (e.g., GEDI L4A through
From d76eba1881fcb6e007644d96783a032b5e32ebc0 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 12 Jun 2025 13:35:34 -0400
Subject: [PATCH 0174/1193] Adding URL checks for the http pattern.
---
modules/data.remote/R/NASA_DAAC_download.R | 2 ++
1 file changed, 2 insertions(+)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index e2c402f66dc..df4ddb827c6 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -118,6 +118,8 @@ NASA_DAAC_download <- function(ul_lat,
PEcAn.logger::logger.info("No files found. Please check the spatial and temporal search window.")
return(NA)
}
+ # remove any urls that are not starting with https.
+ granules_href = granules_href[which(grepl("http*",granules_href))]
# remove duplicated files.
inds <- which(duplicated(basename(granules_href)))
if (length(inds) > 0) {
From 6867bd89a4b81584600cd77a4a8235b043cd085d Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 12 Jun 2025 15:05:54 -0400
Subject: [PATCH 0175/1193] Add parallel SDA workflow.
---
modules/assim.sequential/NAMESPACE | 3 +
.../assim.sequential/R/sda.enkf_parallel.R | 608 ++++++++++++++++++
modules/assim.sequential/man/qsub_sda.Rd | 51 ++
.../assim.sequential/man/qsub_sda_batch.Rd | 17 +
.../assim.sequential/man/sda.enkf_local.Rd | 51 ++
5 files changed, 730 insertions(+)
create mode 100644 modules/assim.sequential/R/sda.enkf_parallel.R
create mode 100644 modules/assim.sequential/man/qsub_sda.Rd
create mode 100644 modules/assim.sequential/man/qsub_sda_batch.Rd
create mode 100644 modules/assim.sequential/man/sda.enkf_local.Rd
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index db21f07876e..46dfb8c1041 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -46,6 +46,8 @@ export(post.analysis.multisite.ggplot)
export(postana.bias.plotting.sda)
export(postana.bias.plotting.sda.corr)
export(postana.timeser.plotting.sda)
+export(qsub_sda)
+export(qsub_sda_batch)
export(rescaling_stateVars)
export(rwtmnorm)
export(sample_met)
@@ -53,6 +55,7 @@ export(sampler_toggle)
export(sda.enkf)
export(sda.enkf.multisite)
export(sda.enkf.original)
+export(sda.enkf_local)
export(sda_weights_site)
export(simple.local)
export(tobit.model)
diff --git a/modules/assim.sequential/R/sda.enkf_parallel.R b/modules/assim.sequential/R/sda.enkf_parallel.R
new file mode 100644
index 00000000000..54866aaa8c5
--- /dev/null
+++ b/modules/assim.sequential/R/sda.enkf_parallel.R
@@ -0,0 +1,608 @@
+#' @description This function provides complete support for the multi-core and multi-node computation on the general HPC system.
+#' Thus, this script will be more computationally efficient, making it possible to run SDA over thousands of locations.
+#' @title sda.enkf_local
+#' @name sda.enkf_local
+#' @author Dongchen Zhang \email{zhangdc@@bu.edu}
+#'
+#' @param settings PEcAn settings object
+#' @param obs.mean Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point.
+#' @param obs.cov Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point.
+#' @param Q Process covariance matrix given if there is no data to estimate it.
+#' @param pre_enkf_params Used for passing pre-existing time-series of process error into the current SDA runs to ignore the impact by the differences between process errors.
+#' @param ensemble.samples Pass ensemble.samples from outside to avoid GitHub check issues.
+#' @param control List of flags controlling the behavior of the SDA.
+#' `TimeseriesPlot` for post analysis examination;
+#' `OutlierDetection` decide if we want to execute the outlier detection each time after the model forecasting;
+#' `send_email` contains lists for sending email to report the SDA progress;
+#' `keepNC` decide if we want to keep the NetCDF files inside the out directory;
+#' `forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations;
+#' `MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).
+#'
+#' @return NONE
+#' @export
+#'
+sda.enkf_local <- function(settings,
+ obs.mean,
+ obs.cov,
+ Q = NULL,
+ pre_enkf_params = NULL,
+ ensemble.samples = NULL,
+ outdir = NULL,
+ job.folder = NULL,
+ cores = NULL,
+ control=list(TimeseriesPlot = FALSE,
+ OutlierDetection = FALSE,
+ send_email = NULL,
+ keepNC = TRUE,
+ forceRun = TRUE,
+ MCMC.args = NULL)) {
+ # initialize parallel.
+ if (future::supportsMulticore()) {
+ future::plan(future::multicore)
+ } else {
+ future::plan(future::multisession)
+ }
+ # Tweak outdir if it's specified from outside.
+ if (!is.null(outdir)) {
+ settings$outdir <- outdir
+ settings$rundir <- file.path(outdir, "run")
+ settings$modeloutdir <- file.path(outdir, "out")
+ settings$host$folder <- file.path(outdir, "out")
+ settings$host$outdir <- file.path(outdir, "out")
+ settings$host$rundir <- file.path(outdir, "run")
+ }
+ ###-------------------------------------------------------------------###
+ ### read settings ###
+ ###-------------------------------------------------------------------###
+ adjustment <- settings$state.data.assimilation$adjustment
+ model <- settings$model$type
+ defaults <- settings$pfts
+ outdir <- settings$modeloutdir # currently model runs locally, this will change if remote is enabled
+ rundir <- settings$host$rundir
+ nens <- as.numeric(settings$ensemble$size)
+ var.names <- sapply(settings$state.data.assimilation$state.variable, '[[', "variable.name")
+ names(var.names) <- NULL
+ #--------Initialization
+ restart.list <- NULL
+ #create SDA folder to store output
+ if(!dir.exists(settings$outdir)) dir.create(settings$outdir, showWarnings = FALSE)
+
+ ##### Creating matrices that describe the bounds of the state variables
+ ##### interval is remade everytime depending on the data at time t
+ ##### state.interval stays constant and converts new.analysis to be within the correct bounds
+ interval <- NULL
+ state.interval <- cbind(as.numeric(lapply(settings$state.data.assimilation$state.variables,'[[','min_value')),
+ as.numeric(lapply(settings$state.data.assimilation$state.variables,'[[','max_value')))
+ rownames(state.interval) <- var.names
+ #------------------------------Multi - site specific - settings
+ #Here I'm trying to make a temp config list name and put it into map to iterate
+ conf.settings <- settings
+ site.ids <- conf.settings %>% purrr::map(~.x[['run']] ) %>% purrr::map('site') %>% purrr::map('id') %>% base::unlist() %>% base::as.character()
+ # a matrix ready to be sent to spDistsN1 in sp package - first col is the long second is the lat and row names are the site ids
+ site.locs <- conf.settings %>% purrr::map(~.x[['run']] ) %>%
+ purrr::map('site') %>% purrr::map(function(s){
+ temp <- as.numeric(c(s$lon, s$lat))
+ names(temp) <- c("Lon", "Lat")
+ temp
+ }) %>%
+ dplyr::bind_rows() %>%
+ as.data.frame() %>%
+ `rownames<-`(site.ids)
+ ###-------------------------------------------------------------------###
+ ### check dates before data assimilation ###
+ ###-------------------------------------------------------------------###----
+ #filtering obs data based on years specifited in setting > state.data.assimilation
+ start.cut <- lubridate::ymd_hms(settings$state.data.assimilation$start.date, truncated = 3)
+ Start.year <- (lubridate::year(settings$state.data.assimilation$start.date))
+ End.year <- lubridate::year(settings$state.data.assimilation$end.date) # dates that assimilations will be done for - obs will be subsetted based on this
+ assim.sda <- Start.year:End.year
+ obs.mean <- obs.mean[sapply(lubridate::year(names(obs.mean)), function(obs.year) obs.year %in% (assim.sda))] #checks obs.mean dates against assimyear dates
+ obs.cov <- obs.cov[sapply(lubridate::year(names(obs.cov)), function(obs.year) obs.year %in% (assim.sda))] #checks obs.cov dates against assimyear dates
+ #checking that there are dates in obs.mean and adding midnight as the time
+ obs.times <- names(obs.mean)
+ obs.times.POSIX <- lubridate::ymd_hms(obs.times)
+ for (i in seq_along(obs.times)) {
+ if (is.na(obs.times.POSIX[i])) {
+ if (is.na(lubridate::ymd(obs.times[i]))) {
+ PEcAn.logger::logger.warn("Error: no dates associated with observations")
+ } else {
+ ### Data does not have time associated with dates
+ ### Adding 12:59:59PM assuming next time step starts one second later
+ # PEcAn.logger::logger.warn("Pumpkin Warning: adding one minute before midnight time assumption to dates associated with data")
+ obs.times.POSIX[i] <- lubridate::ymd_hms(paste(obs.times[i], "23:59:59"))
+ }
+ }
+ }
+ obs.times <- obs.times.POSIX
+ read_restart_times <- c(lubridate::ymd_hms(start.cut, truncated = 3), obs.times)
+ nt <- length(obs.times) #sets length of for loop for Forecast/Analysis
+ if (nt==0) PEcAn.logger::logger.severe('There has to be at least one Obs.')
+
+ # Model Specific Setup ----------------------------------------------------
+ #--get model specific functions
+ do.call("library", list(paste0("PEcAn.", model)))
+ my.write_restart <- paste0("write_restart.", model)
+ my.read_restart <- paste0("read_restart.", model)
+ my.split_inputs <- paste0("split_inputs.", model)
+ #- Double checking some of the inputs
+ if (is.null(adjustment)) adjustment <- TRUE
+ # models that don't need split_inputs, check register file for that
+ register.xml <- system.file(paste0("register.", model, ".xml"), package = paste0("PEcAn.", model))
+ register <- XML::xmlToList(XML::xmlParse(register.xml))
+ no_split <- !as.logical(register$exact.dates)
+
+ if (!exists(my.split_inputs) & !no_split) {
+ PEcAn.logger::logger.warn(my.split_inputs, "does not exist")
+ PEcAn.logger::logger.severe("please make sure that the PEcAn interface is loaded for", model)
+ PEcAn.logger::logger.warn(my.split_inputs, "If your model does not need the split function you can specify that in register.Model.xml in model's inst folder by adding FALSE tag.")
+
+ }
+ #split met if model calls for it
+ #create a folder to store extracted met files
+ if(!file.exists(paste0(settings$outdir, "/Extracted_met/"))){
+ dir.create(paste0(settings$outdir, "/Extracted_met/"))
+ }
+ PEcAn.logger::logger.info("Splitting mets!")
+ conf.settings <-conf.settings %>%
+ `class<-`(c("list")) %>% #until here, it separates all the settings for all sites that listed in the xml file
+ furrr::future_map(function(settings) {
+ library(paste0("PEcAn.",settings$model$type), character.only = TRUE)#solved by including the model in the settings
+ inputs.split <- list()
+ if (!no_split) {
+ for (i in 1:length(settings$run$inputs$met$path)) {
+ #---------------- model specific split inputs
+ ### model specific split inputs
+ settings$run$inputs$met$path[[i]] <- do.call(
+ my.split_inputs,
+ args = list(
+ settings = settings,
+ start.time = lubridate::ymd_hms(settings$run$site$met.start, truncated = 3), # This depends if we are restart or not
+ stop.time = lubridate::ymd_hms(settings$run$site$met.end, truncated = 3),
+ inputs = settings$run$inputs$met$path[[i]],
+ outpath = paste0(paste0(settings$outdir, "/Extracted_met/"), settings$run$site$id),
+ overwrite =F
+ )
+ )
+ # changing the start and end date which will be used for model2netcdf.model
+ settings$run$start.date <- lubridate::ymd_hms(settings$state.data.assimilation$start.date, truncated = 3)
+ settings$run$end.date <- lubridate::ymd_hms(settings$state.data.assimilation$end.date, truncated = 3)
+ }
+ } else{
+ inputs.split <- inputs
+ }
+ settings
+ }, .progress = F)
+ conf.settings<- PEcAn.settings::as.MultiSettings(conf.settings)
+ ###-------------------------------------------------------------------###
+ ### set up for data assimilation ###
+ ###-------------------------------------------------------------------###----
+ # Reading param samples-------------------------------
+ #create params object using samples generated from TRAITS functions
+ if (is.null(ensemble.samples)) {
+ load(file.path(settings$outdir, "samples.Rdata"))
+ }
+ #reformatting params
+ new.params <- PEcAnAssimSequential:::sda_matchparam(settings, ensemble.samples, site.ids, nens)
+ #sample met ensemble members
+ #sample all inputs specified in the settings$ensemble
+ #now looking into the xml
+ samp <- conf.settings$ensemble$samplingspace
+ #finding who has a parent
+ parents <- lapply(samp,'[[', 'parent')
+ #order parents based on the need of who has to be first
+ order <- names(samp)[lapply(parents, function(tr) which(names(samp) %in% tr)) %>% unlist()]
+ #new ordered sampling space
+ samp.ordered <- samp[c(order, names(samp)[!(names(samp) %in% order)])]
+ #performing the sampling
+ inputs <- vector("list", length(conf.settings))
+ # For the tags specified in the xml I do the sampling
+ for (s in seq_along(conf.settings)){
+ if (is.null(inputs[[s]])) {
+ inputs[[s]] <- list()
+ }
+ for (i in seq_along(samp.ordered)){
+ #call the function responsible for generating the ensemble
+ inputs[[s]][[names(samp.ordered)[i]]] <- input.ens.gen(settings=conf.settings[[s]],
+ input=names(samp.ordered)[i],
+ method=samp.ordered[[i]]$method,
+ parent_ids=NULL)
+ }
+ }
+ ###------------------------------------------------------------------------------------------------###
+ ### loop over time ###
+ ###------------------------------------------------------------------------------------------------###
+ for(t in 1:nt){
+ # initialize dat for saving memory usage.
+ sda.outputs <- FORECAST <- enkf.params <- ANALYSIS <- ens_weights <- list()
+ obs.t <- as.character(lubridate::date(obs.times[t]))
+ obs.year <- lubridate::year(obs.t)
+ PEcAn.logger::logger.info(paste("Processing Year:", obs.year))
+ ###-------------------------------------------------------------------------###
+ ### Taking care of Forecast. Splitting / Writting / running / reading back###
+ ###-------------------------------------------------------------------------###-----
+ #- Check to see if this is the first run or not and what inputs needs to be sent to write.ensemble configs
+ if (t>1){
+ #for next time step split the met if model requires
+ #-Splitting the input for the models that they don't care about the start and end time of simulations and they run as long as their met file.
+ PEcAn.logger::logger.info("Splitting mets!")
+ inputs.split <-
+ furrr::future_pmap(list(conf.settings %>% `class<-`(c("list")), inputs, model), function(settings, inputs, model) {
+ # Loading the model package - this is required bc of the furrr
+ library(paste0("PEcAn.",model), character.only = TRUE)
+ inputs.split <- inputs
+ if (!no_split) {
+ for (i in seq_len(nens)) {
+ #---------------- model specific split inputs
+ inputs.split$met$samples[i] <- do.call(
+ my.split_inputs,
+ args = list(
+ settings = settings,
+ start.time = (lubridate::ymd_hms(obs.times[t - 1], truncated = 3) + lubridate::second(lubridate::hms("00:00:01"))),
+ stop.time = lubridate::ymd_hms(obs.times[t], truncated = 3),
+ inputs = inputs$met$samples[[i]])
+ )
+ }
+ } else{
+ inputs.split <- inputs
+ }
+ inputs.split
+ })
+ #---------------- setting up the restart argument for each site separately and keeping them in a list
+ PEcAn.logger::logger.info("Collecting restart info!")
+ restart.list <-
+ furrr::future_pmap(list(out.configs, conf.settings %>% `class<-`(c("list")), params.list, inputs.split),
+ function(configs, settings, new.params, inputs) {
+ #if the new state for each site only has one row/col.
+ #then we need to convert it to matrix to solve the indexing issue.
+ new_state_site <- new.state[, which(attr(X, "Site") %in% settings$run$site$id)]
+ if(is.vector(new_state_site)){
+ new_state_site <- matrix(new_state_site)
+ }
+ list(
+ runid = configs$runs$id,
+ start.time = strptime(obs.times[t -1], format = "%Y-%m-%d %H:%M:%S") + lubridate::second(lubridate::hms("00:00:01")),
+ stop.time = strptime(obs.times[t], format ="%Y-%m-%d %H:%M:%S"),
+ settings = settings,
+ new.state = new_state_site,
+ new.params = new.params,
+ inputs = inputs,
+ RENAME = TRUE,
+ ensemble.id = settings$ensemble$ensemble.id
+ )
+ })
+ } else { ## t == 1
+ restart.list <- vector("list", length(conf.settings))
+ }
+ # release memory.
+ gc()
+ # submit jobs for writing configs.
+ PEcAn.logger::logger.info("Writting configs!")
+ out.configs <-furrr::future_pmap(list(conf.settings %>% `class<-`(c("list")),restart.list, inputs), function(settings, restart.arg, inputs) {
+ # Loading the model package - this is required bc of the furrr
+ library(paste0("PEcAn.",settings$model$type), character.only = TRUE)
+ # wrtting configs for each settings - this does not make a difference with the old code
+ PEcAn.uncertainty::write.ensemble.configs(
+ defaults = settings$pfts,
+ ensemble.samples = ensemble.samples,
+ settings = settings,
+ model = settings$model$type,
+ write.to.db = settings$database$bety$write,
+ restart = restart.arg,
+ samples=inputs,
+ rename = F
+ )
+ }) %>% stats::setNames(site.ids)
+ # collect run info.
+ # get ensemble ids for each site.
+ ensemble.ids <- site.ids %>% furrr::future_map(function(i){
+ run.list <- c()
+ for (j in 1:nens) {
+ run.list <- c(run.list, paste0("ENS-", sprintf("%05d", j), "-", i))
+ }
+ return(run.list)}, .progress = F) %>% unlist
+ runs.tmp <- file.path(rundir, ensemble.ids)
+ # local model executions.
+ PEcAn.logger::logger.info("Running models!")
+ job.files <- file.path(runs.tmp, "job.sh")
+ temp <- job.files %>% furrr::future_map(function(f){
+ cmd <- paste0("cd ", dirname(f), ";./job.sh")
+ system(cmd, intern = F, ignore.stdout = T, ignore.stderr = T)
+ }, .progress = F)
+ # submit jobs for reading sda outputs.
+ PEcAn.logger::logger.info("Reading forecast outputs!")
+ reads <- PEcAnAssimSequential:::build_X(out.configs = out.configs,
+ settings = settings,
+ new.params = new.params,
+ nens = nens,
+ read_restart_times = read_restart_times,
+ outdir = outdir,
+ t = t,
+ var.names = var.names,
+ my.read_restart = my.read_restart,
+ restart_flag = FALSE)
+ #let's read the parameters of each site/ens
+ params.list <- reads %>% purrr::map(~.x %>% purrr::map("params"))
+ # add namespace for variables inside the foreach.
+ X <- reads %>% furrr::future_map(function(r){
+ r %>% purrr::map_df(~.x[["X"]] %>% t %>% as.data.frame)
+ })
+ #replacing crazy outliers before it's too late
+ if (control$OutlierDetection){
+ X <- outlier.detector.boxplot(X)
+ PEcAn.logger::logger.info("Outlier Detection.")
+ }
+ # convert from forecast list to data frame.
+ X <- seq_along(X) %>% furrr::future_map(function(i){
+ temp <- do.call(cbind, X[i])
+ colnames(temp) <- paste0(var.names, ".", i)
+ return(temp)
+ }) %>%
+ dplyr::bind_cols() %>%
+ `colnames<-`(c(rep(var.names, length(X)))) %>%
+ `attr<-`('Site',c(rep(site.ids, each=length(var.names))))
+ FORECAST[[obs.t]] <- X
+ gc()
+ ###-------------------------------------------------------------------###
+ ### preparing OBS ###
+ ###-------------------------------------------------------------------###----
+ #To trigger the analysis function with free run, you need to first specify the control$forceRun as TRUE,
+ #Then specify the settings$state.data.assimilation$scalef as 0, and settings$state.data.assimilation$free.run as TRUE.
+ if (!is.null(obs.mean[[t]][[1]]) | (as.logical(settings$state.data.assimilation$free.run) & control$forceRun)) {
+ #decide if we want the block analysis function or multi-site analysis function.
+ #initialize block.list.all.
+ if (t == 1 | !exists("block.list.all")) {
+ block.list.all <- obs.mean %>% purrr::map(function(l){NULL})
+ }
+ #initialize MCMC arguments.
+ if (is.null(control$MCMC.args)) {
+ MCMC.args <- list(niter = 1e5,
+ nthin = 10,
+ nchain = 1,
+ nburnin = 5e4)
+ } else {
+ MCMC.args <- control$MCMC.args
+ }
+ #running analysis function.
+ # forbid submitting jobs to remote.
+ settings$state.data.assimilation$batch.settings$analysis <- NULL
+ enkf.params[[obs.t]] <- PEcAnAssimSequential:::analysis_sda_block(settings, block.list.all, X, obs.mean, obs.cov, t, nt, MCMC.args, pre_enkf_params, cores)
+ enkf.params[[obs.t]] <- c(enkf.params[[obs.t]], RestartList = list(restart.list %>% stats::setNames(site.ids)))
+ block.list.all <- enkf.params[[obs.t]]$block.list.all
+ #Forecast
+ mu.f <- enkf.params[[obs.t]]$mu.f
+ Pf <- enkf.params[[obs.t]]$Pf
+ #Analysis
+ Pa <- enkf.params[[obs.t]]$Pa
+ mu.a <- enkf.params[[obs.t]]$mu.a
+ }
+ ###-------------------------------------------------------------------###
+ ### adjust/update state matrix ###
+ ###-------------------------------------------------------------------###----
+ analysis <- enkf.params[[obs.t]]$analysis
+ enkf.params[[obs.t]]$analysis <- NULL
+ ##### Mapping analysis vectors to be in bounds of state variables
+ for(i in 1:ncol(analysis)){
+ int.save <- state.interval[which(startsWith(colnames(analysis)[i], var.names)),]
+ analysis[analysis[,i] < int.save[1],i] <- int.save[1]
+ analysis[analysis[,i] > int.save[2],i] <- int.save[2]
+ }
+ ## in the future will have to be separated from analysis
+ new.state <- as.data.frame(analysis)
+ ANALYSIS[[obs.t]] <- analysis
+ ens_weights[[obs.t]] <- PEcAnAssimSequential::sda_weights_site(FORECAST, ANALYSIS, 1, nens)
+ ###-------------------------------------------------------------------###
+ ### save outputs ###
+ ###-------------------------------------------------------------------###----
+ sda.outputs <- list(obs.mean = obs.mean[[t]],
+ obs.cov = obs.cov[[t]],
+ forecast = FORECAST[[obs.t]],
+ analysis = ANALYSIS[[obs.t]],
+ enkf.params = enkf.params[[obs.t]],
+ ens_weights[[obs.t]],
+ params.list = params.list,
+ restart.list = restart.list)
+ # save file to the job folder if it's specified.
+ if (!is.null(job.folder)) {
+ # create the job folder if it doesn't exist.
+ if (!file.exists(job.folder)) {
+ dir.create(job.folder)
+ }
+ save(sda.outputs, file = file.path(job.folder, paste0("sda.output", t, ".Rdata")))
+ } else {
+ save(sda.outputs, file = file.path(settings$outdir, paste0("sda.output", t, ".Rdata")))
+ }
+ # remove files as SDA runs
+ if (!(control$keepNC) && t == 1){
+ PEcAn.logger::logger.info("Deleting NC files!")
+ outs.tmp <- file.path(outdir, ensemble.ids)
+ temp <- outs.tmp %>% furrr::future_map(function(f){
+ temp <- list.files(f, "*.nc", full.names = T)
+ unlink(temp)
+ }, .progress = F)
+ }
+ if(!is.null(control$send_email)){
+ sendmail <- Sys.which("sendmail")
+ mailfile <- tempfile("mail")
+ cat(paste0("From: ", control$send_email$from, "\n", "Subject: ", "SDA progress report", "\n", "To: ", control$send_email$to, "\n", "\n", paste("Time point:", obs.times[t], "has been completed!")), file = mailfile)
+ system2(sendmail, c("-f", paste0("\"", control$send_email$from, "\""), paste0("\"", control$send_email$to, "\""), "<", mailfile))
+ unlink(mailfile)
+ }
+ }
+ # assemble results.
+ sda.out.files <- file.path(settings$outdir, paste0("sda.output", 1:nt, ".Rdata"))
+ analysis.all <- forecast.all <- vector("list", nt)
+ for (file in seq_along(sda.out.files)) {
+ res_env <- new.env()
+ load(sda.out.files[file], envir = res_env)
+ analysis.all[[file]] <- res_env$sda.outputs$analysis
+ forecast.all[[file]] <- res_env$sda.outputs$forecast
+ }
+ names(analysis.all) <- as.character(lubridate::date(obs.times))
+ names(forecast.all) <- as.character(lubridate::date(obs.times))
+ if (!is.null(job.folder)) {
+ save(list = c("analysis.all", "forecast.all"), file = file.path(job.folder, "sda.all.forecast.analysis.Rdata"))
+ } else {
+ save(list = c("analysis.all", "forecast.all"), file = file.path(settings$outdir, "sda.all.forecast.analysis.Rdata"))
+ }
+ gc()
+} # sda.enkf
+
+
+##' This function provides means to split large SDA runs into separate `qsub` jobs.
+##' Including job creation, submission, and assemble.
+##' @title qsub_sda
+#' @param settings PEcAn settings object
+#' @param obs.mean Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point.
+#' @param obs.cov Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point.
+#' @param Q Process covariance matrix given if there is no data to estimate it.
+#' @param pre_enkf_params Used for passing pre-existing time-series of process error into the current SDA runs to ignore the impact by the differences between process errors.
+#' @param ensemble.samples Pass ensemble.samples from outside to avoid GitHub check issues.
+#' @param outdir Physical path to the folder where the SDA outputs will be stored.
+#' The default is NULL, where we will be using outdir from the settings object.
+#' @param control List of flags controlling the behavior of the SDA.
+#' `TimeseriesPlot` for post analysis examination;
+#' `OutlierDetection` decide if we want to execute the outlier detection each time after the model forecasting;
+#' `send_email` contains lists for sending email to report the SDA progress;
+#' `keepNC` decide if we want to keep the NetCDF files inside the out directory;
+#' `forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations;
+#' `MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).
+#' @author Dongchen Zhang
+#' @return NONE
+#' @export
+qsub_sda <- function(settings, obs.mean, obs.cov, Q, pre_enkf_params, ensemble.samples, outdir = NULL, control) {
+ if (future::supportsMulticore()) {
+ future::plan(future::multicore)
+ } else {
+ future::plan(future::multisession)
+ }
+ L <- length(settings)
+ # grab info from settings for the parallel job submissions.
+ num.folder <- as.numeric(settings$state.data.assimilation$batch.settings$general.job$folder.num)
+ cores <- as.numeric(settings$state.data.assimilation$batch.settings$general.job$cores)
+ num.per.folder <- ceiling(L/num.folder)
+ if (is.null(outdir)) {
+ outdir <- settings$outdir
+ }
+ # create folder for storing job outputs.
+ batch.folder <- file.path(outdir, "batch")
+ # delete the whole folder if it's not empty.
+ if (file.exists(batch.folder)){
+ PEcAn.logger::logger.info("Deleting batch folder!")
+ list.files(batch.folder, full.names = T) %>% furrr::future_map(function(f){
+ temp <- system(paste0("rm -rf ", f))
+ }, .progress = T)
+ unlink(batch.folder, recursive = T)
+ }
+ dir.create(batch.folder)
+ # loop over sub-folders.
+ folder.paths <- job.ids <- rep(NA, num.folder)
+ PEcAn.logger::logger.info(paste("Submitting", num.folder, "jobs."))
+ temp <- 1:num.folder %>% furrr::future_map(function(i){
+ # create folder for each set of job runs.
+ # calculate start and end index for the current folder.
+ head.num <- (i-1)*num.per.folder + 1
+ if (i*num.per.folder > L) {
+ tail.num <- L
+ } else {
+ tail.num <- i*num.per.folder
+ }
+ if (!is.null(block.index)) {
+ block.site.inds <- block.index[[i]]
+ } else {
+ block.site.inds <- head.num:tail.num
+ }
+ # naming and creating folder by the folder index.
+ folder.name <- paste0("Job_", i)
+ folder.path <- file.path(batch.folder, folder.name)
+ folder.paths[i] <- folder.path
+ dir.create(folder.path)
+ # save corresponding block list to the folder.
+ temp.settings <- PEcAn.settings::write.settings(settings[block.site.inds], outputfile = "pecan.xml", outputdir = folder.path)
+ temp.obs.mean <- obs.mean %>% purrr::map(function(obs){
+ obs[block.site.inds]
+ })
+ temp.obs.cov <- obs.cov %>% purrr::map(function(obs){
+ obs[block.site.inds]
+ })
+ configs <- list(setting = temp.settings,
+ obs.mean = temp.obs.mean,
+ obs.cov = temp.obs.cov,
+ Q = Q,
+ pre_enkf_params = pre_enkf_params,
+ ensemble.samples = ensemble.samples,
+ outdir = folder.path, # outdir
+ job.folder = folder.path,
+ cores = cores,
+ control = control,
+ site.ids = block.site.inds)
+ saveRDS(configs, file = file.path(folder.path, "configs.rds"))
+ # create job file.
+ jobsh <- c("#!/bin/bash -l",
+ "module load R/4.1.2",
+ "echo \"require (PEcAnAssimSequential)",
+ " require (PEcAn.uncertainty)",
+ " require (foreach)",
+ " qsub_sda_batch('@FOLDER_PATH@')",
+ " \" | R --no-save")
+ jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh)
+ writeLines(jobsh, con = file.path(folder.path, "job.sh"))
+ # qsub command.
+ qsub <- "qsub -l h_rt=10:00:00 -l mem_per_core=4G -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash"
+ qsub <- gsub("@NAME@", paste0("Job-", i), qsub)
+ qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub)
+ qsub <- gsub("@STDERR@", file.path(folder.path, "stderr.log"), qsub)
+ qsub <- gsub("@CORES@", cores, qsub)
+ qsub <- strsplit(qsub, " (?=([^\"']*\"[^\"']*\")*[^\"']*$)", perl = TRUE)
+ cmd <- qsub[[1]]
+ out <- system2(cmd, file.path(folder.path, "job.sh"), stdout = TRUE, stderr = TRUE)
+ }, .progress = T)
+ # check job completion
+ completed.folder.num <- sum(file.exists(file.path(folder.paths, "sda.all.forecast.analysis.Rdata")))
+ while (completed.folder.num < length(folder.paths)) {
+ Sys.sleep(60)
+ completed.folder.num <- sum(file.exists(file.path(folder.paths, "sda.all.forecast.analysis.Rdata")))
+ }
+ # order folder names.
+ folder.inds <- folder.paths %>% purrr::map(function(f){
+ as.numeric(strsplit(basename(f), "_")[[1]][2])
+ }) %>% unlist
+ order.folders <- folders[order(folder.inds)]
+ # stack analysis and forecast results.
+ load(file.path(order.folders[1], "sda.all.forecast.analysis.Rdata"))
+ times <- names(analysis.all)
+ Analysis.all <- analysis.all
+ Forecast.all <- forecast.all
+ for (f in order.folders[2:length(order.folders)]) {
+ load(file.path(f, "sda.all.forecast.analysis.Rdata"))
+ for (t in seq_along(analysis.all)) {
+ Analysis.all[[t]] <- cbind(Analysis.all[[t]], analysis.all[[t]])
+ Forecast.all[[t]] <- cbind(Forecast.all[[t]], forecast.all[[t]])
+ }
+ }
+ analysis.all <- Analysis.all
+ forecast.all <- Forecast.all
+ names(analysis.all) <- times
+ names(forecast.all) <- times
+ # save results.
+ save(list = c("analysis.all", "forecast.all"), file = file.path(outdir, "sda.all.forecast.analysis.Rdata"))
+}
+
+##' This function can help to execute sda function.
+##' @title qsub_sda_batch
+##' @param folder.path character: path where the `configs.rds` file is stored.
+##' @author Dongchen Zhang.
+##' @export
+qsub_sda_batch <- function(folder.path) {
+ configs <- readRDS(file.path(folder.path, "configs.rds"))
+ setting <- PEcAn.settings::read.settings(configs$setting)
+ sda.enkf_local(setting,
+ configs$obs.mean,
+ configs$obs.cov,
+ configs$Q,
+ configs$pre_enkf_params,
+ configs$ensemble.samples,
+ configs$outdir,
+ configs$job.folder,
+ as.numeric(configs$cores),
+ configs$control)
+}
\ No newline at end of file
diff --git a/modules/assim.sequential/man/qsub_sda.Rd b/modules/assim.sequential/man/qsub_sda.Rd
new file mode 100644
index 00000000000..e4fce8d8261
--- /dev/null
+++ b/modules/assim.sequential/man/qsub_sda.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sda.enkf_parallel.R
+\name{qsub_sda}
+\alias{qsub_sda}
+\title{qsub_sda}
+\usage{
+qsub_sda(
+ settings,
+ obs.mean,
+ obs.cov,
+ Q,
+ pre_enkf_params,
+ ensemble.samples,
+ outdir = NULL,
+ control
+)
+}
+\arguments{
+\item{settings}{PEcAn settings object}
+
+\item{obs.mean}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point.}
+
+\item{obs.cov}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point.}
+
+\item{Q}{Process covariance matrix given if there is no data to estimate it.}
+
+\item{pre_enkf_params}{Used for passing pre-existing time-series of process error into the current SDA runs to ignore the impact by the differences between process errors.}
+
+\item{ensemble.samples}{Pass ensemble.samples from outside to avoid GitHub check issues.}
+
+\item{outdir}{Physical path to the folder where the SDA outputs will be stored.
+The default is NULL, where we will be using outdir from the settings object.}
+
+\item{control}{List of flags controlling the behavior of the SDA.
+`TimeseriesPlot` for post analysis examination;
+`OutlierDetection` decide if we want to execute the outlier detection each time after the model forecasting;
+`send_email` contains lists for sending email to report the SDA progress;
+`keepNC` decide if we want to keep the NetCDF files inside the out directory;
+`forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations;
+`MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).}
+}
+\value{
+NONE
+}
+\description{
+This function provides means to split large SDA runs into separate `qsub` jobs.
+Including job creation, submission, and assemble.
+}
+\author{
+Dongchen Zhang
+}
diff --git a/modules/assim.sequential/man/qsub_sda_batch.Rd b/modules/assim.sequential/man/qsub_sda_batch.Rd
new file mode 100644
index 00000000000..abac7861614
--- /dev/null
+++ b/modules/assim.sequential/man/qsub_sda_batch.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sda.enkf_parallel.R
+\name{qsub_sda_batch}
+\alias{qsub_sda_batch}
+\title{qsub_sda_batch}
+\usage{
+qsub_sda_batch(folder.path)
+}
+\arguments{
+\item{folder.path}{character: path where the `configs.rds` file is stored.}
+}
+\description{
+This function can help to execute sda function.
+}
+\author{
+Dongchen Zhang.
+}
diff --git a/modules/assim.sequential/man/sda.enkf_local.Rd b/modules/assim.sequential/man/sda.enkf_local.Rd
new file mode 100644
index 00000000000..3c9c4befe7f
--- /dev/null
+++ b/modules/assim.sequential/man/sda.enkf_local.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sda.enkf_parallel.R
+\name{sda.enkf_local}
+\alias{sda.enkf_local}
+\title{sda.enkf_local}
+\usage{
+sda.enkf_local(
+ settings,
+ obs.mean,
+ obs.cov,
+ Q = NULL,
+ pre_enkf_params = NULL,
+ ensemble.samples = NULL,
+ outdir = NULL,
+ job.folder = NULL,
+ cores = NULL,
+ control = list(TimeseriesPlot = FALSE, OutlierDetection = FALSE, send_email = NULL,
+ keepNC = TRUE, forceRun = TRUE, MCMC.args = NULL)
+)
+}
+\arguments{
+\item{settings}{PEcAn settings object}
+
+\item{obs.mean}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation means for each state variables of each site for each time point.}
+
+\item{obs.cov}{Lists of date times named by time points, which contains lists of sites named by site ids, which contains observation covariances for all state variables of each site for each time point.}
+
+\item{Q}{Process covariance matrix given if there is no data to estimate it.}
+
+\item{pre_enkf_params}{Used for passing pre-existing time-series of process error into the current SDA runs to ignore the impact by the differences between process errors.}
+
+\item{ensemble.samples}{Pass ensemble.samples from outside to avoid GitHub check issues.}
+
+\item{control}{List of flags controlling the behavior of the SDA.
+`TimeseriesPlot` for post analysis examination;
+`OutlierDetection` decide if we want to execute the outlier detection each time after the model forecasting;
+`send_email` contains lists for sending email to report the SDA progress;
+`keepNC` decide if we want to keep the NetCDF files inside the out directory;
+`forceRun` decide if we want to proceed the Bayesian MCMC sampling without observations;
+`MCMC.args` include lists for controling the MCMC sampling process (iteration, nchains, burnin, and nthin.).}
+}
+\value{
+NONE
+}
+\description{
+This function provides complete support for the multi-core and multi-node computation on the general HPC system.
+Thus, this script will be more computationally efficient, making it possible to run SDA over thousands of locations.
+}
+\author{
+Dongchen Zhang \email{zhangdc@bu.edu}
+}
From ebdd631e82ecd8f54d3a8b3f09c31ab0d7948175 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 12 Jun 2025 15:06:40 -0400
Subject: [PATCH 0176/1193] Update change log.
---
CHANGELOG.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9f9f4a00b64..07820163a64 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
- Added automated pkgdown documentation for all PEcAn packages (@divine7022, #3482):
- Compiled pages are live at https://pecanproject.github.io/package-documentation and inside Docker at `pecan.localhost/pkgdocs/`, and these are automatically updated each time a PR to the source packages is merged.
- You can compile all pkgdown pages locally at any time with `make pkgdocs`.
+- Added parallel mode for the entire SDA workflow.
### Fixed
- api to correctly use x_var from request in plotResults #3528
From 9426603969311c26c0062666eb0a83671975600a Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Fri, 13 Jun 2025 09:35:53 -0700
Subject: [PATCH 0177/1193] check if variables exist in IC file before trying
to read them Should reduce noise from ncdf4 "cant find var (or dimvar) in
file" messages Also: Added missing NEWS entries
---
models/sipnet/DESCRIPTION | 2 +-
models/sipnet/NEWS.md | 12 +++
models/sipnet/R/write.configs.SIPNET.R | 110 ++++++++++++++++---------
3 files changed, 85 insertions(+), 39 deletions(-)
diff --git a/models/sipnet/DESCRIPTION b/models/sipnet/DESCRIPTION
index d1d84b3fa20..3ead97b81e0 100644
--- a/models/sipnet/DESCRIPTION
+++ b/models/sipnet/DESCRIPTION
@@ -1,7 +1,7 @@
Package: PEcAn.SIPNET
Type: Package
Title: PEcAn Functions Used for Ecological Forecasts and Reanalysis
-Version: 1.9.0
+Version: 1.9.1
Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"),
email = "dietze@bu.edu"),
person("University of Illinois, NCSA", role = c("cph")))
diff --git a/models/sipnet/NEWS.md b/models/sipnet/NEWS.md
index a90e5d0d1fb..09ddcf2f4b0 100644
--- a/models/sipnet/NEWS.md
+++ b/models/sipnet/NEWS.md
@@ -1,3 +1,15 @@
+# PEcAn.SIPNET 1.9.1
+
+## Changed
+
+* Breaking: Renamed the setting used to pass soil and hydrology parameters. `write.config.SIPNET` previously read these from `settings$run$inputs$soilinitcond`, now `settings$run$inputs$soil_physics` to better reflect that these are state factors applicable to the whole run rather than initial conditions. (Quianyu Xuan, #3406)
+
+## Fixed
+
+* `write.config.SIPNET` now checks more carefully whether an optional variable exists in an initial condition file before trying to read it, therefore printing fewer messages about (expectedly) missing variables.
+* When passed a vector of input paths, `write.config.SIPNET` was choosing one at random; it now uses only the first (Blesson Thomas, #3298). Note that a single input path per call has always been the intended usage; being passed many was a second bug in PEcAn.uncertainty that is also now fixed.
+
+
# PEcAn.SIPNET 1.9.0
## License change
diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R
index 76a9500737d..d3cfb245eca 100755
--- a/models/sipnet/R/write.configs.SIPNET.R
+++ b/models/sipnet/R/write.configs.SIPNET.R
@@ -598,24 +598,34 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs
if ("microbe" %in% ic.names) {
param[which(param[, 1] == "microbeInit"), 2] <- IC$microbe
}
- }
- else if (length(settings$run$inputs$poolinitcond$path)>0) {
+ } else if (length(settings$run$inputs$poolinitcond$path) > 0) {
ICs_num <- length(settings$run$inputs$poolinitcond$path)
IC.path <- settings$run$inputs$poolinitcond$path[[1]]
IC.pools <- PEcAn.data.land::prepare_pools(IC.path, constants = list(sla = SLA))
- if(!is.null(IC.pools)){
+ if (!is.null(IC.pools)) {
IC.nc <- ncdf4::nc_open(IC.path) #for additional variables specific to SIPNET
+ ic_ncvars_wanted <- c(
+ "nee",
+ "SoilMoistFrac",
+ "SWE",
+ "date_of_budburst",
+ "date_of_senescence",
+ "Microbial Biomass C"
+ )
+ ic_has_ncvars <- ic_ncvars_wanted %in% names(IC.nc$var)
+ names(ic_has_ncvars) <- ic_ncvars_wanted
+
## plantWoodInit gC/m2
if ("wood" %in% names(IC.pools)) {
- param[which(param[, 1] == "plantWoodInit"), 2] <- PEcAn.utils::ud_convert(IC.pools$wood, "kg m-2", "g m-2")
+ param[param[, 1] == "plantWoodInit", 2] <- PEcAn.utils::ud_convert(IC.pools$wood, "kg m-2", "g m-2")
}
## laiInit m2/m2
lai <- IC.pools$LAI
if (!is.na(lai) && is.numeric(lai)) {
- param[which(param[, 1] == "laiInit"), 2] <- lai
+ param[param[, 1] == "laiInit", 2] <- lai
}
# Sipnet always starts from initial LAI whether day 0 is in or out of the
@@ -642,72 +652,96 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs
}
## neeInit gC/m2
- nee <- try(ncdf4::ncvar_get(IC.nc,"nee"),silent = TRUE)
- if (!is.na(nee) && is.numeric(nee)) {
- param[which(param[, 1] == "neeInit"), 2] <- nee
+ if (ic_has_ncvars[["nee"]]) {
+ nee <- ncdf4::ncvar_get(IC.nc, "nee")
+ if (!is.na(nee) && is.numeric(nee)) {
+ param[param[, 1] == "neeInit", 2] <- nee
+ }
}
## litterInit gC/m2
if ("litter" %in% names(IC.pools)) {
- param[which(param[, 1] == "litterInit"), 2] <- PEcAn.utils::ud_convert(IC.pools$litter, 'g m-2', 'g m-2') # BETY: kgC m-2
+ param[param[, 1] == "litterInit", 2] <- PEcAn.utils::ud_convert(IC.pools$litter, "g m-2", "g m-2") # BETY: kgC m-2
}
## soilInit gC/m2
if ("soil" %in% names(IC.pools)) {
- param[which(param[, 1] == "soilInit"), 2] <- PEcAn.utils::ud_convert(sum(IC.pools$soil), 'kg m-2', 'g m-2') # BETY: kgC m-2
+ param[param[, 1] == "soilInit", 2] <- PEcAn.utils::ud_convert(sum(IC.pools$soil), "kg m-2", "g m-2") # BETY: kgC m-2
}
## soilWFracInit fraction
- soilWFrac <- try(ncdf4::ncvar_get(IC.nc,"SoilMoistFrac"),silent = TRUE)
- if (!"try-error" %in% class(soilWFrac)) {
+ if (ic_has_ncvars[["SoilMoistFrac"]]) {
+ soilWFrac <- ncdf4::ncvar_get(IC.nc, "SoilMoistFrac")
if (!is.na(soilWFrac) && is.numeric(soilWFrac)) {
- param[which(param[, 1] == "soilWFracInit"), 2] <- sum(soilWFrac)/100
+ param[param[, 1] == "soilWFracInit", 2] <- sum(soilWFrac) / 100
}
}
## litterWFracInit fraction
litterWFrac <- soilWFrac
-
+
## snowInit cm water equivalent (cm = g / cm2 because 1 g water = 1 cm3 water)
- snow = try(ncdf4::ncvar_get(IC.nc,"SWE"),silent = TRUE)
- if (!is.na(snow) && is.numeric(snow)) {
- param[which(param[, 1] == "snowInit"), 2] <- PEcAn.utils::ud_convert(snow, "kg m-2", "g cm-2") # BETY: kg m-2
+ if (ic_has_ncvars[["SWE"]]) {
+ snow <- ncdf4::ncvar_get(IC.nc, "SWE")
+ if (!is.na(snow) && is.numeric(snow)) {
+ param[param[, 1] == "snowInit", 2] <- PEcAn.utils::ud_convert(snow, "kg m-2", "g cm-2") # BETY: kg m-2
+ }
}
## leafOnDay
- leafOnDay <- try(ncdf4::ncvar_get(IC.nc,"date_of_budburst"),silent = TRUE)
- if (!is.na(leafOnDay) && is.numeric(leafOnDay)) {
- param[which(param[, 1] == "leafOnDay"), 2] <- leafOnDay
+ if (ic_has_ncvars[["date_of_budburst"]]) {
+ leafOnDay <- ncdf4::ncvar_get(IC.nc, "date_of_budburst")
+ if (!is.na(leafOnDay) && is.numeric(leafOnDay)) {
+ param[param[, 1] == "leafOnDay", 2] <- leafOnDay
+ }
}
## leafOffDay
- leafOffDay <- try(ncdf4::ncvar_get(IC.nc,"date_of_senescence"),silent = TRUE)
- if (!is.na(leafOffDay) && is.numeric(leafOffDay)) {
- param[which(param[, 1] == "leafOffDay"), 2] <- leafOffDay
+ if (ic_has_ncvars[["date_of_senescence"]]) {
+ leafOffDay <- ncdf4::ncvar_get(IC.nc, "date_of_senescence")
+ if (!is.na(leafOffDay) && is.numeric(leafOffDay)) {
+ param[param[, 1] == "leafOffDay", 2] <- leafOffDay
+ }
}
- microbe <- try(ncdf4::ncvar_get(IC.nc,"Microbial Biomass C"),silent = TRUE)
- if (!is.na(microbe) && is.numeric(microbe)) {
- param[which(param[, 1] == "microbeInit"), 2] <- PEcAn.utils::ud_convert(microbe, "mg kg-1", "mg g-1") #BETY: mg microbial C kg-1 soil
+ if (ic_has_ncvars[["Microbial Biomass C"]]) {
+ microbe <- ncdf4::ncvar_get(IC.nc, "Microbial Biomass C")
+ if (!is.na(microbe) && is.numeric(microbe)) {
+ param[param[, 1] == "microbeInit", 2] <- PEcAn.utils::ud_convert(microbe, "mg kg-1", "mg g-1") #BETY: mg microbial C kg-1 soil
+ }
}
-
+
ncdf4::nc_close(IC.nc)
- }else{
+ } else {
PEcAn.logger::logger.error("Bad initial conditions filepath; keeping defaults")
}
- }else{
+ } else {
#some stuff about IC file that we can give in lieu of actual ICs
}
-
-
+
+
if (!is.null(settings$run$inputs$soilmoisture)) {
#read soil moisture netcdf file, grab closet date to start_date, set equal to soilWFrac
- if(!is.null(settings$run$inputs$soilmoisture$path)){
+ if (!is.null(settings$run$inputs$soilmoisture$path)) {
soil.path <- settings$run$inputs$soilmoisture$path
soilWFrac <- ncdf4::ncvar_get(ncdf4::nc_open(soil.path), varid = "mass_fraction_of_unfrozen_water_in_soil_moisture")
-
+
param[which(param[, 1] == "soilWFracInit"), 2] <- soilWFrac
}
-
+
}
- if(file.exists(file.path(settings$rundir, run.id, "sipnet.param"))) file.rename(file.path(settings$rundir, run.id, "sipnet.param"),file.path(settings$rundir, run.id, paste0("sipnet_",lubridate::year(settings$run$start.date),"_",lubridate::year(settings$run$end.date),".param")))
-
+ if (file.exists(file.path(settings$rundir, run.id, "sipnet.param"))) {
+ file.rename(
+ file.path(settings$rundir, run.id, "sipnet.param"),
+ file.path(
+ settings$rundir,
+ run.id,
+ paste0("sipnet_", lubridate::year(settings$run$start.date), "_", lubridate::year(settings$run$end.date), ".param")
+ )
+ )
+ }
+
- utils::write.table(param, file.path(settings$rundir, run.id, "sipnet.param"), row.names = FALSE, col.names = FALSE,
- quote = FALSE)
+ utils::write.table(
+ param,
+ file.path(settings$rundir, run.id, "sipnet.param"),
+ row.names = FALSE,
+ col.names = FALSE,
+ quote = FALSE
+ )
} # write.config.SIPNET
From ba7c0e1963a292f27e95a411d680a445f9c03dae Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Fri, 13 Jun 2025 09:45:20 -0700
Subject: [PATCH 0178/1193] add pr num
---
models/sipnet/NEWS.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/models/sipnet/NEWS.md b/models/sipnet/NEWS.md
index 09ddcf2f4b0..73f279c9eb6 100644
--- a/models/sipnet/NEWS.md
+++ b/models/sipnet/NEWS.md
@@ -6,7 +6,7 @@
## Fixed
-* `write.config.SIPNET` now checks more carefully whether an optional variable exists in an initial condition file before trying to read it, therefore printing fewer messages about (expectedly) missing variables.
+* `write.config.SIPNET` now checks more carefully whether an optional variable exists in an initial condition file before trying to read it, therefore printing fewer messages about (expectedly) missing variables. (#3545)
* When passed a vector of input paths, `write.config.SIPNET` was choosing one at random; it now uses only the first (Blesson Thomas, #3298). Note that a single input path per call has always been the intended usage; being passed many was a second bug in PEcAn.uncertainty that is also now fixed.
From c3bfd1f241deb12ca5df10c031c76cd3095f0890 Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 14 Jun 2025 01:50:06 +0000
Subject: [PATCH 0179/1193] Enhance ERA5 Download Function: Direct NetCDF,
Flexible Parameters, and Improved Docs
---
modules/data.atmosphere/R/ERA5_download.R | 85 ++++++++++++++++-------
1 file changed, 58 insertions(+), 27 deletions(-)
diff --git a/modules/data.atmosphere/R/ERA5_download.R b/modules/data.atmosphere/R/ERA5_download.R
index 9cbb990de71..f339a351a3c 100644
--- a/modules/data.atmosphere/R/ERA5_download.R
+++ b/modules/data.atmosphere/R/ERA5_download.R
@@ -1,26 +1,55 @@
+#' Download ERA5 Climate Data from the Copernicus CDS API
+#'
#' @description
-#' This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API.
-#' @title ERA5_cds_annual_download
-#'
-#' @param outfolder Character: physical path where the ERA5 data are stored.
+#' Download ERA5 climate data from the Copernicus Climate Data Store (CDS) API as NetCDF files, year by year, according to user-specified parameters.
+#' The function saves one NetCDF file per year in the specified output directory.
+#'
+#' @details
+#' This function requires a valid CDS API key and the Python `cdsapi` package installed and accessible via the `reticulate` package in R.
+#' If you do not have a `.cdsapirc` file with your API credentials, set `auto.create.key = TRUE` to be prompted for your CDS API URL and key.
+#' To get a Copernicus CDS API key, register at \url{https://cds.climate.copernicus.eu/profile}.
+#' The API URL is \url{https://cds.climate.copernicus.eu/api/v2}.
+#'
+#' @param outfolder Character. Directory where downloaded NetCDF files will be saved.
#' @param start_date character: the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
#' @param end_date character: the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
-#' @param extent numeric: a vector of numbers contains the bounding box (formatted as xmin, xmax, ymin, ymax) to be downloaded.
+#' @param extent numeric: a vector of numbers contains the bounding box (formatted as xmin, xmax, ymin, ymax) (longitude and latitude in degrees).
#' @param variables character: a vector contains variables to be downloaded (e.g., c("2m_temperature","surface_pressure")).
+#' @param time Character vector or NULL. Hours of the day to download (e.g., c("00:00", "12:00")). Default to NULL to download all hours.
+#' @param dataset Character. Name of the CDS dataset to use (default: "reanalysis-era5-single-levels").
+#' @param product_type Character. Product type to request from CDS (default: "ensemble_members").
#' @param auto.create.key Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE.
#' @param timeout numeric: the maximum time (in seconds) allowed to download the data. The default is 36000 seconds.
#'
-#' @return A vector containing file paths to the downloaded files.
+#' @return
+#' A list where each element is a list containing:
+#' \item{file}{File path to the downloaded NetCDF file.}
+#' \item{host}{Host name where the file was downloaded.}
+#' \item{startdate}{Start date and time of the data in the file.}
+#' \item{enddate}{End date and time of the data in the file.}
+#' \item{mimetype}{MIME type of the file ("application/x-netcdf").}
+#' \item{formatname}{Format name ("ERA5_year.nc").}
+#'
+#' @examples
+#' \dontrun{
+#' era5_files <- download.ERA5_cds(
+#' outfolder = "D:/working/era5_func_test",
+#' start_date = "2020-01-01",
+#' end_date = "2022-12-31",
+#' extent = c(-72.2215, -72.1215, 42.4878, 42.5878),
+#' variables = c("2m_temperature","surface_pressure"),
+#' time = NULL,
+#' product_type = "reanalysis"
+#' )
+#' }
#' @export
#'
#' @importFrom purrr %>%
#' @author Dongchen Zhang
-download.ERA5_cds <- function(outfolder, start_date, end_date, extent, variables, auto.create.key = T, timeout = 36000) {
- # check shell environments.
- if ("try-error" %in% class(try(system("grib_to_netcdf"), silent = T))) {
- PEcAn.logger::logger.info("The grib_to_netcdf function is not detected in shell command.")
- return(NA)
- }
+download.ERA5_cds <- function(outfolder, start_date, end_date,
+ extent, variables, time = NULL, dataset = "reanalysis-era5-single-levels",
+ product_type = "ensemble_members", auto.create.key = T, timeout = 36000) {
+
# setup timeout for download.
options(timeout=timeout)
# convert arguments to CDS API specific arguments.
@@ -29,10 +58,16 @@ download.ERA5_cds <- function(outfolder, start_date, end_date, extent, variables
purrr::map(function(d)sprintf("%02d", d))
days <- sort(unique(lubridate::day(seq(lubridate::date(start_date), lubridate::date(end_date), "1 day")))) %>%
purrr::map(function(d)sprintf("%02d", d))
- times <- list('00:00','03:00','06:00',
- '09:00','12:00','15:00',
- '18:00','21:00')
- area <- paste(c(extent[4], extent[1], extent[3], extent[2]), collapse = "/")
+
+ # handle time argument: all hours if Null
+ if (is.null(time)) {
+ times <- sprintf("%02d:00", 0:23)
+ } else {
+ times <- time
+ }
+
+ # Format area for CDS API (North, West, South, East)
+ area <- round(c(extent[4], extent[1], extent[3], extent[2]), 2)
variables <- as.list(variables)
#load cdsapi from python environment.
tryCatch({
@@ -99,7 +134,7 @@ download.ERA5_cds <- function(outfolder, start_date, end_date, extent, variables
# loop over years.
nc.paths <- c()
for (y in years) {
- fname <- file.path(outfolder, paste0("ERA5_", y, ".grib"))
+ fname <- file.path(outfolder, paste0("ERA5_", y, ".nc"))
# start retrieving data.
# you need to have an account for downloaing the files
# Read the documantion for how to setup your account and settings before trying this
@@ -107,26 +142,22 @@ download.ERA5_cds <- function(outfolder, start_date, end_date, extent, variables
c$retrieve(
'reanalysis-era5-single-levels',
list(
- 'product_type' = 'ensemble_members',
- 'data_format' = 'grib',
+ 'product_type' = list(product_type),
+ 'data_format' = 'netcdf',
"download_format" = "unarchived",
'day' = days,
'time' = times,
'month' = months,
- 'year' = as.character(y),
+ 'year' = list(as.character(y)),
"area" = area,
'variable' = variables
),
fname
)
- # convert grib to nc file.
- nc.path <- gsub(".grib", ".nc", fname, fixed = T)
- cmd <- paste("grib_to_netcdf", fname, "-o", nc.path)
- out <- system(cmd, intern = F, ignore.stdout = T, ignore.stderr = T)
+
# store the path.
- nc.paths <- c(nc.paths, nc.path)
- # remove previous grib file.
- unlink(fname)
+ nc.paths <- c(nc.paths, fname)
+
}
# construct results to meet the requirements of pecan.met workflow.
results <- vector("list", length = length(years))
From 39863c76e0bdd15d09d8d6513335a5fe47ba459c Mon Sep 17 00:00:00 2001
From: divne7022
Date: Sat, 14 Jun 2025 02:38:01 +0000
Subject: [PATCH 0180/1193] updated .Rd file
---
.../data.atmosphere/man/download.ERA5_cds.Rd | 45 ++++++++++++++++---
1 file changed, 40 insertions(+), 5 deletions(-)
diff --git a/modules/data.atmosphere/man/download.ERA5_cds.Rd b/modules/data.atmosphere/man/download.ERA5_cds.Rd
index 5c8d1d460e7..49368edd7f7 100644
--- a/modules/data.atmosphere/man/download.ERA5_cds.Rd
+++ b/modules/data.atmosphere/man/download.ERA5_cds.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/ERA5_download.R
\name{download.ERA5_cds}
\alias{download.ERA5_cds}
-\title{ERA5_cds_annual_download}
+\title{Download ERA5 Climate Data from the Copernicus CDS API}
\usage{
download.ERA5_cds(
outfolder,
@@ -10,30 +10,65 @@ download.ERA5_cds(
end_date,
extent,
variables,
+ time = NULL,
+ dataset = "reanalysis-era5-single-levels",
+ product_type = "ensemble_members",
auto.create.key = T,
timeout = 36000
)
}
\arguments{
-\item{outfolder}{Character: physical path where the ERA5 data are stored.}
+\item{outfolder}{Character. Directory where downloaded NetCDF files will be saved.}
\item{start_date}{character: the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)}
\item{end_date}{character: the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)}
-\item{extent}{numeric: a vector of numbers contains the bounding box (formatted as xmin, xmax, ymin, ymax) to be downloaded.}
+\item{extent}{numeric: a vector of numbers contains the bounding box (formatted as xmin, xmax, ymin, ymax) (longitude and latitude in degrees).}
\item{variables}{character: a vector contains variables to be downloaded (e.g., c("2m_temperature","surface_pressure")).}
+\item{time}{Character vector or NULL. Hours of the day to download (e.g., c("00:00", "12:00")). Default to NULL to download all hours.}
+
+\item{dataset}{Character. Name of the CDS dataset to use (default: "reanalysis-era5-single-levels").}
+
+\item{product_type}{Character. Product type to request from CDS (default: "ensemble_members").}
+
\item{auto.create.key}{Boolean: decide if we want to generate the CDS RC file if it doesn't exist, the default is TRUE.}
\item{timeout}{numeric: the maximum time (in seconds) allowed to download the data. The default is 36000 seconds.}
}
\value{
-A vector containing file paths to the downloaded files.
+A list where each element is a list containing:
+ \item{file}{File path to the downloaded NetCDF file.}
+ \item{host}{Host name where the file was downloaded.}
+ \item{startdate}{Start date and time of the data in the file.}
+ \item{enddate}{End date and time of the data in the file.}
+ \item{mimetype}{MIME type of the file ("application/x-netcdf").}
+ \item{formatname}{Format name ("ERA5_year.nc").}
}
\description{
-This function helps to download the yearly ERA5 data based on the prescribed features using the CDS API.
+Download ERA5 climate data from the Copernicus Climate Data Store (CDS) API as NetCDF files, year by year, according to user-specified parameters.
+The function saves one NetCDF file per year in the specified output directory.
+}
+\details{
+This function requires a valid CDS API key and the Python `cdsapi` package installed and accessible via the `reticulate` package in R.
+If you do not have a `.cdsapirc` file with your API credentials, set `auto.create.key = TRUE` to be prompted for your CDS API URL and key.
+To get a Copernicus CDS API key, register at \url{https://cds.climate.copernicus.eu/profile}.
+The API URL is \url{https://cds.climate.copernicus.eu/api/v2}.
+}
+\examples{
+\dontrun{
+era5_files <- download.ERA5_cds(
+ outfolder = "D:/working/era5_func_test",
+ start_date = "2020-01-01",
+ end_date = "2022-12-31",
+ extent = c(-72.2215, -72.1215, 42.4878, 42.5878),
+ variables = c("2m_temperature","surface_pressure"),
+ time = NULL,
+ product_type = "reanalysis"
+)
+}
}
\author{
Dongchen Zhang
From 81afebe56888f0dd3635eeae5f8d8669fe04c006 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sat, 14 Jun 2025 16:23:49 +0530
Subject: [PATCH 0181/1193] feat: timeseries plot and reading the output using
read.output
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 153 ++++++++++++------
1 file changed, 103 insertions(+), 50 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 59572dbf959..cf3ba49c4f1 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -2,6 +2,12 @@
title: "PEcAn Workflow"
author: "PEcAn"
format: pdf
+params:
+ year: 2004 # Default to start year from settings
+ xvar: "coarse_root_carbon_content"
+ yvar: "LAI"
+ width: 600
+ height: 400
---
# Introduction {#introduction}
@@ -104,55 +110,102 @@ PEcAn.workflow::start_model_runs(settings)
```
```{r get-plot-vars}
-library(ncdf4)
-# ⚙️ Use settings$outdir to build base path
-workflow_outdir <- settings$outdir # Correct path from pecan.xml
-
-# Default run_id for demo — you can replace this with your actual run ID
-# Tip: You can find your run ID from the "run-model" chunk output during execution
-run_id <- "99000000195"
-
-#Select year to inspect
-year <- 2006
-#Build NetCDF file path
-netcdf_file <- file.path(workflow_outdir, "out", run_id, paste0(year, ".nc"))
-
-#Open NetCDF file and show variable and dimension names
-nc <- nc_open(netcdf_file)
-cat("Variables:\n")
-print(names(nc$var))
-cat("\n Dimensions:\n")
-print(names(nc$dim))
-nc_close(nc)
+run_file <- file.path(settings$rundir, "runs.txt")
+run_id <- as.numeric(readLines(run_file)[1]) # Get first run ID
+
+# Load PEcAn output using read.output()
+model_output <- PEcAn.utils::read.output(
+ runid = settings$workflow$id,
+ outdir = file.path(settings$outdir, "out", run_id),
+ start.year = as.numeric(format(as.Date(settings$run$start.date), "%Y")),
+ end.year = as.numeric(format(as.Date(settings$run$end.date), "%Y")),
+ variables = NULL,
+ dataframe = TRUE,
+ verbose = TRUE
+)
+
+# Display available variables in model output
+cat("Available variables in model output:\n")
+print(names(model_output)[names(model_output) != "posix"])
+
+```{r plot-timeseries}
+# To plot different variables, simply change the variable names in the plot commands
+# You can see all available variables in the output from the previous block above
+# They are also listed here for reference:
+
+# Carbon fluxes:
+# - GPP, NPP, NEE, TotalResp, AutoResp, HeteroResp, SoilResp
+# Carbon pools:
+# - AbvGrndWood, TotLivBiom, TotSoilCarb, leaf_carbon_content
+# - litter_carbon_content, fine_root_carbon_content, coarse_root_carbon_content
+# - GWBI, AGB
+# Water variables:
+# - Qle, Transp, SoilMoist, SoilMoistFrac, SWE
+# - litter_mass_content_of_water
+# Other:
+# - LAI
+
+# Example 1: Carbon fluxes
+plot(model_output$posix, model_output$GPP,
+ type = 'l',
+ col = 'green',
+ xlab = 'Date',
+ ylab = 'Carbon Flux (kg C m-2 s-1)',
+ main = 'Carbon Fluxes Over Time')
+
+# Add NPP line
+lines(model_output$posix, model_output$NPP,
+ col = 'blue')
+
+# Add legend
+legend('topright',
+ legend = c('GPP', 'NPP'),
+ col = c('green', 'blue'),
+ lty = 1)
+
+# Example 2: Carbon pools
+# To plot carbon pools instead, uncomment and modify these lines:
+# plot(model_output$posix, model_output$TotLivBiom,
+# type = 'l',
+# col = 'darkgreen',
+# xlab = 'Date',
+# ylab = 'Carbon Pool (kg C m-2)',
+# main = 'Carbon Pools Over Time')
+# lines(model_output$posix, model_output$TotSoilCarb,
+# col = 'brown')
+# legend('topright',
+# legend = c('Total Live Biomass', 'Total Soil Carbon'),
+# col = c('darkgreen', 'brown'),
+# lty = 1)
+
+# Example 3: Water variables
+# To plot water variables instead, uncomment and modify these lines:
+# plot(model_output$posix, model_output$SoilMoist,
+# type = 'l',
+# col = 'blue',
+# xlab = 'Date',
+# ylab = 'Soil Moisture (kg m-2)',
+# main = 'Soil Moisture Over Time')
+# lines(model_output$posix, model_output$SWE,
+# col = 'lightblue')
+# legend('topright',
+# legend = c('Soil Moisture', 'Snow Water Equivalent'),
+# col = c('blue', 'lightblue'),
+# lty = 1)
+
+# Example 4: LAI and Biomass
+# To plot LAI and biomass, uncomment and modify these lines:
+# plot(model_output$posix, model_output$LAI,
+# type = 'l',
+# col = 'darkgreen',
+# xlab = 'Date',
+# ylab = 'LAI (m2 m-2)',
+# main = 'Leaf Area Index Over Time')
+# lines(model_output$posix, model_output$AbvGrndWood,
+# col = 'brown')
+# legend('topright',
+# legend = c('LAI', 'Above Ground Wood'),
+# col = c('darkgreen', 'brown'),
+# lty = 1)
```
-# Plot NetCDF Variables Using Selected Data and Custom Parameters
-
-```{r plot-graph, eval=TRUE}
-# This chunk generates a NetCDF variable plot dynamically
-# ℹ Make sure `settings` object is loaded from pecan.xml before running this
-# Required user inputs (customize these):
-# - year: choose a year to visualize
-# - xvar, yvar: choose variables from the **Variables** printed in the previous chunk (`get-plot-vars`)
-year <- 2006
-xvar <- 'coarse_root_carbon_content' # Select valid variable from previous chunk output
-yvar <- 'LAI' # Select valid variable from previous chunk output
-width <- 800
-height <- 600
-filename <- "plot.png"
-
-workflow_outdir <- settings$outdir
-run_id <- "99000000195" # Update this from "run-model" output
-netcdf_file <- file.path(workflow_outdir, "out", run_id, paste0(year, ".nc"))
-
-PEcAn.visualization::plot_netcdf(
- datafile = netcdf_file,
- yvar = yvar,
- xvar = xvar,
- width = width,
- height = height,
- filename = filename,
- year = year
-)
-knitr::include_graphics(filename)
-```
\ No newline at end of file
From 67a57e3face12e8e7fe3f25abf4bbca8ba93d740 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sat, 14 Jun 2025 16:28:06 +0530
Subject: [PATCH 0182/1193] remove unneecesary parama from top
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 20 +++++++++----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index cf3ba49c4f1..99549d5238d 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -2,13 +2,6 @@
title: "PEcAn Workflow"
author: "PEcAn"
format: pdf
-params:
- year: 2004 # Default to start year from settings
- xvar: "coarse_root_carbon_content"
- yvar: "LAI"
- width: 600
- height: 400
----
# Introduction {#introduction}
@@ -121,15 +114,20 @@ model_output <- PEcAn.utils::read.output(
end.year = as.numeric(format(as.Date(settings$run$end.date), "%Y")),
variables = NULL,
dataframe = TRUE,
- verbose = TRUE
+ verbose = FALSE # Set to FALSE to avoid warning messages
)
-# Display available variables in model output
+# Store variable names for later use
+available_vars <- names(model_output)[!names(model_output) %in% c("posix", "time_bounds")]
+```
+
+```{r show-variables, echo=FALSE}
+# Display available variables in a clean format
cat("Available variables in model output:\n")
-print(names(model_output)[names(model_output) != "posix"])
+cat(paste(available_vars, collapse = "\n"))
+```
```{r plot-timeseries}
-# To plot different variables, simply change the variable names in the plot commands
# You can see all available variables in the output from the previous block above
# They are also listed here for reference:
From 470ffbdce02119ebfe957c126446dbb545ee2eb0 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sat, 14 Jun 2025 19:29:18 +0530
Subject: [PATCH 0183/1193] fix: correction in commnets
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 99549d5238d..9fccce66b7e 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -40,7 +40,7 @@ Let's get started!
#### Objective
-This demo illustrates how to run a basic PEcAn workflow using an R-based Quarto notebook. We will cover loading settings, performing trait and meta-analysis, writing model configuration files, and running model simulations. This approach provides a programmatic alternative to the web-based PEcAn interface for executing ecosystem models.
+This demo illustrates how to run a basic PEcAn workflow using an R-based Quarto notebook. It will cover loading settings, writing model configuration files, and running model simulations. This approach provides a programmatic alternative to the web-based PEcAn interface for executing ecosystem models.
# Load PEcAn packages
@@ -55,7 +55,7 @@ library("PEcAn.all")
This section handles the crucial step of loading the PEcAn settings.
-- If you have a `pecan.xml` file generated from the web interface, place it in your working directory and copy its file path. Assign this path to the **settings_path** variable.
+- If you have a `pecan.xml` file precofigured, place it in your working directory and copy its file path. Assign this path to the **settings_path** variable.
- Alternatively, for a sample or initial run, you can use the example `pecan.xml` file available at: `/pecan/base/all/inst/quarto_notebooks/_extensions/demo1/run-model`.
```{r load-settings}
From 630cd0a7944b220d00ab56f2f094e48873f92bb6 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Sat, 14 Jun 2025 19:29:44 +0530
Subject: [PATCH 0184/1193] added lat and lon in pecan.xml
Signed-off-by: Aritra Dey
---
.../quarto_notebooks/_extensions/demo1/run-model/pecan.xml | 3 +++
1 file changed, 3 insertions(+)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
index 9491b0f8b06..30333c509a3 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
@@ -54,6 +54,9 @@
7722004/01/012006/12/31
+ Niwot Ridge Forest/LTER NWT1 (US-NR1)
+ 40.0329
+ -105.546
From 59304273a90c8579125e94a5aea4e4651a92e52e Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 16 Jun 2025 08:48:46 +0530
Subject: [PATCH 0185/1193] model output refactor
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/run_pecan.qmd | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
index 9fccce66b7e..c700d339221 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
@@ -103,18 +103,20 @@ PEcAn.workflow::start_model_runs(settings)
```
```{r get-plot-vars}
-run_file <- file.path(settings$rundir, "runs.txt")
-run_id <- as.numeric(readLines(run_file)[1]) # Get first run ID
+runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id
+outdir<- paste(settings$outdir,"/out/",runid,sep= "")
+start.year<-as.numeric(lubridate::year(settings$run$start.date))
+end.year<-as.numeric(lubridate::year(settings$run$end.date))
# Load PEcAn output using read.output()
model_output <- PEcAn.utils::read.output(
- runid = settings$workflow$id,
- outdir = file.path(settings$outdir, "out", run_id),
- start.year = as.numeric(format(as.Date(settings$run$start.date), "%Y")),
- end.year = as.numeric(format(as.Date(settings$run$end.date), "%Y")),
+ runid,
+ outdir,
+ start.year,
+ end.year,
variables = NULL,
dataframe = TRUE,
- verbose = FALSE # Set to FALSE to avoid warning messages
+ verbose = FALSE
)
# Store variable names for later use
From e7d60c8fe03aa671379e7445f2a7bb5cb6520142 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 16 Jun 2025 19:43:41 +0530
Subject: [PATCH 0186/1193] refactor pecan.xml
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/pecan.xml | 29 +++++++------------
1 file changed, 11 insertions(+), 18 deletions(-)
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
index 30333c509a3..34ced299212 100644
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
+++ b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
@@ -6,22 +6,10 @@
2025/05/26 08:25:18 +0000
- /data/workflows/PEcAn_99000000049
-
-
- bety
- bety
- postgres
- 5432
- bety
- PostgreSQL
- true
-
- /data/dbfiles
-
+ /data/workflows/PEcAn_99000000001
- temperate.broadleaf.deciduous
+ temperate.broadleaf.deciduous
@@ -30,24 +18,29 @@
FALSETRUE
+ FALSE
+ 1.21NPP
- uniform
+ none
- sampling
+ none99000000003
+ SIPNET
+ FALSE
+ sipnet
- 99000000046
+ 99000000001
@@ -62,7 +55,7 @@
AmerifluxLBL
- Aritra_2004
+ Aritra_20042004/01/01
From 8b18f066e9753e1439f5d03f707a521b2853d695 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 16 Jun 2025 19:49:15 +0530
Subject: [PATCH 0187/1193] moved the directory to documrntation/tutorials
folder
Signed-off-by: Aritra Dey
---
.../quarto/01_Demo_Basic_Run/pecan.xml | 71 ++++++
.../quarto/01_Demo_Basic_Run/run_pecan.qmd | 211 ++++++++++++++++++
2 files changed, 282 insertions(+)
create mode 100644 documentation/quarto/01_Demo_Basic_Run/pecan.xml
create mode 100644 documentation/quarto/01_Demo_Basic_Run/run_pecan.qmd
diff --git a/documentation/quarto/01_Demo_Basic_Run/pecan.xml b/documentation/quarto/01_Demo_Basic_Run/pecan.xml
new file mode 100644
index 00000000000..34ced299212
--- /dev/null
+++ b/documentation/quarto/01_Demo_Basic_Run/pecan.xml
@@ -0,0 +1,71 @@
+
+
+
+
+ -1
+
+ 2025/05/26 08:25:18 +0000
+
+ /data/workflows/PEcAn_99000000001
+
+
+ temperate.broadleaf.deciduous
+
+
+
+ 3000
+
+ FALSE
+ TRUE
+
+ FALSE
+ 1.2
+
+
+ 1
+ NPP
+
+
+ none
+
+
+ none
+
+
+
+
+ 99000000003
+ SIPNET
+ FALSE
+ sipnet
+
+
+ 99000000001
+
+
+
+ 772
+ 2004/01/01
+ 2006/12/31
+ Niwot Ridge Forest/LTER NWT1 (US-NR1)
+ 40.0329
+ -105.546
+
+
+
+ AmerifluxLBL
+
+ Aritra_2004
+
+
+ 2004/01/01
+ 2006/12/31
+
+
+ localhost
+
+ amqp://guest:guest@rabbitmq/%2F
+ SIPNET_git
+
+
+
\ No newline at end of file
diff --git a/documentation/quarto/01_Demo_Basic_Run/run_pecan.qmd b/documentation/quarto/01_Demo_Basic_Run/run_pecan.qmd
new file mode 100644
index 00000000000..3a9a88644ae
--- /dev/null
+++ b/documentation/quarto/01_Demo_Basic_Run/run_pecan.qmd
@@ -0,0 +1,211 @@
+---
+title: "PEcAn Workflow"
+author: "PEcAn"
+format: pdf
+
+# Introduction {#introduction}
+
+Welcome to this PEcAn workflow notebook! This notebook will guide you through running an ecosystem model using PEcAn's programmatic interface.
+
+## What is PEcAn?
+
+PEcAn (Predictive Ecosystem Analyzer) is a scientific workflow system designed to make ecosystem modeling more transparent, repeatable, and accessible. It helps researchers:
+
+- Run ecosystem models with standardized inputs and outputs
+- Perform uncertainty analysis on model parameters
+- Compare model predictions with observations
+- Share and reproduce scientific workflows
+
+## What This Notebook Does
+
+This notebook demonstrates how to:
+1. Set up and configure a PEcAn workflow
+2. Run an ecosystem model simulation
+3. Analyze and visualize the results
+
+## Prerequisites
+
+Before running this notebook, make sure you have:
+- The PEcAn R packages installed
+- A valid `pecan.xml` configuration file (or use the example provided)
+
+## How to Use This Notebook
+
+1. Each section is clearly marked with a heading
+2. Code chunks are provided with explanations
+3. You can run the code chunks sequentially
+4. Feel free to modify parameters to suit your needs
+
+Let's get started!
+
+#### Objective
+
+This demo illustrates how to run a basic PEcAn workflow using an R-based Quarto notebook. It will cover loading settings, writing model configuration files, and running model simulations. This approach provides a programmatic alternative to the web-based PEcAn interface for executing ecosystem models.
+
+# Load PEcAn packages
+
+First, we need to load the PEcAn R packages. These packages provide all the functions we'll use to run the workflow.
+
+```{r libraries}
+# Load the PEcAn.all package, which includes all necessary PEcAn functionality
+library("PEcAn.all")
+```
+
+# Load PEcAn settings files
+
+This section handles the crucial step of loading the PEcAn settings.
+
+- If you have a `pecan.xml` file precofigured, place it in your working directory and copy its file path. Assign this path to the **settings_path** variable.
+- Alternatively, for a sample or initial run, you can use the example `pecan.xml` file available at: `pecan/documentation/quarto/01_Demo_Basic_Run`.
+
+```{r load-settings}
+# Specify the path to your pecan.xml file
+# Replace this with your actual pecan.xml file path
+settings_path <- "~/pecan.xml"
+```
+
+# Prepare and Validate Settings
+
+After specifying the path to your `pecan.xml` file, the next step involves reading and preparing these settings. PEcAn provides robust utilities to process and validate your configurations before any execution begins.
+
+* `PEcAn.settings::read.settings(settings_path)`: This function parses the `pecan.xml` file, converting its contents into an R list object that PEcAn can work with. This step ensures that the XML structure is correctly interpreted.
+* `PEcAn.settings::prepare.settings(settings)`: After reading, the settings are passed to this function for further preparation and validation. This involves checking for missing required fields, setting up default values where necessary, and ensuring that all paths and configurations are consistent with PEcAn's operational requirements. This step is crucial for preventing errors during subsequent workflow stages.
+
+```{r read-prepare-settings}
+# Read the settings from the pecan.xml file
+settings <- PEcAn.settings::read.settings(settings_path)
+
+# Prepare and validate the settings
+settings <- PEcAn.settings::prepare.settings(settings)
+```
+
+# Exploring the settings objects
+
+Once the settings have been read and prepared, it is often useful to inspect the structure of the `settings` object. This object is a comprehensive R list that contains all the parameters and configurations for your PEcAn workflow, derived directly from the `pecan.xml` file.
+
+* `str(settings)`: This function provides a concise, human-readable summary of the structure of the `settings` object. It displays the internal structure of an R object, showing its type, length, and the first few elements for each component. This helps in understanding how your XML configurations are represented in R and can be invaluable for debugging or verifying settings during the workflow.
+
+```{r explore-settings}
+# Display the structure of the settings object
+# This helps you understand what configurations are available
+str(settings)
+```
+
+# Run Model Simulations and Fetch Results
+
+This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
+
+* `start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
+
+```{r run-model}
+# Start the model simulations
+PEcAn.workflow::start_model_runs(settings)
+```
+
+```{r get-plot-vars}
+runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id
+outdir<- paste(settings$outdir,"/out/",runid,sep= "")
+start.year<-as.numeric(lubridate::year(settings$run$start.date))
+end.year<-as.numeric(lubridate::year(settings$run$end.date))
+
+# Load PEcAn output using read.output()
+model_output <- PEcAn.utils::read.output(
+ runid,
+ outdir,
+ start.year,
+ end.year,
+ variables = NULL,
+ dataframe = TRUE,
+ verbose = FALSE
+)
+
+# Store variable names for later use
+available_vars <- names(model_output)[!names(model_output) %in% c("posix", "time_bounds")]
+```
+
+```{r show-variables, echo=FALSE}
+# Display available variables in a clean format
+cat("Available variables in model output:\n")
+cat(paste(available_vars, collapse = "\n"))
+```
+
+```{r plot-timeseries}
+# You can see all available variables in the output from the previous block above
+# They are also listed here for reference:
+
+# Carbon fluxes:
+# - GPP, NPP, NEE, TotalResp, AutoResp, HeteroResp, SoilResp
+# Carbon pools:
+# - AbvGrndWood, TotLivBiom, TotSoilCarb, leaf_carbon_content
+# - litter_carbon_content, fine_root_carbon_content, coarse_root_carbon_content
+# - GWBI, AGB
+# Water variables:
+# - Qle, Transp, SoilMoist, SoilMoistFrac, SWE
+# - litter_mass_content_of_water
+# Other:
+# - LAI
+
+# Example 1: Carbon fluxes
+plot(model_output$posix, model_output$GPP,
+ type = 'l',
+ col = 'green',
+ xlab = 'Date',
+ ylab = 'Carbon Flux (kg C m-2 s-1)',
+ main = 'Carbon Fluxes Over Time')
+
+# Add NPP line
+lines(model_output$posix, model_output$NPP,
+ col = 'blue')
+
+# Add legend
+legend('topright',
+ legend = c('GPP', 'NPP'),
+ col = c('green', 'blue'),
+ lty = 1)
+
+# Example 2: Carbon pools
+# To plot carbon pools instead, uncomment and modify these lines:
+# plot(model_output$posix, model_output$TotLivBiom,
+# type = 'l',
+# col = 'darkgreen',
+# xlab = 'Date',
+# ylab = 'Carbon Pool (kg C m-2)',
+# main = 'Carbon Pools Over Time')
+# lines(model_output$posix, model_output$TotSoilCarb,
+# col = 'brown')
+# legend('topright',
+# legend = c('Total Live Biomass', 'Total Soil Carbon'),
+# col = c('darkgreen', 'brown'),
+# lty = 1)
+
+# Example 3: Water variables
+# To plot water variables instead, uncomment and modify these lines:
+# plot(model_output$posix, model_output$SoilMoist,
+# type = 'l',
+# col = 'blue',
+# xlab = 'Date',
+# ylab = 'Soil Moisture (kg m-2)',
+# main = 'Soil Moisture Over Time')
+# lines(model_output$posix, model_output$SWE,
+# col = 'lightblue')
+# legend('topright',
+# legend = c('Soil Moisture', 'Snow Water Equivalent'),
+# col = c('blue', 'lightblue'),
+# lty = 1)
+
+# Example 4: LAI and Biomass
+# To plot LAI and biomass, uncomment and modify these lines:
+# plot(model_output$posix, model_output$LAI,
+# type = 'l',
+# col = 'darkgreen',
+# xlab = 'Date',
+# ylab = 'LAI (m2 m-2)',
+# main = 'Leaf Area Index Over Time')
+# lines(model_output$posix, model_output$AbvGrndWood,
+# col = 'brown')
+# legend('topright',
+# legend = c('LAI', 'Above Ground Wood'),
+# col = c('darkgreen', 'brown'),
+# lty = 1)
+```
+
From 6480abbf099c0dc754f0010c036e2eece7a61047 Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Mon, 16 Jun 2025 19:50:39 +0530
Subject: [PATCH 0188/1193] removed quarto from base dir
Signed-off-by: Aritra Dey
---
.../_extensions/demo1/run-model/pecan.xml | 71 ------
.../_extensions/demo1/run-model/run_pecan.qmd | 211 ------------------
2 files changed, 282 deletions(-)
delete mode 100644 base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
delete mode 100644 base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
deleted file mode 100644
index 34ced299212..00000000000
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/pecan.xml
+++ /dev/null
@@ -1,71 +0,0 @@
-
-
-
-
- -1
-
- 2025/05/26 08:25:18 +0000
-
- /data/workflows/PEcAn_99000000001
-
-
- temperate.broadleaf.deciduous
-
-
-
- 3000
-
- FALSE
- TRUE
-
- FALSE
- 1.2
-
-
- 1
- NPP
-
-
- none
-
-
- none
-
-
-
-
- 99000000003
- SIPNET
- FALSE
- sipnet
-
-
- 99000000001
-
-
-
- 772
- 2004/01/01
- 2006/12/31
- Niwot Ridge Forest/LTER NWT1 (US-NR1)
- 40.0329
- -105.546
-
-
-
- AmerifluxLBL
-
- Aritra_2004
-
-
- 2004/01/01
- 2006/12/31
-
-
- localhost
-
- amqp://guest:guest@rabbitmq/%2F
- SIPNET_git
-
-
-
\ No newline at end of file
diff --git a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd b/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
deleted file mode 100644
index c700d339221..00000000000
--- a/base/all/inst/quarto_notebooks/_extensions/demo1/run-model/run_pecan.qmd
+++ /dev/null
@@ -1,211 +0,0 @@
----
-title: "PEcAn Workflow"
-author: "PEcAn"
-format: pdf
-
-# Introduction {#introduction}
-
-Welcome to this PEcAn workflow notebook! This notebook will guide you through running an ecosystem model using PEcAn's programmatic interface.
-
-## What is PEcAn?
-
-PEcAn (Predictive Ecosystem Analyzer) is a scientific workflow system designed to make ecosystem modeling more transparent, repeatable, and accessible. It helps researchers:
-
-- Run ecosystem models with standardized inputs and outputs
-- Perform uncertainty analysis on model parameters
-- Compare model predictions with observations
-- Share and reproduce scientific workflows
-
-## What This Notebook Does
-
-This notebook demonstrates how to:
-1. Set up and configure a PEcAn workflow
-2. Run an ecosystem model simulation
-3. Analyze and visualize the results
-
-## Prerequisites
-
-Before running this notebook, make sure you have:
-- The PEcAn R packages installed
-- A valid `pecan.xml` configuration file (or use the example provided)
-
-## How to Use This Notebook
-
-1. Each section is clearly marked with a heading
-2. Code chunks are provided with explanations
-3. You can run the code chunks sequentially
-4. Feel free to modify parameters to suit your needs
-
-Let's get started!
-
-#### Objective
-
-This demo illustrates how to run a basic PEcAn workflow using an R-based Quarto notebook. It will cover loading settings, writing model configuration files, and running model simulations. This approach provides a programmatic alternative to the web-based PEcAn interface for executing ecosystem models.
-
-# Load PEcAn packages
-
-First, we need to load the PEcAn R packages. These packages provide all the functions we'll use to run the workflow.
-
-```{r libraries}
-# Load the PEcAn.all package, which includes all necessary PEcAn functionality
-library("PEcAn.all")
-```
-
-# Load PEcAn settings files
-
-This section handles the crucial step of loading the PEcAn settings.
-
-- If you have a `pecan.xml` file precofigured, place it in your working directory and copy its file path. Assign this path to the **settings_path** variable.
-- Alternatively, for a sample or initial run, you can use the example `pecan.xml` file available at: `/pecan/base/all/inst/quarto_notebooks/_extensions/demo1/run-model`.
-
-```{r load-settings}
-# Specify the path to your pecan.xml file
-# Replace this with your actual pecan.xml file path
-settings_path <- "~/pecan.xml"
-```
-
-# Prepare and Validate Settings
-
-After specifying the path to your `pecan.xml` file, the next step involves reading and preparing these settings. PEcAn provides robust utilities to process and validate your configurations before any execution begins.
-
-* `PEcAn.settings::read.settings(settings_path)`: This function parses the `pecan.xml` file, converting its contents into an R list object that PEcAn can work with. This step ensures that the XML structure is correctly interpreted.
-* `PEcAn.settings::prepare.settings(settings)`: After reading, the settings are passed to this function for further preparation and validation. This involves checking for missing required fields, setting up default values where necessary, and ensuring that all paths and configurations are consistent with PEcAn's operational requirements. This step is crucial for preventing errors during subsequent workflow stages.
-
-```{r read-prepare-settings}
-# Read the settings from the pecan.xml file
-settings <- PEcAn.settings::read.settings(settings_path)
-
-# Prepare and validate the settings
-settings <- PEcAn.settings::prepare.settings(settings)
-```
-
-# Exploring the settings objects
-
-Once the settings have been read and prepared, it is often useful to inspect the structure of the `settings` object. This object is a comprehensive R list that contains all the parameters and configurations for your PEcAn workflow, derived directly from the `pecan.xml` file.
-
-* `str(settings)`: This function provides a concise, human-readable summary of the structure of the `settings` object. It displays the internal structure of an R object, showing its type, length, and the first few elements for each component. This helps in understanding how your XML configurations are represented in R and can be invaluable for debugging or verifying settings during the workflow.
-
-```{r explore-settings}
-# Display the structure of the settings object
-# This helps you understand what configurations are available
-str(settings)
-```
-
-# Run Model Simulations and Fetch Results
-
-This section executes the actual model simulations and retrieves the results. The process is managed by PEcAn's workflow system, which handles the execution of your chosen ecosystem model.
-
-* `start_model_runs(settings)`: This function initiates the model runs based on your configuration. It manages the execution of your chosen ecosystem model, using the configuration files generated in the previous step.
-
-```{r run-model}
-# Start the model simulations
-PEcAn.workflow::start_model_runs(settings)
-```
-
-```{r get-plot-vars}
-runid<-as.character(read.table(paste(settings$outdir, "/run/","runs.txt", sep=""))[1,1]) # Note: if you are using an xml from a run with multiple ensembles this line will provide only the first run id
-outdir<- paste(settings$outdir,"/out/",runid,sep= "")
-start.year<-as.numeric(lubridate::year(settings$run$start.date))
-end.year<-as.numeric(lubridate::year(settings$run$end.date))
-
-# Load PEcAn output using read.output()
-model_output <- PEcAn.utils::read.output(
- runid,
- outdir,
- start.year,
- end.year,
- variables = NULL,
- dataframe = TRUE,
- verbose = FALSE
-)
-
-# Store variable names for later use
-available_vars <- names(model_output)[!names(model_output) %in% c("posix", "time_bounds")]
-```
-
-```{r show-variables, echo=FALSE}
-# Display available variables in a clean format
-cat("Available variables in model output:\n")
-cat(paste(available_vars, collapse = "\n"))
-```
-
-```{r plot-timeseries}
-# You can see all available variables in the output from the previous block above
-# They are also listed here for reference:
-
-# Carbon fluxes:
-# - GPP, NPP, NEE, TotalResp, AutoResp, HeteroResp, SoilResp
-# Carbon pools:
-# - AbvGrndWood, TotLivBiom, TotSoilCarb, leaf_carbon_content
-# - litter_carbon_content, fine_root_carbon_content, coarse_root_carbon_content
-# - GWBI, AGB
-# Water variables:
-# - Qle, Transp, SoilMoist, SoilMoistFrac, SWE
-# - litter_mass_content_of_water
-# Other:
-# - LAI
-
-# Example 1: Carbon fluxes
-plot(model_output$posix, model_output$GPP,
- type = 'l',
- col = 'green',
- xlab = 'Date',
- ylab = 'Carbon Flux (kg C m-2 s-1)',
- main = 'Carbon Fluxes Over Time')
-
-# Add NPP line
-lines(model_output$posix, model_output$NPP,
- col = 'blue')
-
-# Add legend
-legend('topright',
- legend = c('GPP', 'NPP'),
- col = c('green', 'blue'),
- lty = 1)
-
-# Example 2: Carbon pools
-# To plot carbon pools instead, uncomment and modify these lines:
-# plot(model_output$posix, model_output$TotLivBiom,
-# type = 'l',
-# col = 'darkgreen',
-# xlab = 'Date',
-# ylab = 'Carbon Pool (kg C m-2)',
-# main = 'Carbon Pools Over Time')
-# lines(model_output$posix, model_output$TotSoilCarb,
-# col = 'brown')
-# legend('topright',
-# legend = c('Total Live Biomass', 'Total Soil Carbon'),
-# col = c('darkgreen', 'brown'),
-# lty = 1)
-
-# Example 3: Water variables
-# To plot water variables instead, uncomment and modify these lines:
-# plot(model_output$posix, model_output$SoilMoist,
-# type = 'l',
-# col = 'blue',
-# xlab = 'Date',
-# ylab = 'Soil Moisture (kg m-2)',
-# main = 'Soil Moisture Over Time')
-# lines(model_output$posix, model_output$SWE,
-# col = 'lightblue')
-# legend('topright',
-# legend = c('Soil Moisture', 'Snow Water Equivalent'),
-# col = c('blue', 'lightblue'),
-# lty = 1)
-
-# Example 4: LAI and Biomass
-# To plot LAI and biomass, uncomment and modify these lines:
-# plot(model_output$posix, model_output$LAI,
-# type = 'l',
-# col = 'darkgreen',
-# xlab = 'Date',
-# ylab = 'LAI (m2 m-2)',
-# main = 'Leaf Area Index Over Time')
-# lines(model_output$posix, model_output$AbvGrndWood,
-# col = 'brown')
-# legend('topright',
-# legend = c('LAI', 'Above Ground Wood'),
-# col = c('darkgreen', 'brown'),
-# lty = 1)
-```
-
From 3690daa4fe5c335dfc7a55e21b3195d432f44d7d Mon Sep 17 00:00:00 2001
From: David LeBauer
Date: Tue, 17 Jun 2025 14:38:21 -0400
Subject: [PATCH 0189/1193] fix and update tests for clip and move raster
function
---
.../data.land/R/clip_and_move_raster_file.R | 57 ++++++-----
.../man/clip_and_move_raster_file.Rd | 33 +++++++
.../testthat/test-clip_and_move_raster_file.R | 94 ++++++++++++-------
3 files changed, 123 insertions(+), 61 deletions(-)
create mode 100644 modules/data.land/man/clip_and_move_raster_file.Rd
diff --git a/modules/data.land/R/clip_and_move_raster_file.R b/modules/data.land/R/clip_and_move_raster_file.R
index 4cd9782b995..882940d21e9 100644
--- a/modules/data.land/R/clip_and_move_raster_file.R
+++ b/modules/data.land/R/clip_and_move_raster_file.R
@@ -4,44 +4,51 @@
#' output in the same format as the input.
#'
#' @param input_path Character. Path to the input raster file.
-#' @param polygon An `sf` or `SpatVector` object to be used for clipping and masking.
+#' @param polygon An object or file coercible to a `SpatVector` by `terra::vect()`
+#' (e.g., an `sf` object, a `SpatVector`, or a file path to a vector dataset).
+#' used for clipping and masking. Must have a valid CRS.
#' @param out_path Character. Path to save the processed raster.
-#' @return Character. The path to the saved output raster.
+#' @return Invisibly, the clipped `SpatRaster` object. The raster is also saved to `out_path`.
#' @export
#' @author David LeBauer
-clip_and_move_raster_files <- function(input_path, polygon, out_path, mask = TRUE, overwrite = TRUE) {
+clip_and_move_raster_file <- function(input_path, polygon, out_path, mask = TRUE, overwrite = TRUE) {
+
+ # Check that input and output files have same extension
+ # This function is not designed to convert between raster formats
+ if (tools::file_ext(input_path) != tools::file_ext(out_path)) {
+ PEcAn.logger::logger.error("Input and output files must have the same extension.")
+ }
+
rast_in <- terra::rast(input_path)
- # check that input file exists
- if (!file.exists(input_path)) {
- PEcAn.logger::logger.error("Input raster file does not exist: ", input_path)
+
+ # Coerce to SpatVector if not already
+ if (inherits(polygon, "SpatVector")) { # NB passing a SpatVector to terra::vect() fails
+ poly_sv <- polygon
+ } else {
+ poly_sv <- terra::vect(polygon)
}
- # check that polygon is valid
- if (!inherits(polygon, c("sf", "SpatVector"))) {
- PEcAn.logger::logger.error("Polygon must be an sf object or SpatVector")
+
+ if (terra::crs(poly_sv) == "") {
+ PEcAn.logger::logger.error("Input polygon must have CRS defined.")
}
- if (inherits(polygon, "sf")) {
- # Convert sf object to SpatVector
- polygon <- terra::vect(polygon)
+
+ # Reproject polygon to raster CRS if different
+ if (!terra::same.crs(poly_sv, rast_in)) {
+ poly_sv <- terra::project(poly_sv, terra::crs(rast_in))
}
- # Reproject polygon to raster CRS, convert to SpatVector
- polygon_proj <- sf::st_transform(polygon, crs = terra::crs(rast_in))
- polygon_vect <- terra::vect(polygon_proj)
- rast_crop <- terra::crop(rast_in, polygon_vect)
-
+
+ rast_crop <- terra::crop(rast_in, poly_sv)
+
if (mask) {
- rast_to_write <- terra::mask(rast_crop, polygon_vect)
+ rast_to_write <- terra::mask(rast_crop, poly_sv)
} else {
rast_to_write <- rast_crop
}
- filetype <- terra::filetype(rast_in)
- gdal_opts <- terra::gdal(rast_in)
-
+
terra::writeRaster(
rast_to_write,
filename = out_path,
- overwrite = overwrite,
- filetype = filetype,
- gdal = gdal_opts
+ overwrite = overwrite
)
- return(out_path)
+ invisible(rast_to_write)
}
\ No newline at end of file
diff --git a/modules/data.land/man/clip_and_move_raster_file.Rd b/modules/data.land/man/clip_and_move_raster_file.Rd
new file mode 100644
index 00000000000..c8ce532226b
--- /dev/null
+++ b/modules/data.land/man/clip_and_move_raster_file.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clip_and_move_raster_file.R
+\name{clip_and_move_raster_file}
+\alias{clip_and_move_raster_file}
+\title{Clip and Move a Raster File}
+\usage{
+clip_and_move_raster_file(
+ input_path,
+ polygon,
+ out_path,
+ mask = TRUE,
+ overwrite = TRUE
+)
+}
+\arguments{
+\item{input_path}{Character. Path to the input raster file.}
+
+\item{polygon}{An object or file coercible to a `SpatVector` by `terra::vect()`
+(e.g., an `sf` object, a `SpatVector`, or a file path to a vector dataset).
+used for clipping and masking. Must have a valid CRS.}
+
+\item{out_path}{Character. Path to save the processed raster.}
+}
+\value{
+Invisibly, the clipped `SpatRaster` object. The raster is also saved to `out_path`.
+}
+\description{
+Clips a raster to a polygon bounding box, optionally masks to polygon, and saves the
+output in the same format as the input.
+}
+\author{
+David LeBauer
+}
diff --git a/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R b/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R
index 5fb1879ea05..9fd6506ad04 100644
--- a/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R
+++ b/modules/data.land/tests/testthat/test-clip_and_move_raster_file.R
@@ -1,57 +1,79 @@
library(testthat)
library(terra)
library(sf)
+library(withr) # Add withr
-# load function under test
-source("../../clip_and_move_rasters.R") # adjust relative path as needed
# helper to create a small test raster
-make_raster <- function(crs = "EPSG:4326") {
+make_raster <- function(outfile, crs = "EPSG:4326") {
r <- terra::rast(matrix(1:16, 4, 4),
- extent = terra::ext(0, 4, 0, 4), crs = crs)
- f <- tempfile(fileext = ".tif")
- terra::writeRaster(r, f, filetype = "GTiff", overwrite = TRUE)
- f
+ extent = terra::ext(0, 4, 0, 4),
+ crs = crs
+ )
+ terra::writeRaster(r, outfile, filetype = "GTiff", overwrite = TRUE)
+ return(outfile)
}
test_that("clip & mask works: output clipped to polygon bbox and masked", {
- in_r <- make_raster()
- on.exit(unlink(in_r), add = TRUE)
- # box polygon (1,1)-(3,3)
- poly <- sf::st_as_sfc(sf::st_bbox(c(xmin=1, ymin=1, xmax=3, ymax=3)), crs = "EPSG:4326")
- out_f <- tempfile(fileext = ".tif")
- clip_and_move_raster_files(in_r, poly, out_f, mask = TRUE)
+ in_r <- local_tempfile(fileext = ".tif")
+ out_f <- local_tempfile(fileext = ".tif")
+
+ make_raster(outfile = in_r)
+
+ poly <- terra::as.polygons(
+ terra::ext(1, 3, 1, 3),
+ crs = "EPSG:4326"
+ )
+
+ clip_and_move_raster_file(input_path = in_r, polygon = poly, out_path = out_f, mask = TRUE)
+
expect_true(file.exists(out_f))
+
r_out <- terra::rast(out_f)
- # extent == polygon bbox
expect_equal(terra::ext(r_out), terra::ext(sf::st_bbox(poly)))
- # some values NA (corners) and some not (center)
- vals <- terra::values(r_out)
- expect_true(any(is.na(vals)))
- expect_true(any(!is.na(vals)))
- unlink(out_f)
+
+ inside_vals <- terra::values(terra::mask(r_out, poly, inverse = FALSE))
+ expect_true(all(!is.na(inside_vals)))
+
+ outside_vals <- terra::values(terra::mask(r_out, poly, inverse = TRUE))
+ expect_true(all(is.na(outside_vals)))
})
test_that("clip without mask retains all values within bbox", {
- in_r <- make_raster()
- on.exit(unlink(in_r), add = TRUE)
- poly <- sf::st_as_sfc(sf::st_bbox(c(xmin=1, ymin=1, xmax=3, ymax=3)), crs = "EPSG:4326")
- out_f <- tempfile(fileext = ".tif")
- clip_and_move_raster_files(in_r, poly, out_f, mask = FALSE)
+ in_r <- local_tempfile(fileext = ".tif")
+ make_raster(outfile = in_r)
+
+ poly <- sf::st_as_sf(
+ sf::st_as_sfc(
+ sf::st_bbox(c(xmin = 1, ymin = 1, xmax = 3, ymax = 3), crs = sf::st_crs(4326))
+ )
+ )
+ out_f <- local_tempfile(fileext = ".tif")
+
+ clip_and_move_raster_file(in_r, poly, out_f, mask = FALSE)
r_out <- terra::rast(out_f)
expect_false(any(is.na(terra::values(r_out))))
- unlink(in_r); unlink(out_f)
})
test_that("preserves CRS and filetype", {
- in_r <- make_raster(crs = "EPSG:3857")
- on.exit(unlink(in_r), add = TRUE)
- poly <- sf::st_as_sfc(sf::st_bbox(c(xmin=0, ymin=0, xmax=2, ymax=2)), crs = "EPSG:3857")
- out_f <- tempfile(fileext = ".tif")
- clip_and_move_raster_files(in_r, poly, out_f)
- r_out <- terra::rast(out_f)
- expect_true(terra::same.crs(r_out, terra::rast(in_r)))
- # file extension implies GTiff
- expect_true(grepl("\\.tif$", out_f))
- unlink(in_r); unlink(out_f)
-})
+ in_r_path <- local_tempfile(fileext = ".tif")
+ make_raster(outfile = in_r_path, crs = "EPSG:3857")
+
+ spatvect_raster <- terra::rast(in_r_path)
+
+ poly <- sf::st_as_sf(
+ sf::st_as_sfc(
+ sf::st_bbox(c(xmin = 1, ymin = 1, xmax = 3, ymax = 3), crs = sf::st_crs(3857))
+ )
+ )
+ out_f_path <- local_tempfile(fileext = ".tif")
+
+ clip_and_move_raster_file(input_path = in_r_path, polygon = poly, out_path = out_f_path)
+ r_out <- terra::rast(out_f_path)
+
+ expect_equal(
+ tools::file_ext(terra::sources(r_out)[1]),
+ tools::file_ext(terra::sources(spatvect_raster)[1])
+ )
+ expect_true(terra::same.crs(r_out, spatvect_raster))
+})
\ No newline at end of file
From ca19ebabcd9dedac0afe52b6ffba9ba9130d57ad Mon Sep 17 00:00:00 2001
From: David LeBauer
Date: Tue, 17 Jun 2025 17:18:56 -0400
Subject: [PATCH 0190/1193] only run test-match_species_id if db con is
available
---
modules/data.land/DESCRIPTION | 3 ++-
.../tests/testthat/test-match_species_id.R | 16 ++++++++++++----
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 700be8b56a7..e681e1ddab9 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -68,7 +68,8 @@ Suggests:
redland,
raster,
reticulate,
- testthat (>= 1.0.2)
+ testthat (>= 1.0.2),
+ withr
Remotes:
github::ropensci/traits
License: BSD_3_clause + file LICENSE
diff --git a/modules/data.land/tests/testthat/test-match_species_id.R b/modules/data.land/tests/testthat/test-match_species_id.R
index ae3ba6dafb3..53285ff6763 100644
--- a/modules/data.land/tests/testthat/test-match_species_id.R
+++ b/modules/data.land/tests/testthat/test-match_species_id.R
@@ -22,8 +22,14 @@ test_that("Species matching works", {
user = "bety",
password = "bety",
host = "localhost",
- driver = "Postgres")
- con <- PEcAn.DB::db.open(db_params)
+ driver = "Postgres"
+ )
+ con <- tryCatch(
+ PEcAn.DB::db.open(db_params),
+ error = function(e) NULL
+ )
+
+ skip_if(is.null(con), "No database connection available for species matching tests.")
test_merge(c('ACRU', 'TSCA'), 'usda', con)
test_merge(c(316L, 261L), 'fia', con)
@@ -31,11 +37,13 @@ test_that("Species matching works", {
test_table <- data.frame(
bety_species_id = c(30L, 1419L),
- input_code = c('AceRub', 'TsuCan'))
+ input_code = c('AceRub', 'TsuCan')
+ )
test_merge(
input_codes = test_table$input_code,
format_name = 'custom',
bety = con,
- translation_table = test_table)
+ translation_table = test_table
+ )
})
From 1dfd8247e6f24fb86e3c8ecbf50ec6ae93cca2aa Mon Sep 17 00:00:00 2001
From: David LeBauer
Date: Tue, 17 Jun 2025 17:23:56 -0400
Subject: [PATCH 0191/1193] update dependencies
---
docker/depends/pecan_package_dependencies.csv | 1 +
1 file changed, 1 insertion(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 01e80a9433e..0cc67d24cab 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -676,6 +676,7 @@
"withr","*","models/sipnet","Suggests",FALSE
"withr","*","modules/allometry","Suggests",FALSE
"withr","*","modules/data.atmosphere","Suggests",FALSE
+"withr","*","modules/data.land","Suggests",FALSE
"XML","*","base/workflow","Imports",FALSE
"XML","*","models/biocro","Imports",FALSE
"XML","*","models/maat","Imports",FALSE
From c9969b402dae8f95dada33a26c8f0e56509ca38a Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Wed, 18 Jun 2025 11:28:54 -0700
Subject: [PATCH 0192/1193] MODIS_LAI_prep: rename skip.download to
skip_download, change default to FALSE
---
modules/data.remote/DESCRIPTION | 2 +-
modules/data.remote/NEWS.md | 7 +++++++
modules/data.remote/R/MODIS_LAI_prep.R | 16 +++++++++-------
modules/data.remote/man/MODIS_LAI_prep.Rd | 4 ++--
4 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index bff565d6969..293468f2b8a 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -1,7 +1,7 @@
Package: PEcAn.data.remote
Type: Package
Title: PEcAn Functions Used for Extracting Remote Sensing Data
-Version: 1.9.0
+Version: 1.9.1
Authors@R: c(person("Mike", "Dietze", role = c("aut"),
email = "dietze@bu.edu"),
person("Bailey", "Morrison", role = c("aut", "cre"),
diff --git a/modules/data.remote/NEWS.md b/modules/data.remote/NEWS.md
index 0a1602b0147..f81b9b3d3eb 100644
--- a/modules/data.remote/NEWS.md
+++ b/modules/data.remote/NEWS.md
@@ -1,3 +1,10 @@
+# PEcAn.data.remote 1.9.1
+
+* `MODIS_LAI_prep` gains two arguments:
+ - `skip_download` (with default FALSE) to work offline from an existing file named "LAI.csv".
+ - `boundary` (with default NULL, ie no effect) to set upper and lower quantiles for trimming LAI data
+
+
# PEcAn.data.remote 1.9.0
* Refactored GEDI, LAI, and SMAP workflows for more efficient parallel processing
diff --git a/modules/data.remote/R/MODIS_LAI_prep.R b/modules/data.remote/R/MODIS_LAI_prep.R
index 9f18b5ffb60..ed116add237 100644
--- a/modules/data.remote/R/MODIS_LAI_prep.R
+++ b/modules/data.remote/R/MODIS_LAI_prep.R
@@ -6,7 +6,7 @@
#' @param search_window numeric: search window for locate available LAI values.
#' @param export_csv boolean: decide if we want to export the CSV file.
#' @param sd_threshold numeric or character: for filtering out any estimations with unrealistic high standard error, default is 20. The QC check will be skipped if it's set as NULL.
-#' @param skip.download boolean: determine if we want to use existing LAI.csv file and skip the MODIS LAI download part.
+#' @param skip_download boolean: determine if we want to use existing LAI.csv file and skip the MODIS LAI download part.
#' @param boundary numeric vector or list: the upper and lower quantiles for filtering out noisy LAI values (e.g., c(0.05, 0.95) or list(0.05, 0.95)). The default is NULL.
#'
#' @return A data frame containing LAI and sd for each site and each time step.
@@ -14,7 +14,7 @@
#'
#' @author Dongchen Zhang
#' @importFrom magrittr %>%
-MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window = 30, export_csv = FALSE, sd_threshold = 20, skip.download = TRUE, boundary = NULL){
+MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window = 30, export_csv = FALSE, sd_threshold = 20, skip_download = FALSE, boundary = NULL){
# unlist boundary if it's passing from the assembler function.
if (is.list(boundary)) {
boundary <- as.numeric(unlist(boundary))
@@ -56,8 +56,9 @@ MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window
if (!is.null(boundary)) {
Previous_CSV <- MODIS_LAI_ts_filter(Previous_CSV, boundary = boundary)
}
- LAI_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>%
- `colnames<-`(c("site_id", paste0(time_points, "_LAI"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, LAI, std, target time point.
+ LAI_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>%
+ `colnames<-`(c("site_id", paste0(time_points, "_LAI"), paste0(time_points, "_SD"))) %>%
+ as.data.frame()#we need: site_id, LAI, std, target time point.
LAI_Output$site_id <- site_info$site_id
#Calculate LAI for each time step and site.
#loop over time and site
@@ -81,14 +82,15 @@ MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window
LAI_Output[, paste0(t, "_SD")] <- LAI.list[[i]][,paste0(t, "_SD")]
}
}else{#we don't have any previous downloaded CSV file.
- LAI_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>%
- `colnames<-`(c("site_id", paste0(time_points, "_LAI"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, LAI, std, target time point.
+ LAI_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>%
+ `colnames<-`(c("site_id", paste0(time_points, "_LAI"), paste0(time_points, "_SD"))) %>%
+ as.data.frame()#we need: site_id, LAI, std, target time point.
LAI_Output$site_id <- site_info$site_id
}
#only Site that has NA for any time points need to be downloaded.
new_site_info <- site_info %>% purrr::map(function(x)x[!stats::complete.cases(LAI_Output)])
#TODO: only download data for specific date when we have missing data.
- if(length(new_site_info$site_id) != 0 && !skip.download){
+ if(length(new_site_info$site_id) != 0 && !skip_download){
product <- "MCD15A3H"
PEcAn.logger::logger.info("Extracting LAI mean products!")
lai_mean <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) %>%
diff --git a/modules/data.remote/man/MODIS_LAI_prep.Rd b/modules/data.remote/man/MODIS_LAI_prep.Rd
index 25140470b08..1f9f8e4869a 100644
--- a/modules/data.remote/man/MODIS_LAI_prep.Rd
+++ b/modules/data.remote/man/MODIS_LAI_prep.Rd
@@ -11,7 +11,7 @@ MODIS_LAI_prep(
search_window = 30,
export_csv = FALSE,
sd_threshold = 20,
- skip.download = TRUE,
+ skip_download = FALSE,
boundary = NULL
)
}
@@ -28,7 +28,7 @@ MODIS_LAI_prep(
\item{sd_threshold}{numeric or character: for filtering out any estimations with unrealistic high standard error, default is 20. The QC check will be skipped if it's set as NULL.}
-\item{skip.download}{boolean: determine if we want to use existing LAI.csv file and skip the MODIS LAI download part.}
+\item{skip_download}{boolean: determine if we want to use existing LAI.csv file and skip the MODIS LAI download part.}
\item{boundary}{numeric vector or list: the upper and lower quantiles for filtering out noisy LAI values (e.g., c(0.05, 0.95) or list(0.05, 0.95)). The default is NULL.}
}
From 3b677a32ad670270860dfccc2dd9c18be26ab13f Mon Sep 17 00:00:00 2001
From: Aritra Dey
Date: Thu, 19 Jun 2025 21:44:11 +0530
Subject: [PATCH 0193/1193] added posterior data
Signed-off-by: Aritra Dey
---
.../quarto/01_Demo_Basic_Run/pecan.xml | 130 +++++++++---------
.../temperate.coniferous/prior.distns.Rdata | Bin 0 -> 559 bytes
.../quarto/01_Demo_Basic_Run/run_pecan.qmd | 9 ++
3 files changed, 72 insertions(+), 67 deletions(-)
create mode 100644 documentation/quarto/01_Demo_Basic_Run/pft/temperate.coniferous/prior.distns.Rdata
diff --git a/documentation/quarto/01_Demo_Basic_Run/pecan.xml b/documentation/quarto/01_Demo_Basic_Run/pecan.xml
index 34ced299212..87c55893632 100644
--- a/documentation/quarto/01_Demo_Basic_Run/pecan.xml
+++ b/documentation/quarto/01_Demo_Basic_Run/pecan.xml
@@ -1,71 +1,67 @@
-
-
- -1
-
- 2025/05/26 08:25:18 +0000
-
- /data/workflows/PEcAn_99000000001
-
-
- temperate.broadleaf.deciduous
-
-
-
- 3000
-
- FALSE
- TRUE
-
- FALSE
- 1.2
-
-
- 1
- NPP
-
-
- none
-
+
+
+ -1
+
+ 2025-06-19-15-34-01
+
+ /data/workflows/
+
+
+ temperate.coniferous
+ /pecan/documentation/quarto/01_Demo_Basic_Run/pft/temperate.coniferous
+
+
+
+ 99000000003
+ SIPNET
+ git
+ FALSE
+ /usr/local/bin/sipnet.git
+
+
+ 2025-06-19-15-34-01
+
+
+
+ 772
+ 2004/01/01
+ 2004/12/31
+ Niwot Ridge Forest/LTER NWT1 (US-NR1)
+ 40.0329
+ -105.546
+
+
- none
-
-
-
-
- 99000000003
- SIPNET
- FALSE
- sipnet
-
-
- 99000000001
-
-
-
- 772
- 2004/01/01
- 2006/12/31
- Niwot Ridge Forest/LTER NWT1 (US-NR1)
- 40.0329
- -105.546
-
-
-
- AmerifluxLBL
-
- Aritra_2004
-
-
- 2004/01/01
- 2006/12/31
-
-
- localhost
-
- amqp://guest:guest@rabbitmq/%2F
- SIPNET_git
-
-
+ AmerifluxLBL
+
+ Aritra_2004
+
+ 99000000006
+
+
+ /data/dbfiles/AmerifluxLBL_SIPNET_site_0-772/AMF_US-NR1_BASE_HH_23-5.2004-01-01.2004-12-31.clim
+
+
+
+ 2004/01/01
+ 2004/12/31
+
+
+ localhost
+
+ amqp://guest:guest@rabbitmq/%2F
+ SIPNET_git
+
+ /data/workflows/2025-06-19-15-34-01/run
+ /data/workflows/2025-06-19-15-34-01/out
+
+
+ TRUE
+ TRUE
+ TRUE
+
+ /data/workflows/2025-06-19-15-34-01/run
+ /data/workflows/2025-06-19-15-34-01/out
\ No newline at end of file
diff --git a/documentation/quarto/01_Demo_Basic_Run/pft/temperate.coniferous/prior.distns.Rdata b/documentation/quarto/01_Demo_Basic_Run/pft/temperate.coniferous/prior.distns.Rdata
new file mode 100644
index 0000000000000000000000000000000000000000..1648cc1dfa819d040016ca0436b57459da2cce23
GIT binary patch
literal 559
zcmV+~0?_>*iwFP!000001FcidPt-sZpLJV-`Vqpe1~t)xUfn2MNC;^+t{4&$*#$Rn
zYG&IhOlW7C=`6d!iwF5jynFUP@PdgqxDt5JF{SS!GnVm5J;(8_Q1@5VEc*r-UptrS#2IP%dgA%_PedzoKH6l4&Is;8-0Z
zG1wD5LOb0!Hc*$*=r495#f}*liQ$d2|Do>xwGq0VkL=mbQu^V@anDZ=9)4<`p)tn;
z?Otm7lDD#YsLmIIyWi&Z@v5VF?r}3O-14Tuf4_h7>SOh-`yKp?NS~q}^mo6i6WojZ
zxqsqqK|fDGH$cytKJqb658QLD9(j`xKReX@4)|M!f21$KTLk|r;E?B8kOy-O&37yB
z@5(IgzEjU(QL)cT9v9<(lESLm3@1eGsNPM>1oh^Eb3MfZCwRo`fLCILa#Mo$YQrVX
zy3cGYMDF;4WT_wFKyqP_nBd5lUBUPs5x&5Z0CGogE_DV~QOemcIHSSerCId^_radk
z#fv5UWDfB1`f7df3n}UgaBj2{KjuAOk|c%DWL|bz;K2NWX~s5Qr_wZ=Dekyt?~=Zx
zEYRVAlPIDACCsE&c5oc|8CJtwWv
Date: Tue, 24 Jun 2025 22:38:16 +0530
Subject: [PATCH 0194/1193] fix: target link
---
.../05_developer_workflows/01_update_pecan_code.Rmd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/book_source/02_demos_tutorials_workflows/05_developer_workflows/01_update_pecan_code.Rmd b/book_source/02_demos_tutorials_workflows/05_developer_workflows/01_update_pecan_code.Rmd
index 32b84955aab..161a55e9b4a 100644
--- a/book_source/02_demos_tutorials_workflows/05_developer_workflows/01_update_pecan_code.Rmd
+++ b/book_source/02_demos_tutorials_workflows/05_developer_workflows/01_update_pecan_code.Rmd
@@ -2,7 +2,7 @@
Release notes for all releases can be found [here](https://github.com/PecanProject/pecan/releases).
-This page will only list any steps you have to do to upgrade an existing system. When updating PEcAn it is highly encouraged to update BETY. You can find instructions on how to do this, as well on how to update the database in the [Updating BETYdb](https://pecan.gitbooks.io/betydb-documentation/content/updating_betydb_when_new_versions_are_released.html) gitbook page.
+This page will only list any steps you have to do to upgrade an existing system. When updating PEcAn it is highly encouraged to update BETY. You can find instructions on how to do this, as well on how to update the database in the [Updating BETYdb](https://pecan.gitbook.io/betydbdoc-dataentry) gitbook page.
### Updating PEcAn {#pecan-make}
From 3cd8748eae45ca82206cb49da365ee41c351d22f Mon Sep 17 00:00:00 2001
From: harsh agrawal <73224301+harshagr70@users.noreply.github.com>
Date: Wed, 25 Jun 2025 12:40:25 +0530
Subject: [PATCH 0195/1193] Update multisite-workflow-example.qmd
Co-authored-by: Chris Black
---
.../multisite-workflow/multisite-workflow-example.qmd | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
index 415d7615616..29192f63994 100644
--- a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
+++ b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
@@ -6,9 +6,11 @@ editor: visual
## Introduction
-This notebook demonstrates how to use a flat-file (`site_info.csv`) to set up a multisite workflow in PEcAn, without querying the database.
+This notebook demonstrates how to set up a multisite workflow in PEcAn that uses a subset of the sites defined in a flat-file (`site_info.csv`), without querying the database.
You will learn how to read site metadata, filter by group, and create a MultiSettings object for your runs.
+Sites stored in the BETY database can be queried by `sitegroup` to retrieve predefined sets of sites. Inside the database a sitegroup is nothing but a name given to a list of siteIDs, so in a flat file context we can do the same thing by adding one or more grouping columns.
+
---
## Example `site_info.csv`
From f585a611127747a010ff7b625afc95e56ded2b79 Mon Sep 17 00:00:00 2001
From: harsh agrawal <73224301+harshagr70@users.noreply.github.com>
Date: Wed, 25 Jun 2025 17:10:10 +0530
Subject: [PATCH 0196/1193] added-example-as-tribble
Co-authored-by: Chris Black
---
.../multisite-workflow-example.qmd | 20 +++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
index 29192f63994..8f099f4affe 100644
--- a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
+++ b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
@@ -17,14 +17,18 @@ Sites stored in the BETY database can be queried by `sitegroup` to retrieve pred
Suppose you have a CSV file like this:
-| id | lat | lon | site_group |
-|-----|-------|--------|------------|
-| 101 | 40.1 | -88.2 | NEON |
-| 102 | 41.2 | -87.9 | NEON |
-| 201 | 39.9 | -90.0 | Ameriflux |
-| 202 | 38.5 | -89.5 | Ameriflux |
-
-Save this as `site_info.csv` in your working directory.
+```{r}
+# For this demo:
+site_info <- tibble::tribble(
+ ~id, ~lat, ~lon, ~elev, ~site_group,
+ 101, 40.1, -88.2, 10, "NEON",
+ 102, 41.2, -87.9, 1980, "NEON",
+ 201, 39.9, -90.0, 925, "Ameriflux",
+ 202, 38.5, -89.5, 1108, "Ameriflux"
+)
+# In a real workflow:
+# site_info <- read.csv("site_info.csv")
+```
---
From 5f1403a4ff7ce8147a2a435d07af2db0908022a6 Mon Sep 17 00:00:00 2001
From: harsh agrawal <73224301+harshagr70@users.noreply.github.com>
Date: Wed, 25 Jun 2025 17:12:04 +0530
Subject: [PATCH 0197/1193] removed redundant lines
Co-authored-by: Chris Black
---
.../multisite-workflow/multisite-workflow-example.qmd | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
index 8f099f4affe..e85393a4a00 100644
--- a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
+++ b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
@@ -42,10 +42,9 @@ library(dplyr)
---
-## Add the following lines to your workflow code .
+## Filter by one group
```{r}
-site_info <- read.csv("site_info.csv")
# Filter for NEON sites (change "NEON" to your desired group)
selected_sites <- site_info %>% filter(site_group == "NEON")
print(selected_sites) ## prints demo output (not required in main code)
From 28e9caa70ca761760d7164fd7f3495bbb50d1549 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Wed, 25 Jun 2025 18:47:26 +0530
Subject: [PATCH 0198/1193] added example section
---
.../multisite-workflow-example.qmd | 87 ++++++++++++++++---
1 file changed, 77 insertions(+), 10 deletions(-)
diff --git a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
index e85393a4a00..80f95bc3709 100644
--- a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
+++ b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
@@ -12,9 +12,7 @@ You will learn how to read site metadata, filter by group, and create a MultiSet
Sites stored in the BETY database can be queried by `sitegroup` to retrieve predefined sets of sites. Inside the database a sitegroup is nothing but a name given to a list of siteIDs, so in a flat file context we can do the same thing by adding one or more grouping columns.
---
-
-## Example `site_info.csv`
-
+Example `site_info.csv`
Suppose you have a CSV file like this:
```{r}
@@ -32,7 +30,7 @@ site_info <- tibble::tribble(
---
-## Load Required Libraries
+Load Required Libraries
```{r}
library(dplyr)
@@ -42,7 +40,7 @@ library(dplyr)
---
-## Filter by one group
+Filter by one group
```{r}
# Filter for NEON sites (change "NEON" to your desired group)
@@ -53,22 +51,91 @@ settings <- createMultiSiteSettings(settings, selected_sites) ## populates setti
```
---
+Example of how settings will look like before and after the execution of "createMultiSiteSettings()"
-## (Optional) Integrate with workflow
+Settings object before the execution of 'createMultiSiteSettings()'
```{r}
-settings$site_info <- selected_sites
+# Example: settings object with no site info yet
+
+settings <- list(
+ run = list(
+ start.date = "2025-01-01",
+ end.date = "2025-12-31",
+ inputs = list()
+ # No site info yet!
+ )
+)
+
+str(settings)
+
```
-Now, any downstream function that receives settings can access the filtered site info.
----
+Here this shows the settings object after the execution of "createMultiSiteSettings()"
+```{r}
+# Example: settings object with site info
+
+settings <- list(
+ run = list(
+ list(
+ site = list(id = 101, lat = 40.1, lon = -88.2, elev = 10, site_group = "NEON", met.start = "2025-01-01", met.end = "2025-12-31"),
+ start.date = "2025-01-01",
+ end.date = "2025-12-31",
+ inputs = list()
+ ),
+ list(
+ site = list(id = 102, lat = 41.2, lon = -87.9, elev = 1980, site_group = "NEON", met.start = "2025-01-01", met.end = "2025-12-31"),
+ start.date = "2025-01-01",
+ end.date = "2025-12-31",
+ inputs = list()
+ ),
+ list(
+ site = list(id = 201, lat = 39.9, lon = -90.0, elev = 925, site_group = "Ameriflux", met.start = "2025-01-01", met.end = "2025-12-31"),
+ start.date = "2025-01-01",
+ end.date = "2025-12-31",
+ inputs = list()
+ ),
+ list(
+ site = list(id = 202, lat = 38.5, lon = -89.5, elev = 1108, site_group = "Ameriflux", met.start = "2025-01-01", met.end = "2025-12-31"),
+ start.date = "2025-01-01",
+ end.date = "2025-12-31",
+ inputs = list()
+ )
+ )
+)
+
+str(settings)
+
+```
+
+---
+
+## Group by multiple characterstics
+
+Say you're developing a new parameterization for the conifer PFT and you want to test it specifically at high elevation. By filtering and reassigning, this can be done without any edits to the saved `site_info.csv`:
+
+```{r}
+settings$pfts <- settings$pfts |>
+ append(list(
+ name="new_conifer",
+ posterior.files = "/path/to/new_conifer/post.distns.Rdata"))
+neon_hi_elev <- site_info |>
+ filter(
+ site_group == "NEON",
+ elev > 1000) |>
+ mutate(site.pft = "new_conifer")
+settings <- createMultiSiteSettings(settings, neon_hi_elev)
+```
+
+
+---
## Summary
- You can use a flat-file (site_info.csv) to manage multisite workflows in PEcAn.
- Filter your sites in R, then pass the filtered data frame to createMultiSiteSettings().
+- Group your data by multiple characterstics.
- No database queries are needed for site metadata.
-
---
From b19582540254a5bb8513580356698280c8ee3167 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Wed, 25 Jun 2025 21:26:35 +0530
Subject: [PATCH 0199/1193] updated-multiple-grouping-part
---
.../multisite-workflow-example.qmd | 41 ++++++++++---------
1 file changed, 21 insertions(+), 20 deletions(-)
diff --git a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
index 80f95bc3709..0d77bde6139 100644
--- a/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
+++ b/documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
@@ -50,7 +50,28 @@ settings <- createMultiSiteSettings(settings, selected_sites) ## populates setti
```
+
---
+
+## Group by multiple characterstics
+
+Say you're developing a new parameterization for the conifer PFT and you want to test it specifically at high elevation. By filtering and reassigning, this can be done without any edits to the saved `site_info.csv`:
+
+```{r}
+settings$pfts <- settings$pfts |>
+ append(list(
+ name="new_conifer",
+ posterior.files = "/path/to/new_conifer/post.distns.Rdata"))
+neon_hi_elev <- site_info |>
+ filter(
+ site_group == "NEON",
+ elev > 1000) |>
+ mutate(site.pft = "new_conifer")
+settings <- createMultiSiteSettings(settings, neon_hi_elev)
+```
+
+---
+
Example of how settings will look like before and after the execution of "createMultiSiteSettings()"
Settings object before the execution of 'createMultiSiteSettings()'
@@ -109,26 +130,6 @@ str(settings)
```
----
-
-## Group by multiple characterstics
-
-Say you're developing a new parameterization for the conifer PFT and you want to test it specifically at high elevation. By filtering and reassigning, this can be done without any edits to the saved `site_info.csv`:
-
-```{r}
-settings$pfts <- settings$pfts |>
- append(list(
- name="new_conifer",
- posterior.files = "/path/to/new_conifer/post.distns.Rdata"))
-neon_hi_elev <- site_info |>
- filter(
- site_group == "NEON",
- elev > 1000) |>
- mutate(site.pft = "new_conifer")
-settings <- createMultiSiteSettings(settings, neon_hi_elev)
-```
-
-
---
## Summary
From b866799baacee70f674b38e25f40a3e39ef54410 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Wed, 25 Jun 2025 23:14:14 +0530
Subject: [PATCH 0200/1193] replaced swfscMisc with terra
---
modules/data.land/R/extract_NEON_veg.R | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/modules/data.land/R/extract_NEON_veg.R b/modules/data.land/R/extract_NEON_veg.R
index c66f1b6ddfd..68ff3b02257 100644
--- a/modules/data.land/R/extract_NEON_veg.R
+++ b/modules/data.land/R/extract_NEON_veg.R
@@ -50,7 +50,9 @@ extract_NEON_veg <- function(lon, lat, start_date, end_date, store_dir, neonsite
neonsites <- neonstore::neon_sites(api = "https://data.neonscience.org/api/v0", .token = Sys.getenv("NEON_TOKEN"))
}
neonsites <- dplyr::select(neonsites, "siteCode", "siteLatitude", "siteLongitude") #select for relevant columns
- betyneondist <- swfscMisc::distance(lat1 = lat, lon1 = lon, lat2 = neonsites$siteLatitude, lon2 = neonsites$siteLongitude)
+ pt1 <- terra::vect(matrix(c(lon1 = lon, lat1 = lat) , ncol = 2), type = "points", crs = "EPSG:4326")
+ pt2 <- terra::vect(matrix(c(lon2 = neonsites$siteLongitude, lat2 = neonsites$siteLatitude) , ncol = 2), type = "points", crs = "EPSG:4326")
+ betyneondist <- distance(p1, p2)
mindist <- min(betyneondist)
distloc <- match(mindist, betyneondist)
lat <- neonsites$siteLatitude[distloc]
From 30c5356f3e3889cdd8131b52c5ef0a6482ec1e47 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Wed, 25 Jun 2025 23:21:39 +0530
Subject: [PATCH 0201/1193] added changes
---
modules/data.land/R/extract_NEON_veg.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.land/R/extract_NEON_veg.R b/modules/data.land/R/extract_NEON_veg.R
index 68ff3b02257..fc00eed871e 100644
--- a/modules/data.land/R/extract_NEON_veg.R
+++ b/modules/data.land/R/extract_NEON_veg.R
@@ -52,7 +52,7 @@ extract_NEON_veg <- function(lon, lat, start_date, end_date, store_dir, neonsite
neonsites <- dplyr::select(neonsites, "siteCode", "siteLatitude", "siteLongitude") #select for relevant columns
pt1 <- terra::vect(matrix(c(lon1 = lon, lat1 = lat) , ncol = 2), type = "points", crs = "EPSG:4326")
pt2 <- terra::vect(matrix(c(lon2 = neonsites$siteLongitude, lat2 = neonsites$siteLatitude) , ncol = 2), type = "points", crs = "EPSG:4326")
- betyneondist <- distance(p1, p2)
+ betyneondist <- terra::distance(p1, p2)
mindist <- min(betyneondist)
distloc <- match(mindist, betyneondist)
lat <- neonsites$siteLatitude[distloc]
From f50a319db65e011946d9416d3513daf192517ce9 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 11:59:23 -0400
Subject: [PATCH 0202/1193] Add xgboost to the description.
---
docker/depends/pecan_package_dependencies.csv | 1 +
modules/assim.sequential/DESCRIPTION | 1 +
2 files changed, 2 insertions(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 7b7ad3a8b4e..04f132622c6 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -672,6 +672,7 @@
"withr","*","models/sipnet","Suggests",FALSE
"withr","*","modules/allometry","Suggests",FALSE
"withr","*","modules/data.atmosphere","Suggests",FALSE
+"xgboost","*","modules/assim.sequential","Suggests",FALSE
"XML","*","base/workflow","Imports",FALSE
"XML","*","models/biocro","Imports",FALSE
"XML","*","models/maat","Imports",FALSE
diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index dfa454384cd..0a118670bf6 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -61,6 +61,7 @@ Suggests:
tidyr,
sp,
utils,
+ xgboost,
XML
License: BSD_3_clause + file LICENSE
Copyright: Authors
From 9d02fbff07dcac7c8d349618f11462e9c00c2c37 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 12:00:04 -0400
Subject: [PATCH 0203/1193] Add the xgboost method to the downscale function
and rename the previous RF functions to a more general name.
---
.../R/SDA_parallel_downscale.R | 59 ++++++++++++++-----
.../inst/anchor/NA_downscale_script.R | 6 +-
...downscale_rf_main.Rd => downscale_main.Rd} | 9 ++-
...parallel_rf_train.Rd => parallel_train.Rd} | 14 +++--
4 files changed, 65 insertions(+), 23 deletions(-)
rename modules/assim.sequential/man/{downscale_rf_main.Rd => downscale_main.Rd} (86%)
rename modules/assim.sequential/man/{parallel_rf_train.Rd => parallel_train.Rd} (66%)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index a11b209f099..1bd163bcf3a 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -270,15 +270,16 @@ prepare_train_dat <- function(pts, analysis, covariates.dir, variable) {
#' @description
#' This function helps to train the ML model across ensemble members in parallel.
-#' @title parallel_rf_train
+#' @title parallel_train
#'
#' @param full_data numeric: the matrix generated using the `prepare_train_dat` function.
+#' @param method: character: machine learning method (currently support randomForest and xgboost).
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
#'
#' @return list of trained models across ensemble members.
#'
#' @author Dongchen Zhang
-parallel_rf_train <- function(full_data, cores = parallel::detectCores()) {
+parallel_train <- function(full_data, method = "randomForest", cores = parallel::detectCores()) {
# grab ensemble and predictor index.
col.names <- colnames(full_data)
ensemble.inds <- which(grepl("ensemble", col.names, fixed = TRUE))
@@ -296,16 +297,40 @@ parallel_rf_train <- function(full_data, cores = parallel::detectCores()) {
opts <- list(progress=progress)
# foreach loop.
models <- foreach::foreach(i = ensemble.inds,
- .packages=c("Kendall", "stats", "randomForest"),
+ .packages=c("Kendall", "stats", method),
.options.snow=opts) %dopar% {
ensemble_col <- col.names[ensemble.inds[i]]
- formula <- stats::as.formula(paste(ensemble_col, "~", paste(col.names[predictor.inds], collapse = " + ")))
- randomForest::randomForest(formula,
- data = full_data,
- ntree = 1000,
- na.action = stats::na.omit,
- keep.forest = TRUE,
- importance = TRUE)
+ predictor_col <- col.names[predictor.inds]
+ # if it's randomForest.
+ if (method == "randomForest") {
+ formula <- stats::as.formula(paste(ensemble_col, "~", paste(predictor_col, collapse = " + ")))
+ model <- randomForest::randomForest(formula,
+ data = full_data,
+ ntree = 1000,
+ na.action = stats::na.omit,
+ keep.forest = TRUE,
+ importance = TRUE)
+ }
+ # if it's xgboost.
+ if (method == "xgboost") {
+ formula <- stats::as.formula(paste0("~ ", paste(predictor_col, collapse = " + "), " - 1"))
+ train.df <- model.matrix(pred_formula, data = full_data)
+ train.df <- xgb.DMatrix(data = train.df, label = full_data[[ensemble_col]])
+ model <- xgb.train(
+ params = list(
+ objective = "reg:squarederror",
+ eta = 0.1,
+ max_depth = 6,
+ subsample = 0.8,
+ colsample_bytree = 0.8
+ ),
+ data = train.df,
+ nrounds = 1000,
+ nthread = 1,
+ verbose = 0
+ )
+ }
+ model
}
# stop parallel.
parallel::stopCluster(cl)
@@ -383,12 +408,18 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#' @param variable: character: name of state variable. It should match up with the column names of the analysis data frame.
#' @param outdir character: the output directory where the downscaled maps will be stored.
#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
+#' @param method: character: machine learning method, default is randomForest (currently support randomForest and xgboost).
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
#'
#' @return paths to the ensemble downscaled maps.
#'
#' @author Dongchen Zhang
-downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) {
+downscale_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, method = "randomForest", cores = parallel::detectCores()) {
+ # check ML package.
+ if (!require(method, character.only = T)) {
+ PEcAn.logger::logger.info(paste("The package:", method, "is not installed."))
+ return(0)
+ }
# create folder specific for the time and carbon type.
folder.name <- file.path(outdir, paste0(c(variable, time), collapse = "_"))
if (!file.exists(folder.name)) {
@@ -408,10 +439,10 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable
}
# parallel train.
PEcAn.logger::logger.info("Parallel training.")
- models <- parallel_rf_train(full_data = full_data, cores = cores)
+ models <- parallel_rf_train(full_data = full_data, method = method, cores = cores)
# save trained models for future analysis.
# saveRDS(models, file.path(folder.name, "rf_models.rds"))
- save(models, file = file.path(folder.name, "rf_models.Rdata"))
+ save(models, file = file.path(folder.name, "ml_models.Rdata"))
# convert stacked covariates geotiff file into data frame.
PEcAn.logger::logger.info("Converting geotiff to df.")
cov.df <- stack_covariates_2_df(rast.dir = covariates.dir, cores = cores)
@@ -454,6 +485,6 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable
#' @author Dongchen Zhang
downscale_qsub_main <- function(folder.path) {
dat <- readRDS(file.path(folder.path, "dat.rds"))
- out <- downscale_rf_main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores)
+ out <- downscale_main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$method, dat$cores)
saveRDS(out, file.path(folder.path, "res.rds"))
}
\ No newline at end of file
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index a63c4f85b99..5add9112a91 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -223,7 +223,8 @@ for (y in 2012:2024) {
cores = parallel::detectCores())
}
-# setup.
+# setup parallel downscaling.
+method <- "randomForest"
base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif"
load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/sda.all.forecast.analysis.Rdata")
variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb")
@@ -251,7 +252,8 @@ for (i in seq_along(date)) {
time = time,
variable = variable,
folder.path = folder.path,
- base.map.dir = base.map.dir,
+ base.map.dir = base.map.dir,
+ method = method,
cores = cores,
outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")),
file = file.path(folder.path, "dat.rds"))
diff --git a/modules/assim.sequential/man/downscale_rf_main.Rd b/modules/assim.sequential/man/downscale_main.Rd
similarity index 86%
rename from modules/assim.sequential/man/downscale_rf_main.Rd
rename to modules/assim.sequential/man/downscale_main.Rd
index 99962b4cc4e..306d2d1efe5 100644
--- a/modules/assim.sequential/man/downscale_rf_main.Rd
+++ b/modules/assim.sequential/man/downscale_main.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_parallel_downscale.R
-\name{downscale_rf_main}
-\alias{downscale_rf_main}
+\name{downscale_main}
+\alias{downscale_main}
\title{downscale_rf_main}
\usage{
-downscale_rf_main(
+downscale_main(
settings,
analysis,
covariates.dir,
@@ -12,6 +12,7 @@ downscale_rf_main(
variable,
outdir,
base.map.dir,
+ method = "randomForest",
cores = parallel::detectCores()
)
}
@@ -31,6 +32,8 @@ downscale_rf_main(
\item{time:}{character: the time tag used to differentiate the outputs from others.}
\item{variable:}{character: name of state variable. It should match up with the column names of the analysis data frame.}
+
+\item{method:}{character: machine learning method, default is randomForest (currently support randomForest and xgboost).}
}
\value{
paths to the ensemble downscaled maps.
diff --git a/modules/assim.sequential/man/parallel_rf_train.Rd b/modules/assim.sequential/man/parallel_train.Rd
similarity index 66%
rename from modules/assim.sequential/man/parallel_rf_train.Rd
rename to modules/assim.sequential/man/parallel_train.Rd
index a001a78d365..e80861e4680 100644
--- a/modules/assim.sequential/man/parallel_rf_train.Rd
+++ b/modules/assim.sequential/man/parallel_train.Rd
@@ -1,15 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SDA_parallel_downscale.R
-\name{parallel_rf_train}
-\alias{parallel_rf_train}
-\title{parallel_rf_train}
+\name{parallel_train}
+\alias{parallel_train}
+\title{parallel_train}
\usage{
-parallel_rf_train(full_data, cores = parallel::detectCores())
+parallel_train(
+ full_data,
+ method = "randomForest",
+ cores = parallel::detectCores()
+)
}
\arguments{
\item{full_data}{numeric: the matrix generated using the `prepare_train_dat` function.}
\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+
+\item{method:}{character: machine learning method (currently support randomForest and xgboost).}
}
\value{
list of trained models across ensemble members.
From 4cecb88d61b19a15a82ffa4e771a925a5a0eedf8 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 12:23:15 -0400
Subject: [PATCH 0204/1193] Update documentation.
---
modules/assim.sequential/R/SDA_parallel_downscale.R | 6 +++---
modules/assim.sequential/man/downscale_main.Rd | 12 ++++++------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index 1bd163bcf3a..84c68ed9878 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -404,11 +404,11 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#' @param settings character: physical path that points to the pecan settings XML file.
#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
#' @param covariates.dir character: path to the exported covariates GeoTIFF file.
-#' @param time: character: the time tag used to differentiate the outputs from others.
-#' @param variable: character: name of state variable. It should match up with the column names of the analysis data frame.
+#' @param time character: the time tag used to differentiate the outputs from others.
+#' @param variable character: name of state variable. It should match up with the column names of the analysis data frame.
#' @param outdir character: the output directory where the downscaled maps will be stored.
#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
-#' @param method: character: machine learning method, default is randomForest (currently support randomForest and xgboost).
+#' @param method character: machine learning method, default is randomForest (currently support randomForest and xgboost).
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
#'
#' @return paths to the ensemble downscaled maps.
diff --git a/modules/assim.sequential/man/downscale_main.Rd b/modules/assim.sequential/man/downscale_main.Rd
index 306d2d1efe5..f782cee0013 100644
--- a/modules/assim.sequential/man/downscale_main.Rd
+++ b/modules/assim.sequential/man/downscale_main.Rd
@@ -23,17 +23,17 @@ downscale_main(
\item{covariates.dir}{character: path to the exported covariates GeoTIFF file.}
-\item{outdir}{character: the output directory where the downscaled maps will be stored.}
+\item{time}{character: the time tag used to differentiate the outputs from others.}
-\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.}
+\item{variable}{character: name of state variable. It should match up with the column names of the analysis data frame.}
-\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+\item{outdir}{character: the output directory where the downscaled maps will be stored.}
-\item{time:}{character: the time tag used to differentiate the outputs from others.}
+\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.}
-\item{variable:}{character: name of state variable. It should match up with the column names of the analysis data frame.}
+\item{method}{character: machine learning method, default is randomForest (currently support randomForest and xgboost).}
-\item{method:}{character: machine learning method, default is randomForest (currently support randomForest and xgboost).}
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
}
\value{
paths to the ensemble downscaled maps.
From 401f0fb93b2a18a96f91ea8102b20437ef9b4beb Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Thu, 26 Jun 2025 21:55:04 +0530
Subject: [PATCH 0205/1193] updated data.land DESCRIPTION
---
modules/data.land/DESCRIPTION | 1 -
1 file changed, 1 deletion(-)
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 700be8b56a7..21fe084eadc 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -40,7 +40,6 @@ Imports:
ncdf4 (>= 1.15),
neonUtilities,
neonstore,
- swfscMisc,
PEcAn.benchmark,
PEcAn.DB,
PEcAn.logger,
From 4ae6f880d5d13ce744655b0033b368eb1bfbcaf0 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 12:26:02 -0400
Subject: [PATCH 0206/1193] Update documentation.
---
modules/assim.sequential/R/SDA_parallel_downscale.R | 2 +-
modules/assim.sequential/man/parallel_train.Rd | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index 84c68ed9878..9795726b60a 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -273,7 +273,7 @@ prepare_train_dat <- function(pts, analysis, covariates.dir, variable) {
#' @title parallel_train
#'
#' @param full_data numeric: the matrix generated using the `prepare_train_dat` function.
-#' @param method: character: machine learning method (currently support randomForest and xgboost).
+#' @param method character: machine learning method (currently support randomForest and xgboost).
#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.
#'
#' @return list of trained models across ensemble members.
diff --git a/modules/assim.sequential/man/parallel_train.Rd b/modules/assim.sequential/man/parallel_train.Rd
index e80861e4680..e4e833e99e0 100644
--- a/modules/assim.sequential/man/parallel_train.Rd
+++ b/modules/assim.sequential/man/parallel_train.Rd
@@ -13,9 +13,9 @@ parallel_train(
\arguments{
\item{full_data}{numeric: the matrix generated using the `prepare_train_dat` function.}
-\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
+\item{method}{character: machine learning method (currently support randomForest and xgboost).}
-\item{method:}{character: machine learning method (currently support randomForest and xgboost).}
+\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
}
\value{
list of trained models across ensemble members.
From 4cf94d59aa38d1c06a331c403a3c4465f0be6011 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Thu, 26 Jun 2025 22:06:12 +0530
Subject: [PATCH 0207/1193] updated pecan_package_dependencies.csv
---
docker/depends/pecan_package_dependencies.csv | 1 -
1 file changed, 1 deletion(-)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 01e80a9433e..b36ca289b18 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -580,7 +580,6 @@
"stringr",">= 1.1.0","models/ed","Imports",FALSE
"stringr",">= 1.1.0","modules/data.atmosphere","Imports",FALSE
"suntools","*","modules/data.atmosphere","Imports",FALSE
-"swfscMisc","*","modules/data.land","Imports",FALSE
"terra","*","modules/assim.sequential","Suggests",FALSE
"terra","*","modules/data.atmosphere","Imports",FALSE
"terra","*","modules/data.land","Imports",FALSE
From 1e68153035389f591f85694544c6faf24b894815 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Thu, 26 Jun 2025 22:31:01 +0530
Subject: [PATCH 0208/1193] added import-terra to DESCRIPTION
---
modules/data.land/DESCRIPTION | 1 +
1 file changed, 1 insertion(+)
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index 21fe084eadc..e5313520f5b 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -40,6 +40,7 @@ Imports:
ncdf4 (>= 1.15),
neonUtilities,
neonstore,
+ terra,
PEcAn.benchmark,
PEcAn.DB,
PEcAn.logger,
From fced4e859c537dc40166ea44bb9e6d393a484f93 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:03:26 -0400
Subject: [PATCH 0209/1193] Update dependencies.
---
docker/depends/pecan_package_dependencies.csv | 1 +
modules/assim.sequential/DESCRIPTION | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index e6a04f398cb..4bf7a1afd0e 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -123,6 +123,7 @@
"httr","*","modules/data.land","Imports",FALSE
"httr","*","modules/data.remote","Suggests",FALSE
"IDPmisc","*","modules/assim.batch","Imports",FALSE
+"itertools","*","modules/assim.sequential","Suggests",FALSE
"jsonlite","*","base/remote","Imports",FALSE
"jsonlite","*","models/stics","Imports",FALSE
"jsonlite","*","modules/data.atmosphere","Imports",FALSE
diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 021891430df..1c5405cb9c9 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -31,13 +31,17 @@ Imports:
rlang,
stringr
Suggests:
+ abind,
corrplot,
+ doSNOW,
exactextractr,
+ foreach,
ggrepel,
emdbook,
glue,
ggpubr,
gridExtra,
+ itertools,
magic (>= 1.5.0),
methods,
PEcAn.benchmark,
From ae66ecaafcf639ca746dc9c72ac43bebb247c8a0 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:04:00 -0400
Subject: [PATCH 0210/1193] Move the ERA5 averaging function to the atmosphere
package.
---
modules/assim.sequential/NAMESPACE | 1 -
modules/data.atmosphere/R/average_ERA5.R | 77 +++++++++++++++++++
.../man/Average_ERA5_2_GeoTIFF.Rd | 2 +-
3 files changed, 78 insertions(+), 2 deletions(-)
create mode 100644 modules/data.atmosphere/R/average_ERA5.R
rename modules/{assim.sequential => data.atmosphere}/man/Average_ERA5_2_GeoTIFF.Rd (93%)
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index 90512107500..d0c32a470d6 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -1,7 +1,6 @@
# Generated by roxygen2: do not edit by hand
export(Analysis.sda)
-export(Average_ERA5_2_GeoTIFF)
export(Construct.H.multisite)
export(Construct.R)
export(Construct_H)
diff --git a/modules/data.atmosphere/R/average_ERA5.R b/modules/data.atmosphere/R/average_ERA5.R
new file mode 100644
index 00000000000..a6e32515420
--- /dev/null
+++ b/modules/data.atmosphere/R/average_ERA5.R
@@ -0,0 +1,77 @@
+#' @description
+#' This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file.
+#' @title Average_ERA5_2_GeoTIFF
+#'
+#' @param start.date character: start point of when to average the data (e.g., 2012-01-01).
+#' @param end.date character: end point of when to average the data (e.g., 2021-12-31).
+#' @param in.path character: the directory where your ERA5 data stored (they should named as ERA5_YEAR.nc).
+#' @param outdir character: the output directory where the averaged GeoTIFF file will be generated.
+#'
+#' @return character: path to the exported GeoTIFF file.
+#'
+#' @export
+#' @author Dongchen Zhang
+Average_ERA5_2_GeoTIFF <- function (start.date, end.date, in.path, outdir) {
+ if (!require("abind")) {
+ PEcAn.logger::logger.info("The package: abind is not installed.")
+ return(0)
+ }
+ # create dates.
+ years <- sort(unique(lubridate::year(start.date):lubridate::year(end.date)))
+ # initialize final outcomes.
+ temp.all <- precip.all <- srd.all <- dewpoint.all <- c()
+ # loop over years.
+ for (i in seq_along(years)) {
+ # open ERA5 nc file as geotiff format for referencing crs and ext.
+ ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", years[i], ".nc")))
+ # open ERA5 nc file.
+ met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", years[i], ".nc")))
+ # find index for the date.
+ times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC")
+ time.inds <- which(lubridate::date(times) >= start.date & lubridate::date(times) <= end.date)
+ # extract temperature.
+ PEcAn.logger::logger.info("entering temperature.")
+ temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract precipitation.
+ PEcAn.logger::logger.info("entering precipitation.")
+ precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract shortwave solar radiation.
+ PEcAn.logger::logger.info("entering solar radiation.")
+ srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # extract dewpoint.
+ PEcAn.logger::logger.info("entering dewpoint.")
+ dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3)
+ # close the NC connection.
+ ncdf4::nc_close(met.nc)
+ }
+ # aggregate across time.
+ # temperature.
+ temp <- apply(temp.all, c(1, 2), mean)
+ temp <- PEcAn.utils::ud_convert(temp, "K", "degC")
+ # precipitation.
+ precip <- apply(precip.all, c(1, 2), mean)
+ # solar radiation.
+ srd <- apply(srd.all, c(1, 2), mean)
+ # dewpoint.
+ dewpoint <- apply(dewpoint.all, c(1, 2), mean)
+ dewpoint <- PEcAn.utils::ud_convert(dewpoint, "K", "degC")
+ # convert dew point to relative humidity.
+ beta <- (112 - (0.1 * temp) + dewpoint) / (112 + (0.9 * temp))
+ relative.humidity <- beta ^ 8
+ VPD <- PEcAn.data.atmosphere::get.vpd(100*relative.humidity, temp)
+ # combine together.
+ PEcAn.logger::logger.info("Aggregate maps.")
+ met.rast <- c(terra::rast(matrix(temp, nrow = dim(temp)[2], ncol = dim(temp)[1], byrow = T)),
+ terra::rast(matrix(precip, nrow = dim(precip)[2], ncol = dim(precip)[1], byrow = T)),
+ terra::rast(matrix(srd, nrow = dim(srd)[2], ncol = dim(srd)[1], byrow = T)),
+ terra::rast(matrix(VPD, nrow = dim(VPD)[2], ncol = dim(VPD)[1], byrow = T)))
+ # adjust crs and extents.
+ terra::crs(met.rast) <- terra::crs(ERA5.tiff)
+ terra::ext(met.rast) <- terra::ext(ERA5.tiff)
+ names(met.rast) <- c("temp", "prec", "srad", "vapr")
+ # write into geotiff file.
+ terra::writeRaster(met.rast, file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
+ # end.
+ gc()
+ return(file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
+}
\ No newline at end of file
diff --git a/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd b/modules/data.atmosphere/man/Average_ERA5_2_GeoTIFF.Rd
similarity index 93%
rename from modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
rename to modules/data.atmosphere/man/Average_ERA5_2_GeoTIFF.Rd
index 0f2dca9761c..32f34b1f59d 100644
--- a/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd
+++ b/modules/data.atmosphere/man/Average_ERA5_2_GeoTIFF.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/SDA_parallel_downscale.R
+% Please edit documentation in R/average_ERA5.R
\name{Average_ERA5_2_GeoTIFF}
\alias{Average_ERA5_2_GeoTIFF}
\title{Average_ERA5_2_GeoTIFF}
From c50bfb296b7746e0c81a5c73a4db57846102e858 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Thu, 26 Jun 2025 22:34:28 +0530
Subject: [PATCH 0211/1193] update pecan_package_dependencies.csv
---
docker/depends/pecan_package_dependencies.csv | 1 +
1 file changed, 1 insertion(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index b36ca289b18..ed66569569a 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -583,6 +583,7 @@
"terra","*","modules/assim.sequential","Suggests",FALSE
"terra","*","modules/data.atmosphere","Imports",FALSE
"terra","*","modules/data.land","Imports",FALSE
+"terra","*","modules/data.land","Imports",FALSE
"terra","*","modules/data.remote","Imports",FALSE
"testthat","*","base/all","Suggests",FALSE
"testthat","*","base/logger","Suggests",FALSE
From b9602f51af06ac1aaa4ad3cf55879f3c282ac27f Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:04:29 -0400
Subject: [PATCH 0212/1193] Update documentation and add package checking code.
---
.../R/SDA_parallel_downscale.R | 90 +++----------------
1 file changed, 14 insertions(+), 76 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index 9795726b60a..a6b69dc42b2 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -1,77 +1,3 @@
-#' @description
-#' This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file.
-#' @title Average_ERA5_2_GeoTIFF
-#'
-#' @param start.date character: start point of when to average the data (e.g., 2012-01-01).
-#' @param end.date character: end point of when to average the data (e.g., 2021-12-31).
-#' @param in.path character: the directory where your ERA5 data stored (they should named as ERA5_YEAR.nc).
-#' @param outdir character: the output directory where the averaged GeoTIFF file will be generated.
-#'
-#' @return character: path to the exported GeoTIFF file.
-#'
-#' @export
-#' @author Dongchen Zhang
-Average_ERA5_2_GeoTIFF <- function (start.date, end.date, in.path, outdir) {
- # create dates.
- years <- sort(unique(lubridate::year(start.date):lubridate::year(end.date)))
- # initialize final outcomes.
- temp.all <- precip.all <- srd.all <- dewpoint.all <- c()
- # loop over years.
- for (i in seq_along(years)) {
- # open ERA5 nc file as geotiff format for referencing crs and ext.
- ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", years[i], ".nc")))
- # open ERA5 nc file.
- met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", years[i], ".nc")))
- # find index for the date.
- times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC")
- time.inds <- which(lubridate::date(times) >= start.date & lubridate::date(times) <= end.date)
- # extract temperature.
- PEcAn.logger::logger.info("entering temperature.")
- temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3)
- # extract precipitation.
- PEcAn.logger::logger.info("entering precipitation.")
- precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3)
- # extract shortwave solar radiation.
- PEcAn.logger::logger.info("entering solar radiation.")
- srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3)
- # extract dewpoint.
- PEcAn.logger::logger.info("entering dewpoint.")
- dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3)
- # close the NC connection.
- ncdf4::nc_close(met.nc)
- }
- # aggregate across time.
- # temperature.
- temp <- apply(temp.all, c(1, 2), mean)
- temp <- PEcAn.utils::ud_convert(temp, "K", "degC")
- # precipitation.
- precip <- apply(precip.all, c(1, 2), mean)
- # solar radiation.
- srd <- apply(srd.all, c(1, 2), mean)
- # dewpoint.
- dewpoint <- apply(dewpoint.all, c(1, 2), mean)
- dewpoint <- PEcAn.utils::ud_convert(dewpoint, "K", "degC")
- # convert dew point to relative humidity.
- beta <- (112 - (0.1 * temp) + dewpoint) / (112 + (0.9 * temp))
- relative.humidity <- beta ^ 8
- VPD <- PEcAn.data.atmosphere::get.vpd(100*relative.humidity, temp)
- # combine together.
- PEcAn.logger::logger.info("Aggregate maps.")
- met.rast <- c(terra::rast(matrix(temp, nrow = dim(temp)[2], ncol = dim(temp)[1], byrow = T)),
- terra::rast(matrix(precip, nrow = dim(precip)[2], ncol = dim(precip)[1], byrow = T)),
- terra::rast(matrix(srd, nrow = dim(srd)[2], ncol = dim(srd)[1], byrow = T)),
- terra::rast(matrix(VPD, nrow = dim(VPD)[2], ncol = dim(VPD)[1], byrow = T)))
- # adjust crs and extents.
- terra::crs(met.rast) <- terra::crs(ERA5.tiff)
- terra::ext(met.rast) <- terra::ext(ERA5.tiff)
- names(met.rast) <- c("temp", "prec", "srad", "vapr")
- # write into geotiff file.
- terra::writeRaster(met.rast, file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
- # end.
- gc()
- return(file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff")))
-}
-
#' @description
#' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map.
#' @title stack_covariates_2_geotiff
@@ -346,7 +272,7 @@ parallel_train <- function(full_data, method = "randomForest", cores = parallel:
#'
#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
#' @param models list: trained models across ensemble members generated by the `parallel_rf_train` function.
-#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.
+#' @param cov.vecs numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.
#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function.
#' @param outdir character: the output directory where the downscaled maps will be stored.
#' @param name list: containing the time and variable name to create the final GeoTIFF file name.
@@ -415,11 +341,23 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#'
#' @author Dongchen Zhang
downscale_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, method = "randomForest", cores = parallel::detectCores()) {
- # check ML package.
+ # check packages.
if (!require(method, character.only = T)) {
PEcAn.logger::logger.info(paste("The package:", method, "is not installed."))
return(0)
}
+ if (!require("itertools")) {
+ PEcAn.logger::logger.info("The package: itertools is not installed.")
+ return(0)
+ }
+ if (!require("doSNOW")) {
+ PEcAn.logger::logger.info("The package: doSNOW is not installed.")
+ return(0)
+ }
+ if (!require("foreach")) {
+ PEcAn.logger::logger.info("The package: foreach is not installed.")
+ return(0)
+ }
# create folder specific for the time and carbon type.
folder.name <- file.path(outdir, paste0(c(variable, time), collapse = "_"))
if (!file.exists(folder.name)) {
From a8334d9304fa712a60ddd501bb7092492c3de3ad Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:04:49 -0400
Subject: [PATCH 0213/1193] move the ERA5 average function to the atmosphere
package.
---
modules/data.atmosphere/NAMESPACE | 1 +
1 file changed, 1 insertion(+)
diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE
index ebe22618974..364536e2a14 100644
--- a/modules/data.atmosphere/NAMESPACE
+++ b/modules/data.atmosphere/NAMESPACE
@@ -4,6 +4,7 @@ export(.download.raw.met.module)
export(.extract.nc.module)
export(.met2model.module)
export(AirDens)
+export(Average_ERA5_2_GeoTIFF)
export(ERA5_met_process)
export(align.met)
export(build_cf_variables_table_url)
From 57ae4e863c0bc79508648a919f1ed08a54e5b508 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:05:11 -0400
Subject: [PATCH 0214/1193] Update ERA5 average function namespace.
---
modules/assim.sequential/inst/anchor/NA_downscale_script.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
index 5add9112a91..07ec21f02d3 100644
--- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R
+++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R
@@ -187,7 +187,7 @@ end.dates <- c("2012-07-15", "2013-07-15", "2014-07-15",
# parallel average ERA5 into covariates.
future::plan(future::multisession, workers = 5, gc = T)
paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){
- Average_ERA5_2_GeoTIFF(d1, d2, in.path, outdir)
+ PEcAn.data.atmosphere::Average_ERA5_2_GeoTIFF(d1, d2, in.path, outdir)
}, .progress = T) %>% unlist
# create covariates time series.
for (y in 2012:2024) {
From 97efdec15b81bdab2e465436a846663399336b6e Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:05:19 -0400
Subject: [PATCH 0215/1193] Update documentation.
---
modules/assim.sequential/man/parallel_prediction.Rd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/assim.sequential/man/parallel_prediction.Rd b/modules/assim.sequential/man/parallel_prediction.Rd
index 11edbf93cb5..1e9ac2e0ed8 100644
--- a/modules/assim.sequential/man/parallel_prediction.Rd
+++ b/modules/assim.sequential/man/parallel_prediction.Rd
@@ -19,6 +19,8 @@ parallel_prediction(
\item{models}{list: trained models across ensemble members generated by the `parallel_rf_train` function.}
+\item{cov.vecs}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.}
+
\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function.}
\item{outdir}{character: the output directory where the downscaled maps will be stored.}
@@ -26,8 +28,6 @@ parallel_prediction(
\item{name}{list: containing the time and variable name to create the final GeoTIFF file name.}
\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.}
-
-\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.}
}
\value{
paths to the ensemble downscaled maps.
From 2b94d53281b942b52af60730d5d31208d8413bfb Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:08:24 -0400
Subject: [PATCH 0216/1193] Remove namespace.
---
modules/data.atmosphere/R/average_ERA5.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.atmosphere/R/average_ERA5.R b/modules/data.atmosphere/R/average_ERA5.R
index a6e32515420..21a88685dbe 100644
--- a/modules/data.atmosphere/R/average_ERA5.R
+++ b/modules/data.atmosphere/R/average_ERA5.R
@@ -58,7 +58,7 @@ Average_ERA5_2_GeoTIFF <- function (start.date, end.date, in.path, outdir) {
# convert dew point to relative humidity.
beta <- (112 - (0.1 * temp) + dewpoint) / (112 + (0.9 * temp))
relative.humidity <- beta ^ 8
- VPD <- PEcAn.data.atmosphere::get.vpd(100*relative.humidity, temp)
+ VPD <- get.vpd(100*relative.humidity, temp)
# combine together.
PEcAn.logger::logger.info("Aggregate maps.")
met.rast <- c(terra::rast(matrix(temp, nrow = dim(temp)[2], ncol = dim(temp)[1], byrow = T)),
From 2290d99f939155deaf55d265baef5a109d4293cf Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Thu, 26 Jun 2025 22:45:21 +0530
Subject: [PATCH 0217/1193] updated DESCRIPTION
---
modules/data.land/DESCRIPTION | 1 -
1 file changed, 1 deletion(-)
diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION
index e5313520f5b..21fe084eadc 100644
--- a/modules/data.land/DESCRIPTION
+++ b/modules/data.land/DESCRIPTION
@@ -40,7 +40,6 @@ Imports:
ncdf4 (>= 1.15),
neonUtilities,
neonstore,
- terra,
PEcAn.benchmark,
PEcAn.DB,
PEcAn.logger,
From 9d9b01e6e93fd74c6546e9424167db5b57c5d633 Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Thu, 26 Jun 2025 22:48:00 +0530
Subject: [PATCH 0218/1193] rendered pecan_package_dependencies.csv
---
docker/depends/pecan_package_dependencies.csv | 1 -
1 file changed, 1 deletion(-)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index ed66569569a..b36ca289b18 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -583,7 +583,6 @@
"terra","*","modules/assim.sequential","Suggests",FALSE
"terra","*","modules/data.atmosphere","Imports",FALSE
"terra","*","modules/data.land","Imports",FALSE
-"terra","*","modules/data.land","Imports",FALSE
"terra","*","modules/data.remote","Imports",FALSE
"testthat","*","base/all","Suggests",FALSE
"testthat","*","base/logger","Suggests",FALSE
From ea7d426566d5d1158e449b0d4d7bc7e395d4b1fd Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 13:25:02 -0400
Subject: [PATCH 0219/1193] Update the document.
---
docker/depends/pecan_package_dependencies.csv | 3 +++
1 file changed, 3 insertions(+)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index 4bf7a1afd0e..d3ab88dc247 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -1,5 +1,6 @@
"package","version","needed_by_dir","type","is_pecan"
"abind","*","modules/assim.batch","Imports",FALSE
+"abind","*","modules/assim.sequential","Suggests",FALSE
"abind",">= 1.4.5","base/utils","Imports",FALSE
"abind",">= 1.4.5","models/ed","Imports",FALSE
"abind",">= 1.4.5","modules/data.atmosphere","Imports",FALSE
@@ -41,6 +42,7 @@
"doParallel","*","modules/data.atmosphere","Suggests",FALSE
"doParallel","*","modules/data.remote","Imports",FALSE
"doSNOW","*","base/remote","Suggests",FALSE
+"doSNOW","*","modules/assim.sequential","Suggests",FALSE
"doSNOW","*","modules/data.land","Imports",FALSE
"doSNOW","*","modules/data.remote","Suggests",FALSE
"dplR","*","modules/data.land","Imports",FALSE
@@ -65,6 +67,7 @@
"emdbook","*","modules/assim.sequential","Suggests",FALSE
"exactextractr","*","modules/assim.sequential","Suggests",FALSE
"foreach","*","base/remote","Imports",FALSE
+"foreach","*","modules/assim.sequential","Suggests",FALSE
"foreach","*","modules/data.atmosphere","Suggests",FALSE
"foreach","*","modules/data.land","Imports",FALSE
"foreach","*","modules/data.remote","Imports",FALSE
From 23ab7ee46ec9fe77d2131e8e62634266b9d5c0ed Mon Sep 17 00:00:00 2001
From: Michael Dietze
Date: Thu, 26 Jun 2025 14:28:56 -0400
Subject: [PATCH 0220/1193] Update modules/data.land/R/extract_NEON_veg.R
---
modules/data.land/R/extract_NEON_veg.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.land/R/extract_NEON_veg.R b/modules/data.land/R/extract_NEON_veg.R
index fc00eed871e..3c1d9100754 100644
--- a/modules/data.land/R/extract_NEON_veg.R
+++ b/modules/data.land/R/extract_NEON_veg.R
@@ -52,7 +52,7 @@ extract_NEON_veg <- function(lon, lat, start_date, end_date, store_dir, neonsite
neonsites <- dplyr::select(neonsites, "siteCode", "siteLatitude", "siteLongitude") #select for relevant columns
pt1 <- terra::vect(matrix(c(lon1 = lon, lat1 = lat) , ncol = 2), type = "points", crs = "EPSG:4326")
pt2 <- terra::vect(matrix(c(lon2 = neonsites$siteLongitude, lat2 = neonsites$siteLatitude) , ncol = 2), type = "points", crs = "EPSG:4326")
- betyneondist <- terra::distance(p1, p2)
+ betyneondist <- terra::distance(pt1, pt2)
mindist <- min(betyneondist)
distloc <- match(mindist, betyneondist)
lat <- neonsites$siteLatitude[distloc]
From fe056062e3bc275d0ea213851fa4da2d5b47430b Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Fri, 27 Jun 2025 00:22:55 +0530
Subject: [PATCH 0221/1193] updated Rcheck_reference.log
---
modules/data.land/tests/Rcheck_reference.log | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/data.land/tests/Rcheck_reference.log b/modules/data.land/tests/Rcheck_reference.log
index 3974222b7e8..1430528137e 100644
--- a/modules/data.land/tests/Rcheck_reference.log
+++ b/modules/data.land/tests/Rcheck_reference.log
@@ -13,7 +13,7 @@
* package encoding: UTF-8
* checking package namespace information ... OK
* checking package dependencies ... NOTE
-Imports includes 36 non-default packages.
+Imports includes 35 non-default packages.
Importing from so many packages makes the package vulnerable to any of
them becoming unavailable. Move as many as possible to Suggests and
use conditionally.
From 6c00b7468ce81ffed86b1e6b3ee66740379f3faf Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 15:18:04 -0400
Subject: [PATCH 0222/1193] Change package detection method.
---
modules/assim.sequential/R/SDA_parallel_downscale.R | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index a6b69dc42b2..a75aade36a7 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -342,19 +342,19 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#' @author Dongchen Zhang
downscale_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, method = "randomForest", cores = parallel::detectCores()) {
# check packages.
- if (!require(method, character.only = T)) {
+ if (method %in% rownames(installed.packages())) {
PEcAn.logger::logger.info(paste("The package:", method, "is not installed."))
return(0)
}
- if (!require("itertools")) {
+ if ("itertools" %in% rownames(installed.packages())) {
PEcAn.logger::logger.info("The package: itertools is not installed.")
return(0)
}
- if (!require("doSNOW")) {
+ if ("doSNOW" %in% rownames(installed.packages())) {
PEcAn.logger::logger.info("The package: doSNOW is not installed.")
return(0)
}
- if (!require("foreach")) {
+ if ("foreach" %in% rownames(installed.packages())) {
PEcAn.logger::logger.info("The package: foreach is not installed.")
return(0)
}
From 212819e9573c1fc42f87e745e8bba0e5ef84ea8b Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 15:37:56 -0400
Subject: [PATCH 0223/1193] Update document.
---
.../R/SDA_parallel_downscale.R | 18 +++++++++---------
modules/assim.sequential/man/downscale_main.Rd | 4 ++--
.../man/downscale_qsub_main.Rd | 2 +-
.../man/parallel_prediction.Rd | 2 +-
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index a75aade36a7..875153e5ac1 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -271,7 +271,7 @@ parallel_train <- function(full_data, method = "randomForest", cores = parallel:
#' @title parallel_prediction
#'
#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.
-#' @param models list: trained models across ensemble members generated by the `parallel_rf_train` function.
+#' @param models list: trained models across ensemble members generated by the `parallel_train` function.
#' @param cov.vecs numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.
#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function.
#' @param outdir character: the output directory where the downscaled maps will be stored.
@@ -323,9 +323,9 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
}
#' @description
-#' This is the main function to execute the RF training and prediction.
+#' This is the main function to execute the machine learning training and prediction.
#' Note it will be deployed by each node you requested if the qsub feature is enabled below.
-#' @title downscale_rf_main
+#' @title downscale_main
#'
#' @param settings character: physical path that points to the pecan settings XML file.
#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.
@@ -342,19 +342,19 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
#' @author Dongchen Zhang
downscale_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, method = "randomForest", cores = parallel::detectCores()) {
# check packages.
- if (method %in% rownames(installed.packages())) {
+ if (method %in% rownames(utils::installed.packages())) {
PEcAn.logger::logger.info(paste("The package:", method, "is not installed."))
return(0)
}
- if ("itertools" %in% rownames(installed.packages())) {
+ if ("itertools" %in% rownames(utils::installed.packages())) {
PEcAn.logger::logger.info("The package: itertools is not installed.")
return(0)
}
- if ("doSNOW" %in% rownames(installed.packages())) {
+ if ("doSNOW" %in% rownames(utils::installed.packages())) {
PEcAn.logger::logger.info("The package: doSNOW is not installed.")
return(0)
}
- if ("foreach" %in% rownames(installed.packages())) {
+ if ("foreach" %in% rownames(utils::installed.packages())) {
PEcAn.logger::logger.info("The package: foreach is not installed.")
return(0)
}
@@ -377,7 +377,7 @@ downscale_main <- function(settings, analysis, covariates.dir, time, variable, o
}
# parallel train.
PEcAn.logger::logger.info("Parallel training.")
- models <- parallel_rf_train(full_data = full_data, method = method, cores = cores)
+ models <- parallel_train(full_data = full_data, method = method, cores = cores)
# save trained models for future analysis.
# saveRDS(models, file.path(folder.name, "rf_models.rds"))
save(models, file = file.path(folder.name, "ml_models.Rdata"))
@@ -414,7 +414,7 @@ downscale_main <- function(settings, analysis, covariates.dir, time, variable, o
}
#' @description
-#' This qsub function helps to run the submitted qsub jobs for running the downscale_rf_main function.
+#' This qsub function helps to run the submitted qsub jobs for running the downscale_main function.
#' @title downscale_qsub_main
#'
#' @param folder.path Character: physical path to which the job file is located.
diff --git a/modules/assim.sequential/man/downscale_main.Rd b/modules/assim.sequential/man/downscale_main.Rd
index f782cee0013..3ac2ae3ae91 100644
--- a/modules/assim.sequential/man/downscale_main.Rd
+++ b/modules/assim.sequential/man/downscale_main.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/SDA_parallel_downscale.R
\name{downscale_main}
\alias{downscale_main}
-\title{downscale_rf_main}
+\title{downscale_main}
\usage{
downscale_main(
settings,
@@ -39,7 +39,7 @@ downscale_main(
paths to the ensemble downscaled maps.
}
\description{
-This is the main function to execute the RF training and prediction.
+This is the main function to execute the machine learning training and prediction.
Note it will be deployed by each node you requested if the qsub feature is enabled below.
}
\author{
diff --git a/modules/assim.sequential/man/downscale_qsub_main.Rd b/modules/assim.sequential/man/downscale_qsub_main.Rd
index 0aeb957983f..b968c38e71e 100644
--- a/modules/assim.sequential/man/downscale_qsub_main.Rd
+++ b/modules/assim.sequential/man/downscale_qsub_main.Rd
@@ -10,7 +10,7 @@ downscale_qsub_main(folder.path)
\item{folder.path}{Character: physical path to which the job file is located.}
}
\description{
-This qsub function helps to run the submitted qsub jobs for running the downscale_rf_main function.
+This qsub function helps to run the submitted qsub jobs for running the downscale_main function.
}
\author{
Dongchen Zhang
diff --git a/modules/assim.sequential/man/parallel_prediction.Rd b/modules/assim.sequential/man/parallel_prediction.Rd
index 1e9ac2e0ed8..51aeb9072b8 100644
--- a/modules/assim.sequential/man/parallel_prediction.Rd
+++ b/modules/assim.sequential/man/parallel_prediction.Rd
@@ -17,7 +17,7 @@ parallel_prediction(
\arguments{
\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.}
-\item{models}{list: trained models across ensemble members generated by the `parallel_rf_train` function.}
+\item{models}{list: trained models across ensemble members generated by the `parallel_train` function.}
\item{cov.vecs}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.}
From d44890786a6b18fe9738853f4bbff6ae6a66c860 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 15:39:11 -0400
Subject: [PATCH 0224/1193] Add %dopar% namespace.
---
modules/assim.sequential/NAMESPACE | 1 +
modules/assim.sequential/R/SDA_parallel_downscale.R | 3 +++
2 files changed, 4 insertions(+)
diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE
index d0c32a470d6..86a49cd6ff6 100644
--- a/modules/assim.sequential/NAMESPACE
+++ b/modules/assim.sequential/NAMESPACE
@@ -65,6 +65,7 @@ import(furrr)
import(lubridate)
import(nimble)
importFrom(dplyr,"%>%")
+importFrom(foreach,"%dopar%")
importFrom(lubridate,"%m+%")
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index 875153e5ac1..12044821015 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -14,6 +14,7 @@
#' @export
#'
#' @author Dongchen Zhang
+#' @importFrom foreach %dopar%
stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) {
# create the folder if it doesn't exist.
if (!file.exists(outdir)) {
@@ -205,6 +206,7 @@ prepare_train_dat <- function(pts, analysis, covariates.dir, variable) {
#' @return list of trained models across ensemble members.
#'
#' @author Dongchen Zhang
+#' @importFrom foreach %dopar%
parallel_train <- function(full_data, method = "randomForest", cores = parallel::detectCores()) {
# grab ensemble and predictor index.
col.names <- colnames(full_data)
@@ -281,6 +283,7 @@ parallel_train <- function(full_data, method = "randomForest", cores = parallel:
#' @return paths to the ensemble downscaled maps.
#'
#' @author Dongchen Zhang
+#' @importFrom foreach %dopar%
parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) {
# load base map.
base.map <- terra::rast(base.map.dir)
From 53cf78592cb755252eb24d9a99c1315e4d5bdd22 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 15:41:35 -0400
Subject: [PATCH 0225/1193] Add a namespace for the variable inside the
foreach.
---
modules/assim.sequential/R/SDA_parallel_downscale.R | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index 12044821015..d082d442f86 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -32,6 +32,7 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.
progress <- function(n) utils::setTxtProgressBar(pb, n)
opts <- list(progress=progress)
# foreach loop.
+ f <- NULL
paths <- foreach::foreach(f = cov.tif.file.list,
.packages=c("Kendall", "terra"),
.options.snow=opts) %dopar% {
@@ -133,6 +134,7 @@ stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) {
progress <- function(n) utils::setTxtProgressBar(pb, n)
opts <- list(progress=progress)
# foreach loop.
+ r <- NULL
vecs <- foreach::foreach(r = seq_along(layer.names),
.packages=c("Kendall", "terra"),
.options.snow=opts) %dopar% {
@@ -224,6 +226,7 @@ parallel_train <- function(full_data, method = "randomForest", cores = parallel:
progress <- function(n) utils::setTxtProgressBar(pb, n)
opts <- list(progress=progress)
# foreach loop.
+ i <- NULL
models <- foreach::foreach(i = ensemble.inds,
.packages=c("Kendall", "stats", method),
.options.snow=opts) %dopar% {
@@ -305,6 +308,7 @@ parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out
doSNOW::registerDoSNOW(cl)
# foreach parallel.
model <- models[[i]]
+ d <- NULL
output <- foreach::foreach(d=itertools::isplitRows(cov.vecs, chunks=cores),
.packages=c("stats", "randomForest")) %dopar% {
stats::predict(model, d)
From d6a9b3ecff74e8c216187a2db1f23f4b27ed55ac Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 15:45:05 -0400
Subject: [PATCH 0226/1193] Update namespace.
---
modules/assim.sequential/R/SDA_parallel_downscale.R | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R
index d082d442f86..3510867a879 100644
--- a/modules/assim.sequential/R/SDA_parallel_downscale.R
+++ b/modules/assim.sequential/R/SDA_parallel_downscale.R
@@ -118,6 +118,7 @@ pecan_settings_2_pts <- function(settings) {
#' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels.
#'
#' @author Dongchen Zhang
+#' @importFrom foreach %dopar%
stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) {
# load maps.
all.rast <- terra::rast(rast.dir)
@@ -245,9 +246,9 @@ parallel_train <- function(full_data, method = "randomForest", cores = parallel:
# if it's xgboost.
if (method == "xgboost") {
formula <- stats::as.formula(paste0("~ ", paste(predictor_col, collapse = " + "), " - 1"))
- train.df <- model.matrix(pred_formula, data = full_data)
- train.df <- xgb.DMatrix(data = train.df, label = full_data[[ensemble_col]])
- model <- xgb.train(
+ train.df <- stats::model.matrix(formula, data = full_data)
+ train.df <- xgboost::xgb.DMatrix(data = train.df, label = full_data[[ensemble_col]])
+ model <- xgboost::xgb.train(
params = list(
objective = "reg:squarederror",
eta = 0.1,
From e5425d27788f9bfd4a7287a7ebe9d404befd7874 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 15:54:13 -0400
Subject: [PATCH 0227/1193] Update dependency.
---
docker/depends/pecan_package_dependencies.csv | 4 ++--
modules/assim.sequential/DESCRIPTION | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index d3ab88dc247..049172f19aa 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -67,7 +67,7 @@
"emdbook","*","modules/assim.sequential","Suggests",FALSE
"exactextractr","*","modules/assim.sequential","Suggests",FALSE
"foreach","*","base/remote","Imports",FALSE
-"foreach","*","modules/assim.sequential","Suggests",FALSE
+"foreach","*","modules/assim.sequential","Imports",FALSE
"foreach","*","modules/data.atmosphere","Suggests",FALSE
"foreach","*","modules/data.land","Imports",FALSE
"foreach","*","modules/data.remote","Imports",FALSE
@@ -220,7 +220,7 @@
"mvbutils","*","base/qaqc","Suggests",FALSE
"mvtnorm","*","modules/allometry","Imports",FALSE
"mvtnorm","*","modules/assim.batch","Imports",FALSE
-"mvtnorm","*","modules/assim.sequential","Imports",FALSE
+"mvtnorm","*","modules/assim.sequential","Suggests",FALSE
"mvtnorm","*","modules/data.land","Imports",FALSE
"mvtnorm","*","modules/emulator","Imports",FALSE
"ncdf4","*","base/db","Imports",FALSE
diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
index 1c5405cb9c9..257c21bb28a 100644
--- a/modules/assim.sequential/DESCRIPTION
+++ b/modules/assim.sequential/DESCRIPTION
@@ -12,13 +12,13 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
Imports:
coda,
dplyr,
+ foreach,
furrr,
future,
ggplot2,
lubridate (>= 1.6.0),
magrittr,
Matrix,
- mvtnorm,
ncdf4,
nimble,
PEcAn.DB,
@@ -35,7 +35,7 @@ Suggests:
corrplot,
doSNOW,
exactextractr,
- foreach,
+ mvtnorm,
ggrepel,
emdbook,
glue,
From c7f6120530cb5e118d076261d5d0ad3a33a85826 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 16:10:02 -0400
Subject: [PATCH 0228/1193] Remove package detection.
---
modules/data.atmosphere/R/average_ERA5.R | 4 ----
1 file changed, 4 deletions(-)
diff --git a/modules/data.atmosphere/R/average_ERA5.R b/modules/data.atmosphere/R/average_ERA5.R
index 21a88685dbe..2917078e86a 100644
--- a/modules/data.atmosphere/R/average_ERA5.R
+++ b/modules/data.atmosphere/R/average_ERA5.R
@@ -12,10 +12,6 @@
#' @export
#' @author Dongchen Zhang
Average_ERA5_2_GeoTIFF <- function (start.date, end.date, in.path, outdir) {
- if (!require("abind")) {
- PEcAn.logger::logger.info("The package: abind is not installed.")
- return(0)
- }
# create dates.
years <- sort(unique(lubridate::year(start.date):lubridate::year(end.date)))
# initialize final outcomes.
From 92c49f43c9a1a33fb29731bd3a9c2404dd034b26 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Thu, 26 Jun 2025 16:45:20 -0400
Subject: [PATCH 0229/1193] Add filtering methods to the URLs.
---
modules/data.remote/R/NASA_DAAC_download.R | 28 +++++++++-------------
1 file changed, 11 insertions(+), 17 deletions(-)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index df4ddb827c6..95a7b1b4adb 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -94,37 +94,31 @@ NASA_DAAC_download <- function(ul_lat,
if (length(granules) == 0)
break
# if it's GLANCE product.
- # GLANCE product has special data archive.
- if (doi == "10.5067/MEaSUREs/GLanCE/GLanCE30.001") {
- granules_href <- c(granules_href, sapply(granules, function(x) {
- links <- c()
- for (j in seq_along(x$links)) {
- links <- c(links, x$links[[j]]$href)
- }
- return(links)
- }))
+ # GLANCE and HLS products have special data archive.
+ if (doi %in% c("10.5067/MEaSUREs/GLanCE/GLanCE30.001", "10.5067/HLS/HLSS30.002")) {
+ granules_href <- c(granules_href, sapply(granules, function(x) {sapply(x$links,function(y) y$href)}))
} else {
granules_href <- c(granules_href, sapply(granules, function(x) x$links[[1]]$href))
}
# grab specific band.
if (!is.null(band)) {
- granules_href <- granules_href[which(grepl(paste(band, collapse = "|"), granules_href))]
+ granules_href <- granules_href[which(grepl(band, granules_href, fixed = T))]
}
page <- page + 1
}
}
- # if no files are found.
- if (is.null(granules_href)) {
- PEcAn.logger::logger.info("No files found. Please check the spatial and temporal search window.")
- return(NA)
- }
- # remove any urls that are not starting with https.
- granules_href = granules_href[which(grepl("http*",granules_href))]
+ # remove non-target files (e.g. s3)
+ granules_href <- granules_href[which(grepl("https*", granules_href))]
# remove duplicated files.
inds <- which(duplicated(basename(granules_href)))
if (length(inds) > 0) {
granules_href <- granules_href[-inds]
}
+ # if no files are found.
+ if (is.null(granules_href)) {
+ PEcAn.logger::logger.info("No files found. Please check the spatial and temporal search window.")
+ return(NA)
+ }
# remove non-image files.
inds <- which(grepl(".h5", basename(granules_href)) |
grepl(".tif", basename(granules_href)) |
From 222d150f4b8805b8eec94b53f42d68ac4ac86475 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Fri, 27 Jun 2025 02:38:09 -0700
Subject: [PATCH 0230/1193] comment optional param list, address review
comments
---
models/sipnet/NEWS.md | 2 +-
models/sipnet/R/write.configs.SIPNET.R | 21 ++++++++++++++++-----
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/models/sipnet/NEWS.md b/models/sipnet/NEWS.md
index 73f279c9eb6..a38ac4fef44 100644
--- a/models/sipnet/NEWS.md
+++ b/models/sipnet/NEWS.md
@@ -7,7 +7,7 @@
## Fixed
* `write.config.SIPNET` now checks more carefully whether an optional variable exists in an initial condition file before trying to read it, therefore printing fewer messages about (expectedly) missing variables. (#3545)
-* When passed a vector of input paths, `write.config.SIPNET` was choosing one at random; it now uses only the first (Blesson Thomas, #3298). Note that a single input path per call has always been the intended usage; being passed many was a second bug in PEcAn.uncertainty that is also now fixed.
+* When passed a vector of multiple input paths, `write.config.SIPNET` was choosing one at random; it now throws an error (Blesson Thomas, #3298). Note that a single input path per call has always been the intended usage; being passed many was a second bug in PEcAn.uncertainty that is also now fixed.
# PEcAn.SIPNET 1.9.0
diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R
index d3cfb245eca..6619b3c11f4 100755
--- a/models/sipnet/R/write.configs.SIPNET.R
+++ b/models/sipnet/R/write.configs.SIPNET.R
@@ -600,14 +600,25 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs
}
} else if (length(settings$run$inputs$poolinitcond$path) > 0) {
- ICs_num <- length(settings$run$inputs$poolinitcond$path)
- IC.path <- settings$run$inputs$poolinitcond$path[[1]]
+ IC.path <- settings$run$inputs$poolinitcond$path
+ if (length(IC.path) > 1) {
+ PEcAn.logger::logger.error(
+ "write.config.SIPNET needs one poolinitcond path",
+ "got", length(IC.path)
+ )
+ }
IC.pools <- PEcAn.data.land::prepare_pools(IC.path, constants = list(sla = SLA))
if (!is.null(IC.pools)) {
IC.nc <- ncdf4::nc_open(IC.path) #for additional variables specific to SIPNET
- ic_ncvars_wanted <- c(
+
+ # Optional variables: Use these if present, but don't complain if missing
+ # TODO: Each variable here is used in a corresponding `if` block below,
+ # which are mixed in among the variables from prepare_pools.
+ # Should reorder to separate these, and consider making this an input
+ # to let user control at runtime what's optional and what's mandatory
+ ic_ncvars_to_try <- c(
"nee",
"SoilMoistFrac",
"SWE",
@@ -615,8 +626,8 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs
"date_of_senescence",
"Microbial Biomass C"
)
- ic_has_ncvars <- ic_ncvars_wanted %in% names(IC.nc$var)
- names(ic_has_ncvars) <- ic_ncvars_wanted
+ ic_has_ncvars <- ic_ncvars_to_try %in% names(IC.nc$var)
+ names(ic_has_ncvars) <- ic_ncvars_to_try
## plantWoodInit gC/m2
if ("wood" %in% names(IC.pools)) {
From ca74e12d119b7ab0de2f76106c2c1b5912219cfc Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 27 Jun 2025 10:29:42 -0400
Subject: [PATCH 0231/1193] Add examples and refine code.
---
modules/data.remote/R/NASA_DAAC_download.R | 122 ++++++++++++++++--
modules/data.remote/man/NASA_DAAC_download.Rd | 99 +++++++++++++-
2 files changed, 206 insertions(+), 15 deletions(-)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 95a7b1b4adb..26db3eb04bd 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -23,6 +23,7 @@
#'
#' @examples
#' \dontrun{
+#' # SHIFT Hyper-spectral data.
#' ul_lat <- 35
#' ul_lon <- -121
#' lr_lat <- 33
@@ -30,7 +31,6 @@
#' from <- "2022-02-23"
#' to <- "2022-05-30"
#' doi <- "10.3334/ORNLDAAC/2183"
-#' outdir <- "/projectnb/dietzelab/dongchen/SHIFT/test_download"
#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
#' ul_lon = ul_lon,
#' lr_lat = lr_lat,
@@ -39,6 +39,103 @@
#' to = to,
#' doi = doi,
#' just_path = T)
+#' # GEDI level 4A data.
+#' ul_lat <- 85
+#' ul_lon <- -179
+#' lr_lat <- 7
+#' lr_lon <- -20
+#' from <- "2020-01-01"
+#' to <- "2020-12-31"
+#' doi <- "10.3334/ORNLDAAC/2056"
+#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
+#' ul_lon = ul_lon,
+#' lr_lat = lr_lat,
+#' lr_lon = lr_lon,
+#' from = from,
+#' to = to,
+#' band = "V2_1",
+#' doi = doi,
+#' just_path = T)
+#' # MODIS LAI data.
+#' ul_lat <- 85
+#' ul_lon <- -179
+#' lr_lat <- 7
+#' lr_lon <- -20
+#' from <- "2020-01-01"
+#' to <- "2020-01-31"
+#' doi <- "10.5067/MODIS/MCD15A3H.061"
+#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
+#' ul_lon = ul_lon,
+#' lr_lat = lr_lat,
+#' lr_lon = lr_lon,
+#' from = from,
+#' to = to,
+#' doi = doi,
+#' just_path = T)
+#' # SMAP Soil Moisture data.
+#' ul_lat <- 85
+#' ul_lon <- -179
+#' lr_lat <- 7
+#' lr_lon <- -20
+#' from <- "2020-01-01"
+#' to <- "2020-01-31"
+#' doi <- "10.5067/LWJ6TF5SZRG3"
+#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
+#' ul_lon = ul_lon,
+#' lr_lat = lr_lat,
+#' lr_lon = lr_lon,
+#' from = from,
+#' to = to,
+#' doi = doi,
+#' just_path = T)
+#' # GLANCE Phenology and LC data.
+#' ul_lat <- 85
+#' ul_lon <- -179
+#' lr_lat <- 7
+#' lr_lon <- -20
+#' from <- "2019-01-01"
+#' to <- "2019-12-31"
+#' doi <- "10.5067/MEaSUREs/GLanCE/GLanCE30.001"
+#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
+#' ul_lon = ul_lon,
+#' lr_lat = lr_lat,
+#' lr_lon = lr_lon,
+#' from = from,
+#' to = to,
+#' doi = doi,
+#' just_path = T)
+#' # HLS reflectance data.
+#' ul_lat <- 35
+#' ul_lon <- -121
+#' lr_lat <- 33
+#' lr_lon <- -117
+#' from <- "2022-02-23"
+#' to <- "2022-05-30"
+#' doi <- "10.5067/HLS/HLSS30.002"
+#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
+#' ul_lon = ul_lon,
+#' lr_lat = lr_lat,
+#' lr_lon = lr_lon,
+#' from = from,
+#' to = to,
+#' doi = doi,
+#' just_path = T)
+#' ul_lat <- 35
+#' # HLS Phenology data.
+#' ul_lon <- -121
+#' lr_lat <- 33
+#' lr_lon <- -117
+#' from <- "2019-01-01"
+#' to <- "2019-12-31"
+#' doi <- "10.5067/Community/MuSLI/MSLSP30NA.011"
+#' paths <- NASA_DAAC_download(ul_lat = ul_lat,
+#' ul_lon = ul_lon,
+#' lr_lat = lr_lat,
+#' lr_lon = lr_lon,
+#' from = from,
+#' to = to,
+#' doi = doi,
+#' just_path = T)
#' }
#' @author Dongchen Zhang
#' @importFrom foreach %dopar%
@@ -65,7 +162,9 @@ NASA_DAAC_download <- function(ul_lat,
PEcAn.logger::logger.info("Please provide the physical path to the credential file!")
return(NA)
}
- netrc <- getnetrc(credential_path)
+ if (!just_path) {
+ netrc <- getnetrc(credential_path)
+ }
# setup arguments for URL.
daterange <- c(from, to)
# grab provider and concept id from CMR based on DOI.
@@ -93,16 +192,11 @@ NASA_DAAC_download <- function(ul_lat,
granules <- result$feed$entry
if (length(granules) == 0)
break
- # if it's GLANCE product.
- # GLANCE and HLS products have special data archive.
- if (doi %in% c("10.5067/MEaSUREs/GLanCE/GLanCE30.001", "10.5067/HLS/HLSS30.002")) {
- granules_href <- c(granules_href, sapply(granules, function(x) {sapply(x$links,function(y) y$href)}))
- } else {
- granules_href <- c(granules_href, sapply(granules, function(x) x$links[[1]]$href))
- }
+ # grab raw URLs from the records.
+ granules_href <- c(granules_href, sapply(granules, function(x) {sapply(x$links,function(y) y$href)}))
# grab specific band.
if (!is.null(band)) {
- granules_href <- granules_href[which(grepl(band, granules_href, fixed = T))]
+ granules_href <- granules_href[which(grepl(band, basename(granules_href), fixed = T))]
}
page <- page + 1
}
@@ -120,10 +214,10 @@ NASA_DAAC_download <- function(ul_lat,
return(NA)
}
# remove non-image files.
- inds <- which(grepl(".h5", basename(granules_href)) |
- grepl(".tif", basename(granules_href)) |
- grepl(".hdf", basename(granules_href)) |
- grepl(".nc", basename(granules_href)))
+ inds <- which(str_ends(basename(granules_href), ".h5") |
+ str_ends(basename(granules_href), ".tif") |
+ str_ends(basename(granules_href), ".hdf") |
+ str_ends(basename(granules_href), ".nc"))
granules_href <- granules_href[inds]
# detect existing files if we want to download the files.
if (!just_path) {
diff --git a/modules/data.remote/man/NASA_DAAC_download.Rd b/modules/data.remote/man/NASA_DAAC_download.Rd
index a748e4dd93d..78145edf73b 100644
--- a/modules/data.remote/man/NASA_DAAC_download.Rd
+++ b/modules/data.remote/man/NASA_DAAC_download.Rd
@@ -57,6 +57,7 @@ Parallel download data from the NASA ORNL DAAC server given period, spatial boun
}
\examples{
\dontrun{
+# SHIFT Hyper-spectral data.
ul_lat <- 35
ul_lon <- -121
lr_lat <- 33
@@ -64,7 +65,6 @@ lr_lon <- -117
from <- "2022-02-23"
to <- "2022-05-30"
doi <- "10.3334/ORNLDAAC/2183"
-outdir <- "/projectnb/dietzelab/dongchen/SHIFT/test_download"
paths <- NASA_DAAC_download(ul_lat = ul_lat,
ul_lon = ul_lon,
lr_lat = lr_lat,
@@ -73,6 +73,103 @@ paths <- NASA_DAAC_download(ul_lat = ul_lat,
to = to,
doi = doi,
just_path = T)
+# GEDI level 4A data.
+ul_lat <- 85
+ul_lon <- -179
+lr_lat <- 7
+lr_lon <- -20
+from <- "2020-01-01"
+to <- "2020-12-31"
+doi <- "10.3334/ORNLDAAC/2056"
+paths <- NASA_DAAC_download(ul_lat = ul_lat,
+ ul_lon = ul_lon,
+ lr_lat = lr_lat,
+ lr_lon = lr_lon,
+ from = from,
+ to = to,
+ band = "V2_1",
+ doi = doi,
+ just_path = T)
+# MODIS LAI data.
+ul_lat <- 85
+ul_lon <- -179
+lr_lat <- 7
+lr_lon <- -20
+from <- "2020-01-01"
+to <- "2020-01-31"
+doi <- "10.5067/MODIS/MCD15A3H.061"
+paths <- NASA_DAAC_download(ul_lat = ul_lat,
+ ul_lon = ul_lon,
+ lr_lat = lr_lat,
+ lr_lon = lr_lon,
+ from = from,
+ to = to,
+ doi = doi,
+ just_path = T)
+# SMAP Soil Moisture data.
+ul_lat <- 85
+ul_lon <- -179
+lr_lat <- 7
+lr_lon <- -20
+from <- "2020-01-01"
+to <- "2020-01-31"
+doi <- "10.5067/LWJ6TF5SZRG3"
+paths <- NASA_DAAC_download(ul_lat = ul_lat,
+ ul_lon = ul_lon,
+ lr_lat = lr_lat,
+ lr_lon = lr_lon,
+ from = from,
+ to = to,
+ doi = doi,
+ just_path = T)
+# GLANCE Phenology and LC data.
+ul_lat <- 85
+ul_lon <- -179
+lr_lat <- 7
+lr_lon <- -20
+from <- "2019-01-01"
+to <- "2019-12-31"
+doi <- "10.5067/MEaSUREs/GLanCE/GLanCE30.001"
+paths <- NASA_DAAC_download(ul_lat = ul_lat,
+ ul_lon = ul_lon,
+ lr_lat = lr_lat,
+ lr_lon = lr_lon,
+ from = from,
+ to = to,
+ doi = doi,
+ just_path = T)
+# HLS reflectance data.
+ul_lat <- 35
+ul_lon <- -121
+lr_lat <- 33
+lr_lon <- -117
+from <- "2022-02-23"
+to <- "2022-05-30"
+doi <- "10.5067/HLS/HLSS30.002"
+paths <- NASA_DAAC_download(ul_lat = ul_lat,
+ ul_lon = ul_lon,
+ lr_lat = lr_lat,
+ lr_lon = lr_lon,
+ from = from,
+ to = to,
+ doi = doi,
+ just_path = T)
+ ul_lat <- 35
+# HLS Phenology data.
+ul_lon <- -121
+lr_lat <- 33
+lr_lon <- -117
+from <- "2019-01-01"
+to <- "2019-12-31"
+doi <- "10.5067/Community/MuSLI/MSLSP30NA.011"
+paths <- NASA_DAAC_download(ul_lat = ul_lat,
+ ul_lon = ul_lon,
+ lr_lat = lr_lat,
+ lr_lon = lr_lon,
+ from = from,
+ to = to,
+ doi = doi,
+ just_path = T)
}
}
\author{
From 0c3ed4657922a52237bcd3baff7c88da02f98efc Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 27 Jun 2025 10:31:08 -0400
Subject: [PATCH 0232/1193] refine code.
---
modules/data.remote/R/NASA_DAAC_download.R | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 26db3eb04bd..06b8fda7e2f 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -201,6 +201,11 @@ NASA_DAAC_download <- function(ul_lat,
page <- page + 1
}
}
+ # if no files are found.
+ if (is.null(granules_href)) {
+ PEcAn.logger::logger.info("No files found. Please check the spatial and temporal search window.")
+ return(NA)
+ }
# remove non-target files (e.g. s3)
granules_href <- granules_href[which(grepl("https*", granules_href))]
# remove duplicated files.
@@ -208,11 +213,6 @@ NASA_DAAC_download <- function(ul_lat,
if (length(inds) > 0) {
granules_href <- granules_href[-inds]
}
- # if no files are found.
- if (is.null(granules_href)) {
- PEcAn.logger::logger.info("No files found. Please check the spatial and temporal search window.")
- return(NA)
- }
# remove non-image files.
inds <- which(str_ends(basename(granules_href), ".h5") |
str_ends(basename(granules_href), ".tif") |
From 0e87eb455b7e91a0a5cf080989975156c470d949 Mon Sep 17 00:00:00 2001
From: Dongchen Zhang
Date: Fri, 27 Jun 2025 12:15:07 -0400
Subject: [PATCH 0233/1193] Update namespace and dependency.
---
docker/depends/pecan_package_dependencies.csv | 1 +
modules/data.remote/DESCRIPTION | 1 +
modules/data.remote/R/NASA_DAAC_download.R | 8 ++++----
3 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
index b36ca289b18..9cbae935b43 100644
--- a/docker/depends/pecan_package_dependencies.csv
+++ b/docker/depends/pecan_package_dependencies.csv
@@ -576,6 +576,7 @@
"stringr","*","modules/assim.sequential","Imports",FALSE
"stringr","*","modules/benchmark","Imports",FALSE
"stringr","*","modules/data.land","Imports",FALSE
+"stringr","*","modules/data.remote","Suggests",FALSE
"stringr",">= 1.1.0","base/visualization","Imports",FALSE
"stringr",">= 1.1.0","models/ed","Imports",FALSE
"stringr",">= 1.1.0","modules/data.atmosphere","Imports",FALSE
diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION
index bff565d6969..f8cfe749fd2 100644
--- a/modules/data.remote/DESCRIPTION
+++ b/modules/data.remote/DESCRIPTION
@@ -45,6 +45,7 @@ Suggests:
raster,
reshape,
sf,
+ stringr,
testthat (>= 1.0.2),
tibble,
utils
diff --git a/modules/data.remote/R/NASA_DAAC_download.R b/modules/data.remote/R/NASA_DAAC_download.R
index 06b8fda7e2f..276bf399281 100644
--- a/modules/data.remote/R/NASA_DAAC_download.R
+++ b/modules/data.remote/R/NASA_DAAC_download.R
@@ -214,10 +214,10 @@ NASA_DAAC_download <- function(ul_lat,
granules_href <- granules_href[-inds]
}
# remove non-image files.
- inds <- which(str_ends(basename(granules_href), ".h5") |
- str_ends(basename(granules_href), ".tif") |
- str_ends(basename(granules_href), ".hdf") |
- str_ends(basename(granules_href), ".nc"))
+ inds <- which(stringr::str_ends(basename(granules_href), ".h5") |
+ stringr::str_ends(basename(granules_href), ".tif") |
+ stringr::str_ends(basename(granules_href), ".hdf") |
+ stringr::str_ends(basename(granules_href), ".nc"))
granules_href <- granules_href[inds]
# detect existing files if we want to download the files.
if (!just_path) {
From 83bdd5932e34f815439b04c4258962d86a83b03b Mon Sep 17 00:00:00 2001
From: Harsh Agrawal
Date: Sat, 28 Jun 2025 12:57:49 +0530
Subject: [PATCH 0234/1193] refactor: query.site use in ic_process.R
---
modules/data.land/R/ic_process.R | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/modules/data.land/R/ic_process.R b/modules/data.land/R/ic_process.R
index 099875bb15d..8c1ee171721 100644
--- a/modules/data.land/R/ic_process.R
+++ b/modules/data.land/R/ic_process.R
@@ -50,7 +50,16 @@ ic_process <- function(settings, input, dir, overwrite = FALSE){
on.exit(PEcAn.DB::db.close(con), add = TRUE)
#grab site lat and lon info
- latlon <- PEcAn.DB::query.site(site$id, con = con)[c("lat", "lon")]
+
+ # check if site metadata is available in the settings$run$site
+ if (!is.null(settings$run$site$lat) && !is.null(settings$run$site$lon)) {
+ # if lat and lon are available, use them directly
+ latlon <- data.frame(lat = settings$run$site$lat, lon = settings$run$site$lon)
+ } else {
+ # otherwise, query the site information from the database
+ latlon <- PEcAn.DB::query.site(site$id, con = con)[c("lat", "lon")]
+ }
+
# setup site database number, lat, lon and name and copy for format.vars if new input
new.site <- data.frame(id = as.numeric(site$id),
lat = latlon$lat,
From 0c38aab83e1458cb0fe8fa26c9321fa61a6cb49b Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Tue, 1 Jul 2025 09:35:07 -0700
Subject: [PATCH 0235/1193] make tag arg optional in listToXml
Seems safe just to default to "pecan"
---
base/settings/R/MultiSettings.R | 2 +-
base/settings/R/listToXml.R | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/base/settings/R/MultiSettings.R b/base/settings/R/MultiSettings.R
index 6c6b50709fa..964a4f36baf 100644
--- a/base/settings/R/MultiSettings.R
+++ b/base/settings/R/MultiSettings.R
@@ -176,7 +176,7 @@ printAll.MultiSettings <- function(x) {
.expandableItemsTag <- "multisettings"
#' @export
-listToXml.MultiSettings <- function(item, tag, collapse = TRUE) {
+listToXml.MultiSettings <- function(item, tag = "pecan", collapse = TRUE) {
if (collapse) {
if (.expandableItemsTag %in% names(item)) {
stop("Settings can't contain reserved tag 'multisettings'.")
diff --git a/base/settings/R/listToXml.R b/base/settings/R/listToXml.R
index e354a33e6c4..aa0bf455bd3 100644
--- a/base/settings/R/listToXml.R
+++ b/base/settings/R/listToXml.R
@@ -24,7 +24,7 @@ listToXml <- function(x, ...) {
listToXml.default <- function(x, ...) {
args <- list(...)
if (length(args) == 0) {
- PEcAn.logger::logger.error("no tag provided")
+ tag <- "pecan"
} else if ("tag" %in% names(args)) {
tag <- args$tag
} else {
From c5096d9a6eaf888b8d336003abd79a9e016dd36c Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Tue, 1 Jul 2025 10:38:45 -0700
Subject: [PATCH 0236/1193] TIL about hasArg
---
base/settings/NEWS.md | 5 +++++
base/settings/R/listToXml.R | 18 ++++++++----------
base/settings/man/listToXml.default.Rd | 9 +++++----
3 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/base/settings/NEWS.md b/base/settings/NEWS.md
index 9f2bad1dfd0..78d8876904d 100644
--- a/base/settings/NEWS.md
+++ b/base/settings/NEWS.md
@@ -4,6 +4,11 @@
* listToXml.MultiSettings now produces valid XML from a MultiSettings with length 1.
+## Changed
+
+* The `tag` argument to `listToXml()` is now optional and will default to "pecan" if not specified. This sets the name of of the root XML tag that contains the entire resulting settings object.
+
+
# PEcAn.settings 1.9.0
## Changed
diff --git a/base/settings/R/listToXml.R b/base/settings/R/listToXml.R
index aa0bf455bd3..5525831808e 100644
--- a/base/settings/R/listToXml.R
+++ b/base/settings/R/listToXml.R
@@ -14,21 +14,19 @@ listToXml <- function(x, ...) {
#' @title List to XML
#' @param x object to be converted.
#' Despite the function name, need not actually be a list
-#' @param ... further arguments.
-#' Used to set the element name of the created XML object,
-#' which is taken from an argument named `tag` if present,
-#' or otherwise from the first element of `...`
+#' @param ... further arguments, used to set the root element name
+#' of the created XML object.
+#' The name is taken from an argument named `tag` if present,
+#' or otherwise from the first unnamed element of `...`,
+#' or otherwise defaults to "pecan'
#' @return xmlNode
#' @export
#' @author David LeBauer, Carl Davidson, Rob Kooper
listToXml.default <- function(x, ...) {
- args <- list(...)
- if (length(args) == 0) {
- tag <- "pecan"
- } else if ("tag" %in% names(args)) {
- tag <- args$tag
+ if (methods::hasArg("tag")) {
+ tag <- list(...)$tag
} else {
- tag <- args[[1]]
+ tag <- "pecan"
}
# just a textnode, or empty node with attributes
if (typeof(x) != "list") {
diff --git a/base/settings/man/listToXml.default.Rd b/base/settings/man/listToXml.default.Rd
index 87cb9a568e9..047da3b8b46 100644
--- a/base/settings/man/listToXml.default.Rd
+++ b/base/settings/man/listToXml.default.Rd
@@ -10,10 +10,11 @@
\item{x}{object to be converted.
Despite the function name, need not actually be a list}
-\item{...}{further arguments.
-Used to set the element name of the created XML object,
-which is taken from an argument named \code{tag} if present,
-or otherwise from the first element of \code{...}}
+\item{...}{further arguments, used to set the root element name
+of the created XML object.
+The name is taken from an argument named \code{tag} if present,
+or otherwise from the first unnamed element of \code{...},
+or otherwise defaults to "pecan'}
}
\value{
xmlNode
From 380cd23df7aa7bd11c29a63db7e297095d2e3d26 Mon Sep 17 00:00:00 2001
From: Chris Black
Date: Tue, 1 Jul 2025 10:59:12 -0700
Subject: [PATCH 0237/1193] only use first _unnamed_ arg
---
base/settings/R/listToXml.R | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/base/settings/R/listToXml.R b/base/settings/R/listToXml.R
index 5525831808e..bf611a657ec 100644
--- a/base/settings/R/listToXml.R
+++ b/base/settings/R/listToXml.R
@@ -23,10 +23,16 @@ listToXml <- function(x, ...) {
#' @export
#' @author David LeBauer, Carl Davidson, Rob Kooper
listToXml.default <- function(x, ...) {
+ args <- list(...)
if (methods::hasArg("tag")) {
- tag <- list(...)$tag
+ tag <- args$tag
} else {
- tag <- "pecan"
+ args <- args[names(args) == ""]
+ if (length(args) > 0) {
+ tag <- args[[1]]
+ } else {
+ tag <- "pecan"
+ }
}
# just a textnode, or empty node with attributes
if (typeof(x) != "list") {
From e3a74a4300e8c85062636823053e28142351340f Mon Sep 17 00:00:00 2001
From: harsh agrawal <73224301+harshagr70@users.noreply.github.com>
Date: Wed, 2 Jul 2025 00:19:38 +0530
Subject: [PATCH 0238/1193] Update
documentation/tutorials/multisite-workflow/multisite-workflow-example.qmd
Co-authored-by: Chris Black