From 4c803220b4715ee054c1e7ff3e21c6930c342554 Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Tue, 15 Oct 2019 16:37:18 -0400 Subject: [PATCH 0001/1193] added updated thredds function --- modules/data.remote/R/download.thredds.R | 194 ++++++++++++++--------- 1 file changed, 122 insertions(+), 72 deletions(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index 4b3fadacd2f..7c1babaa7a7 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -1,17 +1,27 @@ # -##' @title download.thredds.AGB -##' @name download.thredds.AGB +##' @title download.thredds.data +##' @name download.thredds.data ##' -##' @param outdir Where to place output -##' @param site_ids What locations to download data at? +##' @param outdir file location to place output +##' @param site_info information about the site. i.e. site_id, latitude, longitude +##' @param dates character vector of start and end date for dataset as YYYYmmdd +##' @param varid character vector of shorthand variable name. i.e. LAI +##' @param dir_url catalog url of data from ncei.noaa.gov/thredds website +##' @param data_url opendap url of data from ncei.noaa.gov/thredds website ##' @param run_parallel Logical. Download and extract files in parallel? -##' @param ncores Optional. If run_parallel=TRUE how many cores to use? If left as NULL will select max number -1 ##' ##' @return data.frame summarize the results of the function call ##' ##' @examples ##' \dontrun{ -##' outdir <- "~/scratch/abg_data/" +##' outdir <- directory to store downloaded data +##' site_info <- dataframe that contains information about site_id, latitude, longitude, and site_names +##' dates <- date range to download data. Should be a character vector with start and end date as YYYYmmdd +##' varod <- character shorthand name of variable to download. Example: LAI for leaf area index. +##' dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP +##' data_url <- OpenDAP URL that actually downloads the netcdf file. +##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer +##' ##' results <- PEcAn.data.remote::download.thredds.AGB(outdir=outdir, ##' site_ids = c(676, 678, 679, 755, 767, 1000000030, 1000000145, 1000025731), @@ -20,84 +30,124 @@ ##' @export ##' @author Bailey Morrison ##' -download.thredds.AGB <- function(outdir = NULL, site_ids, run_parallel = FALSE, - ncores = NULL) { +download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201", "19961215"), + varid = "LAI", + dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", + data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", + run_parallel = TRUE) { + # require("XML") + # require("RCurl") + require("foreach") + + # check that dates are within the date range of the dataset + dates = c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d")) + if (!(is.null(dir_url))) + { + #https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files/1981/catalog.html -> link for directory files, not downloads + result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=TRUE, dirlistonly = TRUE) + files = XML::getHTMLLinks(result) + + date_year_range = unique(range(c(year(as.Date(dates[1], "%Y")), year(as.Date(dates[2], "%Y"))))) + if (all((!(substr(files, 1, 4) %in% date_year_range)))) + { + # give warning that dates aren't available + print(test) + } + + } + + # get list of catalog file links to determine actual dates that can be downloaded with in user range + links = vector() + for (i in 1:length(date_year_range)) + { + links[i] = RCurl::getURL(paste(dir_url, date_year_range[i], "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=T, dirlistonly = T) + } + # get list of all dates available from year range provided + files = foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i]) - bety <- list(user='bety', password='bety', host='localhost', - dbname='bety', driver='PostgreSQL',write=TRUE) - con <- PEcAn.DB::db.open(bety) - bety$con <- con - site_ID <- as.character(site_ids) - suppressWarnings(site_qry <- glue::glue_sql("SELECT *, ST_X(ST_CENTROID(geometry)) AS lon, - ST_Y(ST_CENTROID(geometry)) AS lat FROM sites WHERE id IN ({ids*})", - ids = site_ID, .con = con)) - suppressWarnings(qry_results <- DBI::dbSendQuery(con,site_qry)) - suppressWarnings(qry_results <- DBI::dbFetch(qry_results)) - site_info <- list(site_id=qry_results$id, site_name=qry_results$sitename, lat=qry_results$lat, - lon=qry_results$lon, time_zone=qry_results$time_zone) + #remove files with no dates and get list of dates available. + index_dates = regexpr(pattern = "[0-9]{8}", files) + files = files[-(which(index_dates < 0))] + index_dates = index_dates[which(index_dates > 0)] - mylat = site_info$lat - mylon = site_info$lon + # get list of files that fall within the specific date range user asks for (Ymd, not Y) + dates_avail = as.Date(substr(files, index_dates, index_dates+7), "%Y%m%d") + date_range = seq(dates[1], dates[2], by = "day") + get_dates = date_range[which(date_range %in% dates_avail)] - # site specific URL for dataset --> these will be made to work for all THREDDS datasets in the future, but for now, just testing with - # this one dataset. This specific dataset only has 1 year (2005), so no temporal looping for now. - obs_file = "https://thredds.daac.ornl.gov/thredds/dodsC/ornldaac/1221/agb_5k.nc4" - obs_err = "https://thredds.daac.ornl.gov/thredds/dodsC/ornldaac/1221/agb_SE_5k.nc4" - files = c(obs_file, obs_err) + # only keep files that are within the true yyyymmdd date range user requested + files = files[foreach(i = seq_along(get_dates), .combine = c) %do% grep(files, pattern = format(get_dates[i], '%Y%m%d'))] + filenames = basename(files) - # function to extract ncdf data from lat and lon values for value + SE URLs - get_data = function(i) + # user must supply data_URL or the netcdf files cannot be downloaded through thredds. if user has supplied no data_url, the job will fail + # supply a warning + if (!(is.null(data_url))) { - data = ncdf4::nc_open(files[1]) - agb_lats = ncdf4::ncvar_get(data, "latitude") - agb_lons = ncdf4::ncvar_get(data, "longitude") - - agb_x = which(abs(agb_lons- mylon[i]) == min(abs(agb_lons - mylon[i]))) - agb_y = which(abs(agb_lats- mylat[i]) == min(abs(agb_lats - mylat[i]))) - - start = c(agb_x, agb_y) - count = c(1,1) - d = ncdf4::ncvar_get(ncdf4::nc_open(files[1]), "abvgrndbiomass", start=start, count = count) - if (is.na(d)) d <- NA - sd = ncdf4::ncvar_get(ncdf4::nc_open(files[2]), "agbSE", start=start, count = count) - if (is.na(sd)) sd <- NA - date = "2005" - site = site_ID[i] - output = as.data.frame(cbind(d, sd, date, site)) - names(output) = c("value", "sd", "date", "siteID") + #https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1981/AVHRR-Land_v005_AVH15C1_NOAA-07_19810624_c20181025194251.nc.html + # this is what a link looks like to download threeds data. + urls = sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/")) - # option to save output dataset to directory for user. - if (!(is.null(outdir))) + extract_nc = function(site_info, url, run_parallel) { - write.csv(output, file = paste0(outdir, "THREDDS_", sub("^([^.]*).*", "\\1",basename(files[1])), "_site_", site, ".csv"), row.names = FALSE) + require("foreach") + require("ncdf4") + + mylats = site_info$lat + mylons = site_info$lon + sites = site_info$site_id + + # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf + data = ncdf4::nc_open(url) + vars = names(data$var) + var = vars[grep(vars, pattern = varid, ignore.case = T)] + + # get list of all xy coordinates in netcdf + lats = ncdf4::ncvar_get(data, "latitude") + lons = ncdf4::ncvar_get(data, "longitude") + + # find the cell that site coordinates are located in + dist_y = foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2) + dist_x = foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2) + y = foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = T) + x = foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = T) + + scale = data$var[[var]]$scaleFact + + d = as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1))) + + info = as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = F) + names(info) = c("site_id", "lon", "lat", "value") + + return(info) } - return(output) - } - - ## setup parallel - if (run_parallel) { - if (!is.null(ncores)) { - ncores <- ncores - } else { - ncores <- parallel::detectCores() -1 - } - require(doParallel) - PEcAn.logger::logger.info(paste0("Running in parallel with: ", ncores)) - cl = parallel::makeCluster(ncores) - doParallel::registerDoParallel(cl) - data = foreach(i = seq_along(mylat), .combine = rbind) %dopar% get_data(i) - stopCluster(cl) - } else { - # setup sequential run - data = data.frame() - for (i in seq_along(mylat)) + + if (run_parallel) { - data = rbind(data, get_data(i)) + require("parallel") + require("doParallel") + ncores = parallel::detectCores(all.tests = FALSE, logical = TRUE) + if (ncores >= 3) + { + # failsafe in case someone has a computer with 2 nodes. + ncores = ncores-2 + } + # THREDDS has a 10 job limit. Will fail if you try to download more than 10 values at a time + if (ncores >= 10) + { + ncores = 9 # went 1 less becasue it still fails sometimes + } + cl <- parallel::makeCluster(ncores, outfile="") + doParallel::registerDoParallel(cl) + output = foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel) + stopCluster(cl) + } else { + output = foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel) } + + return(output) + } - - return(data) } From 2071b3eb7492e1ad7c886f45ce1c7111d2ce174d Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 12:53:08 -0400 Subject: [PATCH 0002/1193] some updated changes --- modules/data.remote/R/download.thredds.R | 151 +++++++++++++++-------- 1 file changed, 97 insertions(+), 54 deletions(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index 7c1babaa7a7..aed5d5ed835 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -1,9 +1,58 @@ -# -##' @title download.thredds.data -##' @name download.thredds.data +##' @title get_site_info +##' @name get_site_info +##' +##' +##' @param xmlfile full path to pecan xml settings file +##' +##' +##' @return a list of site information derived from BETY using a pecan .xml settings file with site_id, site_name, lat, lon, and time_zone. +##' +##' @examples +##' \dontrun{ +##' xmlfile <- the full path to a pecan .xml settings file. +##' + +##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml") +##' +##' @export +##' @author Bailey Morrison +##' +get_site_info <- function(xmlfile) { + require(PEcAn.all) + + settings <- read.settings(xmlfile) + + observation <- c() + for (i in seq_along(1:length(settings$run))) { + command <- paste0("settings$run$settings.", i, "$site$id") + obs <- eval(parse(text = command)) + observation <- c(observation, obs) + } + + + PEcAn.logger::logger.info("**** Extracting LandTrendr AGB data for model sites ****") + bety <- list(user = 'bety', password = 'bety', host = 'localhost', + dbname = 'bety', driver = 'PostgreSQL', write = TRUE) + con <- PEcAn.DB::db.open(bety) + bety$con <- con + site_ID <- observation + suppressWarnings(site_qry <- glue::glue_sql("SELECT *, ST_X(ST_CENTROID(geometry)) AS lon, + ST_Y(ST_CENTROID(geometry)) AS lat FROM sites WHERE id IN ({ids*})", + ids = site_ID, .con = con)) + suppressWarnings(qry_results <- DBI::dbSendQuery(con,site_qry)) + suppressWarnings(qry_results <- DBI::dbFetch(qry_results)) + site_info <- list(site_id = qry_results$id, site_name = qry_results$sitename, lat = qry_results$lat, + lon = qry_results$lon, time_zone = qry_results$time_zone) + return(site_info) +} + + +##' @title download.thredds +##' @name download.thredds +##' ##' ##' @param outdir file location to place output -##' @param site_info information about the site. i.e. site_id, latitude, longitude +##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. ##' @param dates character vector of start and end date for dataset as YYYYmmdd ##' @param varid character vector of shorthand variable name. i.e. LAI ##' @param dir_url catalog url of data from ncei.noaa.gov/thredds website @@ -15,7 +64,7 @@ ##' @examples ##' \dontrun{ ##' outdir <- directory to store downloaded data -##' site_info <- dataframe that contains information about site_id, latitude, longitude, and site_names +##' site_info <- list that contains information about site_id, site_name, latitude, longitude, and time_zone ##' dates <- date range to download data. Should be a character vector with start and end date as YYYYmmdd ##' varod <- character shorthand name of variable to download. Example: LAI for leaf area index. ##' dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP @@ -23,62 +72,56 @@ ##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer ##' -##' results <- PEcAn.data.remote::download.thredds.AGB(outdir=outdir, -##' site_ids = c(676, 678, 679, 755, 767, 1000000030, 1000000145, 1000025731), -##' run_parallel = TRUE, ncores = 8) +##' results <- download_thredds(outdir = NULL, site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE) +##' ##' ##' @export ##' @author Bailey Morrison ##' -download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201", "19961215"), - varid = "LAI", - dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", - data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", - run_parallel = TRUE) { - # require("XML") - # require("RCurl") +download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, data_url,run_parallel = TRUE) { + require("foreach") # check that dates are within the date range of the dataset - dates = c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d")) + dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d")) if (!(is.null(dir_url))) { #https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files/1981/catalog.html -> link for directory files, not downloads - result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=TRUE, dirlistonly = TRUE) - files = XML::getHTMLLinks(result) + result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=FALSE ,ftp.use.epsv = TRUE, dirlistonly = TRUE) + files <- XML::getHTMLLinks(result) - date_year_range = unique(range(c(year(as.Date(dates[1], "%Y")), year(as.Date(dates[2], "%Y"))))) + date_year_range <- unique(range(c(lubridate::year(as.Date(dates[1], "%Y")), lubridate::year(as.Date(dates[2], "%Y"))))) if (all((!(substr(files, 1, 4) %in% date_year_range)))) { # give warning that dates aren't available - print(test) + print("something") } } # get list of catalog file links to determine actual dates that can be downloaded with in user range - links = vector() + links <- vector() for (i in 1:length(date_year_range)) { - links[i] = RCurl::getURL(paste(dir_url, date_year_range[i], "catalog.html", sep = "/"), verbose=F,ftp.use.epsv=T, dirlistonly = T) + links[i] <- RCurl::getURL(paste(dir_url, date_year_range[i], "catalog.html", sep = "/"), verbose= FALSE, ftp.use.epsv = TRUE, dirlistonly = TRUE) } # get list of all dates available from year range provided - files = foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i]) + files <- foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i]) #remove files with no dates and get list of dates available. - index_dates = regexpr(pattern = "[0-9]{8}", files) - files = files[-(which(index_dates < 0))] - index_dates = index_dates[which(index_dates > 0)] + index_dates <- regexpr(pattern = "[0-9]{8}", files) + files <- files[-(which(index_dates < 0))] + index_dates <- index_dates[which(index_dates > 0)] # get list of files that fall within the specific date range user asks for (Ymd, not Y) - dates_avail = as.Date(substr(files, index_dates, index_dates+7), "%Y%m%d") - date_range = seq(dates[1], dates[2], by = "day") - get_dates = date_range[which(date_range %in% dates_avail)] + dates_avail <- as.Date(substr(files, index_dates, index_dates+7), "%Y%m%d") + date_range <- seq(dates[1], dates[2], by = "day") + get_dates <- date_range[which(date_range %in% dates_avail)] # only keep files that are within the true yyyymmdd date range user requested - files = files[foreach(i = seq_along(get_dates), .combine = c) %do% grep(files, pattern = format(get_dates[i], '%Y%m%d'))] - filenames = basename(files) + files <- files[foreach(i = seq_along(get_dates), .combine = c) %do% grep(files, pattern = format(get_dates[i], '%Y%m%d'))] + filenames <- basename(files) # user must supply data_URL or the netcdf files cannot be downloaded through thredds. if user has supplied no data_url, the job will fail # supply a warning @@ -86,38 +129,38 @@ download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201" { #https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1981/AVHRR-Land_v005_AVH15C1_NOAA-07_19810624_c20181025194251.nc.html # this is what a link looks like to download threeds data. - urls = sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/")) + urls <- sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/")) - extract_nc = function(site_info, url, run_parallel) + extract_nc <- function(site_info, url, run_parallel) { require("foreach") require("ncdf4") - mylats = site_info$lat - mylons = site_info$lon - sites = site_info$site_id + mylats <- site_info$lat + mylons <- site_info$lon + sites <- site_info$site_id # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf - data = ncdf4::nc_open(url) - vars = names(data$var) - var = vars[grep(vars, pattern = varid, ignore.case = T)] + data <- ncdf4::nc_open(url) + vars <- names(data$var) + var <- vars[grep(vars, pattern = varid, ignore.case = TRUE)] # get list of all xy coordinates in netcdf - lats = ncdf4::ncvar_get(data, "latitude") - lons = ncdf4::ncvar_get(data, "longitude") + lats <- ncdf4::ncvar_get(data, "latitude") + lons <- ncdf4::ncvar_get(data, "longitude") # find the cell that site coordinates are located in - dist_y = foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2) - dist_x = foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2) - y = foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = T) - x = foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = T) + dist_y <- foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2) + dist_x <- foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2) + y <- foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = TRUE) + x <- foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = TRUE) - scale = data$var[[var]]$scaleFact + scale <- data$var[[var]]$scaleFact - d = as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1))) + d <- as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1))) - info = as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = F) - names(info) = c("site_id", "lon", "lat", "value") + info <- as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = FALSE) + names(info) <- c("site_id", "lon", "lat", "value") return(info) } @@ -128,23 +171,23 @@ download.thredds.data <- function(outdir = NULL, site_info, dates = c("19950201" { require("parallel") require("doParallel") - ncores = parallel::detectCores(all.tests = FALSE, logical = TRUE) + ncores <- parallel::detectCores(all.tests = FALSE, logical = TRUE) if (ncores >= 3) { # failsafe in case someone has a computer with 2 nodes. - ncores = ncores-2 + ncores <- ncores-2 } # THREDDS has a 10 job limit. Will fail if you try to download more than 10 values at a time if (ncores >= 10) { - ncores = 9 # went 1 less becasue it still fails sometimes + ncores <- 9 # went 1 less becasue it still fails sometimes } cl <- parallel::makeCluster(ncores, outfile="") doParallel::registerDoParallel(cl) - output = foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel) + output <- foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel) stopCluster(cl) } else { - output = foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel) + output <- foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel) } return(output) From 0ff2b048ffb869de53ec32297cc41dba63e520c2 Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 13:08:05 -0400 Subject: [PATCH 0003/1193] added outdir option in function --- modules/data.remote/R/download.thredds.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index aed5d5ed835..0f94781948e 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -72,13 +72,13 @@ get_site_info <- function(xmlfile) { ##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer ##' -##' results <- download_thredds(outdir = NULL, site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE) +##' results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE, outdir = NULL) ##' ##' ##' @export ##' @author Bailey Morrison ##' -download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, data_url,run_parallel = TRUE) { +download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = TRUE, outdir = NULL) { require("foreach") @@ -107,7 +107,7 @@ download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, da } # get list of all dates available from year range provided - files <- foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i]) + files <- foreach::foreach(i = 1:length(links), .combine = c) %do% XML::getHTMLLinks(links[i]) #remove files with no dates and get list of dates available. index_dates <- regexpr(pattern = "[0-9]{8}", files) @@ -190,6 +190,11 @@ download_thredds <- function(outdir = NULL, site_info, dates, varid, dir_url, da output <- foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel) } + if (outdir) + { + write.csv(output, file = paste(outdir, "/THREDDS_", varid, "_", dates[1], "-", dates[2], ".csv", sep = "")) + } + return(output) } From bd136a267c55844631937998b399cec9da9112ea Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 13:24:25 -0400 Subject: [PATCH 0004/1193] added date corrections --- modules/data.remote/R/download.thredds.R | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index 0f94781948e..a82eb30e35e 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -82,8 +82,24 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para require("foreach") - # check that dates are within the date range of the dataset - dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d")) + #### check that dates are within the date range of the dataset + + #first make sure dates are in date format. Correct if not. + if (!(lubridate::is.Date(dates))){ + if (!(is.character(dates))) { + dates = as.character(dates) + } + if (length(grep(dates, pattern = "-")) > 0) { + dates <- c(as.Date(dates[1], "%Y-%m-%d"), as.Date(dates[2], "%Y-%m-%d")) + } else { + dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d")) + } + # Julien Date + if (nchar(dates) == 7) { + dates <- c(as.Date(dates[1], "%Y%j"), as.Date(dates[2], "%Y%j")) + } + } + if (!(is.null(dir_url))) { #https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files/1981/catalog.html -> link for directory files, not downloads From 89af55e67fe90a46daa0d33521c7e38e0a6e16a1 Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 13:27:58 -0400 Subject: [PATCH 0005/1193] updated @params --- modules/data.remote/R/download.thredds.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index a82eb30e35e..99bc83a50b8 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -53,7 +53,7 @@ get_site_info <- function(xmlfile) { ##' ##' @param outdir file location to place output ##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. -##' @param dates character vector of start and end date for dataset as YYYYmmdd +##' @param dates vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object. ##' @param varid character vector of shorthand variable name. i.e. LAI ##' @param dir_url catalog url of data from ncei.noaa.gov/thredds website ##' @param data_url opendap url of data from ncei.noaa.gov/thredds website @@ -65,7 +65,7 @@ get_site_info <- function(xmlfile) { ##' \dontrun{ ##' outdir <- directory to store downloaded data ##' site_info <- list that contains information about site_id, site_name, latitude, longitude, and time_zone -##' dates <- date range to download data. Should be a character vector with start and end date as YYYYmmdd +##' dates <- date range to download data. Should be a vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object. ##' varod <- character shorthand name of variable to download. Example: LAI for leaf area index. ##' dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP ##' data_url <- OpenDAP URL that actually downloads the netcdf file. From 0629c0b986798f7c28cd4fab7aab78ebbbfbd75c Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 13:37:35 -0400 Subject: [PATCH 0006/1193] updated date_year_range --- modules/data.remote/R/download.thredds.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index 99bc83a50b8..d316df02415 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -106,7 +106,7 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para result <- RCurl::getURL(paste(dir_url, "catalog.html", sep = "/"), verbose=FALSE ,ftp.use.epsv = TRUE, dirlistonly = TRUE) files <- XML::getHTMLLinks(result) - date_year_range <- unique(range(c(lubridate::year(as.Date(dates[1], "%Y")), lubridate::year(as.Date(dates[2], "%Y"))))) + date_year_range = unique(lubridate::year(dates)) if (all((!(substr(files, 1, 4) %in% date_year_range)))) { # give warning that dates aren't available From 10eae350b20bee234656ab059e09494e997010d0 Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 14:36:45 -0400 Subject: [PATCH 0007/1193] separated nc extract function from download function --- modules/data.remote/R/download.thredds.R | 113 ++++++++++++++--------- 1 file changed, 70 insertions(+), 43 deletions(-) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index d316df02415..0060d4c1e2f 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -78,8 +78,10 @@ get_site_info <- function(xmlfile) { ##' @export ##' @author Bailey Morrison ##' -download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = TRUE, outdir = NULL) { +download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = FALSE, outdir = NULL) { + #until the issues with parallel runs are fixed. + run_parallel = FALSE require("foreach") #### check that dates are within the date range of the dataset @@ -147,50 +149,16 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para # this is what a link looks like to download threeds data. urls <- sort(paste(data_url, substr(dates_avail, 1, 4), filenames, sep = "/")) - extract_nc <- function(site_info, url, run_parallel) - { - require("foreach") - require("ncdf4") - - mylats <- site_info$lat - mylons <- site_info$lon - sites <- site_info$site_id - - # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf - data <- ncdf4::nc_open(url) - vars <- names(data$var) - var <- vars[grep(vars, pattern = varid, ignore.case = TRUE)] - - # get list of all xy coordinates in netcdf - lats <- ncdf4::ncvar_get(data, "latitude") - lons <- ncdf4::ncvar_get(data, "longitude") - - # find the cell that site coordinates are located in - dist_y <- foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2) - dist_x <- foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2) - y <- foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = TRUE) - x <- foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = TRUE) - - scale <- data$var[[var]]$scaleFact - - d <- as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1))) - - info <- as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = FALSE) - names(info) <- c("site_id", "lon", "lat", "value") - - return(info) - } - - - + # parallel seems to have a problem right now with > 500 urls. if (run_parallel) { - require("parallel") + #require("parallel") require("doParallel") ncores <- parallel::detectCores(all.tests = FALSE, logical = TRUE) + # This is a failsafe for computers with low numbers of CPUS to reduce risk of blowing RAM. if (ncores >= 3) { - # failsafe in case someone has a computer with 2 nodes. + # failsafe in case someone has a computer with 2-4 nodes. ncores <- ncores-2 } # THREDDS has a 10 job limit. Will fail if you try to download more than 10 values at a time @@ -200,14 +168,15 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para } cl <- parallel::makeCluster(ncores, outfile="") doParallel::registerDoParallel(cl) - output <- foreach(i = urls, .combine = rbind) %dopar% extract_nc(site_info, i, run_parallel) - stopCluster(cl) + output <- foreach(i = urls, .combine = rbind) %dopar% extract_thredds_nc(site_info = site_info, url = i) + parallel::stopCluster(cl) } else { - output <- foreach(i = urls, .combine = rbind) %do% extract_nc(site_info, i, run_parallel) + output <- foreach(i = urls, .combine = rbind) %do% extract_thredds_nc(site_info, url = i) } - if (outdir) + if (!(is.null(outdir))) { + # this will need to be changed in the future if users want to be able to save data they haven't already extracted at different sites/dates. write.csv(output, file = paste(outdir, "/THREDDS_", varid, "_", dates[1], "-", dates[2], ".csv", sep = "")) } @@ -215,3 +184,61 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para } } + +##' @title extract_thredds_nc +##' @name extract_thredds_nc +##' +##' +##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. +##' @param url a THREDDS url of a .nc file to extract data from. +##' @param run_parallel T or F option to extra data in parallel. +##' +##' +##' @return a dataframe with the values for each date/site combination from a THREDDS file +##' +##' @examples +##' \dontrun{ +##' site_info <- list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. +##' url <- url a THREDDS url of a .nc file to extract data from. +##' run_parallel <- T or F option to extra data in parallel. + +##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml") +##' +##' @export +##' @author Bailey Morrison +##' +extract_thredds_nc <- function(site_info, url_info, run_parallel) +{ + #print(url) + require("foreach") + require("ncdf4") + + mylats <- site_info$lat + mylons <- site_info$lon + sites <- site_info$site_id + + # open netcdf file and get the correct variable name based on varid parameter + var names of netcdf + data <- ncdf4::nc_open(url_info) + vars <- names(data$var) + var <- vars[grep(vars, pattern = varid, ignore.case = TRUE)] + + # get list of all xy coordinates in netcdf + lats <- ncdf4::ncvar_get(data, "latitude") + lons <- ncdf4::ncvar_get(data, "longitude") + + # find the cell that site coordinates are located in + dist_y <- foreach(i = mylats, .combine = cbind) %do% sqrt((lats - i)^2) + dist_x <- foreach(i = mylons, .combine = cbind) %do% sqrt((lons - i)^2) + y <- foreach(i = 1:ncol(dist_y), .combine = c) %do% which(dist_y[,i] == min(dist_y[,i]), arr.ind = TRUE) + x <- foreach(i = 1:ncol(dist_x), .combine = c) %do% which(dist_x[,i] == min(dist_x[,i]), arr.ind = TRUE) + + scale <- data$var[[var]]$scaleFact + + d <- as.vector(foreach(i = seq_along(x), .combine = rbind) %do% ncdf4::ncvar_get(data, var, start = c(x[i], y[i], 1), count = c(1,1,1))) + + info <- as.data.frame(cbind(sites, mylons, mylats, d), stringsAsFactors = FALSE) + names(info) <- c("site_id", "lon", "lat", "value") + + return(info) +} + From 24b835fa41271c66d8b2a6b388473e9ca5d7cba3 Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Wed, 16 Oct 2019 14:45:28 -0400 Subject: [PATCH 0008/1193] fixed documentation issue --- modules/data.remote/NAMESPACE | 4 ++- modules/data.remote/R/download.thredds.R | 14 ++++---- .../data.remote/man/download.thredds.AGB.Rd | 27 --------------- modules/data.remote/man/download.thredds.Rd | 34 +++++++++++++++++++ modules/data.remote/man/extract_thredds_nc.Rd | 24 +++++++++++++ modules/data.remote/man/get_site_info.Rd | 20 +++++++++++ 6 files changed, 87 insertions(+), 36 deletions(-) delete mode 100644 modules/data.remote/man/download.thredds.AGB.Rd create mode 100644 modules/data.remote/man/download.thredds.Rd create mode 100644 modules/data.remote/man/extract_thredds_nc.Rd create mode 100644 modules/data.remote/man/get_site_info.Rd diff --git a/modules/data.remote/NAMESPACE b/modules/data.remote/NAMESPACE index d84c728b44c..2bb4941b69a 100644 --- a/modules/data.remote/NAMESPACE +++ b/modules/data.remote/NAMESPACE @@ -3,6 +3,8 @@ export(call_MODIS) export(download.LandTrendr.AGB) export(download.NLCD) -export(download.thredds.AGB) +export(download_thredds) export(extract.LandTrendr.AGB) export(extract_NLCD) +export(extract_thredds_nc) +export(get_site_info) diff --git a/modules/data.remote/R/download.thredds.R b/modules/data.remote/R/download.thredds.R index 0060d4c1e2f..b3b17b59891 100755 --- a/modules/data.remote/R/download.thredds.R +++ b/modules/data.remote/R/download.thredds.R @@ -13,7 +13,7 @@ ##' ##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml") -##' +##' } ##' @export ##' @author Bailey Morrison ##' @@ -73,7 +73,7 @@ get_site_info <- function(xmlfile) { ##' ##' results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE, outdir = NULL) -##' +##' } ##' ##' @export ##' @author Bailey Morrison @@ -191,7 +191,6 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para ##' ##' @param site_info list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. ##' @param url a THREDDS url of a .nc file to extract data from. -##' @param run_parallel T or F option to extra data in parallel. ##' ##' ##' @return a dataframe with the values for each date/site combination from a THREDDS file @@ -200,14 +199,13 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para ##' \dontrun{ ##' site_info <- list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. ##' url <- url a THREDDS url of a .nc file to extract data from. -##' run_parallel <- T or F option to extra data in parallel. - -##' site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml") -##' +##' +##' output <- extract_thredds_nc(site_info = site_info, url_info = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1995/AVHRR-Land_v005_AVH15C1_NOAA-14_19950201_c20180831220722.nc") +##'} ##' @export ##' @author Bailey Morrison ##' -extract_thredds_nc <- function(site_info, url_info, run_parallel) +extract_thredds_nc <- function(site_info, url_info) { #print(url) require("foreach") diff --git a/modules/data.remote/man/download.thredds.AGB.Rd b/modules/data.remote/man/download.thredds.AGB.Rd deleted file mode 100644 index 35dfd405cd5..00000000000 --- a/modules/data.remote/man/download.thredds.AGB.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download.thredds.R -\name{download.thredds.AGB} -\alias{download.thredds.AGB} -\title{download.thredds.AGB} -\usage{ -download.thredds.AGB(outdir = NULL, site_ids, run_parallel = FALSE, - ncores = NULL) -} -\arguments{ -\item{outdir}{Where to place output} - -\item{site_ids}{What locations to download data at?} - -\item{run_parallel}{Logical. Download and extract files in parallel?} - -\item{ncores}{Optional. If run_parallel=TRUE how many cores to use? If left as NULL will select max number -1} -} -\value{ -data.frame summarize the results of the function call -} -\description{ -download.thredds.AGB -} -\author{ -Bailey Morrison -} diff --git a/modules/data.remote/man/download.thredds.Rd b/modules/data.remote/man/download.thredds.Rd new file mode 100644 index 00000000000..048f78957ac --- /dev/null +++ b/modules/data.remote/man/download.thredds.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download.thredds.R +\name{download.thredds} +\alias{download.thredds} +\alias{download_thredds} +\title{download.thredds} +\usage{ +download_thredds(site_info, dates, varid, dir_url, data_url, + run_parallel = FALSE, outdir = NULL) +} +\arguments{ +\item{site_info}{list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.} + +\item{dates}{vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object.} + +\item{varid}{character vector of shorthand variable name. i.e. LAI} + +\item{dir_url}{catalog url of data from ncei.noaa.gov/thredds website} + +\item{data_url}{opendap url of data from ncei.noaa.gov/thredds website} + +\item{run_parallel}{Logical. Download and extract files in parallel?} + +\item{outdir}{file location to place output} +} +\value{ +data.frame summarize the results of the function call +} +\description{ +download.thredds +} +\author{ +Bailey Morrison +} diff --git a/modules/data.remote/man/extract_thredds_nc.Rd b/modules/data.remote/man/extract_thredds_nc.Rd new file mode 100644 index 00000000000..1f8e41ed231 --- /dev/null +++ b/modules/data.remote/man/extract_thredds_nc.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download.thredds.R +\name{extract_thredds_nc} +\alias{extract_thredds_nc} +\title{extract_thredds_nc} +\usage{ +extract_thredds_nc(site_info, url_info, run_parallel) +} +\arguments{ +\item{site_info}{list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.} + +\item{run_parallel}{T or F option to extra data in parallel.} + +\item{url}{a THREDDS url of a .nc file to extract data from.} +} +\value{ +a dataframe with the values for each date/site combination from a THREDDS file +} +\description{ +extract_thredds_nc +} +\author{ +Bailey Morrison +} diff --git a/modules/data.remote/man/get_site_info.Rd b/modules/data.remote/man/get_site_info.Rd new file mode 100644 index 00000000000..e73834879ce --- /dev/null +++ b/modules/data.remote/man/get_site_info.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download.thredds.R +\name{get_site_info} +\alias{get_site_info} +\title{get_site_info} +\usage{ +get_site_info(xmlfile) +} +\arguments{ +\item{xmlfile}{full path to pecan xml settings file} +} +\value{ +a list of site information derived from BETY using a pecan .xml settings file with site_id, site_name, lat, lon, and time_zone. +} +\description{ +get_site_info +} +\author{ +Bailey Morrison +} From bd6d4b57e8f101f2dcbac16d659de6e3125b94ee Mon Sep 17 00:00:00 2001 From: "bmorrison@bnl.gov" Date: Thu, 24 Oct 2019 15:49:49 -0400 Subject: [PATCH 0009/1193] some other changes i dont remember --- modules/data.remote/man/download.thredds.Rd | 14 ++++++++++++++ modules/data.remote/man/extract_thredds_nc.Rd | 12 +++++++++--- modules/data.remote/man/get_site_info.Rd | 7 +++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/modules/data.remote/man/download.thredds.Rd b/modules/data.remote/man/download.thredds.Rd index 048f78957ac..9983594ad5a 100644 --- a/modules/data.remote/man/download.thredds.Rd +++ b/modules/data.remote/man/download.thredds.Rd @@ -28,6 +28,20 @@ data.frame summarize the results of the function call } \description{ download.thredds +} +\examples{ +\dontrun{ +outdir <- directory to store downloaded data +site_info <- list that contains information about site_id, site_name, latitude, longitude, and time_zone +dates <- date range to download data. Should be a vector of start and end date for dataset as YYYYmmdd, YYYY-mm-dd, YYYYjjj, or date object. +varod <- character shorthand name of variable to download. Example: LAI for leaf area index. +dir_url <- catalog url from THREDDS that is used to determine which files are available for download using OPENDAP +data_url <- OpenDAP URL that actually downloads the netcdf file. +run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer + +results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = TRUE, outdir = NULL) +} + } \author{ Bailey Morrison diff --git a/modules/data.remote/man/extract_thredds_nc.Rd b/modules/data.remote/man/extract_thredds_nc.Rd index 1f8e41ed231..694fdafcc1c 100644 --- a/modules/data.remote/man/extract_thredds_nc.Rd +++ b/modules/data.remote/man/extract_thredds_nc.Rd @@ -4,13 +4,11 @@ \alias{extract_thredds_nc} \title{extract_thredds_nc} \usage{ -extract_thredds_nc(site_info, url_info, run_parallel) +extract_thredds_nc(site_info, url_info) } \arguments{ \item{site_info}{list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list.} -\item{run_parallel}{T or F option to extra data in parallel.} - \item{url}{a THREDDS url of a .nc file to extract data from.} } \value{ @@ -19,6 +17,14 @@ a dataframe with the values for each date/site combination from a THREDDS file \description{ extract_thredds_nc } +\examples{ +\dontrun{ +site_info <- list of information with the site_id, site_info, lat, lon, and time_zone. Derived from BETY using a PEcAn .xml settings file with site information. Can use the get_site_info function to generate this list. +url <- url a THREDDS url of a .nc file to extract data from. + +output <- extract_thredds_nc(site_info = site_info, url_info = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files/1995/AVHRR-Land_v005_AVH15C1_NOAA-14_19950201_c20180831220722.nc") +} +} \author{ Bailey Morrison } diff --git a/modules/data.remote/man/get_site_info.Rd b/modules/data.remote/man/get_site_info.Rd index e73834879ce..98d06bff0f5 100644 --- a/modules/data.remote/man/get_site_info.Rd +++ b/modules/data.remote/man/get_site_info.Rd @@ -15,6 +15,13 @@ a list of site information derived from BETY using a pecan .xml settings file wi \description{ get_site_info } +\examples{ +\dontrun{ +xmlfile <- the full path to a pecan .xml settings file. + +site_info <- get_site_info(xmlfile = "/data/bmorrison/sda/lai/pecan_MultiSite_SDA_LAI_AGB_8_Sites_2009.xml") + } +} \author{ Bailey Morrison } From 1e1fbefda2a094d2f26838189ba091ec619cc3ff Mon Sep 17 00:00:00 2001 From: Morrison Date: Thu, 14 May 2020 15:37:45 -0400 Subject: [PATCH 0010/1193] some changes I dont remember --- .../R/download.thredds.AVHRR.monthAGG.R | 78 +++++++++---------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R b/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R index 0084adda9fd..16aaf2b2e6b 100755 --- a/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R +++ b/modules/data.remote/R/download.thredds.AVHRR.monthAGG.R @@ -72,49 +72,32 @@ get_site_info <- function(xmlfile) { ##' run_parallel <- optional. Can be used to speed up download process if there are more than 2 cores available on computer ##' -##' results <- download_thredds(site_info = site_info, dates = c("19950201", "19961215"), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = FALSE, outdir = NULL) +##' results <- download_thredds(site_info = site_info, years = c("2000", "2003"), months = c(6,7,8), varid = "LAI", dir_url = "https://www.ncei.noaa.gov/thredds/catalog/cdr/lai/files", data_url = "https://www.ncei.noaa.gov/thredds/dodsC/cdr/lai/files", run_parallel = FALSE, outdir = NULL) ##' } ##' @importFrom foreach %do% %dopar% ##' @export ##' @author Bailey Morrison ##' -download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_parallel = FALSE, outdir = NULL) { +download_thredds <- function(site_info, years, months, varid, dir_url, data_url,run_parallel = FALSE, outdir = NULL) { #until the issues with parallel runs are fixed. run_parallel = FALSE - #require("foreach") - - - #### check that dates are within the date range of the dataset - - #first make sure dates are in date format. Correct if not. - if (!(lubridate::is.Date(dates))){ - if (!(is.character(dates))) { - dates = as.character(dates) - } - if (length(grep(dates, pattern = "-")) > 0) { - dates <- c(as.Date(dates[1], "%Y-%m-%d"), as.Date(dates[2], "%Y-%m-%d")) - } else { - dates <- c(as.Date(dates[1], "%Y%m%d"), as.Date(dates[2], "%Y%m%d")) - } - # Julien Date - if (any(nchar(dates) == 7)) { - dates <- c(as.Date(dates[1], "%Y%j"), as.Date(dates[2], "%Y%j")) - } - } - - date_range = unique(lubridate::year(seq(dates[1], dates[2], by = '1 year'))) + - output = data.frame() + #assumes there is a max of 31 possible days in a month. This covers leap years! + years_range = sort(rep(seq(years[1], years[2]), 31)) + if (!(is.null(dir_url))) { - for (i in seq_along(date_range)) + output = data.frame() + + for (i in seq_along(unique(years_range))) { - result <- RCurl::getURL(paste(dir_url, date_range[i], "/catalog.html", sep = "/"), + result <- RCurl::getURL(paste(dir_url, unique(years_range)[i], "/catalog.html", sep = "/"), verbose=FALSE ,ftp.use.epsv = TRUE, dirlistonly = TRUE) files <- XML::getHTMLLinks(result) - index_dates <- regexpr(pattern = "_[0-9]{8}_", files) + index_dates <- regexpr(pattern = paste0("_[0-9]{4}0[", months[1], "-", months[length(months)], "]{1}[0-9]{2}_"), files) files <- files[-(which(index_dates < 0))] index_dates <- index_dates[which(index_dates > 0)] @@ -148,19 +131,35 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para } else { out <- foreach::foreach(i = urls, .combine = rbind) %do% extract_thredds_nc(site_info, url_info = i) - } - output = rbind(output, out) - - if (!(is.null(outdir))) - { - # this will need to be changed in the future if users want to be able to save data they haven't already extracted at different sites/dates. - write.csv(out, file = paste(outdir, "/THREDDS_", varid, "_", dates[1], "-", dates[2], ".csv", sep = "")) - } + + # get max LAI for each site instead of all days with missing NA fillers + test = foreach::foreach(i = unique(out$site_id), .combine = rbind) %do% + max_lai(x = out, site = i) + test$date = lubridate::year(test$date) + + output = rbind(output, test) + + } } - } + + # if (!(is.null(outdir))) + # { + # # this will need to be changed in the future if users want to be able to save data they haven't already extracted at different sites/dates. + # write.csv(output, file = paste(outdir, "/THREDDS_", varid, "_", years[1], "-", years[2], "_",months[1], "-", months[length(months)], ".csv", sep = "")) + # } + return(output) } - return(output) +} + + + + +max_lai = function(x, site) +{ + site_info_max = as.data.frame(x[x$site_id == site,][1,1:4], stringsAsFactors = FALSE) + site_info_max$max = as.numeric(max(x[x$site_id == site,]$value, na.rm = TRUE)) + return(site_info_max) } @@ -186,9 +185,6 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para ##' extract_thredds_nc <- function(site_info, url_info) { - #print(url) - #require("foreach") - #require("ncdf4") index = regexpr(pattern = "_[0-9]{8}_", url_info) date<- as.Date(substr(url_info, index+1, index+8), "%Y%m%d") From 936e3b77c4daa29d5530e674306b1adab4a6e008 Mon Sep 17 00:00:00 2001 From: Morrison Date: Mon, 1 Jun 2020 22:54:08 -0400 Subject: [PATCH 0011/1193] update thredds function --- modules/data.remote/R/download.thredds.AVHRR.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/data.remote/R/download.thredds.AVHRR.R b/modules/data.remote/R/download.thredds.AVHRR.R index 9dfad069062..5575396db58 100755 --- a/modules/data.remote/R/download.thredds.AVHRR.R +++ b/modules/data.remote/R/download.thredds.AVHRR.R @@ -146,8 +146,11 @@ download_thredds <- function(site_info, dates, varid, dir_url, data_url,run_para # extract_thredds_nc(site_info = site_info, url_info = i) # parallel::stopCluster(cl) } else { - out <- foreach::foreach(i = urls, .combine = rbind) %do% - extract_thredds_nc(site_info, url_info = i) + #start_time <- Sys.time() + out <- foreach::foreach(j = urls, .combine = rbind) %do% + extract_thredds_nc(site_info, url_info = j) + # end_time <- Sys.time() + # end_time - start_time } output = rbind(output, out) From eb8ab25a67a1134231330b436cb2ffb233a35237 Mon Sep 17 00:00:00 2001 From: koolgax99 Date: Thu, 7 Jul 2022 01:44:38 +0530 Subject: [PATCH 0012/1193] added initial postrior apis --- apps/api/R/entrypoint.R | 4 + apps/api/R/posteriors.R | 161 +++++++++++++++++++++++++++++++++++++ apps/api/pecanapi-spec.yml | 119 +++++++++++++++++++++++++++ 3 files changed, 284 insertions(+) create mode 100644 apps/api/R/posteriors.R diff --git a/apps/api/R/entrypoint.R b/apps/api/R/entrypoint.R index 5f1d8a3fb94..18cb4a91d81 100755 --- a/apps/api/R/entrypoint.R +++ b/apps/api/R/entrypoint.R @@ -69,6 +69,10 @@ root$mount("/api/runs", runs_pr) runs_pr <- plumber::Plumber$new("available-models.R") root$mount("/api/availableModels", runs_pr) +# The endpoints mounted here are related to details of PEcAn posteriors +runs_pr <- plumber::Plumber$new("posteriors.R") +root$mount("/api/posteriors", runs_pr) + # set swagger documentation root$setApiSpec("../pecanapi-spec.yml") diff --git a/apps/api/R/posteriors.R b/apps/api/R/posteriors.R new file mode 100644 index 00000000000..9d867ca5adf --- /dev/null +++ b/apps/api/R/posteriors.R @@ -0,0 +1,161 @@ +library(dplyr) + +#' Search for Posteriors containing wildcards for filtering +#' @param pft_id PFT Id (character) +#' @param offset +#' @param limit +#' @return Information about Posteriors based on pft +#' @author Nihar Sanda +#* @get / +searchPosteriors <- function(req, pft_id = NA, host_id = NA, offset = 0, limit = 50, res) { + if (!limit %in% c(10, 20, 50, 100, 500)) { + res$status <- 400 + return(list(error = "Invalid value for parameter")) + } + + posteriors <- tbl(global_db_pool, "posteriors") %>% + select(everything()) + + posteriors <- tbl(global_db_pool, "dbfiles") %>% + select(file_name, file_path, container_type, id = container_id, machine_id) %>% + inner_join(posteriors, by = "id") %>% + filter(container_type == "Posterior") %>% + select(-container_type) + + posteriors <- tbl(global_db_pool, "machines") %>% + select(hostname, machine_id = id) %>% + inner_join(posteriors, by = "machine_id") + + posteriors <- tbl(global_db_pool, "pfts") %>% + select(pft_name = name, pft_id = id) %>% + inner_join(posteriors, by = "pft_id") + + if (!is.na(pft_id)) { + posteriors <- posteriors %>% + filter(pft_id == !!pft_id) + } + + if (!is.na(host_id)) { + posteriors <- posteriors %>% + filter(machine_id == !!host_id) + } + + qry_res <- posteriors %>% + select(-pft_id, -machine_id) %>% + distinct() %>% + arrange(id) %>% + collect() + + if (nrow(qry_res) == 0 || as.numeric(offset) >= nrow(qry_res)) { + res$status <- 404 + return(list(error = "Posterior(s) not found")) + } else { + has_next <- FALSE + has_prev <- FALSE + if (nrow(qry_res) > (as.numeric(offset) + as.numeric(limit))) { + has_next <- TRUE + } + if (as.numeric(offset) != 0) { + has_prev <- TRUE + } + + qry_res <- qry_res[(as.numeric(offset) + 1):min((as.numeric(offset) + as.numeric(limit)), nrow(qry_res)), ] + + result <- list(posteriors = qry_res) + result$count <- nrow(qry_res) + if (has_next) { + if (grepl("offset=", req$QUERY_STRING, fixed = TRUE)) { + result$next_page <- paste0( + req$rook.url_scheme, "://", + req$HTTP_HOST, + "/api/posteriors", + req$PATH_INFO, + substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]), + (as.numeric(limit) + as.numeric(offset)), + "&limit=", + limit + ) + } else { + result$next_page <- paste0( + req$rook.url_scheme, "://", + req$HTTP_HOST, + "/api/posteriors", + req$PATH_INFO, + substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "limit=")[[2]] - 6), + "offset=", + (as.numeric(limit) + as.numeric(offset)), + "&limit=", + limit + ) + } + } + if (has_prev) { + result$prev_page <- paste0( + req$rook.url_scheme, "://", + req$HTTP_HOST, + "/api/workflows", + req$PATH_INFO, + substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]), + max(0, (as.numeric(offset) - as.numeric(limit))), + "&limit=", + limit + ) + } + + return(result) + } +} + +################################################################################################# + +#' Download the posterior specified by the id +#' @param id Posterior id (character) +#' @param filename Optional filename specified if the id points to a folder instead of file (character) +#' If this is passed with an id that actually points to a file, this name will be ignored +#' @return Posterior file specified by user +#' @author Nihar Sanda +#* @serializer contentType list(type="application/octet-stream") +#* @get / +downloadPosterior <- function(posterior_id, filename = "", req, res) { + db_hostid <- PEcAn.DB::dbHostInfo(global_db_pool)$hostid + + # This is just for temporary testing due to the existing issue in dbHostInfo() + db_hostid <- ifelse(db_hostid == 99, 99000000001, db_hostid) + + posterior <- tbl(global_db_pool, "dbfiles") %>% + select(file_name, file_path, container_id, machine_id, container_type) %>% + filter(machine_id == !!db_hostid) %>% + filter(container_type == "Posterior") %>% + filter(container_id == !!posterior_id) %>% + collect() + + if (nrow(posterior) == 0) { + res$status <- 404 + return() + } else { + # Generate the full file path using the file_path & file_name + filepath <- paste0(posterior$file_path, "/", posterior$file_name) + + # If the id points to a directory, check if 'filename' within this directory has been specified + if (dir.exists(filepath)) { + # If no filename is provided, return 400 Bad Request error + if (filename == "") { + res$status <- 400 + return() + } + + # Append the filename to the filepath + filepath <- paste0(filepath, filename) + } + + # If the file doesn't exist, return 404 error + if (!file.exists(filepath)) { + res$status <- 404 + return() + } + + # Read the data in binary form & return it + bin <- readBin(filepath, "raw", n = file.info(filepath)$size) + return(bin) + } +} diff --git a/apps/api/pecanapi-spec.yml b/apps/api/pecanapi-spec.yml index b6ea7e3a757..f3f874e60df 100644 --- a/apps/api/pecanapi-spec.yml +++ b/apps/api/pecanapi-spec.yml @@ -41,6 +41,8 @@ tags: description: Everything about PEcAn PFTs (Plant Functional Types) - name: inputs description: Everything about PEcAn inputs + - name: posteriors + description: Everything about PEcAn posteriors ##################################################################################################################### ##################################################### API Endpoints ################################################# @@ -992,6 +994,123 @@ paths: description: Access forbidden '404': description: Run data not found + + /api/posteriors/: + get: + tags: + - posteriors + summary: Search for the posteriors + parameters: + - in: query + name: pft_id + description: If provided, returns all posteriors for the provided model_id + required: false + schema: + type: string + - in: query + name: host_id + description: If provided, returns all posteriors for the provided host_id + required: false + schema: + type: string + - in: query + name: offset + description: The number of posteriors to skip before starting to collect the result set. + schema: + type: integer + minimum: 0 + default: 0 + required: false + - in: query + name: limit + description: The number of posteriors to return. + schema: + type: integer + default: 50 + enum: + - 10 + - 20 + - 50 + - 100 + - 500 + required: false + responses: + '200': + description: List of posteriors + content: + application/json: + schema: + type: object + properties: + inputs: + type: array + items: + type: object + properties: + id: + type: string + file_name: + type: string + file_path: + type: string + pft_name: + type: string + tag: + type: string + hostname: + type: string + start_date: + type: string + end_date: + type: string + count: + type: integer + next_page: + type: string + prev_page: + type: string + + '401': + description: Authentication required + '403': + description: Access forbidden + '404': + description: Workflows not found + + /api/posteriors/{posterior_id}: + get: + tags: + - posteriors + summary: Download a desired PEcAn posterior file + parameters: + - in: path + name: posterior_id + description: ID of the PEcAn Posterior to be downloaded + required: true + schema: + type: string + - in: query + name: filename + description: Optional filename specified if the id points to a folder instead of file + required: false + schema: + type: string + responses: + '200': + description: Contents of the desired posterior file + content: + application/octet-stream: + schema: + type: string + format: binary + '400': + description: Bad request. Posterior ID points to directory & filename is not specified + '401': + description: Authentication required + '403': + description: Access forbidden + + ##################################################################################################################### ###################################################### Components ################################################### ##################################################################################################################### From 3feea99da53763dd100be24e93fd946b652672d2 Mon Sep 17 00:00:00 2001 From: koolgax99 Date: Thu, 14 Jul 2022 04:20:24 +0530 Subject: [PATCH 0013/1193] Added multiple file download API --- apps/api/R/workflows.R | 116 +++++++++++++++++++++++--- apps/api/pecanapi-spec.yml | 167 +++++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+), 11 deletions(-) diff --git a/apps/api/R/workflows.R b/apps/api/R/workflows.R index 44cb9196f18..304ca9f1391 100644 --- a/apps/api/R/workflows.R +++ b/apps/api/R/workflows.R @@ -49,16 +49,31 @@ getWorkflows <- function(req, model_id=NA, site_id=NA, offset=0, limit=50, res){ result <- list(workflows = qry_res) result$count <- nrow(qry_res) if(has_next){ - result$next_page <- paste0( - req$rook.url_scheme, "://", - req$HTTP_HOST, - "/api/workflows", - req$PATH_INFO, - substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]), - (as.numeric(limit) + as.numeric(offset)), - "&limit=", - limit - ) + if(grepl("offset=", req$QUERY_STRING, fixed = TRUE)){ + result$next_page <- paste0( + req$rook.url_scheme, "://", + req$HTTP_HOST, + "/api/workflows", + req$PATH_INFO, + substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]), + (as.numeric(limit) + as.numeric(offset)), + "&limit=", + limit + ) + } + else { + result$next_page <- paste0( + req$rook.url_scheme, "://", + req$HTTP_HOST, + "/api/workflows", + req$PATH_INFO, + substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "limit=")[[2]] - 6), + "offset=", + (as.numeric(limit) + as.numeric(offset)), + "&limit=", + limit + ) + } } if(has_prev) { result$prev_page <- paste0( @@ -200,7 +215,7 @@ getWorkflowStatus <- function(req, id, res){ #* @get //file/ getWorkflowFile <- function(req, id, filename, res){ Workflow <- tbl(global_db_pool, "workflows") %>% - select(id, user_id) %>% + select(id, user_id) %>% filter(id == !!id) qry_res <- Workflow %>% collect() @@ -229,3 +244,82 @@ getWorkflowFile <- function(req, id, filename, res){ return(bin) } } + + +################################################################################################# +#' Get the list of files in a workflow specified by the id +#' @param id Workflow id (character) +#' @return List of files +#' @author Nihar Sanda +#* @serializer contentType list(type="application/octet-stream") +#* @get //files + +getWorkflowFileDetails <- function(req, id, res){ + Workflow <- tbl(global_db_pool, "workflows") %>% + select(id, user_id) %>% + filter(id == !!id) + + qry_res <- Workflow %>% collect() + + if (nrow(qry_res) == 0) { + res$status <- 404 + return(list(error="Workflow with specified ID was not found")) + } + else { + file_names <- list() + file_names <- list.files(paste0(Sys.getenv("DATA_DIR", "/data/"), "workflows/PEcAn_", id)) + + return(list(workflow_id = id)) + } +} + +################################################################################################# +#' Get the zip of specified files of the workflow specified by the id +#' @param id Workflow id (character) +#' @return Details of requested workflow +#' @author Nihar Sanda +#* @serializer contentType list(type="application/octet-stream") +#* @post //file-multiple/ + +getWorkflowFilesAsZip <- function(req, id, filenames, res){ + if(req$HTTP_CONTENT_TYPE == "application/json") { + filenames_req <- req$postBody + } + + filenamesList <- jsonlite::fromJSON(filenames_req) + filenames <- filenamesList$files + + Workflow <- tbl(global_db_pool, "workflows") %>% + select(id, user_id) %>% + filter(id == !!id) + + qry_res <- Workflow %>% collect() + + if (nrow(qry_res) == 0) { + res$status <- 404 + return() + } + else { + full_files <- vector(mode = "character", length = length(filenames)) + for (i in 1:length(filenames)) { + + # Check if the requested file exists on the host + filepath <- paste0(Sys.getenv("DATA_DIR", "/data/"), "workflows/PEcAn_", id, "/", filenames[i]) + if(! file.exists(filepath)){ + res$status <- 404 + return() + } + + if(Sys.getenv("AUTH_REQ") == TRUE){ + if(qry_res$user_id != req$user$userid) { + res$status <- 403 + return() + } + } + + full_files[i] <- filepath + } + zip_file <- zip::zipr("output.zip", full_files) + return(zip_file) + } +} \ No newline at end of file diff --git a/apps/api/pecanapi-spec.yml b/apps/api/pecanapi-spec.yml index b6ea7e3a757..206687693d7 100644 --- a/apps/api/pecanapi-spec.yml +++ b/apps/api/pecanapi-spec.yml @@ -41,6 +41,8 @@ tags: description: Everything about PEcAn PFTs (Plant Functional Types) - name: inputs description: Everything about PEcAn inputs + - name: posteriors + description: Everything about PEcAn posteriors ##################################################################################################################### ##################################################### API Endpoints ################################################# @@ -779,6 +781,40 @@ paths: description: Authentication required '403': description: Access forbidden + + /api/workflows/{id}/file-multiple/: + post: + tags: + - workflows + summary: Download multiple files + parameters: + - in: path + name: id + description: ID of the PEcAn Workflow + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/WorkflowFiles_POST' + + + responses: + '200': + description: Download the zip file consisting of the desired files + content: + application/octet-stream: + schema: + type: string + format: binary + '401': + description: Authentication required + '415': + description: Unsupported request content type + /api/runs/: get: @@ -992,6 +1028,123 @@ paths: description: Access forbidden '404': description: Run data not found + + /api/posteriors/: + get: + tags: + - posteriors + summary: Search for the posteriors + parameters: + - in: query + name: pft_id + description: If provided, returns all posteriors for the provided model_id + required: false + schema: + type: string + - in: query + name: host_id + description: If provided, returns all posteriors for the provided host_id + required: false + schema: + type: string + - in: query + name: offset + description: The number of posteriors to skip before starting to collect the result set. + schema: + type: integer + minimum: 0 + default: 0 + required: false + - in: query + name: limit + description: The number of posteriors to return. + schema: + type: integer + default: 50 + enum: + - 10 + - 20 + - 50 + - 100 + - 500 + required: false + responses: + '200': + description: List of posteriors + content: + application/json: + schema: + type: object + properties: + inputs: + type: array + items: + type: object + properties: + id: + type: string + file_name: + type: string + file_path: + type: string + pft_name: + type: string + tag: + type: string + hostname: + type: string + start_date: + type: string + end_date: + type: string + count: + type: integer + next_page: + type: string + prev_page: + type: string + + '401': + description: Authentication required + '403': + description: Access forbidden + '404': + description: Workflows not found + + /api/posteriors/{posterior_id}: + get: + tags: + - posteriors + summary: Download a desired PEcAn posterior file + parameters: + - in: path + name: posterior_id + description: ID of the PEcAn Posterior to be downloaded + required: true + schema: + type: string + - in: query + name: filename + description: Optional filename specified if the id points to a folder instead of file + required: false + schema: + type: string + responses: + '200': + description: Contents of the desired input file + content: + application/octet-stream: + schema: + type: string + format: binary + '400': + description: Bad request. Input ID points to directory & filename is not specified + '401': + description: Authentication required + '403': + description: Access forbidden + + ##################################################################################################################### ###################################################### Components ################################################### ##################################################################################################################### @@ -1275,6 +1428,20 @@ components: dbfiles: type: string example: pecan/dbfiles + + WorkflowFiles_POST: + type: object + + properties: + files: + type: array + items: + type: string + example: [ + "pecan.xml", + "workflow.R" + ] + securitySchemes: basicAuth: type: http From e937e8644db06c85c3fe44e1e6da999555f20a73 Mon Sep 17 00:00:00 2001 From: koolgax99 Date: Wed, 10 Aug 2022 19:51:48 +0530 Subject: [PATCH 0014/1193] fixed a minor typo --- apps/api/pecanapi-spec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/pecanapi-spec.yml b/apps/api/pecanapi-spec.yml index 206687693d7..807df0ea8a4 100644 --- a/apps/api/pecanapi-spec.yml +++ b/apps/api/pecanapi-spec.yml @@ -1109,7 +1109,7 @@ paths: '403': description: Access forbidden '404': - description: Workflows not found + description: Posteriors not found /api/posteriors/{posterior_id}: get: From 3cbce66c354cdc6e259665524e830a00fb535a37 Mon Sep 17 00:00:00 2001 From: koolgax99 Date: Fri, 7 Oct 2022 20:11:23 +0530 Subject: [PATCH 0015/1193] run meta analysis for a settings file --- apps/api/R/entrypoint.R | 3 ++ apps/api/R/ma.R | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 apps/api/R/ma.R diff --git a/apps/api/R/entrypoint.R b/apps/api/R/entrypoint.R index 5f1d8a3fb94..1aec6dcf02b 100755 --- a/apps/api/R/entrypoint.R +++ b/apps/api/R/entrypoint.R @@ -69,6 +69,9 @@ root$mount("/api/runs", runs_pr) runs_pr <- plumber::Plumber$new("available-models.R") root$mount("/api/availableModels", runs_pr) +ma_pr <- plumber::Plumber$new("ma.R") +root$mount("/api/ma", ma_pr) + # set swagger documentation root$setApiSpec("../pecanapi-spec.yml") diff --git a/apps/api/R/ma.R b/apps/api/R/ma.R new file mode 100644 index 00000000000..26330c402b8 --- /dev/null +++ b/apps/api/R/ma.R @@ -0,0 +1,67 @@ +library(dplyr) +library("PEcAn.all") +library("RCurl") + +#' Post a settings file for running a Meta-Analysis +#' @param req Send pecan.xml in bodyas xml filetype +#' @return A list of post.distns.MA.R +#' @author Nihar Sanda +#* @post /run +submitWorkflow <- function(req, res){ + if(req$HTTP_CONTENT_TYPE == "application/xml") { + # read req$bosy as xml + settingsXml <- XML::xmlParseString(stringr::str_replace(req$body, "\n", "")) + + ## convert the xml to a list + settings <- XML::xmlToList(settingsXml) + settings <- as.Settings(settings) + settings <- expandMultiSettings(settings) + + # Update/fix/check settings. + # Will only run the first time it's called, unless force=TRUE + settings <- + PEcAn.settings::prepare.settings(settings, force = FALSE) + + # Changing update to TRUE + settings$meta.analysis$update <- TRUE + + # Write pecan.CHECKED.xml + PEcAn.settings::write.settings(settings, outputfile = "pecan.CHECKED.xml") + + # Do conversions + settings <- PEcAn.workflow::do_conversions(settings) + settings <- PEcAn.workflow::runModule.get.trait.data(settings) + + # initiating variables needed for running meta analysis + pfts <- settings$pfts + iterations <- settings$meta.analysis$iter + random <- settings$meta.analysis$random.effects$on + use_ghs <- settings$meta.analysis$random.effects$use_ghs + threshold <- settings$meta.analysis$threshold + dbfiles <- settings$database$dbfiles + database <- settings$database$bety + + # running meta analysis + run.meta.analysis(pfts, iterations, random, threshold, + dbfiles, database, use_ghs) + + #PEcAn.MA::runModule.run.meta.analysis(settings = ma_settings) + + if(dir.exists(settings$pfts$pft$outdir)){ + filepath <- paste0(settings$pfts$pft$outdir, "/post.distns.Rdata") + e <- new.env(parent = emptyenv()) + load(filepath, envir = e) + objs <- ls(envir = e, all.names = TRUE) + for(obj in objs) { + data <- get(obj, envir =e) + } + #csv_file <- paste0(settings$pfts$pft$outdir, '/post.distns.csv') + #plumber::include_file(csv_file, res) + return(list(status = "Meta Analysis ran successfully", data=data)) + } + } + else{ + res$status <- 415 + return(paste("Unsupported request content type:", req$HTTP_CONTENT_TYPE)) + } +} \ No newline at end of file From 614b8f931929e571958c87db6c0bd2738a86ac13 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 18 Jul 2024 13:04:38 +0530 Subject: [PATCH 0016/1193] Shift functions to check for missing files Return from convert_input via a helper function Update corresponding test files and add tests to ensure do_conversions isn't affected by current applied changes Signed-off-by: Abhinav Pandey --- base/db/R/add.database.entries.R | 111 +++++++++++++ base/db/R/check.missing.files.R | 49 ++++++ base/db/R/convert_input.R | 150 ++---------------- base/db/man/add.database.entries.Rd | 70 ++++++++ base/db/man/check_missing_files.Rd | 31 ++++ .../tests/testthat/test.check.missing.files.R | 24 +++ base/db/tests/testthat/test.convert_input.R | 29 ++-- 7 files changed, 320 insertions(+), 144 deletions(-) create mode 100644 base/db/R/add.database.entries.R create mode 100644 base/db/R/check.missing.files.R create mode 100644 base/db/man/add.database.entries.Rd create mode 100644 base/db/man/check_missing_files.Rd create mode 100644 base/db/tests/testthat/test.check.missing.files.R diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R new file mode 100644 index 00000000000..3c253c07e73 --- /dev/null +++ b/base/db/R/add.database.entries.R @@ -0,0 +1,111 @@ +#' Return new arrangement of database while adding code to deal with ensembles +#' +#' @param result list of results from the download function +#' @param con database connection +#' @param start_date start date of the data +#' @param end_date end date of the data +#' @param write whether to write to the database +#' @param overwrite Logical: If a file already exists, create a fresh copy? +#' @param insert.new.file whether to insert a new file +#' @param input.args input arguments obtained from the convert_input function +#' @param machine machine information +#' @param mimetype data product specific file format +#' @param formatname format name of the data +#' @param allow.conflicting.dates whether to allow conflicting dates +#' @param ensemble ensemble id +#' @param ensemble_name ensemble name +#' @param existing.input existing input records +#' @param existing.dbfile existing dbfile records +#' @param input input records +#' @return list of input and dbfile ids +#' +#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko + +add.database.entries <- function( + result, con, start_date, + end_date, write, overwrite, + insert.new.file, input.args, + machine, mimetype, formatname, + allow.conflicting.dates, ensemble, + ensemble_name, existing.input, + existing.dbfile, input) { + if (write) { + # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. + # This list will be returned. + newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. + for (i in 1:length(result)) { # Master for loop + id_not_added <- TRUE + + if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 && + (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { + # Updating record with new dates + db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con) + id_not_added <- FALSE + + # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration. + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) + newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id) + } + + if (overwrite) { + # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record) + if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) { + db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con) + } + + if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) { + db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con) + } + } + + # If there is no ensemble then for each record there should be one parent + # But when you have ensembles, all of the members have one parent !! + parent.id <- if (is.numeric(ensemble)) { + ifelse(is.null(input[[i]]), NA, input[[1]]$id) + } else { + ifelse(is.null(input[[i]]), NA, input[[i]]$id) + } + + + if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { + site.id <- input.args$newsite + } + + if (insert.new.file && id_not_added) { + dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), in.prefix = result[[i]]$dbfile.name[1], "Input", existing.input[[i]]$id, con, reuse = TRUE, hostname = machine$hostname) + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) + newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) + } else if (id_not_added) { + # This is to tell input.insert if we are writing ensembles + # Why does it need it? Because it checks for inputs with the same time period, site, and machine + # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition + ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE + + new_entry <- dbfile.input.insert( + in.path = dirname(result[[i]]$file[1]), + in.prefix = result[[i]]$dbfile.name[1], + siteid = site.id, + startdate = start_date, + enddate = end_date, + mimetype = mimetype, + formatname = formatname, + parentid = parent.id, + con = con, + hostname = machine$hostname, + allow.conflicting.dates = allow.conflicting.dates, + ens = ens.flag + ) + + newinput$input.id <- c(newinput$input.id, new_entry$input.id) + newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) + } + } # End for loop + + successful <- TRUE + return(newinput) + } else { + PEcAn.logger::logger.warn("Input was not added to the database") + successful <- TRUE + return(NULL) + } +} diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R new file mode 100644 index 00000000000..bde3d7ebe97 --- /dev/null +++ b/base/db/R/check.missing.files.R @@ -0,0 +1,49 @@ +#' Function to check if result has empty or missing files +#' +#' @param result A list of dataframes with file paths +#' @param outname Name of the output file +#' @param existing.input Existing input records +#' @param existing.dbfile Existing dbfile records +#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records +#' +#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko + +check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) { + result_sizes <- purrr::map_dfr( + result, + ~ dplyr::mutate( + ., + file_size = purrr::map_dbl(file, file.size), + missing = is.na(file_size), + empty = file_size == 0 + ) + ) + + if (any(result_sizes$missing) || any(result_sizes$empty)) { + log_format_df <- function(df) { + formatted_df <- rbind(colnames(df), format(df)) + formatted_text <- purrr::reduce(formatted_df, paste, sep = " ") + paste(formatted_text, collapse = "\n") + } + + PEcAn.logger::logger.severe( + "Requested Processing produced empty files or Nonexistent files:\n", + log_format_df(result_sizes[, c(1, 8, 9, 10)]), + "\n Table of results printed above.", + wrap = FALSE + ) + } + + # Insert into Database + outlist <- unlist(strsplit(outname, "_")) + + # Wrap in a list for consistant processing later + if (exists("existing.input") && is.data.frame(existing.input)) { + existing.input <- list(existing.input) + } + + if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) { + existing.dbfile <- list(existing.dbfile) + } + return(list(result_sizes, outlist, existing.input, existing.dbfile)) +} diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 8203fa7244b..d5af069d0eb 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -384,7 +384,7 @@ convert_input <- if (!is.null(ensemble) && ensemble) { return.all <-TRUE - }else{ + } else{ return.all <- FALSE } existing.dbfile <- dbfile.input.check(siteid = site.id, @@ -734,143 +734,23 @@ convert_input <- #--------------------------------------------------------------------------------------------------# # Check if result has empty or missing files - result_sizes <- purrr::map_dfr( - result, - ~ dplyr::mutate( - ., - file_size = purrr::map_dbl(file, file.size), - missing = is.na(file_size), - empty = file_size == 0 - ) - ) - - if (any(result_sizes$missing) || any(result_sizes$empty)){ - log_format_df = function(df){ - rbind(colnames(df), format(df)) - purrr::reduce( paste, sep=" ") %>% - paste(collapse="\n") - } - - PEcAn.logger::logger.severe( - "Requested Processing produced empty files or Nonexistant files :\n", - log_format_df(result_sizes[,c(1,8,9,10)]), - "\n Table of results printed above.", - wrap = FALSE) - } - - # Insert into Database - outlist <- unlist(strsplit(outname, "_")) - - # Wrap in a list for consistant processing later - if (exists("existing.input") && is.data.frame(existing.input)) { - existing.input <- list(existing.input) - } - - if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) { - existing.dbfile <- list(existing.dbfile) - } + checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile) + + # Unwrap parameters after performing checks for missing files + result_sizes <- checked.missing.files$result_sizes; + outlist <- checked.missing.files$outlist; + existing.input <- checked.missing.files$existing.input; + existing.dbfile <- checked.missing.files$existing.dbfile; #---------------------------------------------------------------# # New arrangement of database adding code to deal with ensembles. - if (write) { - - # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. - # This list will be returned. - newinput = list(input.id = NULL, dbfile.id = NULL) #Blank vectors are null. - for(i in 1:length(result)) { # Master for loop - id_not_added <- TRUE - - if (exists("existing.input") && nrow(existing.input[[i]]) > 0 && - (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { - - # Updating record with new dates - db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", - end_date, "' WHERE id=", existing.input[[i]]$id), - con) - id_not_added = FALSE - - # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every interation. - newinput$input.id = c(newinput$input.id, existing.input[[i]]$id) - newinput$dbfile.id = c(newinput$dbfile.id, existing.dbfile[[i]]$id) - } - - if (overwrite) { - # A bit hacky, but need to make sure that all fields are updated to expected - # values (i.e., what they'd be if convert_input was creating a new record) - if (exists("existing.input") && nrow(existing.input[[i]]) > 0) { - db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), - "' WHERE id=", existing.input[[i]]$id), con) - - } - - if (exists("existing.dbfile") && nrow(existing.dbfile[[i]]) > 0) { - db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), - "', ", "file_name='", result[[i]]$dbfile.name[1], - "' WHERE id=", existing.dbfile[[i]]$id), con) - - } - } - - # If there is no ensemble then for each record there should be one parent - #But when you have ensembles, all of the members have one parent !! - if (is.numeric(ensemble)){ - parent.id <- ifelse(is.null(input[i]), NA, input[1]$id) - }else{ - parent.id <- ifelse(is.null(input[i]), NA, input[i]$id) - } - - - - if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { - site.id <- input.args$newsite - } - - if (insert.new.file && id_not_added) { - dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), - in.prefix = result[[i]]$dbfile.name[1], - 'Input', existing.input[[i]]$id, - con, reuse=TRUE, hostname = machine$hostname) - newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) - newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) - } else if (id_not_added) { - - # This is to tell input.insert if we are wrting ensembles - # Why does it need it ? bc it checks for inputs with the same time period, site and machine - # and if it returns somethings it does not insert anymore, but for ensembles it needs to bypass this condition - if (!is.null(ensemble) | is.null(ensemble_name)){ - ens.flag <- TRUE - }else{ - ens.flag <- FALSE - } - - new_entry <- dbfile.input.insert(in.path = dirname(result[[i]]$file[1]), - in.prefix = result[[i]]$dbfile.name[1], - siteid = site.id, - startdate = start_date, - enddate = end_date, - mimetype, - formatname, - parentid = parent.id, - con = con, - hostname = machine$hostname, - allow.conflicting.dates = allow.conflicting.dates, - ens=ens.flag - ) - - - newinput$input.id <- c(newinput$input.id, new_entry$input.id) - newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) - } - - } #End for loop - - successful <- TRUE - return(newinput) - } else { - PEcAn.logger::logger.warn("Input was not added to the database") - successful <- TRUE - return(NULL) - } + return (add.database.entries(result, con, start_date, + end_date, write, overwrite, + insert.new.file, input.args, + machine, mimetype, formatname, + allow.conflicting.dates, ensemble, + ensemble_name, existing.input, + existing.dbfile, input)) } # convert_input diff --git a/base/db/man/add.database.entries.Rd b/base/db/man/add.database.entries.Rd new file mode 100644 index 00000000000..5de01cd1705 --- /dev/null +++ b/base/db/man/add.database.entries.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add.database.entries.R +\name{add.database.entries} +\alias{add.database.entries} +\title{Return new arrangement of database while adding code to deal with ensembles} +\usage{ +add.database.entries( + result, + con, + start_date, + end_date, + write, + overwrite, + insert.new.file, + input.args, + machine, + mimetype, + formatname, + allow.conflicting.dates, + ensemble, + ensemble_name, + existing.input, + existing.dbfile, + input +) +} +\arguments{ +\item{result}{list of results from the download function} + +\item{con}{database connection} + +\item{start_date}{start date of the data} + +\item{end_date}{end date of the data} + +\item{write}{whether to write to the database} + +\item{overwrite}{Logical: If a file already exists, create a fresh copy?} + +\item{insert.new.file}{whether to insert a new file} + +\item{input.args}{input arguments obtained from the convert_input function} + +\item{machine}{machine information} + +\item{mimetype}{data product specific file format} + +\item{formatname}{format name of the data} + +\item{allow.conflicting.dates}{whether to allow conflicting dates} + +\item{ensemble}{ensemble id} + +\item{ensemble_name}{ensemble name} + +\item{existing.input}{existing input records} + +\item{existing.dbfile}{existing dbfile records} + +\item{input}{input records} +} +\value{ +list of input and dbfile ids +} +\description{ +Return new arrangement of database while adding code to deal with ensembles +} +\author{ +Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko +} diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd new file mode 100644 index 00000000000..8dd541f9380 --- /dev/null +++ b/base/db/man/check_missing_files.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check.missing.files.R +\name{check_missing_files} +\alias{check_missing_files} +\title{Function to check if result has empty or missing files} +\usage{ +check_missing_files( + result, + outname, + existing.input = NULL, + existing.dbfile = NULL +) +} +\arguments{ +\item{result}{A list of dataframes with file paths} + +\item{outname}{Name of the output file} + +\item{existing.input}{Existing input records} + +\item{existing.dbfile}{Existing dbfile records} +} +\value{ +A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records +} +\description{ +Function to check if result has empty or missing files +} +\author{ +Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko +} diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R new file mode 100644 index 00000000000..e779077294a --- /dev/null +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -0,0 +1,24 @@ +test_that("`check_missing_files()` able to return correct missing files", { + mocked_res <- mockery::mock(list(c("A", "B"))) + mockery::stub(check_missing_files, "purrr::map_dfr", data.frame(missing = c(FALSE), empty = c(FALSE))) + res <- check_missing_files( + result = list(data.frame(file = c("A", "B"))), + outname = "test", + existing.input = data.frame(), + existing.dbfile = data.frame() + ) + + # Print the structure of `res` for debugging + str(res) + + # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile)) + # Perform checks to compare results from stubbed functions to actual results + expect_equal(nrow(res[[1]]), 1) + expect_equal(res[[1]]$missing, FALSE) + expect_equal(res[[1]]$empty, FALSE) + expect_equal(res[[2]], "test") + expect_equal(nrow(res[[3]][[1]]), 0) + expect_equal(ncol(res[[3]][[1]]), 0) + expect_equal(nrow(res[[4]][[1]]), 0) + expect_equal(ncol(res[[4]][[1]]), 0) +}) diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R index 29513187c9e..931d8a7f26b 100644 --- a/base/db/tests/testthat/test.convert_input.R +++ b/base/db/tests/testthat/test.convert_input.R @@ -1,10 +1,21 @@ test_that("`convert_input()` able to call the respective download function for a data item with the correct arguments", { mocked_res <- mockery::mock(list(c("A", "B"))) - mockery::stub(convert_input, 'dbfile.input.check', data.frame()) - mockery::stub(convert_input, 'db.query', data.frame(id = 1)) - mockery::stub(convert_input, 'PEcAn.remote::remote.execute.R', mocked_res) - mockery::stub(convert_input, 'purrr::map_dfr', data.frame(missing = c(FALSE), empty = c(FALSE))) + mockery::stub(convert_input, "dbfile.input.check", data.frame()) + mockery::stub(convert_input, "db.query", data.frame(id = 1)) + mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res) + mockery::stub(convert_input, "check_missing_files", list( + result_sizes = data.frame( + file = c("A", "B"), + file_size = c(100, 200), + missing = c(FALSE, FALSE), + empty = c(FALSE, FALSE) + ), + outlist = "test", + existing.input = list(data.frame(file = character(0))), + existing.dbfile = list(data.frame(file = character(0))) + )) + mockery::stub(convert_input, "add.database.entries", list(input.id = 1, dbfile.id = 1)) convert_input( input.id = NA, @@ -14,8 +25,8 @@ test_that("`convert_input()` able to call the respective download function for a site.id = 1, start_date = "2011-01-01", end_date = "2011-12-31", - pkg = 'PEcAn.data.atmosphere', - fcn = 'download.AmerifluxLBL', + pkg = "PEcAn.data.atmosphere", + fcn = "download.AmerifluxLBL", con = NULL, host = data.frame(name = "localhost"), browndog = NULL, @@ -23,10 +34,10 @@ test_that("`convert_input()` able to call the respective download function for a lat.in = 40, lon.in = -88 ) - + args <- mockery::mock_args(mocked_res) expect_equal( - args[[1]]$script, + args[[1]]$script, "PEcAn.data.atmosphere::download.AmerifluxLBL(lat.in=40, lon.in=-88, overwrite=FALSE, outfolder='test/', start_date='2011-01-01', end_date='2011-12-31')" ) }) @@ -36,4 +47,4 @@ test_that("`.get.file.deletion.commands()` able to return correct file deletion expect_equal(res$move.to.tmp, "dir.create(c('./tmp'), recursive=TRUE, showWarnings=FALSE); file.rename(from=c('test'), to=c('./tmp/test'))") expect_equal(res$delete.tmp, "unlink(c('./tmp'), recursive=TRUE)") expect_equal(res$replace.from.tmp, "file.rename(from=c('./tmp/test'), to=c('test'));unlink(c('./tmp'), recursive=TRUE)") -}) \ No newline at end of file +}) From 838af61ec8011022c9cf73e3a2f11f75f49f5492 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 18 Jul 2024 13:17:33 +0530 Subject: [PATCH 0017/1193] Update CHANGELOG Signed-off-by: Abhinav Pandey --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2280cfc967e..e0c0bcbc731 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # Change Log + All notable changes are kept in this file. All changes made should be added to the section called `Unreleased`. Once a new release is made this file will be updated to create a new `Unreleased` section for the next release. @@ -9,6 +10,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha ### Added +- Refactor `convert_input` to Perform tasks via helper function. Subtask of [#3307](https://github.com/PecanProject/pecan/issues/3307) + ### Fixed ### Changed From f22b962691ce03adf12c3e79907bd52372351b24 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 25 Jul 2024 14:51:01 +0530 Subject: [PATCH 0018/1193] Remove unutilized variables from convert_input Signed-off-by: Abhinav Pandey --- base/db/R/add.database.entries.R | 12 +++++++++++- base/db/R/check.missing.files.R | 18 ++++++++---------- base/db/R/convert_input.R | 6 ++---- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R index 3c253c07e73..d3eb994a646 100644 --- a/base/db/R/add.database.entries.R +++ b/base/db/R/add.database.entries.R @@ -33,6 +33,7 @@ add.database.entries <- function( # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. # This list will be returned. newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. + for (i in 1:length(result)) { # Master for loop id_not_added <- TRUE @@ -72,7 +73,16 @@ add.database.entries <- function( } if (insert.new.file && id_not_added) { - dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), in.prefix = result[[i]]$dbfile.name[1], "Input", existing.input[[i]]$id, con, reuse = TRUE, hostname = machine$hostname) + dbfile.id <- dbfile.insert( + in.path = dirname(result[[i]]$file[1]), + in.prefix = result[[i]]$dbfile.name[1], + "Input", + existing.input[[i]]$id, + con, + reuse = TRUE, + hostname = machine$hostname + ) + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) } else if (id_not_added) { diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R index bde3d7ebe97..617878496de 100644 --- a/base/db/R/check.missing.files.R +++ b/base/db/R/check.missing.files.R @@ -1,11 +1,11 @@ #' Function to check if result has empty or missing files -#' +#' #' @param result A list of dataframes with file paths #' @param outname Name of the output file #' @param existing.input Existing input records #' @param existing.dbfile Existing dbfile records #' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records -#' +#' #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) { @@ -18,14 +18,14 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing empty = file_size == 0 ) ) - + if (any(result_sizes$missing) || any(result_sizes$empty)) { log_format_df <- function(df) { formatted_df <- rbind(colnames(df), format(df)) formatted_text <- purrr::reduce(formatted_df, paste, sep = " ") paste(formatted_text, collapse = "\n") } - + PEcAn.logger::logger.severe( "Requested Processing produced empty files or Nonexistent files:\n", log_format_df(result_sizes[, c(1, 8, 9, 10)]), @@ -33,17 +33,15 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing wrap = FALSE ) } - - # Insert into Database - outlist <- unlist(strsplit(outname, "_")) - + + # Wrap in a list for consistant processing later if (exists("existing.input") && is.data.frame(existing.input)) { existing.input <- list(existing.input) } - + if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) { existing.dbfile <- list(existing.dbfile) } - return(list(result_sizes, outlist, existing.input, existing.dbfile)) + return(list(existing.input, existing.dbfile)) } diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index d5af069d0eb..265559798be 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -737,10 +737,8 @@ convert_input <- checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile) # Unwrap parameters after performing checks for missing files - result_sizes <- checked.missing.files$result_sizes; - outlist <- checked.missing.files$outlist; - existing.input <- checked.missing.files$existing.input; - existing.dbfile <- checked.missing.files$existing.dbfile; + existing.input <- checked.missing.files$existing.input + existing.dbfile <- checked.missing.files$existing.dbfile #---------------------------------------------------------------# # New arrangement of database adding code to deal with ensembles. From d884203d1388b219268daa4e95b95b8134a5e69f Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 25 Jul 2024 14:58:55 +0530 Subject: [PATCH 0019/1193] Update logger statements in convert_input Signed-off-by: Abhinav Pandey --- base/db/R/convert_input.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 265559798be..275b6f54d49 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -514,7 +514,7 @@ convert_input <- # we'll need to update its start/end dates . } } else { - # No existing record found. Should be good to go. + PEcAn.logger::logger.debug("No existing record found. Should be good to go.") } } From 68d9516a3ccecb7c5c1b31907849b8fc7a3ba34e Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 25 Jul 2024 15:43:31 +0530 Subject: [PATCH 0020/1193] Added seperate function to check machine info Signed-off-by: Abhinav Pandey --- base/db/R/convert_input.R | 57 ++------------------------- base/db/R/get.machine.info.R | 68 +++++++++++++++++++++++++++++++++ base/db/man/get.machine.info.Rd | 26 +++++++++++++ 3 files changed, 98 insertions(+), 53 deletions(-) create mode 100644 base/db/R/get.machine.info.R create mode 100644 base/db/man/get.machine.info.Rd diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 275b6f54d49..ad83753e299 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -520,60 +520,11 @@ convert_input <- #---------------------------------------------------------------------------------------------------------------# # Get machine information + machine.info <- get.machine.info(host, dbfile.id = input.args$dbfile.id, input.id = input.id) - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - - if (nrow(machine) == 0) { - PEcAn.logger::logger.error("machine not found", host$name) - return(NULL) - } - - if (missing(input.id) || is.na(input.id) || is.null(input.id)) { - input <- dbfile <- NULL - } else { - input <- db.query(paste("SELECT * from inputs where id =", input.id), con) - if (nrow(input) == 0) { - PEcAn.logger::logger.error("input not found", input.id) - return(NULL) - } - - if(!is.null(input.args$dbfile.id)){ - dbfile <- - db.query( - paste( - "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =", - input.id, - " and container_type = 'Input' and machine_id =", - machine$id - ), - con - ) - }else{ - dbfile <- - db.query( - paste( - "SELECT * from dbfiles where container_id =", - input.id, - " and container_type = 'Input' and machine_id =", - machine$id - ), - con - ) - } - - - - if (nrow(dbfile) == 0) { - PEcAn.logger::logger.error("dbfile not found", input.id) - return(NULL) - } - if (nrow(dbfile) > 1) { - PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile) - dbfile <- dbfile[nrow(dbfile), ] - } - } + machine <- machine.info$machine + input <- machine.info$input + dbfile <- machine.info$dbfile #--------------------------------------------------------------------------------------------------# # Perform Conversion diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R new file mode 100644 index 00000000000..6685e1062ef --- /dev/null +++ b/base/db/R/get.machine.info.R @@ -0,0 +1,68 @@ +#' Get machine information from db +#' @param host host information +#' @param dbfile.id dbfile id for existing records +#' @param input.id input id for existing records +#' @param con database connection +#' +#' @return list of machine, input, and dbfile records +#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko + +get.machine.info <- function(host, dbfile.id, input.id = NULL, con) { + machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) + machine <- db.query(paste0( + "SELECT * from machines where hostname = '", + machine.host, "'" + ), con) + + if (nrow(machine) == 0) { + PEcAn.logger::logger.error("machine not found", host$name) + return(NULL) + } + + if (missing(input.id) || is.na(input.id) || is.null(input.id)) { + input <- dbfile <- NULL + } else { + input <- db.query(paste("SELECT * from inputs where id =", input.id), con) + if (nrow(input) == 0) { + PEcAn.logger::logger.error("input not found", input.id) + return(NULL) + } + + if (!is.null(input.args$dbfile.id)) { + dbfile <- + db.query( + paste( + "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =", + input.id, + " and container_type = 'Input' and machine_id =", + machine$id + ), + con + ) + } else { + dbfile <- + db.query( + paste( + "SELECT * from dbfiles where container_id =", + input.id, + " and container_type = 'Input' and machine_id =", + machine$id + ), + con + ) + } + + + + if (nrow(dbfile) == 0) { + PEcAn.logger::logger.error("dbfile not found", input.id) + return(NULL) + } + if (nrow(dbfile) > 1) { + PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile) + dbfile <- dbfile[nrow(dbfile), ] + } + } + + return(list(machine = machine, input = input, dbfile = dbfile)) +} diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get.machine.info.Rd new file mode 100644 index 00000000000..8989221ea5b --- /dev/null +++ b/base/db/man/get.machine.info.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.machine.info.R +\name{get.machine.info} +\alias{get.machine.info} +\title{Get machine information from db} +\usage{ +get.machine.info(host, dbfile.id, input.id = NULL, con) +} +\arguments{ +\item{host}{host information} + +\item{dbfile.id}{dbfile id for existing records} + +\item{input.id}{input id for existing records} + +\item{con}{database connection} +} +\value{ +list of machine, input, and dbfile records +} +\description{ +Get machine information from db +} +\author{ +Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko +} From 5208b02a1d98c27f24c8e4e9da56424537fa5852 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 25 Jul 2024 16:40:41 +0530 Subject: [PATCH 0021/1193] Update input args to get machine info Signed-off-by: Abhinav Pandey --- base/db/R/convert_input.R | 2 +- base/db/R/get.machine.info.R | 2 +- base/db/tests/testthat/test.check.missing.files.R | 15 ++++++++------- base/db/tests/testthat/test.convert_input.R | 5 +++++ 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index ad83753e299..ba2d7a3a5f0 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -520,7 +520,7 @@ convert_input <- #---------------------------------------------------------------------------------------------------------------# # Get machine information - machine.info <- get.machine.info(host, dbfile.id = input.args$dbfile.id, input.id = input.id) + machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id) machine <- machine.info$machine input <- machine.info$input diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R index 6685e1062ef..d23e5416f9e 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get.machine.info.R @@ -7,7 +7,7 @@ #' @return list of machine, input, and dbfile records #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko -get.machine.info <- function(host, dbfile.id, input.id = NULL, con) { +get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) machine <- db.query(paste0( "SELECT * from machines where hostname = '", diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R index e779077294a..c2de074d5d3 100644 --- a/base/db/tests/testthat/test.check.missing.files.R +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -14,11 +14,12 @@ test_that("`check_missing_files()` able to return correct missing files", { # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile)) # Perform checks to compare results from stubbed functions to actual results expect_equal(nrow(res[[1]]), 1) - expect_equal(res[[1]]$missing, FALSE) - expect_equal(res[[1]]$empty, FALSE) - expect_equal(res[[2]], "test") - expect_equal(nrow(res[[3]][[1]]), 0) - expect_equal(ncol(res[[3]][[1]]), 0) - expect_equal(nrow(res[[4]][[1]]), 0) - expect_equal(ncol(res[[4]][[1]]), 0) + PEcAn.logger::logger.debug(res) + # expect_equal(res[[1]]$missing, FALSE) + # expect_equal(res[[1]]$empty, FALSE) + # expect_equal(res[[2]], "test") + # expect_equal(nrow(res[[3]][[1]]), 0) + # expect_equal(ncol(res[[3]][[1]]), 0) + # expect_equal(nrow(res[[4]][[1]]), 0) + # expect_equal(ncol(res[[4]][[1]]), 0) }) diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R index 931d8a7f26b..cd33523f86c 100644 --- a/base/db/tests/testthat/test.convert_input.R +++ b/base/db/tests/testthat/test.convert_input.R @@ -3,6 +3,11 @@ test_that("`convert_input()` able to call the respective download function for a mockery::stub(convert_input, "dbfile.input.check", data.frame()) mockery::stub(convert_input, "db.query", data.frame(id = 1)) + mockery::stub(convert_input, "get.machine.info", list( + machine = data.frame(id = 1), + input = data.frame(id = 1), + dbfile = data.frame(id = 1) + )) mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res) mockery::stub(convert_input, "check_missing_files", list( result_sizes = data.frame( From f570646849433f89d8335b25be2539bc3c2ae4bb Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 25 Jul 2024 16:59:21 +0530 Subject: [PATCH 0022/1193] Correct roxygen documentations Signed-off-by: Abhinav Pandey --- base/db/R/get.machine.info.R | 2 +- base/db/man/get.machine.info.Rd | 4 ++-- base/db/tests/testthat/test.check.missing.files.R | 15 +++++++-------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R index d23e5416f9e..4683cde1573 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get.machine.info.R @@ -1,6 +1,6 @@ #' Get machine information from db #' @param host host information -#' @param dbfile.id dbfile id for existing records +#' @param input.args input args.r for existing records #' @param input.id input id for existing records #' @param con database connection #' diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get.machine.info.Rd index 8989221ea5b..6e57013c4d7 100644 --- a/base/db/man/get.machine.info.Rd +++ b/base/db/man/get.machine.info.Rd @@ -4,12 +4,12 @@ \alias{get.machine.info} \title{Get machine information from db} \usage{ -get.machine.info(host, dbfile.id, input.id = NULL, con) +get.machine.info(host, input.args, input.id = NULL, con = NULL) } \arguments{ \item{host}{host information} -\item{dbfile.id}{dbfile id for existing records} +\item{input.args}{input args for existing records} \item{input.id}{input id for existing records} diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R index c2de074d5d3..e779077294a 100644 --- a/base/db/tests/testthat/test.check.missing.files.R +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -14,12 +14,11 @@ test_that("`check_missing_files()` able to return correct missing files", { # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile)) # Perform checks to compare results from stubbed functions to actual results expect_equal(nrow(res[[1]]), 1) - PEcAn.logger::logger.debug(res) - # expect_equal(res[[1]]$missing, FALSE) - # expect_equal(res[[1]]$empty, FALSE) - # expect_equal(res[[2]], "test") - # expect_equal(nrow(res[[3]][[1]]), 0) - # expect_equal(ncol(res[[3]][[1]]), 0) - # expect_equal(nrow(res[[4]][[1]]), 0) - # expect_equal(ncol(res[[4]][[1]]), 0) + expect_equal(res[[1]]$missing, FALSE) + expect_equal(res[[1]]$empty, FALSE) + expect_equal(res[[2]], "test") + expect_equal(nrow(res[[3]][[1]]), 0) + expect_equal(ncol(res[[3]][[1]]), 0) + expect_equal(nrow(res[[4]][[1]]), 0) + expect_equal(ncol(res[[4]][[1]]), 0) }) From e479c468f1fcca1c02dec919e4fabca1dcbf792e Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Thu, 25 Jul 2024 17:23:15 +0530 Subject: [PATCH 0023/1193] Update tests Signed-off-by: Abhinav Pandey --- base/db/R/get.machine.info.R | 105 +++++++++--------- .../tests/testthat/test.check.missing.files.R | 23 ++-- 2 files changed, 61 insertions(+), 67 deletions(-) diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R index 4683cde1573..c98bee6cf20 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get.machine.info.R @@ -1,6 +1,6 @@ #' Get machine information from db #' @param host host information -#' @param input.args input args.r for existing records +#' @param input.args input args for existing records #' @param input.id input id for existing records #' @param con database connection #' @@ -8,61 +8,60 @@ #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0( - "SELECT * from machines where hostname = '", - machine.host, "'" - ), con) - - if (nrow(machine) == 0) { - PEcAn.logger::logger.error("machine not found", host$name) - return(NULL) + + machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) + machine <- db.query(paste0("SELECT * from machines where hostname = '", + machine.host, "'"), con) + + if (nrow(machine) == 0) { + PEcAn.logger::logger.error("machine not found", host$name) + return(NULL) + } + + if (missing(input.id) || is.na(input.id) || is.null(input.id)) { + input <- dbfile <- NULL + } else { + input <- db.query(paste("SELECT * from inputs where id =", input.id), con) + if (nrow(input) == 0) { + PEcAn.logger::logger.error("input not found", input.id) + return(NULL) } + + if(!is.null(input.args$dbfile.id)){ + dbfile <- + db.query( + paste( + "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =", + input.id, + " and container_type = 'Input' and machine_id =", + machine$id + ), + con + ) + }else{ + dbfile <- + db.query( + paste( + "SELECT * from dbfiles where container_id =", + input.id, + " and container_type = 'Input' and machine_id =", + machine$id + ), + con + ) + } + - if (missing(input.id) || is.na(input.id) || is.null(input.id)) { - input <- dbfile <- NULL - } else { - input <- db.query(paste("SELECT * from inputs where id =", input.id), con) - if (nrow(input) == 0) { - PEcAn.logger::logger.error("input not found", input.id) - return(NULL) - } - - if (!is.null(input.args$dbfile.id)) { - dbfile <- - db.query( - paste( - "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =", - input.id, - " and container_type = 'Input' and machine_id =", - machine$id - ), - con - ) - } else { - dbfile <- - db.query( - paste( - "SELECT * from dbfiles where container_id =", - input.id, - " and container_type = 'Input' and machine_id =", - machine$id - ), - con - ) - } - - - - if (nrow(dbfile) == 0) { - PEcAn.logger::logger.error("dbfile not found", input.id) - return(NULL) - } - if (nrow(dbfile) > 1) { - PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile) - dbfile <- dbfile[nrow(dbfile), ] - } + + if (nrow(dbfile) == 0) { + PEcAn.logger::logger.error("dbfile not found", input.id) + return(NULL) + } + if (nrow(dbfile) > 1) { + PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile) + dbfile <- dbfile[nrow(dbfile), ] } + } return(list(machine = machine, input = input, dbfile = dbfile)) } diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R index e779077294a..c0ad6794f65 100644 --- a/base/db/tests/testthat/test.check.missing.files.R +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -1,24 +1,19 @@ test_that("`check_missing_files()` able to return correct missing files", { - mocked_res <- mockery::mock(list(c("A", "B"))) - mockery::stub(check_missing_files, "purrr::map_dfr", data.frame(missing = c(FALSE), empty = c(FALSE))) + # Mock `purrr::map_dfr` + mocked_res <- mockery::mock(data.frame(file = c("A", "B"), file_size = c(100, 200), missing = c(FALSE, FALSE), empty = c(FALSE, FALSE))) + mockery::stub(check_missing_files, "purrr::map_dfr", mocked_res) + res <- check_missing_files( result = list(data.frame(file = c("A", "B"))), outname = "test", existing.input = data.frame(), existing.dbfile = data.frame() ) - + # Print the structure of `res` for debugging str(res) - - # This function returns a list as follows: return(list(result_sizes, outlist, existing.input, existing.dbfile)) - # Perform checks to compare results from stubbed functions to actual results - expect_equal(nrow(res[[1]]), 1) - expect_equal(res[[1]]$missing, FALSE) - expect_equal(res[[1]]$empty, FALSE) - expect_equal(res[[2]], "test") - expect_equal(nrow(res[[3]][[1]]), 0) - expect_equal(ncol(res[[3]][[1]]), 0) - expect_equal(nrow(res[[4]][[1]]), 0) - expect_equal(ncol(res[[4]][[1]]), 0) + + expect_equal(length(res), 2) + expect_true(is.list(res[[1]])) + expect_true(is.list(res[[2]])) }) From a1328e1e5f75f4d5708913a7fa7b2b2250665473 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Wed, 31 Jul 2024 00:56:56 -0700 Subject: [PATCH 0024/1193] get GEFS download working again --- modules/data.atmosphere/NEWS.md | 7 + .../data.atmosphere/R/GEFS_helper_functions.R | 182 ++++++++---------- .../data.atmosphere/R/download.NOAA_GEFS.R | 47 +++-- .../data.atmosphere/R/half_hour_downscale.R | 16 +- .../data.atmosphere/man/download.NOAA_GEFS.Rd | 43 +++-- 5 files changed, 148 insertions(+), 147 deletions(-) diff --git a/modules/data.atmosphere/NEWS.md b/modules/data.atmosphere/NEWS.md index ecd7801d184..fca4c6c3a04 100644 --- a/modules/data.atmosphere/NEWS.md +++ b/modules/data.atmosphere/NEWS.md @@ -1,5 +1,12 @@ # PEcAn.data.atmosphere 1.8.0.9000 +## Fixed +* `download.NOAA_GEFS` is updated to work again with GEFS v12.3 + (the current release as of this writing in July 2024). + +## Changed +* Removed `sitename` and `username` from the formal arguments of `download.NOAA_GEFS`. + Before they were silently ignored, now they're treated as part of `...` (which is also ignored!). # PEcAn.data.atmosphere 1.8.0 diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R index 754580ae0da..684fcea9724 100644 --- a/modules/data.atmosphere/R/GEFS_helper_functions.R +++ b/modules/data.atmosphere/R/GEFS_helper_functions.R @@ -17,21 +17,11 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, download_grid <- function(ens_index, location, directory, hours_char, cycle, base_filename1, vars,working_directory){ - #for(j in 1:31){ - if(ens_index == 1){ - base_filename2 <- paste0("gec00",".t",cycle,"z.pgrb2a.0p50.f") - curr_hours <- hours_char[hours <= 384] - }else{ - if((ens_index-1) < 10){ - ens_name <- paste0("0",ens_index - 1) - }else{ - ens_name <- as.character(ens_index -1) - } - base_filename2 <- paste0("gep",ens_name,".t",cycle,"z.pgrb2a.0p50.f") - curr_hours <- hours_char - } - - + ens_base <- if (ens_index == 1) { "gec" } else { "gep" } + ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") + base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f") + curr_hours <- hours_char + for(i in 1:length(curr_hours)){ file_name <- paste0(base_filename2, curr_hours[i]) @@ -73,36 +63,11 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, model_dir <- file.path(output_directory, model_name_raw) + #Availability: most recent 4 days curr_time <- lubridate::with_tz(Sys.time(), tzone = "UTC") curr_date <- lubridate::as_date(curr_time) - - noaa_page <- readLines('https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/') - - potential_dates <- NULL - for(i in 1:length(noaa_page)){ - if(stringr::str_detect(noaa_page[i], ">gefs.")){ - end <- stringr::str_locate(noaa_page[i], ">gefs.")[2] - dates <- stringr::str_sub(noaa_page[i], start = end+1, end = end+8) - potential_dates <- c(potential_dates, dates) - } - } - - - last_cycle_page <- readLines(paste0('https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/gefs.', dplyr::last(potential_dates))) - - potential_cycle <- NULL - for(i in 1:length(last_cycle_page)){ - if(stringr::str_detect(last_cycle_page[i], 'href=\"')){ - end <- stringr::str_locate(last_cycle_page[i], 'href=\"')[2] - cycles <- stringr::str_sub(last_cycle_page[i], start = end+1, end = end+2) - if(cycles %in% c("00","06", "12", "18")){ - potential_cycle <- c(potential_cycle, cycles) - } - } - } - - potential_dates <- lubridate::as_date(potential_dates) - + potential_dates <- curr_date - lubridate::days(3:0) + potential_dates = potential_dates[which(potential_dates == forecast_date)] if(length(potential_dates) == 0){PEcAn.logger::logger.error("Forecast Date not available")} @@ -118,7 +83,10 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, floor(min(lat_list))) base_filename1 <- "https://nomads.ncep.noaa.gov/cgi-bin/filter_gefs_atmos_0p50a.pl?file=" - vars <- "&lev_10_m_above_ground=on&lev_2_m_above_ground=on&lev_surface=on&lev_entire_atmosphere=on&var_APCP=on&var_DLWRF=on&var_DSWRF=on&var_PRES=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&var_TCDC=on" + vars <- paste0( + "&lev_10_m_above_ground=on&lev_2_m_above_ground=on&lev_surface=on&lev_entire_atmosphere=on", + "&var_APCP=on&var_DLWRF=on&var_DSWRF=on&var_PRES=on&var_RH=on&var_TMP=on", + "&var_UGRD=on&var_VGRD=on&var_TCDC=on") for(i in 1:length(potential_dates)){ @@ -143,11 +111,11 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, print(paste("Downloading", forecast_date, cycle)) if(cycle == "00"){ - hours <- c(seq(0, 240, 3),seq(246, 384, 6)) - hours <- hours[hours<=end_hr] + hours <- c(seq(0, 240, 3),seq(246, 840, 6)) }else{ - hours <- c(seq(0, 240, 3),seq(246, min(end_hr, 840) , 6)) + hours <- c(seq(0, 240, 3),seq(246, 384 , 6)) } + hours <- hours[hours<=end_hr] hours_char <- hours hours_char[which(hours < 100)] <- paste0("0",hours[which(hours < 100)]) hours_char[which(hours < 10)] <- paste0("0",hours_char[which(hours < 10)]) @@ -163,12 +131,12 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, parallel::mclapply(X = ens_index, FUN = download_grid, - location, - directory, - hours_char, - cycle, - base_filename1, - vars, + location = location, + directory = directory, + hours_char = hours_char, + cycle = cycle, + base_filename1 = base_filename1, + vars = vars, working_directory = model_date_hour_dir, mc.cores = 1) }else{ @@ -177,6 +145,9 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, } } } + + + #' Extract and temporally downscale points from downloaded grid files #' #' @param lat_list lat for site @@ -222,23 +193,13 @@ process_gridded_noaa_download <- function(lat_list, dlwrfsfc <- array(NA, dim = c(site_length, length(hours_char))) dswrfsfc <- array(NA, dim = c(site_length, length(hours_char))) - if(ens_index == 1){ - base_filename2 <- paste0("gec00",".t",cycle,"z.pgrb2a.0p50.f") - }else{ - if(ens_index-1 < 10){ - ens_name <- paste0("0",ens_index-1) - }else{ - ens_name <- as.character(ens_index-1) - } - base_filename2 <- paste0("gep",ens_name,".t",cycle,"z.pgrb2a.0p50.f") - } + ens_base <- if (ens_index == 1) { "gec" } else { "gep" } + ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") + base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f") lats <- round(lat_list/.5)*.5 lons <- round(lon_list/.5)*.5 - if(lons < 0){ - lons <- 360 + lons - } curr_hours <- hours_char for(hr in 1:length(curr_hours)){ @@ -263,8 +224,13 @@ process_gridded_noaa_download <- function(lat_list, vgrd10m[s, hr] <- grib_data_df$`10[m] HTGL=Specified height level above ground; v-component of wind [m/s]`[index] if(curr_hours[hr] != "000"){ - apcpsfc[s, hr] <- grib_data_df$`SFC=Ground or water surface; 03 hr Total precipitation [kg/(m^2)]`[index] - tcdcclm[s, hr] <- grib_data_df$`RESERVED(10) (Reserved); Total cloud cover [%]`[index] + # total precip alternates being named as 3 or 6 hr total + # TODO: not sure if the contents actually differ or if this is a labeling bug in the grib files + precip_hr <- if ((as.numeric(curr_hours[hr]) %% 2) == 1) { "03" } else { "06" } + precip_name <- paste("SFC=Ground or water surface;", precip_hr, "hr Total precipitation [kg/(m^2)]") + apcpsfc[s, hr] <- grib_data_df[[precip_name]][index] + + tcdcclm[s, hr] <- grib_data_df$`EATM=Entire Atmosphere; Total cloud cover [%]`[index] dswrfsfc[s, hr] <- grib_data_df$`SFC=Ground or water surface; Downward Short-Wave Rad. Flux [W/(m^2)]`[index] dlwrfsfc[s, hr] <- grib_data_df$`SFC=Ground or water surface; Downward Long-Wave Rad. Flux [W/(m^2)]`[index] } @@ -301,17 +267,15 @@ process_gridded_noaa_download <- function(lat_list, - cycle <-forecast_time + cycle <- forecast_time curr_forecast_time <- forecast_date + lubridate::hours(cycle) - if(cycle < 10) cycle <- paste0("0",cycle) - if(cycle == "00"){ - hours <- c(seq(0, 240, 3),seq(246, 840 , 6)) - }else{ - hours <- c(seq(0, 240, 3),seq(246, 384 , 6)) + cycle <- stringr::str_pad(cycle, width = 2, pad = "0") + if (cycle == "00") { + hours <- c(seq(0, 240, 3),seq(246, 840, 6)) + } else { + hours <- c(seq(0, 240, 3),seq(246, 384, 6)) } - hours_char <- hours - hours_char[which(hours < 100)] <- paste0("0",hours[which(hours < 100)]) - hours_char[which(hours < 10)] <- paste0("0",hours_char[which(hours < 10)]) + hours_char <- stringr::str_pad(hours, width = 3, pad = "0") # 3->"003", 384->"384" raw_files <- list.files(file.path(model_name_raw_dir,forecast_date,cycle)) hours_present <- as.numeric(stringr::str_sub(raw_files, start = 25, end = 27)) @@ -341,19 +305,21 @@ process_gridded_noaa_download <- function(lat_list, FUN = extract_sites, hours_char = hours_char, hours = hours, - cycle, - site_id, - lat_list, - lon_list, + cycle = cycle, + site_id = site_id, + lat_list = lat_list, + lon_list = lon_list, working_directory = file.path(model_name_raw_dir,forecast_date,cycle), mc.cores = 1) - forecast_times <- lubridate::as_datetime(forecast_date) + lubridate::hours(as.numeric(cycle)) + lubridate::hours(as.numeric(hours_char)) + forecast_times <- lubridate::as_datetime(forecast_date) + + lubridate::hours(as.numeric(cycle)) + + lubridate::hours(as.numeric(hours_char)) - #Convert negetive longitudes to degrees east + #Convert negative longitudes to degrees east if(lon_list < 0){ lon_east <- 360 + lon_list }else{ @@ -425,17 +391,18 @@ process_gridded_noaa_download <- function(lat_list, #Calculate wind speed from east and north components wind_speed <- sqrt(noaa_data$eastward_wind$value^2 + noaa_data$northward_wind$value^2) - forecast_noaa <- tibble::tibble(time = noaa_data$air_temperature$forecast.date, - NOAA.member = noaa_data$air_temperature$ensembles, - air_temperature = noaa_data$air_temperature$value, - air_pressure= noaa_data$air_pressure$value, - relative_humidity = noaa_data$relative_humidity$value, - surface_downwelling_longwave_flux_in_air = noaa_data$surface_downwelling_longwave_flux_in_air$value, - surface_downwelling_shortwave_flux_in_air = noaa_data$surface_downwelling_shortwave_flux_in_air$value, - precipitation_flux = noaa_data$precipitation_flux$value, - specific_humidity = specific_humidity, - cloud_area_fraction = noaa_data$cloud_area_fraction$value, - wind_speed = wind_speed) + forecast_noaa <- tibble::tibble( + time = noaa_data$air_temperature$forecast.date, + NOAA.member = noaa_data$air_temperature$ensembles, + air_temperature = noaa_data$air_temperature$value, + air_pressure= noaa_data$air_pressure$value, + relative_humidity = noaa_data$relative_humidity$value, + surface_downwelling_longwave_flux_in_air = noaa_data$surface_downwelling_longwave_flux_in_air$value, + surface_downwelling_shortwave_flux_in_air = noaa_data$surface_downwelling_shortwave_flux_in_air$value, + precipitation_flux = noaa_data$precipitation_flux$value, + specific_humidity = specific_humidity, + cloud_area_fraction = noaa_data$cloud_area_fraction$value, + wind_speed = wind_speed) forecast_noaa$cloud_area_fraction <- forecast_noaa$cloud_area_fraction / 100 #Convert from % to proportion @@ -455,14 +422,10 @@ process_gridded_noaa_download <- function(lat_list, for (ens in 1:31) { # i is the ensemble number #Turn the ensemble number into a string - if(ens-1< 10){ - ens_name <- paste0("0",ens-1) - }else{ - ens_name <- ens - 1 - } + ens_name <- stringr::str_pad(ens - 1, width = 2, pad = "0") forecast_noaa_ens <- forecast_noaa %>% - dplyr::filter(NOAA.member == ens) %>% + dplyr::filter(.data$NOAA.member == ens) %>% dplyr::filter(!is.na(.data$air_temperature)) end_date <- forecast_noaa_ens %>% @@ -525,6 +488,15 @@ process_gridded_noaa_download <- function(lat_list, return(results_list) } #process_gridded_noaa_download + + + + + + + + + #' @title Downscale NOAA GEFS from 6hr to 1hr #' @return None #' @@ -645,6 +617,14 @@ temporal_downscale <- function(input_file, output_file, overwrite = TRUE, hr = 1 + + + + + + + + ##' @title Write NOAA GEFS netCDF ##' @name write_noaa_gefs_netcdf ##' @param df data frame of meterological variables to be written to netcdf. Columns @@ -711,4 +691,4 @@ write_noaa_gefs_netcdf <- function(df, ens = NA, lat, lon, cf_units, output_file ncdf4::nc_close(nc_flptr) #Write to the disk/storage } -} \ No newline at end of file +} diff --git a/modules/data.atmosphere/R/download.NOAA_GEFS.R b/modules/data.atmosphere/R/download.NOAA_GEFS.R index e68bc7d166a..f8f9631ae6d 100644 --- a/modules/data.atmosphere/R/download.NOAA_GEFS.R +++ b/modules/data.atmosphere/R/download.NOAA_GEFS.R @@ -6,36 +6,43 @@ ##' @references https://www.ncdc.noaa.gov/crn/measurements.html ##' ##' @section NOAA_GEFS General Information: -##' This function downloads NOAA GEFS weather data. GEFS is an ensemble of 21 different weather forecast models. -##' A 16 day forecast is avaliable every 6 hours. Each forecast includes information on a total of 8 variables. -##' These are transformed from the NOAA standard to the internal PEcAn -##' standard. -##' -##' @section Data Avaliability: -##' NOAA GEFS weather data is avaliable on a rolling 12 day basis; dates provided in "start_date" must be within this range. The end date can be any point after -##' that, but if the end date is beyond 16 days, only 16 days worth of forecast are recorded. Times are rounded down to the previous 6 hour forecast. NOAA -##' GEFS weather data isn't always posted immediately, and to compensate, this function adjusts requests made in the last two hours -##' back two hours (approximately the amount of time it takes to post the data) to make sure the most current forecast is used. +##' This function downloads NOAA GEFS weather data. GEFS is an ensemble of 31 different weather forecast models. +##' A 16 day forecast is available every 6 hours and a 35 day forecast is available every 24 hours. +##' Both are at 3-hour frequency for the first 10 days of the forecast and 6-hour frequency beyond that. +##' Each forecast includes information on a total of 8 variables. +##' These are transformed from the NOAA standard to the internal PEcAn standard. ##' +##' @section Data Availability: +##' NOAA GEFS weather data is available on a rolling 4 day basis. +##' Dates provided in "start_date" must be within this range. +##' The end date can be any point after that, but if the end date is beyond 16 days +##' (35 days for the midnight UTC forecast), only 16 (35) days worth of forecast are retrieved. +##' Times are rounded down to the previous 6 hour forecast. +##' +##' NOAA GEFS weather data isn't always posted immediately. Each 16-day forecast takes +##' approximately three hours to run, and the once-a-day forecasts for days 17-35 are +##' posted much later (up to 21 hours) than the forecasts for days 0 to 16. +##' See the [GEFS v12 release announcement](https://www.weather.gov/media/notification/pdf2/scn20-75gefs_v12_changes.pdf) +##' for details. +##' ##' @section Data Save Format: -##' Data is saved in the netcdf format to the specified directory. File names reflect the precision of the data to the given range of days. +##' Data is saved in the netcdf format to the specified directory. +##' File names reflect the precision of the data to the given range of days. ##' NOAA.GEFS.willow creek.3.2018-06-08T06:00.2018-06-24T06:00.nc specifies the forecast, using ensemble number 3 at willow creek on ##' June 6th, 2018 at 6:00 a.m. to June 24th, 2018 at 6:00 a.m. ##' ##' @return A list of data frames is returned containing information about the data file that can be used to locate it later. Each ##' data frame contains information about one file. ##' -##' @param outfolder Directory where results should be written -##' @param start_date, Range of dates/times to be downloaded (default assumed to be time that function is run) -##' @param end_date, end date for range of dates to be downloaded (default 16 days from start_date) +##' @param site_id The unique ID given to each site. This is used as part of the file name. ##' @param lat.in site latitude in decimal degrees ##' @param lon.in site longitude in decimal degrees -##' @param site_id The unique ID given to each site. This is used as part of the file name. -##' @param sitename Site name -##' @param username username from pecan workflow +##' @param outfolder Directory where results should be written +##' @param start_date Range of dates/times to be downloaded (default assumed to be time that function is run) +##' @param end_date end date for range of dates to be downloaded (default 16 days from start_date) +##' @param downscale logical, assumed True. Indicates whether data should be downscaled to hourly ##' @param overwrite logical. Download a fresh version even if a local file with the same name already exists? -##' @param downscale logical, assumed True. Indicated whether data should be downscaled to hourly -##' @param ... Additional optional parameters +##' @param ... Additional optional parameters, currently ignored ##' ##' @export ##' @@ -50,8 +57,6 @@ ##' @author Quinn Thomas, modified by K Zarada ##' download.NOAA_GEFS <- function(site_id, - sitename = NULL, - username = 'pecan', lat.in, lon.in, outfolder, diff --git a/modules/data.atmosphere/R/half_hour_downscale.R b/modules/data.atmosphere/R/half_hour_downscale.R index bb14748412a..87867093d4b 100644 --- a/modules/data.atmosphere/R/half_hour_downscale.R +++ b/modules/data.atmosphere/R/half_hour_downscale.R @@ -223,8 +223,16 @@ downscale_ShortWave_to_half_hrly <- function(df,lat, lon, hr = 0.5){ for (k in 1:nrow(data.hrly)) { if(is.na(data.hrly$surface_downwelling_shortwave_flux_in_air[k])){ - SWflux <- as.matrix(subset(df, .data$day == data.hrly$day[k] & .data$hour == data.hrly$hour[k], data.hrly$surface_downwelling_shortwave_flux_in_air[k])) - data.hrly$surface_downwelling_shortwave_flux_in_air[k] <- ifelse(data.hrly$rpotHM[k] > 0, as.numeric(SWflux[1])*(data.hrly$rpotH[k]/data.hrly$rpotHM[k]),0) + SWflux <- as.matrix( + df$surface_downwelling_shortwave_flux_in_air[ + df$day == data.hrly$day[k] & df$hour == data.hrly$hour[k] + ] + ) + data.hrly$surface_downwelling_shortwave_flux_in_air[k] <- ifelse( + data.hrly$rpotHM[k] > 0, + as.numeric(SWflux[1]) * (data.hrly$rpotH[k] / data.hrly$rpotHM[k]), + 0 + ) } } @@ -284,11 +292,9 @@ downscale_repeat_6hr_to_half_hrly <- function(df, varName, hr = 0.5){ #previous 6hr period dplyr::mutate(lead_var = dplyr::lead(df[,varName], 1)) #check for NA values and gapfill using closest timestep - for(k in 1:dim(df)[1]){ + for(k in 2:dim(df)[1]){ if (is.na(df$lead_var[k])) { df$lead_var[k] <- df$lead_var[k-1] - }else{ - df$lead_var[k] <- df$lead_var[k] } } diff --git a/modules/data.atmosphere/man/download.NOAA_GEFS.Rd b/modules/data.atmosphere/man/download.NOAA_GEFS.Rd index 05aa332be43..47dd834cc5a 100644 --- a/modules/data.atmosphere/man/download.NOAA_GEFS.Rd +++ b/modules/data.atmosphere/man/download.NOAA_GEFS.Rd @@ -6,8 +6,6 @@ \usage{ download.NOAA_GEFS( site_id, - sitename = NULL, - username = "pecan", lat.in, lon.in, outfolder, @@ -21,25 +19,21 @@ download.NOAA_GEFS( \arguments{ \item{site_id}{The unique ID given to each site. This is used as part of the file name.} -\item{sitename}{Site name} - -\item{username}{username from pecan workflow} - \item{lat.in}{site latitude in decimal degrees} \item{lon.in}{site longitude in decimal degrees} \item{outfolder}{Directory where results should be written} -\item{start_date, }{Range of dates/times to be downloaded (default assumed to be time that function is run)} +\item{start_date}{Range of dates/times to be downloaded (default assumed to be time that function is run)} -\item{end_date, }{end date for range of dates to be downloaded (default 16 days from start_date)} +\item{end_date}{end date for range of dates to be downloaded (default 16 days from start_date)} -\item{downscale}{logical, assumed True. Indicated whether data should be downscaled to hourly} +\item{downscale}{logical, assumed True. Indicates whether data should be downscaled to hourly} \item{overwrite}{logical. Download a fresh version even if a local file with the same name already exists?} -\item{...}{Additional optional parameters} +\item{...}{Additional optional parameters, currently ignored} } \value{ A list of data frames is returned containing information about the data file that can be used to locate it later. Each @@ -56,23 +50,32 @@ but is converted at the station and downloaded in Kelvin. \section{NOAA_GEFS General Information}{ -This function downloads NOAA GEFS weather data. GEFS is an ensemble of 21 different weather forecast models. -A 16 day forecast is avaliable every 6 hours. Each forecast includes information on a total of 8 variables. -These are transformed from the NOAA standard to the internal PEcAn -standard. +This function downloads NOAA GEFS weather data. GEFS is an ensemble of 31 different weather forecast models. +A 16 day forecast is available every 6 hours and a 35 day forecast is available every 24 hours. +Both are at 3-hour frequency for the first 10 days of the forecast and 6-hour frequency beyond that. +Each forecast includes information on a total of 8 variables. +These are transformed from the NOAA standard to the internal PEcAn standard. } -\section{Data Avaliability}{ +\section{Data Availability}{ + +NOAA GEFS weather data is available on a rolling 4 day basis. +Dates provided in "start_date" must be within this range. +The end date can be any point after that, but if the end date is beyond 16 days +(35 days for the midnight UTC forecast), only 16 (35) days worth of forecast are retrieved. +Times are rounded down to the previous 6 hour forecast. -NOAA GEFS weather data is avaliable on a rolling 12 day basis; dates provided in "start_date" must be within this range. The end date can be any point after -that, but if the end date is beyond 16 days, only 16 days worth of forecast are recorded. Times are rounded down to the previous 6 hour forecast. NOAA -GEFS weather data isn't always posted immediately, and to compensate, this function adjusts requests made in the last two hours -back two hours (approximately the amount of time it takes to post the data) to make sure the most current forecast is used. +NOAA GEFS weather data isn't always posted immediately. Each 16-day forecast takes +approximately three hours to run, and the once-a-day forecasts for days 17-35 are +posted much later (up to 21 hours) than the forecasts for days 0 to 16. +See the [GEFS v12 release announcement](https://www.weather.gov/media/notification/pdf2/scn20-75gefs_v12_changes.pdf) +for details. } \section{Data Save Format}{ -Data is saved in the netcdf format to the specified directory. File names reflect the precision of the data to the given range of days. +Data is saved in the netcdf format to the specified directory. + File names reflect the precision of the data to the given range of days. NOAA.GEFS.willow creek.3.2018-06-08T06:00.2018-06-24T06:00.nc specifies the forecast, using ensemble number 3 at willow creek on June 6th, 2018 at 6:00 a.m. to June 24th, 2018 at 6:00 a.m. } From 83db1ba8cf72ebd5f44e4204463ac9c86dc78a25 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Wed, 31 Jul 2024 02:14:36 -0700 Subject: [PATCH 0025/1193] typo --- modules/data.atmosphere/R/half_hour_downscale.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/half_hour_downscale.R b/modules/data.atmosphere/R/half_hour_downscale.R index 87867093d4b..9b2efbb08be 100644 --- a/modules/data.atmosphere/R/half_hour_downscale.R +++ b/modules/data.atmosphere/R/half_hour_downscale.R @@ -292,7 +292,7 @@ downscale_repeat_6hr_to_half_hrly <- function(df, varName, hr = 0.5){ #previous 6hr period dplyr::mutate(lead_var = dplyr::lead(df[,varName], 1)) #check for NA values and gapfill using closest timestep - for(k in 2:dim(df)[1]){ + for(k in 1:dim(df)[1]){ if (is.na(df$lead_var[k])) { df$lead_var[k] <- df$lead_var[k-1] } From 1c63c21c5c34e0a9b6442d2f33bb8f15dbf6c78f Mon Sep 17 00:00:00 2001 From: Chris Black Date: Wed, 31 Jul 2024 02:15:33 -0700 Subject: [PATCH 0026/1193] Update modules/data.atmosphere/NEWS.md --- modules/data.atmosphere/NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data.atmosphere/NEWS.md b/modules/data.atmosphere/NEWS.md index fca4c6c3a04..02b235f3718 100644 --- a/modules/data.atmosphere/NEWS.md +++ b/modules/data.atmosphere/NEWS.md @@ -1,8 +1,8 @@ # PEcAn.data.atmosphere 1.8.0.9000 ## Fixed -* `download.NOAA_GEFS` is updated to work again with GEFS v12.3 - (the current release as of this writing in July 2024). +* `download.NOAA_GEFS` is updated to work again with GEFS v12.3, + the current release as of this writing in July 2024 (#3349). ## Changed * Removed `sitename` and `username` from the formal arguments of `download.NOAA_GEFS`. From bd267b44c50360d34c3ec58d2e08284dac35e30f Mon Sep 17 00:00:00 2001 From: Chris Black Date: Wed, 31 Jul 2024 11:17:56 -0700 Subject: [PATCH 0027/1193] Update modules/data.atmosphere/R/GEFS_helper_functions.R --- modules/data.atmosphere/R/GEFS_helper_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R index 684fcea9724..9424d413138 100644 --- a/modules/data.atmosphere/R/GEFS_helper_functions.R +++ b/modules/data.atmosphere/R/GEFS_helper_functions.R @@ -68,7 +68,7 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, curr_date <- lubridate::as_date(curr_time) potential_dates <- curr_date - lubridate::days(3:0) - potential_dates = potential_dates[which(potential_dates == forecast_date)] + potential_dates <- potential_dates[which(potential_dates == forecast_date)] if(length(potential_dates) == 0){PEcAn.logger::logger.error("Forecast Date not available")} From ecf3d043d81d11c0d087b5576a29be162b1fd1d6 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Mon, 5 Aug 2024 16:03:59 -0700 Subject: [PATCH 0028/1193] Update modules/data.atmosphere/R/GEFS_helper_functions.R --- modules/data.atmosphere/R/GEFS_helper_functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R index 9424d413138..b60392c2964 100644 --- a/modules/data.atmosphere/R/GEFS_helper_functions.R +++ b/modules/data.atmosphere/R/GEFS_helper_functions.R @@ -18,8 +18,8 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, download_grid <- function(ens_index, location, directory, hours_char, cycle, base_filename1, vars,working_directory){ ens_base <- if (ens_index == 1) { "gec" } else { "gep" } - ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") - base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f") + ens_idx <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") + base_filename2 <- paste0(ens_base,ens_idx,".t",cycle,"z.pgrb2a.0p50.f") curr_hours <- hours_char for(i in 1:length(curr_hours)){ From df252a071c02a2a492d8b1c4249859df3af1ae8c Mon Sep 17 00:00:00 2001 From: Chris Black Date: Mon, 5 Aug 2024 16:26:02 -0700 Subject: [PATCH 0029/1193] clarify names --- modules/data.atmosphere/R/GEFS_helper_functions.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R index b60392c2964..67ac51903d8 100644 --- a/modules/data.atmosphere/R/GEFS_helper_functions.R +++ b/modules/data.atmosphere/R/GEFS_helper_functions.R @@ -17,9 +17,9 @@ noaa_grid_download <- function(lat_list, lon_list, forecast_time, forecast_date, download_grid <- function(ens_index, location, directory, hours_char, cycle, base_filename1, vars,working_directory){ - ens_base <- if (ens_index == 1) { "gec" } else { "gep" } - ens_idx <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") - base_filename2 <- paste0(ens_base,ens_idx,".t",cycle,"z.pgrb2a.0p50.f") + member_type <- if (ens_index == 1) { "gec" } else { "gep" } # "_c_ontrol", "_p_erturbed" + ens_idxname <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") + base_filename2 <- paste0(member_type,ens_idxname,".t",cycle,"z.pgrb2a.0p50.f") curr_hours <- hours_char for(i in 1:length(curr_hours)){ From 03fc21308d756a50fa585b642ce9c7a9c8df0e8b Mon Sep 17 00:00:00 2001 From: Chris Black Date: Mon, 5 Aug 2024 16:30:09 -0700 Subject: [PATCH 0030/1193] clarify names --- modules/data.atmosphere/R/GEFS_helper_functions.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R index 67ac51903d8..99d3783df46 100644 --- a/modules/data.atmosphere/R/GEFS_helper_functions.R +++ b/modules/data.atmosphere/R/GEFS_helper_functions.R @@ -193,9 +193,9 @@ process_gridded_noaa_download <- function(lat_list, dlwrfsfc <- array(NA, dim = c(site_length, length(hours_char))) dswrfsfc <- array(NA, dim = c(site_length, length(hours_char))) - ens_base <- if (ens_index == 1) { "gec" } else { "gep" } - ens_name <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") - base_filename2 <- paste0(ens_base,ens_name,".t",cycle,"z.pgrb2a.0p50.f") + member_type <- if (ens_index == 1) { "gec" } else { "gep" } # "_c_ontrol", "_p_erturbed" + ens_idxname <- stringr::str_pad(ens_index - 1, width = 2, pad = "0") + base_filename2 <- paste0(member_type,ens_idxname,".t",cycle,"z.pgrb2a.0p50.f") lats <- round(lat_list/.5)*.5 lons <- round(lon_list/.5)*.5 From abcda0c007313bffe31b811b0515765b57505436 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 9 Aug 2024 16:04:27 -0700 Subject: [PATCH 0031/1193] add myself to data.atm author list --- modules/data.atmosphere/DESCRIPTION | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION index 893b7b1da18..cb8ccc59ab0 100644 --- a/modules/data.atmosphere/DESCRIPTION +++ b/modules/data.atmosphere/DESCRIPTION @@ -2,17 +2,21 @@ Package: PEcAn.data.atmosphere Type: Package Title: PEcAn Functions Used for Managing Climate Driver Data Version: 1.8.0.9000 -Authors@R: c(person("Mike", "Dietze", role = c("aut"), - email = "dietze@bu.edu"), - person("David", "LeBauer", role = c("aut", "cre"), - email = "dlebauer@email.arizona.edu"), - person("Carl", "Davidson", role = c("aut"), - email = "davids14@illinois.edu"), - person("Rob", "Kooper", role = c("aut"), - email = "kooper@illinois.edu"), - person("Deepak", "Jaiswal", role = c("aut"), - email = "djaiswal@djaiswal.edu"), - person("University of Illinois, NCSA", role = c("cph"))) +Authors@R: c( + person("Mike", "Dietze", role = c("aut"), + email = "dietze@bu.edu"), + person("David", "LeBauer", role = c("aut", "cre"), + email = "dlebauer@email.arizona.edu"), + person("Carl", "Davidson", role = c("aut"), + email = "davids14@illinois.edu"), + person("Rob", "Kooper", role = c("aut"), + email = "kooper@illinois.edu"), + person("Deepak", "Jaiswal", role = c("aut"), + email = "djaiswal@djaiswal.edu"), + person("Chris", "Black", role = c("ctb"), + email = "chris@ckblack.org", + comment = c(ORCID="https://orcid.org/0000-0001-8382-298X")), + person("University of Illinois, NCSA", role = c("cph"))) Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific workflow management tool that is designed to simplify the management of model parameterization, execution, and analysis. The PECAn.data.atmosphere From 63f270f89618abe745c3502587a24b49762e30a8 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 14 Aug 2024 23:18:53 +0530 Subject: [PATCH 0032/1193] Refactor extra variables in `run.meta.anbalysis` Signed-off-by: Abhinav Pandey --- modules/meta.analysis/R/run.meta.analysis.R | 34 ++++++++++++--------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/modules/meta.analysis/R/run.meta.analysis.R b/modules/meta.analysis/R/run.meta.analysis.R index 65afcdf61bd..4f8841ae46b 100644 --- a/modules/meta.analysis/R/run.meta.analysis.R +++ b/modules/meta.analysis/R/run.meta.analysis.R @@ -216,22 +216,26 @@ runModule.run.meta.analysis <- function(settings) { PEcAn.logger::logger.info(paste0("Running meta-analysis on all PFTs listed by any Settings object in the list: ", paste(pft.names, collapse = ", "))) - iterations <- settings$meta.analysis$iter - random <- settings$meta.analysis$random.effects$on - use_ghs <- settings$meta.analysis$random.effects$use_ghs - threshold <- settings$meta.analysis$threshold - dbfiles <- settings$database$dbfiles - database <- settings$database$bety - run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs) + run.meta.analysis( + pfts, + settings$meta.analysis$iter, + settings$meta.analysis$random.effects$on, + settings$meta.analysis$threshold, + settings$database$dbfiles, + settings$database$bety, + settings$meta.analysis$random.effects$use_ghs + ) } else if (PEcAn.settings::is.Settings(settings)) { - pfts <- settings$pfts - iterations <- settings$meta.analysis$iter - random <- settings$meta.analysis$random.effects$on - use_ghs <- settings$meta.analysis$random.effects$use_ghs - threshold <- settings$meta.analysis$threshold - dbfiles <- settings$database$dbfiles - database <- settings$database$bety - run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs, update = settings$meta.analysis$update) + run.meta.analysis( + settings$pfts, + settings$meta.analysis$iter, + settings$meta.analysis$random.effects$on, + settings$meta.analysis$threshold, + settings$database$dbfiles, + settings$database$bety, + settings$meta.analysis$random.effects$use_ghs, + update = settings$meta.analysis$update + ) } else { stop("runModule.run.meta.analysis only works with Settings or MultiSettings") } From 74003d9582e8ec0a99f303f9b3c8e4f4777298ac Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 21 Aug 2024 20:24:46 +0530 Subject: [PATCH 0033/1193] get existing machine info using helper function Signed-off-by: Abhinav Pandey --- base/db/R/convert_input.R | 26 +++++++++--------- base/db/R/get.machine.info.R | 48 ++++++++++++++++++++++----------- base/db/man/get.machine.host.Rd | 22 +++++++++++++++ 3 files changed, 66 insertions(+), 30 deletions(-) create mode 100644 base/db/man/get.machine.host.Rd diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index ba2d7a3a5f0..9cc5c8f3c03 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -176,17 +176,15 @@ convert_input <- # Date/time processing for existing input existing.input[[i]]$start_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$start_date), "UTC") existing.input[[i]]$end_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$end_date), "UTC") - + ## Obtain machine information + machine.host.info <- get.machine.host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine #Grab machine info of file that exists existing.machine <- db.query(paste0("SELECT * from machines where id = '", existing.dbfile[[i]]$machine_id, "'"), con) - #Grab machine info of host machine - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - # If the files aren't on the machine, we have to download them, so "overwrite" is meaningless. if (existing.machine$id == machine$id) { @@ -353,9 +351,9 @@ convert_input <- existing.dbfile$machine_id, "'"), con) #Grab machine info of host machine - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) + machine.host.info <- get.machine.host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine if (existing.machine$id != machine$id) { @@ -475,11 +473,11 @@ convert_input <- existing.machine <- db.query(paste0("SELECT * from machines where id = '", existing.dbfile$machine_id, "'"), con) - #Grab machine info of - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - + #Grab machine info of host machine + machine.host.info <- get.machine.host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine + if(existing.machine$id != machine$id){ PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R index c98bee6cf20..979b1f6bb33 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get.machine.info.R @@ -3,21 +3,21 @@ #' @param input.args input args for existing records #' @param input.id input id for existing records #' @param con database connection -#' +#' #' @return list of machine, input, and dbfile records #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { - - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - + + machine.host.info <- get.machine.host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine + if (nrow(machine) == 0) { PEcAn.logger::logger.error("machine not found", host$name) return(NULL) } - + if (missing(input.id) || is.na(input.id) || is.null(input.id)) { input <- dbfile <- NULL } else { @@ -26,19 +26,19 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { PEcAn.logger::logger.error("input not found", input.id) return(NULL) } - - if(!is.null(input.args$dbfile.id)){ + + if (!is.null(input.args$dbfile.id)) { dbfile <- db.query( paste( - "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =", + "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =", input.id, " and container_type = 'Input' and machine_id =", machine$id ), con - ) - }else{ + ) + } else { dbfile <- db.query( paste( @@ -48,11 +48,11 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { machine$id ), con - ) + ) } - - + + if (nrow(dbfile) == 0) { PEcAn.logger::logger.error("dbfile not found", input.id) return(NULL) @@ -63,5 +63,21 @@ get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { } } - return(list(machine = machine, input = input, dbfile = dbfile)) + return(list(machine = machine, input = input, dbfile = dbfile)) +} + +#' Helper Function to retrieve machine host and machine informations +#' @param host host information +#' @param con database connection +#' @return list of machine host and machine information +#' @author Abhinav Pandey +get.machine.host <- function(host, con = NULL) { + #Grab machine info of host machine + machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) + machine <- db.query(paste0( + "SELECT * from machines where hostname = '", + machine.host, "'" + ), con) + + return(list(machine.host, machine)) } diff --git a/base/db/man/get.machine.host.Rd b/base/db/man/get.machine.host.Rd new file mode 100644 index 00000000000..926035dec0c --- /dev/null +++ b/base/db/man/get.machine.host.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.machine.info.R +\name{get.machine.host} +\alias{get.machine.host} +\title{Helper Function to retrieve machine host and machine informations} +\usage{ +get.machine.host(host, con = NULL) +} +\arguments{ +\item{host}{host information} + +\item{con}{database connection} +} +\value{ +list of machine host and machine information +} +\description{ +Helper Function to retrieve machine host and machine informations +} +\author{ +Abhinav Pandey +} From a578be2dfc274c10a100c90e1febf1474d5289f7 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 9 Oct 2024 11:45:13 +0530 Subject: [PATCH 0034/1193] Applied changes as suggested by @infotroph Signed-off-by: Abhinav Pandey --- base/db/R/add.database.entries.R | 161 ++++++++++++++++--------------- base/db/R/convert_input.R | 22 +++-- 2 files changed, 100 insertions(+), 83 deletions(-) diff --git a/base/db/R/add.database.entries.R b/base/db/R/add.database.entries.R index d3eb994a646..8b36e884398 100644 --- a/base/db/R/add.database.entries.R +++ b/base/db/R/add.database.entries.R @@ -23,99 +23,108 @@ add.database.entries <- function( result, con, start_date, - end_date, write, overwrite, + end_date, overwrite, insert.new.file, input.args, machine, mimetype, formatname, allow.conflicting.dates, ensemble, ensemble_name, existing.input, existing.dbfile, input) { - if (write) { - # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. - # This list will be returned. - newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. + # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. + # This list will be returned. + newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. - for (i in 1:length(result)) { # Master for loop - id_not_added <- TRUE + for (i in 1:length(result)) { # Master for loop + id_not_added <- TRUE - if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 && - (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { - # Updating record with new dates - db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, "' WHERE id=", existing.input[[i]]$id), con) - id_not_added <- FALSE + if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 && + (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { + # Updating record with new dates + db.query( + paste0( + "UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, + "' WHERE id=", existing.input[[i]]$id + ), + con + ) + id_not_added <- FALSE - # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration. - newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) - newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id) - } - - if (overwrite) { - # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record) - if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) { - db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), "' WHERE id=", existing.input[[i]]$id), con) - } + # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration. + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) + newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id) + } - if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) { - db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), "', file_name='", result[[i]]$dbfile.name[1], "' WHERE id=", existing.dbfile[[i]]$id), con) - } + if (overwrite) { + # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record) + if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) { + db.query( + paste0( + "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), + "', file_name='", result[[i]]$dbfile.name[1], + "' WHERE id=", existing.dbfile[[i]]$id + ), + con + ) } - # If there is no ensemble then for each record there should be one parent - # But when you have ensembles, all of the members have one parent !! - parent.id <- if (is.numeric(ensemble)) { - ifelse(is.null(input[[i]]), NA, input[[1]]$id) - } else { - ifelse(is.null(input[[i]]), NA, input[[i]]$id) + if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) { + db.query(paste0( + "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), + "', file_name='", result[[i]]$dbfile.name[1], + "' WHERE id=", existing.dbfile[[i]]$id + ), con) } + } + # If there is no ensemble then for each record there should be one parent + # But when you have ensembles, all of the members have one parent !! + parent.id <- if (is.numeric(ensemble)) { + ifelse(is.null(input[[i]]), NA, input[[1]]$id) + } else { + ifelse(is.null(input[[i]]), NA, input[[i]]$id) + } - if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { - site.id <- input.args$newsite - } - if (insert.new.file && id_not_added) { - dbfile.id <- dbfile.insert( - in.path = dirname(result[[i]]$file[1]), - in.prefix = result[[i]]$dbfile.name[1], - "Input", - existing.input[[i]]$id, - con, - reuse = TRUE, - hostname = machine$hostname - ) + if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { + site.id <- input.args$newsite + } - newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) - newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) - } else if (id_not_added) { - # This is to tell input.insert if we are writing ensembles - # Why does it need it? Because it checks for inputs with the same time period, site, and machine - # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition - ens.flag <- if (!is.null(ensemble) | is.null(ensemble_name)) TRUE else FALSE + if (insert.new.file && id_not_added) { + dbfile.id <- dbfile.insert( + in.path = dirname(result[[i]]$file[1]), + in.prefix = result[[i]]$dbfile.name[1], + "Input", + existing.input[[i]]$id, + con, + reuse = TRUE, + hostname = machine$hostname + ) - new_entry <- dbfile.input.insert( - in.path = dirname(result[[i]]$file[1]), - in.prefix = result[[i]]$dbfile.name[1], - siteid = site.id, - startdate = start_date, - enddate = end_date, - mimetype = mimetype, - formatname = formatname, - parentid = parent.id, - con = con, - hostname = machine$hostname, - allow.conflicting.dates = allow.conflicting.dates, - ens = ens.flag - ) + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) + newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) + } else if (id_not_added) { + # This is to tell input.insert if we are writing ensembles + # Why does it need it? Because it checks for inputs with the same time period, site, and machine + # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition + ens.flag <- if (!is.null(ensemble) || is.null(ensemble_name)) TRUE else FALSE - newinput$input.id <- c(newinput$input.id, new_entry$input.id) - newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) - } - } # End for loop + new_entry <- dbfile.input.insert( + in.path = dirname(result[[i]]$file[1]), + in.prefix = result[[i]]$dbfile.name[1], + siteid = site.id, + startdate = start_date, + enddate = end_date, + mimetype = mimetype, + formatname = formatname, + parentid = parent.id, + con = con, + hostname = machine$hostname, + allow.conflicting.dates = allow.conflicting.dates, + ens = ens.flag + ) - successful <- TRUE - return(newinput) - } else { - PEcAn.logger::logger.warn("Input was not added to the database") - successful <- TRUE - return(NULL) - } + newinput$input.id <- c(newinput$input.id, new_entry$input.id) + newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) + } + } # End for loop + return(newinput) } diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 8828d069d6c..ed267440fbc 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -591,13 +591,21 @@ convert_input <- #---------------------------------------------------------------# # New arrangement of database adding code to deal with ensembles. - return (add.database.entries(result, con, start_date, - end_date, write, overwrite, - insert.new.file, input.args, - machine, mimetype, formatname, - allow.conflicting.dates, ensemble, - ensemble_name, existing.input, - existing.dbfile, input)) + if(write) { + add_entries_result <- return (add.database.entries(result, con, start_date, + end_date, overwrite, + insert.new.file, input.args, + machine, mimetype, formatname, + allow.conflicting.dates, ensemble, + ensemble_name, existing.input, + existing.dbfile, input)) + } else { + PEcAn.logger::logger.warn("Input was not added to the database") + successful <- TRUE + return(NULL) + } + successful <- TRUE + return (add_entries_result) } # convert_input From 293a68befdc9452b2011da4f6320da502c91b79d Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 9 Oct 2024 11:55:21 +0530 Subject: [PATCH 0035/1193] Minor review changes Signed-off-by: Abhinav Pandey --- base/db/R/check.missing.files.R | 8 ++++---- base/db/R/convert_input.R | 2 +- base/db/tests/testthat/test.check.missing.files.R | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R index 617878496de..29ce044f68c 100644 --- a/base/db/R/check.missing.files.R +++ b/base/db/R/check.missing.files.R @@ -8,7 +8,7 @@ #' #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko -check_missing_files <- function(result, outname, existing.input = NULL, existing.dbfile = NULL) { +check_missing_files <- function(result, existing.input = NULL, existing.dbfile = NULL) { result_sizes <- purrr::map_dfr( result, ~ dplyr::mutate( @@ -35,12 +35,12 @@ check_missing_files <- function(result, outname, existing.input = NULL, existing } - # Wrap in a list for consistant processing later - if (exists("existing.input") && is.data.frame(existing.input)) { + # Wrap in a list for consistent processing later + if (is.data.frame(existing.input)) { existing.input <- list(existing.input) } - if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) { + if (is.data.frame(existing.dbfile)) { existing.dbfile <- list(existing.dbfile) } return(list(existing.input, existing.dbfile)) diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index ed267440fbc..a074a689389 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -583,7 +583,7 @@ convert_input <- #--------------------------------------------------------------------------------------------------# # Check if result has empty or missing files - checked.missing.files <- check_missing_files(result, outname, existing.input, existing.dbfile) + checked.missing.files <- check_missing_files(result, existing.input, existing.dbfile) # Unwrap parameters after performing checks for missing files existing.input <- checked.missing.files$existing.input diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R index c0ad6794f65..bc61bb1ad4a 100644 --- a/base/db/tests/testthat/test.check.missing.files.R +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -5,7 +5,6 @@ test_that("`check_missing_files()` able to return correct missing files", { res <- check_missing_files( result = list(data.frame(file = c("A", "B"))), - outname = "test", existing.input = data.frame(), existing.dbfile = data.frame() ) From f7f6926fa14c5c5e8ee776b74e0ac5fd77d56048 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 9 Oct 2024 11:55:36 +0530 Subject: [PATCH 0036/1193] Update base/db/R/get.machine.info.R Co-authored-by: Chris Black --- base/db/R/get.machine.info.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R index 979b1f6bb33..31f489daddc 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get.machine.info.R @@ -7,7 +7,7 @@ #' @return list of machine, input, and dbfile records #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko -get.machine.info <- function(host, input.args, input.id = NULL, con = NULL) { +get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) { machine.host.info <- get.machine.host(host, con = con) machine.host <- machine.host.info$machine.host From 8f820b027cb7fb5da70f8f66e8f6e88abd1d4f8b Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Wed, 9 Oct 2024 11:56:54 +0530 Subject: [PATCH 0037/1193] Apply suggestions from code review Co-authored-by: Chris Black --- base/db/R/check.missing.files.R | 1 - base/db/R/convert_input.R | 6 +++++- base/db/R/get.machine.info.R | 4 ++-- base/db/tests/testthat/test.check.missing.files.R | 6 ++---- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R index 29ce044f68c..f3a496cf5de 100644 --- a/base/db/R/check.missing.files.R +++ b/base/db/R/check.missing.files.R @@ -1,7 +1,6 @@ #' Function to check if result has empty or missing files #' #' @param result A list of dataframes with file paths -#' @param outname Name of the output file #' @param existing.input Existing input records #' @param existing.dbfile Existing dbfile records #' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index a074a689389..042c9da08db 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -380,7 +380,7 @@ convert_input <- if (!is.null(ensemble) && ensemble) { return.all <-TRUE - } else{ + } else { return.all <- FALSE } existing.dbfile <- dbfile.input.check(siteid = site.id, @@ -518,6 +518,10 @@ convert_input <- # Get machine information machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id) + if (any(sapply(machine.info, is.null))) { + PEcAn.logger::logger.error("failed lookup of inputs or dbfiles") + return(NULL) + } machine <- machine.info$machine input <- machine.info$input dbfile <- machine.info$dbfile diff --git a/base/db/R/get.machine.info.R b/base/db/R/get.machine.info.R index 31f489daddc..14123a586e9 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get.machine.info.R @@ -18,7 +18,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) { return(NULL) } - if (missing(input.id) || is.na(input.id) || is.null(input.id)) { + if (is.na(input.id) || is.null(input.id)) { input <- dbfile <- NULL } else { input <- db.query(paste("SELECT * from inputs where id =", input.id), con) @@ -71,7 +71,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) { #' @param con database connection #' @return list of machine host and machine information #' @author Abhinav Pandey -get.machine.host <- function(host, con = NULL) { +get_machine_host <- function(host, con) { #Grab machine info of host machine machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) machine <- db.query(paste0( diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R index bc61bb1ad4a..75a531283dd 100644 --- a/base/db/tests/testthat/test.check.missing.files.R +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -1,7 +1,7 @@ test_that("`check_missing_files()` able to return correct missing files", { # Mock `purrr::map_dfr` - mocked_res <- mockery::mock(data.frame(file = c("A", "B"), file_size = c(100, 200), missing = c(FALSE, FALSE), empty = c(FALSE, FALSE))) - mockery::stub(check_missing_files, "purrr::map_dfr", mocked_res) + mocked_size <- mockery::mock(100,200) + mockery::stub(check_missing_files, "file.size", mocked_res) res <- check_missing_files( result = list(data.frame(file = c("A", "B"))), @@ -9,8 +9,6 @@ test_that("`check_missing_files()` able to return correct missing files", { existing.dbfile = data.frame() ) - # Print the structure of `res` for debugging - str(res) expect_equal(length(res), 2) expect_true(is.list(res[[1]])) From 319ab77de17469019c3dbabb9dfa6a32f2c6bf4d Mon Sep 17 00:00:00 2001 From: Quentin Bell Date: Thu, 14 Nov 2024 13:32:45 +0200 Subject: [PATCH 0038/1193] Switched from individual parameter writes to using the vector functionality of SticsRFiles::set_param_xml. --- models/stics/R/write.config.STICS.R | 1339 +++++++-------------------- 1 file changed, 351 insertions(+), 988 deletions(-) diff --git a/models/stics/R/write.config.STICS.R b/models/stics/R/write.config.STICS.R index 39802f5520a..dd8afc7d9ac 100644 --- a/models/stics/R/write.config.STICS.R +++ b/models/stics/R/write.config.STICS.R @@ -15,7 +15,7 @@ ##' @author Istem Fer ##-------------------------------------------------------------------------------------------------# write.config.STICS <- function(defaults, trait.values, settings, run.id) { - + ## the rest of the code assumes only plant PFTs ## little modification here as not to have a bigger re-write for now if(any(grepl("soil", names(trait.values)))){ @@ -59,8 +59,8 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { p2 <- ifelse(events_file$rotation$planted_crop2[uic] != "-99.0", tolower(events_file$rotation$planted_crop2[uic]), "") uname <- paste0(p1,p2) usmdirs[uic] <- paste0(file.path(settings$host$rundir, run.id, uname), "_", - lubridate::year(events_file$rotation$rotation_begin[uic]), "-", - lubridate::year(events_file$rotation$rotation_end[uic])) + lubridate::year(events_file$rotation$rotation_begin[uic]), "-", + lubridate::year(events_file$rotation$rotation_end[uic])) } }else{ @@ -123,13 +123,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { } } - + } # TODO: have a better way to determine USMs ########################## finish usmdirs - + ## make sure rundir and outdir exist dir.create(rundir, showWarnings = FALSE, recursive = TRUE) @@ -146,13 +146,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { prf_list$entry$text <- rundir XML::saveXML(PEcAn.settings::listToXml(prf_list, "properties"), - file = file.path(cfgdir, "preferences.xml"), - prefix = '\n\n') + file = file.path(cfgdir, "preferences.xml"), + prefix = '\n\n') # stics and javastics path stics_path <- settings$model$binary - + # Per STICS development team, there are two types of STICS inputs # Global input: _plt.xml, param_gen.xml, param_newform.xml @@ -164,9 +164,11 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { ## this is where we overwrite model parameters + # Convert pecan parameters to stics names + trait.values <- pecan2stics(trait.values) # read in template plt file, has all the formalisms plt_xml <- XML::xmlParse(system.file("crop_plt.xml", package = "PEcAn.STICS")) - #plt_list <- XML::xmlToList(plt_xml) + plt_files <- list() for (pft in seq_along(trait.values)) { @@ -175,7 +177,6 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { plant_file <- file.path(rundir, paste0(names(trait.values)[pft], "_plt.xml")) - if(names(trait.values)[pft] != "env"){ # save the template, will be overwritten below XML::saveXML(plt_xml, file = plant_file) @@ -183,505 +184,69 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { next } + # Apply changes to those parameters specified by trait.values for this pft. + if (!is.null(pft.traits)) { + SticsRFiles::set_param_xml(plant_file, param = names(pft.traits), values = as.list(unname(pft.traits)), overwrite = TRUE) + } + plt_files[[pft]] <- plant_file # to learn the parameters in a plant file - # SticsRFiles::get_param_info(file_path = plant_file) - - # go over each formalism and replace params following the order in crop_plt - # TODO: vary more params + # SticsRFiles::get_param_info() - # plant name and group - # effect of atmospheric CO2 concentration - - # phasic development # to see parameters per formalism # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "phasic development") # unlist(values) + # Creating a dataframe of parameter names and their values for feeding into SticsRFiles::set_param_xml. + # Note that the parameters in this data frame are either hardcoded for now or otherwise require special treatment. + plt_df <- data.frame(codebfroid = 2) # vernalization requirement, hardcoding for now, 2==yes. + # name code of plant in 3 letters # a handful of plants have to have specific codes, e.g. forages need to be 'fou' and vine needs to be 'vig' # but others can be anything? if not, either consider a LUT or passing via settings - if(names(trait.values)[pft] %in% c("frg", "wcl", "alf")){ - codeplante <- 'fou' - codeperenne <- 2 + if(names(trait.values)[pft] %in% c("frg", "wcl", "alf")){ + plt_df$codeplante <- "fou" + plt_df$codeperenne <- 2 }else{ - codeplante <- base::substr(names(trait.values)[pft],1,3) - codeperenne <- 1 - } - codebfroid <- 2 # vernalization requirement, hardcoding for now, 2==yes - SticsRFiles::set_param_xml(plant_file, "codeplante", codeplante, overwrite = TRUE) - SticsRFiles::set_param_xml(plant_file, "codeperenne", codeperenne, overwrite = TRUE) - SticsRFiles::set_param_xml(plant_file, "codebfroid", codebfroid, overwrite = TRUE) - - # minimum temperature below which development stops (degree C) - if ("tdmin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tdmin", pft.traits[which(pft.names == "tdmin")], overwrite = TRUE) - } - - # maximum temperature above which development stops (degree C) - if ("tdmax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tdmax", pft.traits[which(pft.names == "tdmax")], overwrite = TRUE) - } - - # basal photoperiod - if ("phobase" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "phobase", pft.traits[which(pft.names == "phobase")], overwrite = TRUE) - } - - # saturating photoperiod - if ("phosat" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "phosat", pft.traits[which(pft.names == "phosat")], overwrite = TRUE) - } - - - # maximum phasic delay allowed due to stresses - if ("phasic_delay_max" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "stressdev", pft.traits[which(pft.names == "phasic_delay_max")], overwrite = TRUE) - } - - # minimum number of vernalising days (d) [0,7] - if ("vernalization_days_min" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "jvcmini", round(pft.traits[which(pft.names == "vernalization_days_min")]), overwrite = TRUE) - } - - # day of initiation of vernalisation in perennial crops (julian d) [1,731] - # this only takes effect for perennial crops - if ("vernalization_init" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "julvernal", round(pft.traits[which(pft.names == "vernalization_init")]), overwrite = TRUE) - } - - # optimal temperature for vernalisation (degreeC) - if ("vernalization_TOpt" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tfroid", pft.traits[which(pft.names == "vernalization_TOpt")], overwrite = TRUE) - } - - # semi thermal amplitude for vernalising effect (degreeC) - if ("vernalization_TAmp" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "ampfroid", pft.traits[which(pft.names == "vernalization_TAmp")], overwrite = TRUE) - } - - if ("coeflevamf" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coeflevamf", pft.traits[which(pft.names == "coeflevamf")], overwrite = TRUE) - } - - if ("coefamflax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coefamflax", pft.traits[which(pft.names == "coefamflax")], overwrite = TRUE) - } - - if ("coeflaxsen" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coeflaxsen", pft.traits[which(pft.names == "coeflaxsen")], overwrite = TRUE) - } - - if ("coefsenlan" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coefsenlan", pft.traits[which(pft.names == "coefsenlan")], overwrite = TRUE) - } - - if ("coeflevdrp" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coeflevdrp", pft.traits[which(pft.names == "coeflevdrp")], overwrite = TRUE) - } - - if ("coefdrpmat" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coefdrpmat", pft.traits[which(pft.names == "coefdrpmat")], overwrite = TRUE) - } - - if ("coefflodrp" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coefflodrp", pft.traits[which(pft.names == "coefflodrp")], overwrite = TRUE) - } - - - # emergence and starting - # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "emergence and starting") - # unlist(values) - - # minimum temperature below which emergence is stopped (degreeC) - if ("emergence_Tmin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tgmin", pft.traits[which(pft.names == "emergence_Tmin")], overwrite = TRUE) + plt_df$codeplante <- base::substr(names(trait.values)[pft],1,3) + plt_df$codeperenne <- 1 } # nbfeuilplant, leaf number per plant when planting, default 0, skipping for now - # this is a switch, for now hardcoding to have delay at the beginning of the crop (1) # if starting the simulation from a later stage (e.g. lev) this has no effect # codegermin, option of simulation of a germination phase or a delay at the beginning of the crop (1) or direct starting (2) - SticsRFiles::set_param_xml(plant_file, "codegermin", 1, overwrite = TRUE) + plt_df$codegermin <- 1 - # cumulative thermal time allowing germination (degree-d) - if ("cum_thermal_germin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "stpltger", pft.traits[which(pft.names == "cum_thermal_germin")], overwrite = TRUE) - } - - # skipping the other parameters related to this switch, they don't seem influential, at least on NPP and LAI + # skipping the other parameters related to this switch for now # potgermi: soil water potential under which seed imbibition is impeded # nbjgerlim: maximum number of days after grain imbibition allowing full germination # propjgermin: minimal proportion of the duration nbjgerlim when the temperature is higher than the temperature threshold Tdmax - - # parameter of the curve of coleoptile elongation - if ("belong" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "belong", pft.traits[which(pft.names == "belong")], overwrite = TRUE) - } - - # parameter of the plantlet elongation curve - if ("celong" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "celong", pft.traits[which(pft.names == "celong")], overwrite = TRUE) - } - - # maximum elongation of the coleoptile in darkness condition - if ("coleoptile_elong_dark_max" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "elmax", pft.traits[which(pft.names == "coleoptile_elong_dark_max")], overwrite = TRUE) - } - - # number of days after germination after which plant emergence is reduced - if ("days_reduced_emergence_postgerm" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "nlevlim1", round(pft.traits[which(pft.names == "days2reduced_emergence_postgerm")]), overwrite = TRUE) - } - - # number of days after germination after which plant emergence is impossible - if ("days2stopped_emergence_postgerm" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "nlevlim2", round(pft.traits[which(pft.names == "days2stopped_emergence_postgerm")]), overwrite = TRUE) - } - - # plant vigor index allowing to emerge through a soil crust, vigueurbat == 1 inactivates some soil crust related parameters, skipping for now - - # there are also "planting" related parameters - - # leaves - # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "leaves") - # unlist(values) - - - # phyllotherme, thermal duration between the apparition of two successive leaves on the main stem (degree day) - # assuming this is the same as phyllochron - if ("phyllochron" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "phyllotherme", pft.traits[which(pft.names == "phyllochron")], overwrite = TRUE) - } - - # minimal density above which interplant competition starts (m-2) - if ("dens_comp" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "bdens", pft.traits[which(pft.names == "dens_comp")], overwrite = TRUE) - } - - # LAI above which competition between plants starts (m2 m-2) - if ("lai_comp" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "laicomp", pft.traits[which(pft.names == "lai_comp")], overwrite = TRUE) - } - - # basal height of crop (m) - if ("height" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "hautbase", pft.traits[which(pft.names == "height")], overwrite = TRUE) - } - - # maximum height of crop - if ("HTMAX" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "hautmax", pft.traits[which(pft.names == "HTMAX")], overwrite = TRUE) - } - - # minimum temperature at which growth ceases - if ("tcmin_growth" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tcmin", pft.traits[which(pft.names == "tcmin_growth")], overwrite = TRUE) - } - - # maximum temperature at which growth ceases - if ("tcmax_growth" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tcmax", pft.traits[which(pft.names == "tcmax_growth")], overwrite = TRUE) - } - # temperature beyond which foliar growth stops - if ("tcmax_foliar_growth" %in% pft.names) { + if ("tcxstop" %in% pft.names | "tdmax" %in% pft.names) { # tcxstop must be > tdmax, priors should be set that way, and we can let the simulation fail afterwards, but putting a warning here - tdmax <- SticsRFiles::get_param_xml(plant_file, param="tdmax", select = "formalisme", select_value = "phasic development")[[1]][[1]] - tcxstop <- pft.traits[which(pft.names == "tcmax_foliar_growth")] + # Retrieve the new values if they exist, otherwise read them from the plant file + if ("tcxstop" %in% pft.names) { + tcxstop <- pft.traits[which(pft.names == "tcxstop")] + } else { + tcxstop <- SticsRFiles::get_param_xml(plant_file, param="tcxstop", select = "formalisme", select_value = "leaves")[[1]][[1]] + } + if ("tdmax" %in% pft.names) { + tdmax <- pft.traits[which(pft.names == "tdmax")] + } else { + tdmax <- SticsRFiles::get_param_xml(plant_file, param="tdmax", select = "formalisme", select_value = "phasic development")[[1]][[1]] + } if(tcxstop < tdmax){ PEcAn.logger::logger.warn("tcmax_foliar_growth value (", tcxstop, ") should be greater than tdmax (", tdmax, ").") } - SticsRFiles::set_param_xml(plant_file, "tcxstop", tcxstop, overwrite = TRUE) - - } - - # ulai at the inflexion point of the function DELTAI=f(ULAI) - if ("vlaimax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "vlaimax", pft.traits[which(pft.names == "vlaimax")], overwrite = TRUE) - } - - # parameter of the logistic curve of LAI growth - if ("pentlaimax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "pentlaimax", pft.traits[which(pft.names == "pentlaimax")], overwrite = TRUE) - } - - # ulai from which the rate of leaf growth decreases - if ("udlaimax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "udlaimax", pft.traits[which(pft.names == "udlaimax")], overwrite = TRUE) - } - - # life span of early leaves expressed as a fraction of the life span of the last leaves emitted DURVIEF - if ("early2last_leaflife" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "ratiodurvieI", pft.traits[which(pft.names == "early2last_leaflife")], overwrite = TRUE) - } - - # fraction of senescent biomass (relative to total biomass) - if ("senes2total_biomass" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "ratiosen", pft.traits[which(pft.names == "senes2total_biomass")], overwrite = TRUE) - } - - # fraction of senescent leaves falling to the soil - # not sure if this is supposed to be a fraction or a percentage in STICS, values look like a fraction but min-max is given as 0-100 - # treating it like a fraction for now - if ("fracLeafFall" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "abscission", pft.traits[which(pft.names == "fracLeafFall")], overwrite = TRUE) - } - - # parameter relating the C/N of dead leaves and the INN - if ("parazofmorte" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "parazofmorte", pft.traits[which(pft.names == "parazofmorte")], overwrite = TRUE) - } - - # parameter of the N stress function active on leaf expansion (INNLAI), bilinear function vs INN passing through the point (INNmin, INNturgmin) - if ("innturgmin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "innturgmin", pft.traits[which(pft.names == "innturgmin")], overwrite = TRUE) - } - - # accelerating parameter for the lai growth rate - if ("lai_growth_rate_accelerating" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "dlaimin", pft.traits[which(pft.names == "lai_growth_rate_accelerating")], overwrite = TRUE) - } - - # maximum rate of the setting up of LAI - if ("lai_max_rate" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "dlaimaxbrut", pft.traits[which(pft.names == "lai_max_rate")], overwrite = TRUE) - } - - # relative additional lifespan due to N excess in plant (INN > 1) - if ("relative_addlifespan_DT_excessN" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "durviesupmax", pft.traits[which(pft.names == "relative_addlifespan_DT_excessN")], overwrite = TRUE) - } - - # parameter of the N stress function active on senescence (INNsenes), bilinear function vs INN passing through the point (INNmin, INNsen) - if ("innsen" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "innsen", pft.traits[which(pft.names == "innsen")], overwrite = TRUE) - } - - # threshold soil water content active to simulate water senescence stress as a proportion of the turgor stress - if ("rapsenturg" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "rapsenturg", pft.traits[which(pft.names == "rapsenturg")], overwrite = TRUE) - } - - - # radiation interception - # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "radiation interception") - - # extinction coefficient of photosynthetic active radiation in the canopy - if ("extinction_coefficient_diffuse" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "extin", pft.traits[which(pft.names == "extinction_coefficient_diffuse")], overwrite = TRUE) - } - - # shoot biomass growth - # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "shoot biomass growth") - - # minimum temperature for development - if ("temin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "temin", pft.traits[which(pft.names == "temin")], overwrite = TRUE) - } - - # maximal temperature above which plant growth stops - if ("temax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "temax", pft.traits[which(pft.names == "temax")], overwrite = TRUE) - } - - # optimal temperature (1/2) for plant growth - if ("teopt" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "teopt", pft.traits[which(pft.names == "teopt")], overwrite = TRUE) - } - - # optimal temperature (2/2) for plant growth - if ("teoptbis" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "teoptbis", pft.traits[which(pft.names == "teoptbis")], overwrite = TRUE) - } - - # maximum radiation use efficiency during the juvenile phase - if ("RUE_juv" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "efcroijuv", pft.traits[which(pft.names == "RUE_juv")], overwrite = TRUE) - } - - # maximum radiation use efficiency during the vegetative stage - if ("RUE_veg" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "efcroiveg", pft.traits[which(pft.names == "RUE_veg")], overwrite = TRUE) - } - - # maximum radiation use efficiency during the grain filling phase - if ("RUE_rep" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "efcroirepro", pft.traits[which(pft.names == "RUE_rep")], overwrite = TRUE) - } - - # fraction of daily remobilisable C reserves - if ("remobres" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "remobres", pft.traits[which(pft.names == "remobres")], overwrite = TRUE) - } - - # ratio biomass / useful height cut of crops (t.ha-1.m-1) - if ("biomass2usefulheight" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "coefmshaut", pft.traits[which(pft.names == "biomass2usefulheight")], overwrite = TRUE) - } - - - # partitioning of biomass in organs - # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "partitioning of biomass in organs") - - # maximum SLA (specific leaf area) of green leaves (cm2 g-1) - if ("SLAMAX" %in% pft.names) { - slamax <- pft.traits[which(pft.names == "SLAMAX")] - slamax <- PEcAn.utils::ud_convert(PEcAn.utils::ud_convert(slamax, "m2", "cm2"), "kg-1", "g-1") # m2 kg-1 to cm2 g-1 - SticsRFiles::set_param_xml(plant_file, "slamax", slamax, overwrite = TRUE) - } - - # minimum SLA (specific leaf area) of green leaves (cm2 g-1) - if ("SLAMIN" %in% pft.names) { - slamin <- pft.traits[which(pft.names == "SLAMIN")] - slamin <- PEcAn.utils::ud_convert(PEcAn.utils::ud_convert(slamin, "m2", "cm2"), "kg-1", "g-1") # m2 kg-1 to cm2 g-1 - SticsRFiles::set_param_xml(plant_file, "slamin", slamin, overwrite = TRUE) - } - - - # ratio stem (structural part)/leaf - if ("stem2leaf" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "tigefeuil", pft.traits[which(pft.names == "stem2leaf")], overwrite = TRUE) - } - - # skipping: envfruit, fraction of envelop in grainmaxi (w:w) - # skipping: sea, specific area of fruit envelops - - # yield formation, will get back - - # roots - # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "roots") - - - # sensanox, index of anoxia sensitivity (0 = insensitive), 0 for now - # stoprac, stage when root growth stops (LAX= maximum leaf area index, end of leaf growth or SEN=beginning of leaf senescence) - - # sensrsec, index of root sensitivity to drought (1=insensitive) - if ("rootsens2drought" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "sensrsec", pft.traits[which(pft.names == "rootsens2drought")], overwrite = TRUE) - } - - # contrdamax, maximal reduction in root growth rate due to soil strengthness (high bulk density) - if ("db_reduc_rgr_max" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "contrdamax", pft.traits[which(pft.names == "db_reduc_rgr_max")], overwrite = TRUE) - } - - # draclong, maximum rate of root length production per plant (cm plant-1 degreeD-1) - if ("rootlength_prod_max" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "draclong", pft.traits[which(pft.names == "rootlength_prod_max")], overwrite = TRUE) - } - - # debsenrac, sum of degrees-days defining the beginning of root senescence (root life time) (degreeD) - if ("root_sen_dday" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "debsenrac", round(pft.traits[which(pft.names == "root_sen_dday")]), overwrite = TRUE) - } - - #lvfront, root density at the root apex (cm cm-3) - if ("rootdens_at_apex" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "lvfront", pft.traits[which(pft.names == "rootdens_at_apex")], overwrite = TRUE) - } - - # longsperac - specific root length (cm g-1) - if ("SRL" %in% pft.names) { - srl_val <- PEcAn.utils::ud_convert(pft.traits[which(pft.names == "SRL")], "m", "cm") - SticsRFiles::set_param_xml(plant_file, "longsperac", srl_val, overwrite = TRUE) + # TODO: Do we force one of these to change or let the simulation fail? } # option to activate the N influence on root partitioning within the soil profile (1 = yes, 2 = no) - SticsRFiles::set_param_xml(plant_file, "codazorac", 1, overwrite = TRUE) - - # reduction factor on root growth when soil mineral N is limiting (< minazorac) - if ("minefnra" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "minefnra", pft.traits[which(pft.names == "minefnra")], overwrite = TRUE) - } - - # mineral N concentration in soil below which root growth is reduced (kg.ha-1.cm-1) - if ("minazorac" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "minazorac", pft.traits[which(pft.names == "minazorac")], overwrite = TRUE) - } - - # mineral N concentration in soil above which root growth is maximum (kg.ha-1.cm-1) - if ("maxazorac" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "maxazorac", pft.traits[which(pft.names == "maxazorac")], overwrite = TRUE) - } - - # frost - - # formalism - water - - # psisto, potential of stomatal closing (absolute value) (bars) - # note: units in betyDB are m, but my prior is for testing - if ("psi_stomata_closure" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "psisto", pft.traits[which(pft.names == "psi_stomata_closure")], overwrite = TRUE) - } - - # psiturg, potential of the beginning of decrease of the cellular extension (absolute value) (bars) - # may or may not be leaf_psi_tlp in betyDB - if ("leaf_psi_tlp" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "psiturg", pft.traits[which(pft.names == "leaf_psi_tlp")], overwrite = TRUE) - } - - # h2ofeuilverte, water content of green leaves (relative to fresh matter) (g g-1) - # may or may not be water_content_TLP_leaf in betyDB - if ("water_content_TLP_leaf" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "h2ofeuilverte", pft.traits[which(pft.names == "water_content_TLP_leaf")], overwrite = TRUE) - } - - # skipping: - # h2ofeuiljaune - # h2otigestruc - # h2otigestruc - # h2ofrvert - # deshydbase - # tempdeshyd - - # kmax, maximum crop coefficient for water requirements (=MET/PET) - if ("crop_water_max" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "kmax", pft.traits[which(pft.names == "crop_water_max")], overwrite = TRUE) - } - - # nitrogen - # masecNmax - if ("masecNmax" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "masecNmax", pft.traits[which(pft.names == "masecNmax")], overwrite = TRUE) - } - - # Nreserve - if ("Nreserve" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "Nreserve", pft.traits[which(pft.names == "Nreserve")], overwrite = TRUE) - } - - - # Kmabs1 - if ("Kmabs1" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "Kmabs1", pft.traits[which(pft.names == "Kmabs1")], overwrite = TRUE) - } - - # adil - if ("adil" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "adil", pft.traits[which(pft.names == "adil")], overwrite = TRUE) - } - - # bdil - if ("bdil" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "bdil", pft.traits[which(pft.names == "bdil")], overwrite = TRUE) - } - - # INNmin - if ("INNmin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "INNmin", pft.traits[which(pft.names == "INNmin")], overwrite = TRUE) - } - - # Nmeta - if ("Nmeta" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "Nmeta", pft.traits[which(pft.names == "Nmeta")]*100, overwrite = TRUE) - } - - # correspondance code BBCH + plt_df$codazorac <- 1 # cultivar parameters # values = SticsRFiles::get_param_xml(plant_file, select = "formalisme", select_value = "cultivar parameters") @@ -689,53 +254,10 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # there are multiple cultivars (varietes) in plt file # for now I assume we will always use only #1 in simulations # hence, _tec file will always say variete==1, if you change the logic don't forget to update handling of the _tec file accordingly + # by default set_param_xml modifies the given parameter in all cultivars. - # maximal lifespan of an adult leaf expressed in summation of Q10=2 (2**(T-Tbase)) - if ("leaf_lifespan_max" %in% pft.names) { - # this will modifies all varietes' durvieFs by default - SticsRFiles::set_param_xml(plant_file, "durvieF", pft.traits[which(pft.names == "leaf_lifespan_max")], overwrite = TRUE) - # see example for setting a particular (the Grindstad) cultivar param - # SticsRFiles::set_param_xml(plant_file, "durvieF", pft.traits[which(pft.names == "leaf_lifespan_max")], select = "Grindstad", overwrite = TRUE) - } - - # cumulative thermal time between the stages LEV (emergence) and AMF (maximum acceleration of leaf growth, end of juvenile phase) - if ("cum_thermal_juvenile" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "stlevamf", pft.traits[which(pft.names == "cum_thermal_juvenile")], overwrite = TRUE) - } - - # cumulative thermal time between the stages AMF (maximum acceleration of leaf growth, end of juvenile phase) and LAX (maximum leaf area index, end of leaf growth) - if ("cum_thermal_growth" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "stamflax", pft.traits[which(pft.names == "cum_thermal_growth")], overwrite = TRUE) - } - - # cumulative thermal time between the stages LEV (emergence) and DRP (starting date of filling of harvested organs) - if ("cum_thermal_filling" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "stlevdrp", pft.traits[which(pft.names == "cum_thermal_filling")], overwrite = TRUE) - } - - if ("adens" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "adens", pft.traits[which(pft.names == "adens")], overwrite = TRUE) - } - - if ("croirac" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "croirac", pft.traits[which(pft.names == "croirac")], overwrite = TRUE) - } - - # extinction coefficient connecting LAI to crop height - if ("LAI2height" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "khaut", pft.traits[which(pft.names == "LAI2height")], overwrite = TRUE) - } - - # average root radius - if ("rayon" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "rayon", pft.traits[which(pft.names == "rayon")], overwrite = TRUE) - } - - # minimal value for drought stress index - if ("swfacmin" %in% pft.names) { - SticsRFiles::set_param_xml(plant_file, "swfacmin", pft.traits[which(pft.names == "swfacmin")], overwrite = TRUE) - } - + # Set the parameters that have been added to plt_df in the plant file. + SticsRFiles::set_param_xml(plant_file, names(plt_df), plt_df[1, ], overwrite = TRUE) # convert xml2txt if(names(trait.values)[pft] != "env"){ SticsRFiles::convert_xml2txt(file = plant_file) @@ -756,11 +278,18 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { ## these also have plant parameters as well as soil ## at the moment everything is treated as params, but some could be IC or come from the events file - # these parameters won't change as crop changes in a continous rotation + # these parameters won't change as crop changes in a continuous rotation + + # Convert pecan parameters to stics names for soil + # prepare for pecan2stics call, expects a list + soil_params_list <- list() + soil_params_list[[1]] <- soil_params + soil_params <- pecan2stics(soil_params_list)[[1]] soil.names <- names(soil_params) for (pft in seq_along(trait.values)) { + if(names(trait.values)[pft] == "env"){ next } @@ -768,382 +297,29 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { gen_xml <- XML::xmlParse(system.file("param_gen.xml", package = "PEcAn.STICS")) gen_file <- file.path(rundir, "param_gen.xml") XML::saveXML(gen_xml, file = gen_file) - codeinitprec <- ifelse(length(usmdirs>1), 1, 2) - SticsRFiles::set_param_xml(gen_file, "codeinitprec", codeinitprec, overwrite = TRUE) + # This input file is created from the template and not modified. newf_xml <- XML::xmlParse(system.file("param_newform.xml", package = "PEcAn.STICS")) newf_file <- file.path(rundir, "param_newform.xml") XML::saveXML(newf_xml, file = newf_file) - - - pft.traits <- unlist(trait.values[[pft]]) - pft.names <- names(pft.traits) - - ### Shoot growth - # parameter defining radiation effect on conversion efficiency - if ("rad_on_conversion_eff" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "coefb", pft.traits[which(pft.names == "rad_on_conversion_eff")], overwrite = TRUE) - } - - # ratio of root mass to aerial mass at harvest - if ("root2aerial_harvest" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "proprac", pft.traits[which(pft.names == "root2aerial_harvest")], overwrite = TRUE) - } - - # minimal amount of root mass at harvest (when aerial biomass is nil) t.ha-1 - if ("rootmin_harvest" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "y0msrac", pft.traits[which(pft.names == "rootmin_harvest")], overwrite = TRUE) - } - - ### Root growth - - # bulk density of soil below which root growth is reduced due to a lack of soil cohesion (g.cm-3) - if ("bd_rootgrowth_reduced" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "dacohes", pft.traits[which(pft.names == "bd_rootgrowth_reduced")], overwrite = TRUE) - } - - # bulk density of soil above which root growth is maximal (g.cm-3) - if ("bd_rootgrowth_maximal" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "daseuilbas", pft.traits[which(pft.names == "bd_rootgrowth_maximal")], overwrite = TRUE) - } - - # bulk density of soil above which root growth becomes impossible (g.cm-3) - if ("bd_rootgrowth_impossible" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "daseuilhaut", pft.traits[which(pft.names == "bd_rootgrowth_impossible")], overwrite = TRUE) - } - - ### Water absorption and nitrogen content of the plant - - # parameter of increase of maximal transpiration when a water stress occurs - if ("maxTPincrease_waterstress" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "beta", pft.traits[which(pft.names == "maxTPincrease_waterstress")], overwrite = TRUE) - } - - # root length density (RLD) above which water and N uptake are maximum and independent of RLD - if ("lvopt" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "lvopt", pft.traits[which(pft.names == "lvopt")], overwrite = TRUE) - } - - # diffusion coefficient of nitrate N in soil at field capacity - if ("difN_FC" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "difN", soil_params[which(soil.names == "difN_FC")], overwrite = TRUE) - } - - # skipping - # concrr: inorganic N concentration (NH4+NO3-N) in the rain - - # minimal amount of rain required to start an automatic fertilisation (N mm.d-1) - if ("plNmin" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "plNmin", soil_params[which(soil.names == "plNmin")], overwrite = TRUE) - } - - # skipping, irrlev: - # amount of irrigation applied automatically on the sowing day to allow germination when the model calculates automaticaly - # the amount of irrigations or when the irrigation dates are calculated by sum of temperature - - # minimal amount of N in the plant required to compute INN (kg.ha-1) - if ("QNpltminINN" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "QNpltminINN", pft.traits[which(pft.names == "QNpltminINN")], overwrite = TRUE) - } - - ### Soil C and N processes and fertiliser losses - - # minimal temperature for decomposition of humified organic matter (degreeC) - if ("tmin_mineralisation" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "tmin_mineralisation", soil_params[which(soil.names == "tmin_mineralisation")], overwrite = TRUE) - } - - # parameter (1/2) of the temperature function on humus decomposition rate - if ("T_p1_Hdecomp_rate" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "FTEMh", soil_params[which(soil.names == "T_p1_Hdecomp_rate")], overwrite = TRUE) - } - - # parameter (2/2) of the temperature function on humus decomposition rate - if ("T_p2_Hdecomp_rate" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "FTEMha", soil_params[which(soil.names == "T_p2_Hdecomp_rate")], overwrite = TRUE) - } - - # reference temperature for decomposition of humified organic matter - if ("T_r_HOMdecomp" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "TREFh", soil_params[which(soil.names == "T_r_HOMdecomp")], overwrite = TRUE) - } - - # parameter (1/2) of the temperature function on decomposition rate of organic residues - if ("FTEMr" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "FTEMr", soil_params[which(soil.names == "FTEMr")], overwrite = TRUE) - } - - # parameter (2/2) of the temperature function on decomposition rate of organic residues - if ("FTEMra" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "FTEMra", soil_params[which(soil.names == "FTEMra")], overwrite = TRUE) - } - - # reference temperature for decomposition of organic residues - if ("T_r_ORdecomp" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "TREFr", soil_params[which(soil.names == "T_r_ORdecomp")], overwrite = TRUE) - } - - # TODO: come back to these - # # not used anymore, or at least not with this name!!! - # # relative potential mineralization rate: K2 = fmin1 * exp(- fmin2*argi) / (1+fmin3*calc) - # if ("FMIN1" %in% soil.names) { - # SticsRFiles::set_param_xml(gen_file, "FMIN1", soil_params[which(soil.names == "FMIN1")], overwrite = TRUE) - # } - # - # # not used anymore, or at least not with this name!!! - # # parameter defining the effect of clay on the potential mineralization rate: K2 = fmin1 * exp(-fmin2*argi) / (1+fmin3*calc) - # if ("FMIN2" %in% soil.names) { - # SticsRFiles::set_param_xml(gen_file, "FMIN2", soil_params[which(soil.names == "FMIN2")], overwrite = TRUE) - # } - # - # # not used anymore, or at least not with this name!!! - # # parameter defining the effect of CaCO3 on the potential mineralization rate: K2 = fmin1 * exp(-fmin2*argi) / (1+fmin3*calc) - # if ("FMIN3" %in% soil.names) { - # SticsRFiles::set_param_xml(gen_file, "FMIN3", soil_params[which(soil.names == "FMIN3")], overwrite = TRUE) - # } - - # N/C ratio of soil humus - if ("Wh" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "Wh", soil_params[which(soil.names == "Wh")], overwrite = TRUE) - } - - # soil pH below which NH3 volatilisation derived from fertiliser is nil - if ("pHminvol" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "pHminvol", soil_params[which(soil.names == "pHminvol")], overwrite = TRUE) - } - - # soil pH above which NH3 volatilisation derived from fertiliser is maximum - if ("pHmaxvol" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "pHmaxvol", soil_params[which(soil.names == "pHmaxvol")], overwrite = TRUE) - } - - # N uptake rate at which fertilizer loss is divided by 2 - if ("Nupt_fertloss_halve" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "Vabs2", soil_params[which(soil.names == "Nupt_fertloss_halve")], overwrite = TRUE) - } - - # maximal amount of N immobilised in soil derived from the mineral fertilizer - if ("maxNimm_mineralfert" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "Xorgmax", soil_params[which(soil.names == "maxNimm_mineralfert")], overwrite = TRUE) - } - - # relative water content (fraction of field capacity) below which mineralisation rate is nil - if ("hminm" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "hminm", soil_params[which(soil.names == "hminm")], overwrite = TRUE) - } - - # relative water content (fraction of field capacity) below which mineralisation rate is maximum - if ("hoptm" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "hoptm", soil_params[which(soil.names == "hoptm")], overwrite = TRUE) - } - - # skipping, alphaph: - # maximal soil pH variation per unit of inorganic N added with slurry - - # skipping, dphvolmax: - # maximal pH increase following the application of slurry - - # skipping, phvols: - # parameter used to calculate the variation of soil pH after the addition of slurry - - # relative soil mineralisation rate at water saturation - if ("fhminsat" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "fhminsat", soil_params[which(soil.names == "fhminsat")], overwrite = TRUE) - } - - # reduction factor of decomposition rate of organic residues when mineral N is limiting - if ("Nlim_reductionOMdecomp" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "fredkN", soil_params[which(soil.names == "Nlim_reductionOMdecomp")], overwrite = TRUE) - } - - # reduction factor of decomposition rate of microbial biomass when mineral N is limiting - if ("Nlim_reductionMBdecomp" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "fredlN", soil_params[which(soil.names == "Nlim_reductionMBdecomp")], overwrite = TRUE) - } - - # minimal value for the ratio N/C of the microbial biomass when N limits decomposition - if ("fNCbiomin" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "fNCbiomin", soil_params[which(soil.names == "fNCbiomin")], overwrite = TRUE) - } - - # additional reduction factor of residues decomposition rate when mineral N is very limited in soil - if ("fredNsup" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "fredNsup", soil_params[which(soil.names == "fredNsup")], overwrite = TRUE) - } - - # maximum priming ratio (relative to SOM decomposition SD rate) - if ("Primingmax" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "Primingmax", soil_params[which(soil.names == "Primingmax")], overwrite = TRUE) - } - - ### Nitrification, denitrification and associated N2O emissions - ### TODO: modify these params - ### Soil hydrology and compaction - # minimal amount of rain required to produce runoff (mm.d-1) - if ("precmin4runoff" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "pminruis", soil_params[which(soil.names == "precmin4runoff")], overwrite = TRUE) - } - - # soil thermal diffusivity (cm2.s-1) - if ("soil_thermal_diffusivity" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "diftherm", soil_params[which(soil.names == "soil_thermal_diffusivity")], overwrite = TRUE) - } + # Creating a dataframe of parameter names and their values for feeding into SticsRFiles::set_param_xml. + # Note that the parameters in this data frame are either hardcoded for now or otherwise require special treatment. + gen_df <- data.frame(codeinitprec = ifelse(length(usmdirs>1), 1, 2)) # reset initial conditions in chained simulations - # skipping, bformnappe: - # coefficient for the water table shape (artificially drained soil) - - # drain radius (cm) - if ("rdrain" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "rdrain", soil_params[which(soil.names == "rdrain")], overwrite = TRUE) - } - - # soil water potential corresponding to wilting point (Mpa) - if ("SWP_WP" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "psihumin", soil_params[which(soil.names == "SWP_WP")], overwrite = TRUE) - } - - # soil water potential corresponding to field capacity (Mpa) - if ("SWP_FC" %in% soil.names) { - SticsRFiles::set_param_xml(gen_file, "psihucc", soil_params[which(soil.names == "SWP_FC")], overwrite = TRUE) - } - - # soil moisture content (fraction of field capacity) above which compaction may occur and delay sowing - if ("SMC_compaction_delay_sow" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "prophumtasssem", pft.traits[which(pft.names == "SMC_compaction_delay_sow")], overwrite = TRUE) - } - - # soil moisture content (fraction of field capacity) above which compaction may occur and delay harvest - if ("SMC_compaction_delay_harvest" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "prophumtassrec", pft.traits[which(pft.names == "SMC_compaction_delay_harvest")], overwrite = TRUE) - } - - ### skipping - ### Soil tillage if soil compaction activated - - ### Typology of pebbles fertilisers and residues - ### should some of these parameters come from event files? - - ### codetypeng: Types of mineral fertilisers - 1 atm - # 1: Ammonium.nitrate - # 2: Urea.Ammonium.Nitrate.solution - # 3: Urea - # 4: Anhydrous.ammonia - # 5: Ammonium.sulphate - # 6: Ammonium.phosphate - # 7: Calcium.nitrate - # 8: Fixed.efficiency - - # each option has 4 params - # engamm: fraction of ammonium in the N fertilizer - # orgeng: maximal amount of fertilizer N that can be immobilized in the soil (fraction for type 8) - # deneng: maximal fraction of the mineral fertilizer that can be denitrified (used if codedenit is not activated) - # voleng: maximal fraction of mineral fertilizer that can be volatilized - - ### codetypres: Type of residues for decomposition parameters - 21 atm - # 1: Main crop on surface - # 2: Intermediate crop on surface - # 3: Manure on surface - # 4: Green compost on surface - # 5: Sewage sludge on surface - # 6: Vinasse on surface - # 7: Horn on surface - # 8: Grapevine shoots on surface - # 9: Others.1 on surface - # 10: Others.2 on surface - # 11: Main crop ploughed in - # 12: Intermediate crop ploughed in - # 13: Manure ploughed in - # 14: Green compost ploughed in - # 15: Sewage sludge ploughed in - # 16: Vinasse ploughed in - # 17: Cattle horn ploughed in - # 18: Grapevine shoots ploughed in - # 19: Others.1 ploughed in - # 20: Others.2 ploughed in - # 21: Dead roots in soil - - # each option has 17 params - - # fraction of organic residue which is decomposable - if ("fOR_decomp" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "CroCo", pft.traits[which(pft.names == "fOR_decomp")], overwrite = TRUE) - } - - # parameter of organic residues decomposition: kres=akres+bkres/CsurNres - if ("ORdecomp_par" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "akres", pft.traits[which(pft.names == "ORdecomp_par")], overwrite = TRUE) - } - - # potential rate of decomposition of organic residues: kres=akres+bkres/CsurNres - if ("ORdecomp_rate" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "bkres", pft.traits[which(pft.names == "ORdecomp_rate")], overwrite = TRUE) - } - - # parameter determining C/N ratio of biomass during organic residues decomposition: CsurNbio=awb+bwb/CsurNres - if ("awb" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "awb", pft.traits[which(pft.names == "awb")], overwrite = TRUE) - } - - # parameter determining C/N ratio of biomass during organic residues decomposition: CsurNbio=awb+bwb/CsurNres - if ("bwb" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "bwb", pft.traits[which(pft.names == "bwb")], overwrite = TRUE) - } - - # minimum ratio C/N of microbial biomass decomposing organic residues - if ("minC2N_microbialbiomass" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "cwb", pft.traits[which(pft.names == "minC2N_microbialbiomass")], overwrite = TRUE) - } - - # parameter of organic residues humification: hres = 1 - ahres*CsurNres/(bhres+CsurNres) - if ("ahres" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "ahres", pft.traits[which(pft.names == "ahres")], overwrite = TRUE) - } - - # parameter of organic residues humification: hres = 1 - ahres*CsurNres/(bhres+CsurNres) - if ("bhres" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "bhres", pft.traits[which(pft.names == "bhres")], overwrite = TRUE) - } - - - # TODO: we need a soil PFT - - # potential decay rate of microbial biomass decomposing organic residues - if ("microbialbiomass_decay" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "kbio", pft.traits[which(pft.names == "microbialbiomass_decay")], overwrite = TRUE) - } - - # Carbon assimilation yield by the microbial biomass during crop residues decomposition - if ("microbialbiomass_C_yield" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "yres", pft.traits[which(pft.names == "microbialbiomass_C_yield")], overwrite = TRUE) - } - - # minimum value of C/N ratio of organic residue (g.g-1) - if ("CNresmin" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "CNresmin", pft.traits[which(pft.names == "CNresmin")], overwrite = TRUE) - } + pft.traits <- unlist(trait.values[[pft]]) + pft.names <- names(pft.traits) - # maximum value of C/N ratio of organic residue (g.g-1) - if ("CNresmax" %in% pft.names) { - SticsRFiles::set_param_xml(gen_file, "CNresmax", pft.traits[which(pft.names == "CNresmax")], overwrite = TRUE) + # Apply changes to those parameters specified by trait.values for this pft. + # Currently no checking/differentiation between parameters that are in the plant xml vs these xmls, but, for now, SticsRFiles just throws a warning when the parameter is not in that file. + if (!is.null(pft.traits)) { + SticsRFiles::set_param_xml(gen_file, param = names(pft.traits), values = as.list(unname(pft.traits)), overwrite = TRUE) } - - # skipping, qmulchruis0: - # amount of mulch above which runoff is suppressed - - # skipping, mouillabilmulch: - # maximum wettability of crop mulch - - # skipping, kcouvmlch: - # extinction coefficient connecting the soil cover to the amount of plant mulch - # skipping, albedomulchresidus: - # albedo of crop mulch + # Set the parameters that have been added to gen_df in the param_gen file. + SticsRFiles::set_param_xml(gen_file, names(gen_df), gen_df[1, ], overwrite = TRUE) - # skipping, Qmulchdec: - # maximal amount of decomposable mulch - SticsRFiles::convert_xml2txt(file = gen_file) this_usm <- grep(names(trait.values)[pft], usmdirs) @@ -1151,7 +327,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { file.copy(file.path(rundir, "tempopar.sti"), file.path(usmdirs[x], "tempopar.sti"), overwrite = TRUE) }) - ### new formulations + ### new formulations # DO NOTHING ELSE FOR NOW SticsRFiles::convert_xml2txt(file = newf_file) @@ -1169,8 +345,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # read in template ini file ini_xml <- XML::xmlParse(system.file("pecan_ini.xml", package = "PEcAn.STICS")) for(i in seq_along(usmdirs)){ - - # doesn't really matter what these are called, they will all be eventually 'ficini.txt' + ini_file <- file.path(rundir, paste0(basename(usmdirs[i]), "_ini.xml")) # write the ini file @@ -1183,32 +358,29 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # these may or may not be modified depending on how crop cycles work in STICS # 'snu' is bare soil # fine for annual crops but need to change for perennials - SticsRFiles::set_param_xml(file = ini_file, param = "stade0", values = "snu", select = "plante", select_value = "1", overwrite = TRUE) + SticsRFiles::set_param_xml(file = ini_file, param = "stade0", values = "dor", select = "plante", select_value = "1", overwrite = TRUE) # when snu others are set to 0 by STICS - + }else if(!is.null(settings$run$inputs$poolinitcond)){ ic_path <- settings$run$inputs$poolinitcond$path ic_nc <- ncdf4::nc_open(ic_path) # initial leaf area index (m2 m-2) lai0 <- ncdf4::ncvar_get(ic_nc, "LAI") - SticsRFiles::set_param_xml(file = ini_file, param = "lai0", values = lai0, select = "plante", select_value = "1", overwrite = TRUE) # initial aerial biomass (kg m-2 --> t ha-1) masec0 <- ncdf4::ncvar_get(ic_nc, "AGB") - SticsRFiles::set_param_xml(file = ini_file, param = "masec0", values = PEcAn.utils::ud_convert(masec0, "kg m-2", "t ha-1"), select = "plante", select_value = "1", overwrite = TRUE) # initial depth of root apex of the crop (m --> cm) zrac0 <- ncdf4::ncvar_get(ic_nc, "rooting_depth") if(zrac0 < 0.2) zrac0 <- 0.2 - SticsRFiles::set_param_xml(file = ini_file, param = "zrac0", values = PEcAn.utils::ud_convert(zrac0, "m", "cm"), select = "plante", select_value = "1", overwrite = TRUE) # initial grain dry weight - haven't started any simulations from this stage yet # SticsRFiles::set_param_xml(file = ini_file, param = "magrain0", values = 0, select = "plante", select_value = "1", overwrite = TRUE) # initial N amount in the plant (kg m-2 --> kg ha-1) QNplante0 <- ncdf4::ncvar_get(ic_nc, "plant_nitrogen_content") - SticsRFiles::set_param_xml(file = ini_file, param = "QNplante0", values = PEcAn.utils::ud_convert(QNplante0, "kg m-2", "kg ha-1"), select = "plante", select_value = "1", overwrite = TRUE) + QNplante0 <- PEcAn.utils::ud_convert(QNplante0, "kg m-2", "kg ha-1") # Not anymore # initial reserve of biomass (kg m-2 --> t ha-1) @@ -1227,18 +399,21 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { }else if(zrac0 < 0.8){ densinitial[5] <-0 #densinitial layers should not be filled if zrac0 is not there } - SticsRFiles::set_param_xml(file = ini_file, param = "densinitial", values = densinitial, select = "plante", select_value = "1", overwrite = TRUE) # default 'lev' # SticsRFiles::set_param_xml(file = ini_file, param = "stade0", values = "plt", select = "plante", select_value = "1", overwrite = TRUE) + ic_list <- list(lai0 = lai0, masec0 = masec0, zrac0 = zrac0, QNplante0 = QNplante0, densinitial = densinitial) + + SticsRFiles::set_param_xml(file = ini_file, param = names(ic_list), values = ic_list, select = "plante", select_value = "1", overwrite = TRUE) + ncdf4::nc_close(ic_nc) } SticsRFiles::convert_xml2txt(file = ini_file) file.rename(file.path(rundir, "ficini.txt"), file.path(usmdirs[i], "ficini.txt")) } - + ############################ Prepare Soils ################################## @@ -1246,26 +421,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { #### THERE IS SOME BUG IN SticsRFiles::convert_xml2txt FOR SOLS.XML #### I NOW PUT TXT VERSION TO THE MODEL PACKAGE: param.sol - #### TODO: revise others to have txt templates directly in the package - - # # changed from FINERT to finert and moved to the sols.xml - # # initial fraction of soil organic N inactive for mineralisation (= stable SON/ total SON) - # if ("FINERT" %in% soil.names) { - # SticsRFiles::set_param_xml(gen_file, "finert", soil_params[which(soil.names == "FINERT")], overwrite = TRUE) - # } - - sols_file <- file.path(rundir, "param.sol") - - # cp template sols file (txt) - file.copy(system.file("param.sol", package = "PEcAn.STICS"), sols_file) - - # check param names - # sols_vals <- SticsRFiles::get_soil_txt(sols_file) + #### sols_file <- file.path(rundir, "param.sol") + #### Note this has changed now, if all is working might delete these comments + sols_file <- file.path(rundir, "sols.xml") str_ns <- paste0(as.numeric(settings$run$site$id) %/% 1e+09, "-", as.numeric(settings$run$site$id) %% 1e+09) - # I guess not important what this is called as long as it's consistent in usms - SticsRFiles::set_soil_txt(file = sols_file, param="typsol", value=paste0("sol", str_ns)) + soils_df <- data.frame(soil_name = paste0("sol", str_ns)) if(!is.null(settings$run$inputs$poolinitcond)){ ic_path <- settings$run$inputs$poolinitcond$path @@ -1273,53 +435,61 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # pH pH <- ncdf4::ncvar_get(ic_nc, "pH") - pH <- round(pH[1], digits = 1) # STICS uses 1 pH value - SticsRFiles::set_soil_txt(file = sols_file, param="pH", value=pH) - - sapply(1:5, function(x) SticsRFiles::set_soil_txt(file = sols_file, param="epc", value=20, layer = x)) + soils_df$pH <- round(pH[1], digits = 1) # STICS uses 1 pH value + + # Thickness of each soil layer. This sets all (five) at 20cm, to set individual ones use epc_1, epc_2, etc. + soils_df$epc <- 20 # volume_fraction_of_water_in_soil_at_field_capacity hccf <- ncdf4::ncvar_get(ic_nc, "volume_fraction_of_water_in_soil_at_field_capacity") hccf <- round(hccf*100, digits = 2) - sapply(seq_along(hccf), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="hccf", value=hccf[x], layer = x)) + names(hccf) <- paste0("HCCF_", c(1:length(hccf))) + soils_df <- cbind(soils_df, t(hccf)) # volume_fraction_of_condensed_water_in_soil_at_wilting_point hminf <- ncdf4::ncvar_get(ic_nc, "volume_fraction_of_condensed_water_in_soil_at_wilting_point") hminf <- round(hminf*100, digits = 2) - sapply(seq_along(hminf), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="hminf", value=hminf[x], layer = x)) + names(hminf) <- paste0("HMINF_", c(1:length(hminf))) + soils_df <- cbind(soils_df, t(hminf)) # soil_organic_nitrogen_content Norg <- ncdf4::ncvar_get(ic_nc, "soil_organic_nitrogen_content") Norg <- round(Norg[1]*100, digits = 2) # STICS uses 1 Norg value - SticsRFiles::set_soil_txt(file = sols_file, param="Norg", value=Norg) - + soils_df$norg <- Norg + # mass_fraction_of_clay_in_soil argi <- ncdf4::ncvar_get(ic_nc, "mass_fraction_of_clay_in_soil") argi <- round(argi[1]*100, digits = 0) # STICS uses 1 argi value - SticsRFiles::set_soil_txt(file = sols_file, param="argi", value=argi) + soils_df$argi <- argi # soil_density (kg m-3 --> g cm-3) DAF <- ncdf4::ncvar_get(ic_nc, "soil_density") DAF <- round(PEcAn.utils::ud_convert(DAF, "kg m-3", "g cm-3"), digits = 1) - sapply(seq_along(DAF), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="DAF", value=DAF[x], layer = x)) + names(DAF) <- paste0("DAF_", c(1:length(DAF))) + soils_df <- cbind(soils_df, t(DAF)) # c2n_humus - #CsurNsol0 <- ncdf4::ncvar_get(ic_nc, "c2n_humus") - #SticsRFiles::set_soil_txt(file = sols_file, param="CsurNsol", value=CsurNsol0) + # CsurNsol0 <- ncdf4::ncvar_get(ic_nc, "c2n_humus") + # soils_df$CsurNsol0 <- CsurNsol0 - # epd + # epd: thickness of mixing cells in each soil layer ( = 2 * dispersion length) epd <- rep(10, 5) - sapply(seq_along(epd), function(x) SticsRFiles::set_soil_txt(file = sols_file, param="epd", value=epd[x], layer = x)) + names(epd) <- paste0("epd_", c(1:length(epd))) + soils_df <- cbind(soils_df, t(epd)) ncdf4::nc_close(ic_nc) } - file.copy(sols_file, file.path(usmdirs, "param.sol")) + SticsRFiles::gen_sols_xml(sols_file, param_df = soils_df, template = system.file("sols.xml", package = "PEcAn.STICS")) + SticsRFiles:::gen_sol_xsl_file(soil_name = paste0("sol", str_ns)) + SticsRFiles::convert_xml2txt(file = sols_file) + file.copy(file.path(rundir, "param.sol"), file.path(usmdirs, "param.sol")) + + # check param values + # sols_vals <- SticsRFiles::get_soil_txt(file.path(rundir, "param.sol"), stics_version = SticsRFiles::get_stics_versions_compat()$latest_version) # DO NOTHING ELSE FOR NOW - - # this has some bug for sols.xml - # SticsRFiles::convert_xml2txt(file = sols_file, javastics = javastics_path) + ######################### Prepare Weather Station File ############################### @@ -1345,13 +515,13 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # DO NOTHING ELSE FOR NOW # Should these be prepared by met2model.STICS? - + ############################## Prepare LAI forcing #################################### ## skipping for now - + ############################ Prepare Technical File ################################## @@ -1372,7 +542,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { tec_df$concirr <- 0.11 # concentration of mineral N in irrigation water (kg ha-1 mm-1) tec_df$ressuite <- 'straw+roots' # type of crop residue tec_df$h2ograinmax <- 0.32 # maximal water content of fruits at harvest - + # the following formalisms exist in the tec file: ## supply of organic residus ## soil tillage @@ -1426,10 +596,10 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { "tauxexportfauche", "restit", "mscoupemini") # amount of mineral N added by fertiliser application at each cut of a forage crop, kg.ha-1 - - + + harvest_sub <- events_sub[events_sub$mgmt_operations_event == "harvest",] - + harvest_list <- list() for(hrow in seq_len(nrow(harvest_sub))){ @@ -1501,7 +671,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # empty fert_df <- data.frame(jul = NA, val = NA) - + # If given fertilization date is within simulation days if(as.Date(fert_sub$date[frow]) %in% dseq_sub){ @@ -1521,30 +691,51 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { } fert_tec <- do.call("cbind", fert_list) } #fertilizer-if end - - - # DO NOTHING ELSE FOR NOW - # TODO: ADD OTHER MANAGEMENT - - # same usm -> continue columns - usm_tec_df <- cbind(tec_df, harvest_tec, fert_tec) - - usm_tec_df$ratiol <- 0 - - SticsRFiles::gen_tec_xml(param_df = usm_tec_df, - file=system.file("pecan_tec.xml", package = "PEcAn.STICS"), - out_dir = usmdirs[usmi]) - - # TODO: more than 1 USM, rbind - - SticsRFiles::convert_xml2txt(file = file.path(usmdirs[usmi], "tmp_tec.xml")) - - } # end-loop over usms - } # TODO: if no events file is given modify other harvest parameters, e.g. harvest decision + + # DO NOTHING ELSE FOR NOW + # TODO: ADD OTHER MANAGEMENT + + # same usm -> continue columns + usm_tec_df <- cbind(tec_df, harvest_tec, fert_tec) + + usm_tec_df$ratiol <- 0 + + SticsRFiles::gen_tec_xml(param_df = usm_tec_df, + file=system.file("pecan_tec.xml", package = "PEcAn.STICS"), + out_dir = usmdirs[usmi]) + + # TODO: more than 1 USM, rbind + + SticsRFiles::convert_xml2txt(file = file.path(usmdirs[usmi], "tmp_tec.xml")) + + + } # end-loop over usms + } # TODO: if no events file is given modify other harvest parameters, e.g. harvest decision + + ################################ Prepare Climate file ###################################### + # symlink climate files + met_path <- settings$run$inputs$met$path + + for(usmi in seq_along(usmdirs)){ + + usm_years <- c(sapply(strsplit(sub(".*_", "", basename(usmdirs)[usmi]), "-"), function(x) (as.numeric(x)))) + dseq_sub <- dseq[lubridate::year(dseq) %in% usm_years] + + clim_list <- list() # temporary solution + for(clim in seq_along(usm_years)){ + # currently assuming only first year file has been passed to the settings, modify met2model if changing the logic + met_file <- gsub(paste0(lubridate::year(settings$run$start.date), ".climate"), paste0(usm_years[clim], ".climate"), met_path) + clim_list[[clim]] <- utils::read.table(met_file) + } + clim_run <- do.call("rbind", clim_list) + utils::write.table(clim_run, file.path(usmdirs[usmi], "climat.txt"), col.names = FALSE, row.names = FALSE) + + } + ################################ Prepare USM file ###################################### - + # loop for each USM #ncodesuite <- ifelse(length(usmdirs) > 1, 1,0) @@ -1575,8 +766,8 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { }else{ SticsRFiles::set_usm_txt(usm_file, "codesuite", 1, append = FALSE) } - - + + # number of simulated plants (sole crop=1; intercropping=2) SticsRFiles::set_usm_txt(usm_file, "nbplantes", 1, append = FALSE) # hardcode for now @@ -1648,30 +839,10 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # TODO: more than 1 PFTs # STICS can run 2 PFTs max: main crop + intercrop } - - - - ################################ Prepare Run ###################################### - # symlink climate files - met_path <- settings$run$inputs$met$path - for(usmi in seq_along(usmdirs)){ - - usm_years <- c(sapply(strsplit(sub(".*_", "", basename(usmdirs)[usmi]), "-"), function(x) (as.numeric(x)))) - dseq_sub <- dseq[lubridate::year(dseq) %in% usm_years] - - clim_list <- list() # temporary solution - for(clim in seq_along(usm_years)){ - # currently assuming only first year file has been passed to the settings, modify met2model if changing the logic - met_file <- gsub(paste0(lubridate::year(settings$run$start.date), ".climate"), paste0(usm_years[clim], ".climate"), met_path) - clim_list[[clim]] <- utils::read.table(met_file) - } - clim_run <- do.call("rbind", clim_list) - utils::write.table(clim_run, file.path(usmdirs[usmi], "climat.txt"), col.names = FALSE, row.names = FALSE) - - } + ################################ Prepare Run ###################################### # symlink to binary file.symlink(stics_path, bindir) @@ -1689,7 +860,7 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { # cmd_generate <- paste("java -jar", jexe,"--generate-txt", rundir, usm_name) # cmd_run <- paste("java -jar", jexe,"--run", rundir, usm_name) - + #----------------------------------------------------------------------- # create launch script (which will create symlink) if (!is.null(settings$model$jobtemplate) && file.exists(settings$model$jobtemplate)) { @@ -1742,5 +913,197 @@ write.config.STICS <- function(defaults, trait.values, settings, run.id) { writeLines(jobsh, con = file.path(settings$rundir, run.id, "job.sh")) Sys.chmod(file.path(settings$rundir, run.id, "job.sh")) - + } # write.config.STICS + + +# ==================================================================================================# +#' Function to translate pecan param names and units to stics names and units. +#' @export +#' @param trait.values trait.values, list +#' @return translated list +#' @author Quentin Bell +# Based on pecan2lpjguess function by Istem Fer https://github.com/PecanProject/pecan/blob/develop/models/lpjguess/R/write.config.LPJGUESS.R#L229 +pecan2stics <- function(trait.values){ + + # TODO :match all stics and pecan names + vartable <- tibble::tribble( + ~sticsname, ~pecanname, ~sticsunits, ~pecanunits, + # Plant and soil related parameters + "abscission", "fracLeafFall", NA, NA, + "adens", "adens", NA, NA, + "adil", "adil", NA, NA, + "ahres", "ahres", NA, NA, + "akres", "ORdecomp_par", NA, NA, + "ampfroid", "vernalization_TAmp", NA, NA, + "awb", "awb", NA, NA, + "bdens", "dens_comp", NA, NA, + "bdil", "bdil", NA, NA, + "belong", "belong", NA, NA, + "beta", "maxTPincrease_waterstress", NA, NA, + "bhres", "bhres", NA, NA, + "bkres", "ORdecomp_rate", NA, NA, + "bwb", "bwb", NA, NA, + "celong", "celong", NA, NA, + "CNresmax", "CNresmax", NA, NA, + "CNresmin", "CNresmin", NA, NA, + "coefamflax", "coefamflax", NA, NA, + "coefb", "rad_on_conversion_eff", NA, NA, + "coefdrpmat", "coefdrpmat", NA, NA, + "coefflodrp", "coefflodrp", NA, NA, + "coeflaxsen", "coeflaxsen", NA, NA, + "coeflevamf", "coeflevamf", NA, NA, + "coeflevdrp", "coeflevdrp", NA, NA, + "coefmshaut", "biomass2usefulheight", NA, NA, + "coefsenlan", "coefsenlan", NA, NA, + "contrdamax", "db_reduc_rgr_max", NA, NA, + "CroCo", "fOR_decomp", NA, NA, + "croirac", "croirac", NA, NA, + "cwb", "minC2N_microbialbiomass", NA, NA, + "dacohes", "bd_rootgrowth_reduced", NA, NA, + "daseuilbas", "bd_rootgrowth_maximal", NA, NA, + "daseuilhaut", "bd_rootgrowth_impossible", NA, NA, + "debsenrac", "root_sen_dday", "round", "0", + "difN", "difN_FC", NA, NA, + "diftherm", "soil_thermal_diffusivity", NA, NA, + "dlaimaxbrut", "lai_max_rate", NA, NA, + "dlaimin", "lai_growth_rate_accelerating", NA, NA, + "draclong", "rootlength_prod_max", NA, NA, + "durvieF", "leaf_lifespan_max", NA, NA, + "durviesupmax", "relative_addlifespan_DT_excessN", NA, NA, + "efcroijuv", "RUE_juv", NA, NA, + "efcroirepro", "RUE_rep", NA, NA, + "efcroiveg", "RUE_veg", NA, NA, + "elmax", "coleoptile_elong_dark_max", NA, NA, + "extin", "extinction_coefficient_diffuse", NA, NA, + "fhminsat", "fhminsat", NA, NA, + "FINERT", "FINERT", NA, NA, + "FMIN1", "FMIN1", NA, NA, + "FMIN2", "FMIN2", NA, NA, + "FMIN3", "FMIN3", NA, NA, + "fNCbiomin", "fNCbiomin", NA, NA, + "fredkN", "Nlim_reductionOMdecomp", NA, NA, + "fredlN", "Nlim_reductionMBdecomp", NA, NA, + "fredNsup", "fredNsup", NA, NA, + "FTEMh", "T_p1_Hdecomp_rate", NA, NA, + "FTEMha", "T_p2_Hdecomp_rate", NA, NA, + "FTEMr", "FTEMr", NA, NA, + "FTEMra", "FTEMra", NA, NA, + "h2ofeuilverte", "water_content_TLP_leaf", NA, NA, + "hautmax", "HTMAX", NA, NA, + "hautbase", "height", NA, NA, + "hminm", "hminm", NA, NA, + "hoptm", "hoptm", NA, NA, + "INNmin", "INNmin", NA, NA, + "innsen", "innsen", NA, NA, + "innturgmin", "innturgmin", NA, NA, + "julvernal", "vernalization_init", "round", "0", + "jvcmini", "vernalization_days_min", "round", "0", + "kbio", "microbialbiomass_decay", NA, NA, + "khaut", "LAI2height", NA, NA, + "Kmabs1", "Kmabs1", NA, NA, + "kmax", "crop_water_max", NA, NA, + "laicomp", "lai_comp", NA, NA, + "longsperac", "SRL", NA, NA, + "lvfront", "rootdens_at_apex", NA, NA, + "lvopt", "lvopt", NA, NA, + "masecNmax", "masecNmax", NA, NA, + "maxazorac", "maxazorac", NA, NA, + "minazorac", "minazorac", NA, NA, + "minefnra", "minefnra", NA, NA, + "nlevlim1", "days2reduced_emergence_postgerm", "round", "0", + "nlevlim2", "days2stopped_emergence_postgerm", "round", "0", + "Nmeta", "Nmeta", NA, NA, + "Nreserve", "Nreserve", NA, NA, + "parazofmorte", "parazofmorte", NA, NA, + "pentlaimax", "pentlaimax", NA, NA, + "pHmaxvol", "pHmaxvol", NA, NA, + "pHminvol", "pHminvol", NA, NA, + "phobase", "phobase", NA, NA, + "phosat", "phosat", NA, NA, + "phyllotherme", "phyllochron", NA, NA, + "plNmin", "plNmin", NA, NA, + "pminruis", "precmin4runoff", NA, NA, + "Primingmax", "Primingmax", NA, NA, + "prophumtassrec", "SMC_compaction_delay_harvest", NA, NA, + "prophumtasssem", "SMC_compaction_delay_sow", NA, NA, + "proprac", "root2aerial_harvest", NA, NA, + "psihucc", "SWP_FC", NA, NA, + "psihumin", "SWP_WP", NA, NA, + "psisto", "psi_stomata_closure", NA, NA, # psisto, potential of stomatal closing (absolute value) (bars). note: units in betyDB are m, but Istem's prior is for testing + "psiturg", "leaf_psi_tlp", NA, NA, + "QNpltminINN", "QNpltminINN", NA, NA, + "rapsenturg", "rapsenturg", NA, NA, + "ratiodurvieI", "early2last_leaflife", NA, NA, + "ratiosen", "senes2total_biomass", NA, NA, + "rayon", "rayon", NA, NA, + "rdrain", "rdrain", NA, NA, + "remobres", "remobres", NA, NA, + "sensrsec", "rootsens2drought", NA, NA, + "slamax", "SLAMAX", "cm2 g-1", "m2 kg-1", + "slamin", "SLAMIN", "cm2 g-1", "m2 kg-1", + "stamflax", "cum_thermal_growth", NA, NA, + "stlevamf", "cum_thermal_juvenile", NA, NA, + "stlevdrp", "cum_thermal_filling", NA, NA, + "stpltger", "cum_thermal_germin", NA, NA, + "stressdev", "phasic_delay_max", NA, NA, + "swfacmin", "swfacmin", NA, NA, + "tcmax", "tcmax_growth", NA, NA, + "tcmin", "tcmin_growth", NA, NA, + "tcxstop", "tcmax_foliar_growth", NA, NA, + "tdmax", "tdmax", NA, NA, + "tdmin", "tdmin", NA, NA, + "temax", "temax", NA, NA, + "temin", "temin", NA, NA, + "teopt", "teopt", NA, NA, + "teoptbis", "teoptbis", NA, NA, + "tfroid", "vernalization_TOpt", NA, NA, + "tgmin", "emergence_Tmin", NA, NA, + "tigefeuil", "stem2leaf", NA, NA, + "tmin_mineralisation", "tmin_mineralisation", NA, NA, + "TREFh", "T_r_HOMdecomp", NA, NA, + "TREFr", "T_r_ORdecomp", NA, NA, + "udlaimax", "udlaimax", NA, NA, + "Vabs2", "Nupt_fertloss_halve", NA, NA, + "vlaimax", "vlaimax", NA, NA, + "Wh", "Wh", NA, NA, + "GMIN1", "GMIN1", NA, NA, + "GMIN2", "GMIN2", NA, NA, + "GMIN3", "GMIN3", NA, NA, + "GMIN4", "GMIN4", NA, NA, + "GMIN5", "GMIN5", NA, NA, + "GMIN6", "GMIN6", NA, NA, + "GMIN7", "GMIN7", NA, NA, + "Xorgmax", "maxNimm_mineralfert", NA, NA, + "y0msrac", "rootmin_harvest", NA, NA, + "yres", "microbialbiomass_C_yield", NA, NA, + # Missing pecan parameters without corresponding STICS parameters + "SLA", "SLA", NA, NA, # This is necessary as any parameters in the prior that are missing from this tibble cause an error. + ) + + trait.values <- lapply(trait.values, function(x){ + names(x) <- vartable$sticsname[match(names(x), vartable$pecanname)] + return(x) + }) + + # TODO : unit conversions? + toconvert <- vartable$sticsname[!is.na(vartable$sticsunits)] + trait.values <- lapply(trait.values, function(x){ + canconvert <- toconvert[toconvert %in% names(x)] + if(length(canconvert) != 0){ + for(noc in seq_along(canconvert)){ + if(vartable$sticsunits[vartable$sticsname == canconvert[noc]] == "round"){ + x[,names(x) == canconvert[noc]] <- round(x[,names(x) == canconvert[noc]]) + }else{ + x[,names(x) == canconvert[noc]] <- PEcAn.utils::ud_convert(x[,names(x) == canconvert[noc]], + vartable$pecanunits[vartable$sticsname == canconvert[noc]], + vartable$sticsunits[vartable$sticsname == canconvert[noc]]) + } + + } + } + return(x) + }) + + return(trait.values) +} From dd54681d4d1fc4c59251a553a6d912bb13e895db Mon Sep 17 00:00:00 2001 From: Quentin Bell Date: Thu, 14 Nov 2024 14:38:45 +0200 Subject: [PATCH 0039/1193] Added Quentin Bell and write.config.STICS change --- CHANGELOG.md | 1 + CITATION.cff | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cbd0976ae5..07555797a0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha * Modules `PEcAn.allometry`, `PEcAn.assim.batch`, `PEcAn.data.mining`, `PEcAn.emulator`, `PEcAn.MA`, `PEcAn.photosynthesis`, `PEcAn.priors`, and `PEcAn.RTM`. - Renamed master branch to main - `PEcAn.all::pecan_version()` now reports commit hashes as well as version numbers for each installed package. +- `write.conmfig.STICS()` now modifies parameters with vectors rather than individually. ### Removed diff --git a/CITATION.cff b/CITATION.cff index 7af92146298..32f9620f195 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -122,6 +122,9 @@ authors: orcid: 'https://orcid.org/0000-0002-7430-7879' - given-names: Harunobu Ishii affiliation: Boston University Software & Application Innovation Lab(SAIL) + - affiliation: Finnish Meteorological Institute + given-names: Quentin Bell + orcid: 'https://orcid.org/0009-0005-0253-8642' preferred-citation: type: article From b0a3cbe55e5e0c9df3bb52d559e651505c70a4b2 Mon Sep 17 00:00:00 2001 From: QdBell <5080358+qdbell@users.noreply.github.com> Date: Fri, 15 Nov 2024 09:09:43 +0200 Subject: [PATCH 0040/1193] Update write.config.STICS.R with STICS filenames Added which STICS file the parameter is found in to the pecan2stics lookup table. --- models/stics/R/write.config.STICS.R | 294 ++++++++++++++-------------- 1 file changed, 145 insertions(+), 149 deletions(-) diff --git a/models/stics/R/write.config.STICS.R b/models/stics/R/write.config.STICS.R index dd8afc7d9ac..5184f1d808c 100644 --- a/models/stics/R/write.config.STICS.R +++ b/models/stics/R/write.config.STICS.R @@ -928,157 +928,153 @@ pecan2stics <- function(trait.values){ # TODO :match all stics and pecan names vartable <- tibble::tribble( - ~sticsname, ~pecanname, ~sticsunits, ~pecanunits, + ~sticsname, ~pecanname, ~sticsunits, ~pecanunits, ~sticsfile, # Plant and soil related parameters - "abscission", "fracLeafFall", NA, NA, - "adens", "adens", NA, NA, - "adil", "adil", NA, NA, - "ahres", "ahres", NA, NA, - "akres", "ORdecomp_par", NA, NA, - "ampfroid", "vernalization_TAmp", NA, NA, - "awb", "awb", NA, NA, - "bdens", "dens_comp", NA, NA, - "bdil", "bdil", NA, NA, - "belong", "belong", NA, NA, - "beta", "maxTPincrease_waterstress", NA, NA, - "bhres", "bhres", NA, NA, - "bkres", "ORdecomp_rate", NA, NA, - "bwb", "bwb", NA, NA, - "celong", "celong", NA, NA, - "CNresmax", "CNresmax", NA, NA, - "CNresmin", "CNresmin", NA, NA, - "coefamflax", "coefamflax", NA, NA, - "coefb", "rad_on_conversion_eff", NA, NA, - "coefdrpmat", "coefdrpmat", NA, NA, - "coefflodrp", "coefflodrp", NA, NA, - "coeflaxsen", "coeflaxsen", NA, NA, - "coeflevamf", "coeflevamf", NA, NA, - "coeflevdrp", "coeflevdrp", NA, NA, - "coefmshaut", "biomass2usefulheight", NA, NA, - "coefsenlan", "coefsenlan", NA, NA, - "contrdamax", "db_reduc_rgr_max", NA, NA, - "CroCo", "fOR_decomp", NA, NA, - "croirac", "croirac", NA, NA, - "cwb", "minC2N_microbialbiomass", NA, NA, - "dacohes", "bd_rootgrowth_reduced", NA, NA, - "daseuilbas", "bd_rootgrowth_maximal", NA, NA, - "daseuilhaut", "bd_rootgrowth_impossible", NA, NA, - "debsenrac", "root_sen_dday", "round", "0", - "difN", "difN_FC", NA, NA, - "diftherm", "soil_thermal_diffusivity", NA, NA, - "dlaimaxbrut", "lai_max_rate", NA, NA, - "dlaimin", "lai_growth_rate_accelerating", NA, NA, - "draclong", "rootlength_prod_max", NA, NA, - "durvieF", "leaf_lifespan_max", NA, NA, - "durviesupmax", "relative_addlifespan_DT_excessN", NA, NA, - "efcroijuv", "RUE_juv", NA, NA, - "efcroirepro", "RUE_rep", NA, NA, - "efcroiveg", "RUE_veg", NA, NA, - "elmax", "coleoptile_elong_dark_max", NA, NA, - "extin", "extinction_coefficient_diffuse", NA, NA, - "fhminsat", "fhminsat", NA, NA, - "FINERT", "FINERT", NA, NA, - "FMIN1", "FMIN1", NA, NA, - "FMIN2", "FMIN2", NA, NA, - "FMIN3", "FMIN3", NA, NA, - "fNCbiomin", "fNCbiomin", NA, NA, - "fredkN", "Nlim_reductionOMdecomp", NA, NA, - "fredlN", "Nlim_reductionMBdecomp", NA, NA, - "fredNsup", "fredNsup", NA, NA, - "FTEMh", "T_p1_Hdecomp_rate", NA, NA, - "FTEMha", "T_p2_Hdecomp_rate", NA, NA, - "FTEMr", "FTEMr", NA, NA, - "FTEMra", "FTEMra", NA, NA, - "h2ofeuilverte", "water_content_TLP_leaf", NA, NA, - "hautmax", "HTMAX", NA, NA, - "hautbase", "height", NA, NA, - "hminm", "hminm", NA, NA, - "hoptm", "hoptm", NA, NA, - "INNmin", "INNmin", NA, NA, - "innsen", "innsen", NA, NA, - "innturgmin", "innturgmin", NA, NA, - "julvernal", "vernalization_init", "round", "0", - "jvcmini", "vernalization_days_min", "round", "0", - "kbio", "microbialbiomass_decay", NA, NA, - "khaut", "LAI2height", NA, NA, - "Kmabs1", "Kmabs1", NA, NA, - "kmax", "crop_water_max", NA, NA, - "laicomp", "lai_comp", NA, NA, - "longsperac", "SRL", NA, NA, - "lvfront", "rootdens_at_apex", NA, NA, - "lvopt", "lvopt", NA, NA, - "masecNmax", "masecNmax", NA, NA, - "maxazorac", "maxazorac", NA, NA, - "minazorac", "minazorac", NA, NA, - "minefnra", "minefnra", NA, NA, - "nlevlim1", "days2reduced_emergence_postgerm", "round", "0", - "nlevlim2", "days2stopped_emergence_postgerm", "round", "0", - "Nmeta", "Nmeta", NA, NA, - "Nreserve", "Nreserve", NA, NA, - "parazofmorte", "parazofmorte", NA, NA, - "pentlaimax", "pentlaimax", NA, NA, - "pHmaxvol", "pHmaxvol", NA, NA, - "pHminvol", "pHminvol", NA, NA, - "phobase", "phobase", NA, NA, - "phosat", "phosat", NA, NA, - "phyllotherme", "phyllochron", NA, NA, - "plNmin", "plNmin", NA, NA, - "pminruis", "precmin4runoff", NA, NA, - "Primingmax", "Primingmax", NA, NA, - "prophumtassrec", "SMC_compaction_delay_harvest", NA, NA, - "prophumtasssem", "SMC_compaction_delay_sow", NA, NA, - "proprac", "root2aerial_harvest", NA, NA, - "psihucc", "SWP_FC", NA, NA, - "psihumin", "SWP_WP", NA, NA, - "psisto", "psi_stomata_closure", NA, NA, # psisto, potential of stomatal closing (absolute value) (bars). note: units in betyDB are m, but Istem's prior is for testing - "psiturg", "leaf_psi_tlp", NA, NA, - "QNpltminINN", "QNpltminINN", NA, NA, - "rapsenturg", "rapsenturg", NA, NA, - "ratiodurvieI", "early2last_leaflife", NA, NA, - "ratiosen", "senes2total_biomass", NA, NA, - "rayon", "rayon", NA, NA, - "rdrain", "rdrain", NA, NA, - "remobres", "remobres", NA, NA, - "sensrsec", "rootsens2drought", NA, NA, - "slamax", "SLAMAX", "cm2 g-1", "m2 kg-1", - "slamin", "SLAMIN", "cm2 g-1", "m2 kg-1", - "stamflax", "cum_thermal_growth", NA, NA, - "stlevamf", "cum_thermal_juvenile", NA, NA, - "stlevdrp", "cum_thermal_filling", NA, NA, - "stpltger", "cum_thermal_germin", NA, NA, - "stressdev", "phasic_delay_max", NA, NA, - "swfacmin", "swfacmin", NA, NA, - "tcmax", "tcmax_growth", NA, NA, - "tcmin", "tcmin_growth", NA, NA, - "tcxstop", "tcmax_foliar_growth", NA, NA, - "tdmax", "tdmax", NA, NA, - "tdmin", "tdmin", NA, NA, - "temax", "temax", NA, NA, - "temin", "temin", NA, NA, - "teopt", "teopt", NA, NA, - "teoptbis", "teoptbis", NA, NA, - "tfroid", "vernalization_TOpt", NA, NA, - "tgmin", "emergence_Tmin", NA, NA, - "tigefeuil", "stem2leaf", NA, NA, - "tmin_mineralisation", "tmin_mineralisation", NA, NA, - "TREFh", "T_r_HOMdecomp", NA, NA, - "TREFr", "T_r_ORdecomp", NA, NA, - "udlaimax", "udlaimax", NA, NA, - "Vabs2", "Nupt_fertloss_halve", NA, NA, - "vlaimax", "vlaimax", NA, NA, - "Wh", "Wh", NA, NA, - "GMIN1", "GMIN1", NA, NA, - "GMIN2", "GMIN2", NA, NA, - "GMIN3", "GMIN3", NA, NA, - "GMIN4", "GMIN4", NA, NA, - "GMIN5", "GMIN5", NA, NA, - "GMIN6", "GMIN6", NA, NA, - "GMIN7", "GMIN7", NA, NA, - "Xorgmax", "maxNimm_mineralfert", NA, NA, - "y0msrac", "rootmin_harvest", NA, NA, - "yres", "microbialbiomass_C_yield", NA, NA, + "abscission", "fracLeafFall", NA, NA, "plt.xml", + "adens", "adens", NA, NA, "plt.xml", + "adil", "adil", NA, NA, "plt.xml", + "ahres", "ahres", NA, NA, "param_gen.xml", + "akres", "ORdecomp_par", NA, NA, "param_gen.xml", + "ampfroid", "vernalization_TAmp", NA, NA, "plt.xml", + "awb", "awb", NA, NA, "param_gen.xml", + "bdens", "dens_comp", NA, NA, "plt.xml", + "bdil", "bdil", NA, NA, "plt.xml", + "belong", "belong", NA, NA, "plt.xml", + "beta", "maxTPincrease_waterstress", NA, NA, "param_gen.xml", + "bhres", "bhres", NA, NA, "param_gen.xml", + "bkres", "ORdecomp_rate", NA, NA, "param_gen.xml", + "bwb", "bwb", NA, NA, "param_gen.xml", + "celong", "celong", NA, NA, "plt.xml", + "CNresmax", "CNresmax", NA, NA, "param_gen.xml", + "CNresmin", "CNresmin", NA, NA, "param_gen.xml", + "coefamflax", "coefamflax", NA, NA, "plt.xml", + "coefb", "rad_on_conversion_eff", NA, NA, "param_gen.xml", + "coefdrpmat", "coefdrpmat", NA, NA, "plt.xml", + "coefflodrp", "coefflodrp", NA, NA, "plt.xml", + "coeflaxsen", "coeflaxsen", NA, NA, "plt.xml", + "coeflevamf", "coeflevamf", NA, NA, "plt.xml", + "coeflevdrp", "coeflevdrp", NA, NA, "plt.xml", + "coefmshaut", "biomass2usefulheight", NA, NA, "plt.xml", + "coefsenlan", "coefsenlan", NA, NA, "plt.xml", + "contrdamax", "db_reduc_rgr_max", NA, NA, "plt.xml", + "CroCo", "fOR_decomp", NA, NA, "param_gen.xml", + "croirac", "croirac", NA, NA, "plt.xml", + "cwb", "minC2N_microbialbiomass", NA, NA, "param_gen.xml", + "dacohes", "bd_rootgrowth_reduced", NA, NA, "param_gen.xml", + "daseuilbas", "bd_rootgrowth_maximal", NA, NA, "param_gen.xml", + "daseuilhaut", "bd_rootgrowth_impossible", NA, NA, "param_gen.xml", + "debsenrac", "root_sen_dday", "round", "0", "plt.xml", + "difN", "difN_FC", NA, NA, "param_gen.xml", + "diftherm", "soil_thermal_diffusivity", NA, NA, "param_gen.xml", + "dlaimaxbrut", "lai_max_rate", NA, NA, "plt.xml", + "dlaimin", "lai_growth_rate_accelerating", NA, NA, "plt.xml", + "draclong", "rootlength_prod_max", NA, NA, "plt.xml", + "durvieF", "leaf_lifespan_max", NA, NA, "plt.xml", + "durviesupmax", "relative_addlifespan_DT_excessN", NA, NA, "plt.xml", + "efcroijuv", "RUE_juv", NA, NA, "plt.xml", + "efcroirepro", "RUE_rep", NA, NA, "plt.xml", + "efcroiveg", "RUE_veg", NA, NA, "plt.xml", + "elmax", "coleoptile_elong_dark_max", NA, NA, "plt.xml", + "extin", "extinction_coefficient_diffuse", NA, NA, "plt.xml", + "fhminsat", "fhminsat", NA, NA, "param_gen.xml", + "FINERT", "FINERT", NA, NA, "sols.xml", + "fNCbiomin", "fNCbiomin", NA, NA, "param_gen.xml", + "fredkN", "Nlim_reductionOMdecomp", NA, NA, "param_gen.xml", + "fredlN", "Nlim_reductionMBdecomp", NA, NA, "param_gen.xml", + "fredNsup", "fredNsup", NA, NA, "param_gen.xml", + "FTEMh", "T_p1_Hdecomp_rate", NA, NA, "param_gen.xml", + "FTEMha", "T_p2_Hdecomp_rate", NA, NA, "param_gen.xml", + "FTEMr", "FTEMr", NA, NA, "param_gen.xml", + "FTEMra", "FTEMra", NA, NA, "param_gen.xml", + "h2ofeuilverte", "water_content_TLP_leaf", NA, NA, "plt.xml", + "hautmax", "HTMAX", NA, NA, "plt.xml", + "hautbase", "height", NA, NA, "plt.xml", + "hminm", "hminm", NA, NA, "param_gen.xml", + "hoptm", "hoptm", NA, NA, "param_gen.xml", + "INNmin", "INNmin", NA, NA, "plt.xml", + "innsen", "innsen", NA, NA, "plt.xml", + "innturgmin", "innturgmin", NA, NA, "plt.xml", + "julvernal", "vernalization_init", "round", "0", "plt.xml", + "jvcmini", "vernalization_days_min", "round", "0", "plt.xml", + "kbio", "microbialbiomass_decay", NA, NA, "param_gen.xml", + "khaut", "LAI2height", NA, NA, "plt.xml", + "Kmabs1", "Kmabs1", NA, NA, "plt.xml", + "kmax", "crop_water_max", NA, NA, "plt.xml", + "laicomp", "lai_comp", NA, NA, "plt.xml", + "longsperac", "SRL", NA, NA, "plt.xml", + "lvfront", "rootdens_at_apex", NA, NA, "plt.xml", + "lvopt", "lvopt", NA, NA, "param_gen.xml", + "masecNmax", "masecNmax", NA, NA, "plt.xml", + "maxazorac", "maxazorac", NA, NA, "plt.xml", + "minazorac", "minazorac", NA, NA, "plt.xml", + "minefnra", "minefnra", NA, NA, "plt.xml", + "nlevlim1", "days2reduced_emergence_postgerm", "round", "0", "plt.xml", + "nlevlim2", "days2stopped_emergence_postgerm", "round", "0", "plt.xml", + "Nmeta", "Nmeta", NA, NA, "plt.xml", + "Nreserve", "Nreserve", NA, NA, "plt.xml", + "parazofmorte", "parazofmorte", NA, NA, "plt.xml", + "pentlaimax", "pentlaimax", NA, NA, "plt.xml", + "pHmaxvol", "pHmaxvol", NA, NA, "param_gen.xml", + "pHminvol", "pHminvol", NA, NA, "param_gen.xml", + "phobase", "phobase", NA, NA, "plt.xml", + "phosat", "phosat", NA, NA, "plt.xml", + "phyllotherme", "phyllochron", NA, NA, "plt.xml", + "plNmin", "plNmin", NA, NA, "param_gen.xml", + "pminruis", "precmin4runoff", NA, NA, "param_gen.xml", + "Primingmax", "Primingmax", NA, NA, "param_gen.xml", + "prophumtassrec", "SMC_compaction_delay_harvest", NA, NA, "param_gen.xml", + "prophumtasssem", "SMC_compaction_delay_sow", NA, NA, "param_gen.xml", + "proprac", "root2aerial_harvest", NA, NA, "param_gen.xml", + "psihucc", "SWP_FC", NA, NA, "param_gen.xml", + "psihumin", "SWP_WP", NA, NA, "param_gen.xml", + "psisto", "psi_stomata_closure", NA, NA, "plt.xml", # psisto, potential of stomatal closing (absolute value) (bars). note: units in betyDB are m, but Istem's prior is for testing + "psiturg", "leaf_psi_tlp", NA, NA, "plt.xml", + "QNpltminINN", "QNpltminINN", NA, NA, "param_gen.xml", + "rapsenturg", "rapsenturg", NA, NA, "plt.xml", + "ratiodurvieI", "early2last_leaflife", NA, NA, "plt.xml", + "ratiosen", "senes2total_biomass", NA, NA, "plt.xml", + "rayon", "rayon", NA, NA, "plt.xml", + "rdrain", "rdrain", NA, NA, "param_gen.xml", + "remobres", "remobres", NA, NA, "plt.xml", + "sensrsec", "rootsens2drought", NA, NA, "plt.xml", + "slamax", "SLAMAX", "cm2 g-1", "m2 kg-1", "plt.xml", + "slamin", "SLAMIN", "cm2 g-1", "m2 kg-1", "plt.xml", + "stamflax", "cum_thermal_growth", NA, NA, "plt.xml", + "stlevamf", "cum_thermal_juvenile", NA, NA, "plt.xml", + "stlevdrp", "cum_thermal_filling", NA, NA, "plt.xml", + "stpltger", "cum_thermal_germin", NA, NA, "plt.xml", + "stressdev", "phasic_delay_max", NA, NA, "plt.xml", + "swfacmin", "swfacmin", NA, NA, "plt.xml", + "tcmax", "tcmax_growth", NA, NA, "plt.xml", + "tcmin", "tcmin_growth", NA, NA, "plt.xml", + "tcxstop", "tcmax_foliar_growth", NA, NA, "plt.xml", + "tdmax", "tdmax", NA, NA, "plt.xml", + "tdmin", "tdmin", NA, NA, "plt.xml", + "temax", "temax", NA, NA, "plt.xml", + "temin", "temin", NA, NA, "plt.xml", + "teopt", "teopt", NA, NA, "plt.xml", + "teoptbis", "teoptbis", NA, NA, "plt.xml", + "tfroid", "vernalization_TOpt", NA, NA, "plt.xml", + "tgmin", "emergence_Tmin", NA, NA, "plt.xml", + "tigefeuil", "stem2leaf", NA, NA, "plt.xml", + "tmin_mineralisation", "tmin_mineralisation", NA, NA, "param_gen.xml", + "TREFh", "T_r_HOMdecomp", NA, NA, "param_gen.xml", + "TREFr", "T_r_ORdecomp", NA, NA, "param_gen.xml", + "udlaimax", "udlaimax", NA, NA, "plt.xml", + "Vabs2", "Nupt_fertloss_halve", NA, NA, "param_gen.xml", + "vlaimax", "vlaimax", NA, NA, "plt.xml", + "Wh", "Wh", NA, NA, "param_gen.xml", + "GMIN1", "GMIN1", NA, NA, "param_gen.xml", + "GMIN2", "GMIN2", NA, NA, "param_gen.xml", + "GMIN3", "GMIN3", NA, NA, "param_gen.xml", + "GMIN4", "GMIN4", NA, NA, "param_gen.xml", + "GMIN5", "GMIN5", NA, NA, "param_gen.xml", + "GMIN6", "GMIN6", NA, NA, "param_gen.xml", + "GMIN7", "GMIN7", NA, NA, "param_gen.xml", + "Xorgmax", "maxNimm_mineralfert", NA, NA, "param_gen.xml", + "y0msrac", "rootmin_harvest", NA, NA, "param_gen.xml", + "yres", "microbialbiomass_C_yield", NA, NA, "param_gen.xml", # Missing pecan parameters without corresponding STICS parameters - "SLA", "SLA", NA, NA, # This is necessary as any parameters in the prior that are missing from this tibble cause an error. ) trait.values <- lapply(trait.values, function(x){ From 938434326732df0e0752df1893c384d205d3b345 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Mon, 2 Dec 2024 19:47:17 +0530 Subject: [PATCH 0041/1193] Update machine host to remove duplicate code Signed-off-by: Abhinav Pandey --- ...abase.entries.R => add_database_entries.R} | 2 +- base/db/R/check.missing.files.R | 12 ++-- base/db/R/convert_input.R | 65 +++++-------------- ...{get.machine.info.R => get_machine_info.R} | 23 ++++++- ...ase.entries.Rd => add_database_entries.Rd} | 13 ++-- base/db/man/check_missing_files.Rd | 9 +-- ...et.machine.host.Rd => get_machine_host.Rd} | 8 +-- ...et.machine.info.Rd => get_machine_info.Rd} | 8 +-- base/db/tests/testthat/test.convert_input.R | 4 +- 9 files changed, 64 insertions(+), 80 deletions(-) rename base/db/R/{add.database.entries.R => add_database_entries.R} (99%) rename base/db/R/{get.machine.info.R => get_machine_info.R} (66%) rename base/db/man/{add.database.entries.Rd => add_database_entries.Rd} (90%) rename base/db/man/{get.machine.host.Rd => get_machine_host.Rd} (73%) rename base/db/man/{get.machine.info.Rd => get_machine_info.Rd} (73%) diff --git a/base/db/R/add.database.entries.R b/base/db/R/add_database_entries.R similarity index 99% rename from base/db/R/add.database.entries.R rename to base/db/R/add_database_entries.R index 8b36e884398..87814e04db8 100644 --- a/base/db/R/add.database.entries.R +++ b/base/db/R/add_database_entries.R @@ -21,7 +21,7 @@ #' #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko -add.database.entries <- function( +add_database_entries <- function( result, con, start_date, end_date, overwrite, insert.new.file, input.args, diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R index f3a496cf5de..9c119239988 100644 --- a/base/db/R/check.missing.files.R +++ b/base/db/R/check.missing.files.R @@ -19,12 +19,6 @@ check_missing_files <- function(result, existing.input = NULL, existing.dbfile = ) if (any(result_sizes$missing) || any(result_sizes$empty)) { - log_format_df <- function(df) { - formatted_df <- rbind(colnames(df), format(df)) - formatted_text <- purrr::reduce(formatted_df, paste, sep = " ") - paste(formatted_text, collapse = "\n") - } - PEcAn.logger::logger.severe( "Requested Processing produced empty files or Nonexistent files:\n", log_format_df(result_sizes[, c(1, 8, 9, 10)]), @@ -44,3 +38,9 @@ check_missing_files <- function(result, existing.input = NULL, existing.dbfile = } return(list(existing.input, existing.dbfile)) } + +log_format_df <- function(df) { + formatted_df <- rbind(colnames(df), format(df)) + formatted_text <- purrr::reduce(formatted_df, paste, sep = " ") + paste(formatted_text, collapse = "\n") +} \ No newline at end of file diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 042c9da08db..94f11df502e 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -176,7 +176,7 @@ convert_input <- existing.input[[i]]$end_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$end_date), "UTC") ## Obtain machine information - machine.host.info <- get.machine.host(host, con = con) + machine.host.info <- get_machine_host(host, con = con) machine.host <- machine.host.info$machine.host machine <- machine.host.info$machine #Grab machine info of file that exists @@ -341,33 +341,16 @@ convert_input <- add = TRUE ) # Close on.exit } - - - - #Grab machine info of file that exists - existing.machine <- db.query(paste0("SELECT * from machines where id = '", - existing.dbfile$machine_id, "'"), con) - - #Grab machine info of host machine - machine.host.info <- get.machine.host(host, con = con) - machine.host <- machine.host.info$machine.host - machine <- machine.host.info$machine - - if (existing.machine$id != machine$id) { - - PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") - PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") - insert.new.file <- TRUE - start_date <- existing.input$start_date - end_date <- existing.input$end_date - - } else { - # There's an existing input that spans desired start/end dates with files on this machine - PEcAn.logger::logger.info("Skipping this input conversion because files are already available.") - return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id)) + + existing_files_result <- check_and_handle_existing_files(existing.dbfile, host, con, existing.input, start_date, end_date) + if (!is.null(existing_files_result$input.id)) { + return(existing_files_result) + } else { + insert.new.file <- existing_files_result$insert.new.file + start_date <- existing_files_result$start_date + end_date <- existing_files_result$end_date } - - + } else { # No existing record found. Should be good to go with regular conversion. } @@ -467,25 +450,13 @@ convert_input <- } else if ((start_date >= existing.input$start_date) && (end_date <= existing.input$end_date)) { - #Grab machine info of file that exists - existing.machine <- db.query(paste0("SELECT * from machines where id = '", - existing.dbfile$machine_id, "'"), con) - - #Grab machine info of host machine - machine.host.info <- get.machine.host(host, con = con) - machine.host <- machine.host.info$machine.host - machine <- machine.host.info$machine - - if(existing.machine$id != machine$id){ - PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") - PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") - insert.new.file <- TRUE - start_date <- existing.input$start_date - end_date <- existing.input$end_date + existing_files_result <- check_and_handle_existing_files(existing.dbfile, host, con, existing.input, start_date, end_date) + if (!is.null(existing_files_result$input.id)) { + return(existing_files_result) } else { - # There's an existing input that spans desired start/end dates with files on this machine - PEcAn.logger::logger.info("Skipping this input conversion because files are already available.") - return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id)) + insert.new.file <- existing_files_result$insert.new.file + start_date <- existing_files_result$start_date + end_date <- existing_files_result$end_date } } else { @@ -516,7 +487,7 @@ convert_input <- #---------------------------------------------------------------------------------------------------------------# # Get machine information - machine.info <- get.machine.info(host, input.args = input.args, input.id = input.id) + machine.info <- get_machine_info(host, input.args = input.args, input.id = input.id) if (any(sapply(machine.info, is.null))) { PEcAn.logger::logger.error("failed lookup of inputs or dbfiles") @@ -596,7 +567,7 @@ convert_input <- #---------------------------------------------------------------# # New arrangement of database adding code to deal with ensembles. if(write) { - add_entries_result <- return (add.database.entries(result, con, start_date, + add_entries_result <- return (add_database_entries(result, con, start_date, end_date, overwrite, insert.new.file, input.args, machine, mimetype, formatname, diff --git a/base/db/R/get.machine.info.R b/base/db/R/get_machine_info.R similarity index 66% rename from base/db/R/get.machine.info.R rename to base/db/R/get_machine_info.R index 14123a586e9..2d35cfaca06 100644 --- a/base/db/R/get.machine.info.R +++ b/base/db/R/get_machine_info.R @@ -9,7 +9,7 @@ get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) { - machine.host.info <- get.machine.host(host, con = con) + machine.host.info <- get_machine_host(host, con = con) machine.host <- machine.host.info$machine.host machine <- machine.host.info$machine @@ -81,3 +81,24 @@ get_machine_host <- function(host, con) { return(list(machine.host, machine)) } + +check_and_handle_existing_files <- function(existing.dbfile, host, con, existing.input, start_date, end_date) { + # Grab machine info of file that exists + existing.machine <- db.query(paste0("SELECT * from machines where id = '", + existing.dbfile$machine_id, "'"), con) + + # Grab machine info of host machine + machine.host.info <- get_machine_host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine + + if (existing.machine$id != machine$id) { + PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") + PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") + return(list(insert.new.file = TRUE, start_date = existing.input$start_date, end_date = existing.input$end_date)) + } else { + # There's an existing input that spans desired start/end dates with files on this machine + PEcAn.logger::logger.info("Skipping this input conversion because files are already available.") + return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id)) + } +} \ No newline at end of file diff --git a/base/db/man/add.database.entries.Rd b/base/db/man/add_database_entries.Rd similarity index 90% rename from base/db/man/add.database.entries.Rd rename to base/db/man/add_database_entries.Rd index 5de01cd1705..ad1f8acbc05 100644 --- a/base/db/man/add.database.entries.Rd +++ b/base/db/man/add_database_entries.Rd @@ -1,15 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add.database.entries.R -\name{add.database.entries} -\alias{add.database.entries} +% Please edit documentation in R/add_database_entries.R +\name{add_database_entries} +\alias{add_database_entries} \title{Return new arrangement of database while adding code to deal with ensembles} \usage{ -add.database.entries( +add_database_entries( result, con, start_date, end_date, - write, overwrite, insert.new.file, input.args, @@ -33,8 +32,6 @@ add.database.entries( \item{end_date}{end date of the data} -\item{write}{whether to write to the database} - \item{overwrite}{Logical: If a file already exists, create a fresh copy?} \item{insert.new.file}{whether to insert a new file} @@ -58,6 +55,8 @@ add.database.entries( \item{existing.dbfile}{existing dbfile records} \item{input}{input records} + +\item{write}{whether to write to the database} } \value{ list of input and dbfile ids diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd index 8dd541f9380..6f5f8c23ce9 100644 --- a/base/db/man/check_missing_files.Rd +++ b/base/db/man/check_missing_files.Rd @@ -4,18 +4,11 @@ \alias{check_missing_files} \title{Function to check if result has empty or missing files} \usage{ -check_missing_files( - result, - outname, - existing.input = NULL, - existing.dbfile = NULL -) +check_missing_files(result, existing.input = NULL, existing.dbfile = NULL) } \arguments{ \item{result}{A list of dataframes with file paths} -\item{outname}{Name of the output file} - \item{existing.input}{Existing input records} \item{existing.dbfile}{Existing dbfile records} diff --git a/base/db/man/get.machine.host.Rd b/base/db/man/get_machine_host.Rd similarity index 73% rename from base/db/man/get.machine.host.Rd rename to base/db/man/get_machine_host.Rd index 926035dec0c..4dbc2258ab7 100644 --- a/base/db/man/get.machine.host.Rd +++ b/base/db/man/get_machine_host.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get.machine.info.R -\name{get.machine.host} -\alias{get.machine.host} +% Please edit documentation in R/get_machine_info.R +\name{get_machine_host} +\alias{get_machine_host} \title{Helper Function to retrieve machine host and machine informations} \usage{ -get.machine.host(host, con = NULL) +get_machine_host(host, con) } \arguments{ \item{host}{host information} diff --git a/base/db/man/get.machine.info.Rd b/base/db/man/get_machine_info.Rd similarity index 73% rename from base/db/man/get.machine.info.Rd rename to base/db/man/get_machine_info.Rd index 6e57013c4d7..68221a9c565 100644 --- a/base/db/man/get.machine.info.Rd +++ b/base/db/man/get_machine_info.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get.machine.info.R -\name{get.machine.info} -\alias{get.machine.info} +% Please edit documentation in R/get_machine_info.R +\name{get_machine_info} +\alias{get_machine_info} \title{Get machine information from db} \usage{ -get.machine.info(host, input.args, input.id = NULL, con = NULL) +get_machine_info(host, input.args, input.id = NULL, con = NULL) } \arguments{ \item{host}{host information} diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R index e4f40e7bcb5..474ed4eaaeb 100644 --- a/base/db/tests/testthat/test.convert_input.R +++ b/base/db/tests/testthat/test.convert_input.R @@ -3,7 +3,7 @@ test_that("`convert_input()` able to call the respective download function for a mockery::stub(convert_input, "dbfile.input.check", data.frame()) mockery::stub(convert_input, "db.query", data.frame(id = 1)) - mockery::stub(convert_input, "get.machine.info", list( + mockery::stub(convert_input, "get_machine_info", list( machine = data.frame(id = 1), input = data.frame(id = 1), dbfile = data.frame(id = 1) @@ -20,7 +20,7 @@ test_that("`convert_input()` able to call the respective download function for a existing.input = list(data.frame(file = character(0))), existing.dbfile = list(data.frame(file = character(0))) )) - mockery::stub(convert_input, "add.database.entries", list(input.id = 1, dbfile.id = 1)) + mockery::stub(convert_input, "add_database_entries", list(input.id = 1, dbfile.id = 1)) convert_input( input.id = NA, From 04d78354ed0798fc08b82efb4f74c8b90f70d39c Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Mon, 23 Dec 2024 05:39:07 +0530 Subject: [PATCH 0042/1193] Update naming --- base/db/R/{check.missing.files.R => check_missing_files.R} | 0 base/db/man/check_missing_files.Rd | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename base/db/R/{check.missing.files.R => check_missing_files.R} (100%) diff --git a/base/db/R/check.missing.files.R b/base/db/R/check_missing_files.R similarity index 100% rename from base/db/R/check.missing.files.R rename to base/db/R/check_missing_files.R diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd index 6f5f8c23ce9..fa63c7878d5 100644 --- a/base/db/man/check_missing_files.Rd +++ b/base/db/man/check_missing_files.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check.missing.files.R +% Please edit documentation in R/check_missing_files.R \name{check_missing_files} \alias{check_missing_files} \title{Function to check if result has empty or missing files} From e9a95eef0704025afcdc02aa8c9703aa1117dddb Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Mon, 27 Jan 2025 17:42:35 +0530 Subject: [PATCH 0043/1193] Update documentations wrt comments by @mdietze --- base/db/R/add_database_entries.R | 47 ++++++++++++++++------------- base/db/R/convert_input.R | 12 ++++---- base/db/man/add_database_entries.Rd | 45 ++++++++++++++------------- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/base/db/R/add_database_entries.R b/base/db/R/add_database_entries.R index 87814e04db8..e9dcbadfdce 100644 --- a/base/db/R/add_database_entries.R +++ b/base/db/R/add_database_entries.R @@ -1,24 +1,29 @@ -#' Return new arrangement of database while adding code to deal with ensembles -#' -#' @param result list of results from the download function -#' @param con database connection -#' @param start_date start date of the data -#' @param end_date end date of the data -#' @param write whether to write to the database -#' @param overwrite Logical: If a file already exists, create a fresh copy? -#' @param insert.new.file whether to insert a new file -#' @param input.args input arguments obtained from the convert_input function -#' @param machine machine information -#' @param mimetype data product specific file format -#' @param formatname format name of the data -#' @param allow.conflicting.dates whether to allow conflicting dates -#' @param ensemble ensemble id -#' @param ensemble_name ensemble name -#' @param existing.input existing input records -#' @param existing.dbfile existing dbfile records -#' @param input input records -#' @return list of input and dbfile ids -#' +#' Insert or Update Database Records for New or Modified Input Data +#' +#' @title Insert or Update Database Records for New or Modified Input Data +#' @description This function is called internally by [convert_input()] to insert or update **input** and **dbfile** records in the PEcAn BETY database after one or more data-conversion or download functions have produced local or remote files. It is specifically intended for use with the output from data-conversion functions called by [convert_input()] (e.g. various "download_X" or "met2model_X" functions), but can be adapted if the return structure matches the requirements below. +#' +#' @param result list of data frames, each data frame corresponding to one piece or "chunk" of newly-created data. Typically, these data frames are produced by the function specified in `convert_input(..., fcn=...)`. Each data frame must contain at least: \describe{ \item{file}{Absolute file path(s) to the newly created file(s).} \item{dbfile.name}{The base filename(s) (without leading path) for each corresponding file.} } Additional columns are allowed but unused by this function. +#' @param con database connection object (as returned by, e.g., \code{\link[DBI]{dbConnect}}). +#' @param start_date Date or character. The start date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions. +#' @param end_date Date or character. The end date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions. +#' @param overwrite logical. If `TRUE`, any existing database records and files for the same input and date range should be overwritten with the new files. If `FALSE`, existing files are preserved. +#' @param insert.new.file logical. If `TRUE`, forces the creation of a new **dbfile** entry even if an existing entry is found. Typically used for forecast or ensemble data that may be partially present. +#' @param input.args list. This is passed from [convert_input()] and contains auxiliary arguments or settings that were passed along internally. It may include items such as `newsite` (integer site ID), among others. Its exact contents are not strictly defined but typically include the arguments provided to `convert_input()`. +#' @param machine data.frame. Single row describing the machine on which the new data resides. It typically has columns like `id` and `hostname`, indicating the corresponding row in BETY's `machines` table. +#' @param mimetype character. String indicating the file's MIME type (e.g. `"text/csv"`, `"application/x-netcdf"`, etc.). +#' @param formatname character. String describing the file format (as listed in BETYdb's `formats` table). For example `"CF Meteorology"`. +#' @param allow.conflicting.dates logical. If `TRUE`, allows creation or insertion of new file records even if their date range overlaps with existing records. If `FALSE`, overlapping ranges may cause errors or be disallowed. +#' @param ensemble integer or logical. If an integer > 1, indicates that multiple ensemble members were generated (often for forecast data) and that each member may need separate database entries. If `FALSE`, the data are not an ensemble. +#' @param ensemble_name character. String providing a descriptive label or identifier for an ensemble member. Typically used if `convert_input()` is called iteratively for each member. +#' @param existing.input data.frame. Possibly zero rows representing the current record(s) in the `inputs` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied. +#' @param existing.dbfile data.frame. Possibly zero rows representing the current record(s) in the `dbfiles` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied. +#' @param input data.frame. Single row with the parent input record from BETYdb, typically including columns like `id`, `start_date`, `end_date`, etc. If the new data are derived from an existing input, this links them in the `parent_id` column of the new entries. +#' +#' @return list with two elements: \describe{ \item{input.id}{A numeric vector of new (or updated) input record IDs.} \item{dbfile.id}{A numeric vector of new (or updated) dbfile record IDs.} } +#' +#' @details This function consolidates the final step of adding or updating records in the BETY database to reflect newly created data files. It either updates existing `input` and `dbfile` records or creates new records, depending on the provided arguments (`overwrite`, `insert.new.file`, etc.) and whether a matching record already exists. Typically, these records represent model-ready meteorological or other environmental data, after format conversion or downloading has taken place in [convert_input()]. +#' #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko add_database_entries <- function( diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 94f11df502e..3e55c83c48a 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -568,12 +568,12 @@ convert_input <- # New arrangement of database adding code to deal with ensembles. if(write) { add_entries_result <- return (add_database_entries(result, con, start_date, - end_date, overwrite, - insert.new.file, input.args, - machine, mimetype, formatname, - allow.conflicting.dates, ensemble, - ensemble_name, existing.input, - existing.dbfile, input)) + end_date, overwrite, + insert.new.file, input.args, + machine, mimetype, formatname, + allow.conflicting.dates, ensemble, + ensemble_name, existing.input, + existing.dbfile, input)) } else { PEcAn.logger::logger.warn("Input was not added to the database") successful <- TRUE diff --git a/base/db/man/add_database_entries.Rd b/base/db/man/add_database_entries.Rd index ad1f8acbc05..d103c985853 100644 --- a/base/db/man/add_database_entries.Rd +++ b/base/db/man/add_database_entries.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/add_database_entries.R \name{add_database_entries} \alias{add_database_entries} -\title{Return new arrangement of database while adding code to deal with ensembles} +\title{Insert or Update Database Records for New or Modified Input Data} \usage{ add_database_entries( result, @@ -24,45 +24,48 @@ add_database_entries( ) } \arguments{ -\item{result}{list of results from the download function} +\item{result}{list of data frames, each data frame corresponding to one piece or "chunk" of newly-created data. Typically, these data frames are produced by the function specified in `convert_input(..., fcn=...)`. Each data frame must contain at least: \describe{ \item{file}{Absolute file path(s) to the newly created file(s).} \item{dbfile.name}{The base filename(s) (without leading path) for each corresponding file.} } Additional columns are allowed but unused by this function.} -\item{con}{database connection} +\item{con}{database connection object (as returned by, e.g., \code{\link[DBI]{dbConnect}}).} -\item{start_date}{start date of the data} +\item{start_date}{Date or character. The start date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions.} -\item{end_date}{end date of the data} +\item{end_date}{Date or character. The end date of the data (in UTC). Acceptable types include Date objects (`as.Date`) or character strings that can be parsed to a Date via standard R conversions.} -\item{overwrite}{Logical: If a file already exists, create a fresh copy?} +\item{overwrite}{logical. If `TRUE`, any existing database records and files for the same input and date range should be overwritten with the new files. If `FALSE`, existing files are preserved.} -\item{insert.new.file}{whether to insert a new file} +\item{insert.new.file}{logical. If `TRUE`, forces the creation of a new **dbfile** entry even if an existing entry is found. Typically used for forecast or ensemble data that may be partially present.} -\item{input.args}{input arguments obtained from the convert_input function} +\item{input.args}{list. This is passed from [convert_input()] and contains auxiliary arguments or settings that were passed along internally. It may include items such as `newsite` (integer site ID), among others. Its exact contents are not strictly defined but typically include the arguments provided to `convert_input()`.} -\item{machine}{machine information} +\item{machine}{data.frame. Single row describing the machine on which the new data resides. It typically has columns like `id` and `hostname`, indicating the corresponding row in BETY's `machines` table.} -\item{mimetype}{data product specific file format} +\item{mimetype}{character. String indicating the file's MIME type (e.g. `"text/csv"`, `"application/x-netcdf"`, etc.).} -\item{formatname}{format name of the data} +\item{formatname}{character. String describing the file format (as listed in BETYdb's `formats` table). For example `"CF Meteorology"`.} -\item{allow.conflicting.dates}{whether to allow conflicting dates} +\item{allow.conflicting.dates}{logical. If `TRUE`, allows creation or insertion of new file records even if their date range overlaps with existing records. If `FALSE`, overlapping ranges may cause errors or be disallowed.} -\item{ensemble}{ensemble id} +\item{ensemble}{integer or logical. If an integer > 1, indicates that multiple ensemble members were generated (often for forecast data) and that each member may need separate database entries. If `FALSE`, the data are not an ensemble.} -\item{ensemble_name}{ensemble name} +\item{ensemble_name}{character. String providing a descriptive label or identifier for an ensemble member. Typically used if `convert_input()` is called iteratively for each member.} -\item{existing.input}{existing input records} +\item{existing.input}{data.frame. Possibly zero rows representing the current record(s) in the `inputs` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied.} -\item{existing.dbfile}{existing dbfile records} +\item{existing.dbfile}{data.frame. Possibly zero rows representing the current record(s) in the `dbfiles` table that match (or partially match) the data being added. If no matching record exists, an empty data frame is supplied.} -\item{input}{input records} - -\item{write}{whether to write to the database} +\item{input}{data.frame. Single row with the parent input record from BETYdb, typically including columns like `id`, `start_date`, `end_date`, etc. If the new data are derived from an existing input, this links them in the `parent_id` column of the new entries.} } \value{ -list of input and dbfile ids +list with two elements: \describe{ \item{input.id}{A numeric vector of new (or updated) input record IDs.} \item{dbfile.id}{A numeric vector of new (or updated) dbfile record IDs.} } } \description{ -Return new arrangement of database while adding code to deal with ensembles +This function is called internally by [convert_input()] to insert or update **input** and **dbfile** records in the PEcAn BETY database after one or more data-conversion or download functions have produced local or remote files. It is specifically intended for use with the output from data-conversion functions called by [convert_input()] (e.g. various "download_X" or "met2model_X" functions), but can be adapted if the return structure matches the requirements below. +} +\details{ +Insert or Update Database Records for New or Modified Input Data + +This function consolidates the final step of adding or updating records in the BETY database to reflect newly created data files. It either updates existing `input` and `dbfile` records or creates new records, depending on the provided arguments (`overwrite`, `insert.new.file`, etc.) and whether a matching record already exists. Typically, these records represent model-ready meteorological or other environmental data, after format conversion or downloading has taken place in [convert_input()]. } \author{ Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko From 525e05fae57edce8daf6b569b0b1b2040e6acf6c Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Mon, 27 Jan 2025 17:52:45 +0530 Subject: [PATCH 0044/1193] Update check_missing_files.R --- base/db/R/check_missing_files.R | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/base/db/R/check_missing_files.R b/base/db/R/check_missing_files.R index 9c119239988..4471ee90404 100644 --- a/base/db/R/check_missing_files.R +++ b/base/db/R/check_missing_files.R @@ -1,9 +1,19 @@ -#' Function to check if result has empty or missing files +#' Check for Missing or Empty Files in Conversion Results #' -#' @param result A list of dataframes with file paths -#' @param existing.input Existing input records -#' @param existing.dbfile Existing dbfile records -#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records +#' This function inspects the file paths in a list of data frames (typically produced by a download or conversion routine) to ensure that each file is present and non-empty. Specifically, it checks whether any file path is missing or has a file size of zero, and logs an error if such files are detected. It also normalizes `existing.input` and `existing.dbfile` so that each is returned as a list of data frames. +#' +#' @param result A list of data frames containing file information. Each data frame is expected to have a column named `file` with absolute file paths created by a data-conversion or download function. For example, this might be the structure returned by a "download_X" or "met2model_X" function when invoked via [convert_input()]. +#' @param existing.input A data frame or list of data frames (possibly zero rows) representing input records in the BETY `inputs` table that match (or partially match) the data being added. This is converted to a list of data frames if it is not already. +#' @param existing.dbfile A data frame or list of data frames (possibly zero rows) representing dbfile records in the BETY `dbfiles` table that match (or partially match) the data being added. This is also converted to a list of data frames if it is not already. +#' +#' @return A list containing: +#' \itemize{ +#' \item A list of data frames for `existing.input` +#' \item A list of data frames for `existing.dbfile` +#' } +#' +#' @details +#' The function calculates the file size for each file specified in the `result` data frames. If any file path is missing (`NA`) or any file size is zero, the function raises a fatal error (via [PEcAn.logger::logger.severe]) indicating that an expected file is either nonexistent or empty. If no such issues are found, it merely ensures that `existing.input` and `existing.dbfile` are each wrapped in a list for consistent downstream usage. #' #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko From f82fc4b49de2afb058347af57b8de5fa1ce4d6ec Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Mon, 27 Jan 2025 17:53:41 +0530 Subject: [PATCH 0045/1193] Update add_database_entries.R --- base/db/R/add_database_entries.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/db/R/add_database_entries.R b/base/db/R/add_database_entries.R index e9dcbadfdce..7351e631423 100644 --- a/base/db/R/add_database_entries.R +++ b/base/db/R/add_database_entries.R @@ -38,7 +38,7 @@ add_database_entries <- function( # This list will be returned. newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. - for (i in 1:length(result)) { # Master for loop + for (i in seq_along(result)) { # Master for loop id_not_added <- TRUE if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 && From 5ac641376bbfbbe323580a0603763f9719900842 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Mon, 27 Jan 2025 18:00:26 +0530 Subject: [PATCH 0046/1193] Renamed `add_database_entries` and Updated documentations --- base/db/R/convert_input.R | 2 +- ...ase_entries.R => update_ensemble_writes.R} | 2 +- base/db/man/check_missing_files.Rd | 19 +++++++++++++------ ...e_entries.Rd => update_ensemble_writes.Rd} | 8 ++++---- base/db/tests/testthat/test.convert_input.R | 2 +- 5 files changed, 20 insertions(+), 13 deletions(-) rename base/db/R/{add_database_entries.R => update_ensemble_writes.R} (99%) rename base/db/man/{add_database_entries.Rd => update_ensemble_writes.Rd} (97%) diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 3e55c83c48a..0fc625915f6 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -567,7 +567,7 @@ convert_input <- #---------------------------------------------------------------# # New arrangement of database adding code to deal with ensembles. if(write) { - add_entries_result <- return (add_database_entries(result, con, start_date, + add_entries_result <- return (update_ensemble_writes(result, con, start_date, end_date, overwrite, insert.new.file, input.args, machine, mimetype, formatname, diff --git a/base/db/R/add_database_entries.R b/base/db/R/update_ensemble_writes.R similarity index 99% rename from base/db/R/add_database_entries.R rename to base/db/R/update_ensemble_writes.R index 7351e631423..8076ccb209c 100644 --- a/base/db/R/add_database_entries.R +++ b/base/db/R/update_ensemble_writes.R @@ -26,7 +26,7 @@ #' #' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko -add_database_entries <- function( +update_ensemble_writes <- function( result, con, start_date, end_date, overwrite, insert.new.file, input.args, diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd index fa63c7878d5..35ad0f24331 100644 --- a/base/db/man/check_missing_files.Rd +++ b/base/db/man/check_missing_files.Rd @@ -2,22 +2,29 @@ % Please edit documentation in R/check_missing_files.R \name{check_missing_files} \alias{check_missing_files} -\title{Function to check if result has empty or missing files} +\title{Check for Missing or Empty Files in Conversion Results} \usage{ check_missing_files(result, existing.input = NULL, existing.dbfile = NULL) } \arguments{ -\item{result}{A list of dataframes with file paths} +\item{result}{A list of data frames containing file information. Each data frame is expected to have a column named `file` with absolute file paths created by a data-conversion or download function. For example, this might be the structure returned by a "download_X" or "met2model_X" function when invoked via [convert_input()].} -\item{existing.input}{Existing input records} +\item{existing.input}{A data frame or list of data frames (possibly zero rows) representing input records in the BETY `inputs` table that match (or partially match) the data being added. This is converted to a list of data frames if it is not already.} -\item{existing.dbfile}{Existing dbfile records} +\item{existing.dbfile}{A data frame or list of data frames (possibly zero rows) representing dbfile records in the BETY `dbfiles` table that match (or partially match) the data being added. This is also converted to a list of data frames if it is not already.} } \value{ -A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records +A list containing: +\itemize{ + \item A list of data frames for `existing.input` + \item A list of data frames for `existing.dbfile` +} } \description{ -Function to check if result has empty or missing files +This function inspects the file paths in a list of data frames (typically produced by a download or conversion routine) to ensure that each file is present and non-empty. Specifically, it checks whether any file path is missing or has a file size of zero, and logs an error if such files are detected. It also normalizes `existing.input` and `existing.dbfile` so that each is returned as a list of data frames. +} +\details{ +The function calculates the file size for each file specified in the `result` data frames. If any file path is missing (`NA`) or any file size is zero, the function raises a fatal error (via [PEcAn.logger::logger.severe]) indicating that an expected file is either nonexistent or empty. If no such issues are found, it merely ensures that `existing.input` and `existing.dbfile` are each wrapped in a list for consistent downstream usage. } \author{ Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko diff --git a/base/db/man/add_database_entries.Rd b/base/db/man/update_ensemble_writes.Rd similarity index 97% rename from base/db/man/add_database_entries.Rd rename to base/db/man/update_ensemble_writes.Rd index d103c985853..587de12b1b3 100644 --- a/base/db/man/add_database_entries.Rd +++ b/base/db/man/update_ensemble_writes.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_database_entries.R -\name{add_database_entries} -\alias{add_database_entries} +% Please edit documentation in R/update_ensemble_writes.R +\name{update_ensemble_writes} +\alias{update_ensemble_writes} \title{Insert or Update Database Records for New or Modified Input Data} \usage{ -add_database_entries( +update_ensemble_writes( result, con, start_date, diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R index 474ed4eaaeb..c6c27bde320 100644 --- a/base/db/tests/testthat/test.convert_input.R +++ b/base/db/tests/testthat/test.convert_input.R @@ -20,7 +20,7 @@ test_that("`convert_input()` able to call the respective download function for a existing.input = list(data.frame(file = character(0))), existing.dbfile = list(data.frame(file = character(0))) )) - mockery::stub(convert_input, "add_database_entries", list(input.id = 1, dbfile.id = 1)) + mockery::stub(convert_input, "update_ensemble_writes", list(input.id = 1, dbfile.id = 1)) convert_input( input.id = NA, From bd8f9691ef0a58b0a061ca33ca10646454da3656 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 20:46:37 -0500 Subject: [PATCH 0047/1193] Update namespace for added functions. --- modules/assim.sequential/NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index db21f07876e..3d034fd126f 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(Analysis.sda) +export(Average.ERA5.2.GeoTIFF) export(Construct.H.multisite) export(Construct.R) export(Construct_H) @@ -29,6 +30,7 @@ export(assessParams) export(block_matrix) export(conj_wt_wishart_sampler) export(construct_nimble_H) +export(downscale.qsub.main) export(dwtmnorm) export(get_ensemble_weights) export(hop_test) From d9ad8f450bea052bb9424912a26977be7c3a3c78 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 20:47:01 -0500 Subject: [PATCH 0048/1193] Added the script for running the north america downscale functions. --- .../inst/anchor/NA_downscale_script.R | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 modules/assim.sequential/inst/anchor/NA_downscale_script.R diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R new file mode 100644 index 00000000000..d670bcfdff4 --- /dev/null +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -0,0 +1,80 @@ +library(purrr) +library(foreach) +setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/") +# average ERA5 to climatic covariates. +outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET" +in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" +dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")) +start.dates <- dates[1:10] +end.dates <- dates[2:11] +paths <- c() +for (i in 1:10) { + paths <- c(paths, PEcAnAssimSequential:::Average.ERA5.2.GeoTIFF(start.dates[i], end.dates[i], in.path, outdir)) + print(i) +} +# setup. +base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" +load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/sda.all.forecast.analysis.Rdata") +variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb") +settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/pecanIC.xml" +outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/" +cores <- 28 +date <- seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year") +# loop over years. +for (i in seq_along(date)) { + # setup covariates paths and variable names. + cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif", + var.name = "LC"), + year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif", + var.name = "year_since_disturb"), + agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif", + var.name = "agb"), + twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff", + var.name = "twi"), + met = list(dir = paths[i], + var.name = c("temp", "prec", "srad", "vapr")), + soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif", + var.name = c("PH", "N", "SOC", "Sand"))) + # Assemble covariates. + if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) { + covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff") + } else { + covariates.dir <- create.covariates.geotiff(outdir = outdir, + year = lubridate::year(date[i]), + base.map.dir = base.map.dir, + cov.tif.file.list = cov.tif.file.list, + normalize = T, + cores = cores) + } + # grab analysis. + analysis.yr <- analysis.all[[i]] + time <- date[i] + # loop over carbon types. + for (j in seq_along(variables)) { + # setup folder. + variable <- variables[j] + folder.path <- file.path(outdir, paste0(variables[j], "_", date[i])) + dir.create(folder.path) + save(list = c("settings", "analysis.yr", "covariates.dir", "time", "variable", "folder.path", "base.map.dir", "cores", "outdir"), + file = file.path(folder.path, "dat.Rdata")) + # prepare for qsub. + jobsh <- c("#!/bin/bash -l", + "module load R/4.1.2", + "echo \"require (PEcAnAssimSequential)", + " require (foreach)", + " require (purrr)", + " downscale.qsub.main('@FOLDER_PATH@')", + " \" | R --no-save") + jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh) + writeLines(jobsh, con = file.path(folder.path, "job.sh")) + # qsub command. + qsub <- "qsub -l h_rt=6:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash" + qsub <- gsub("@CORES@", cores, qsub) + qsub <- gsub("@NAME@", paste0("ds_", i, "_", j), qsub) + qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub) + qsub <- gsub("@STDERR@", file.path(folder.path, "stderr.log"), qsub) + qsub <- strsplit(qsub, " (?=([^\"']*\"[^\"']*\")*[^\"']*$)", perl = TRUE) + cmd <- qsub[[1]] + out <- system2(cmd, file.path(folder.path, "job.sh"), stdout = TRUE, stderr = TRUE) + } +} From 14c431238004b1465428012b72c1878e3d6da1ae Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 20:47:19 -0500 Subject: [PATCH 0049/1193] Add the script for the downscale functions. --- modules/assim.sequential/R/SDA_NA_downscale.R | 489 ++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 modules/assim.sequential/R/SDA_NA_downscale.R diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R new file mode 100644 index 00000000000..599ea382b5b --- /dev/null +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -0,0 +1,489 @@ +#' @description +#' This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file. +#' @title Average.ERA5.2.GeoTIFF +#' +#' @param start.date character: start point of when to average the data (e.g., 2012-01-01). +#' @param end.date character: end point of when to average the data (e.g., 2021-12-31). +#' @param in.path character: the directory where your ERA5 data stored (they should named as ERA5_YEAR.nc). +#' @param outdir character: the output directory where the averaged GeoTIFF file will be generated. +#' +#' @return character: path to the exported GeoTIFF file. +#' +#' @examples +#' @export +#' @author Dongchen Zhang +Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { + # open ERA5 nc file as geotiff format for referencing crs and ext. + ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", lubridate::year(start.date), ".nc"))) + dates <- seq(start.date, end.date, "1 year") + if (length(dates) < 2) { + PEcAn.logger::logger.info("There is no time range to be calculated!") + return(NA) + } + # initialize final outcomes. + temp.all <- precip.all <- srd.all <- dewpoint.all <- c() + for (i in 2:length(dates)) { + # initialize start and end dates for the current period + if (i == 1) { + start <- start.date + } else { + start <- as.Date(paste0(lubridate::year(dates[i]), "-01-01")) + } + if (i == length(dates)) { + end <- end.date + } else { + end <- as.Date(paste0(lubridate::year(dates[i]), "-12-31")) + } + # loop over years. + for (j in seq_along(dates)) { + # open ERA5 nc file. + met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc"))) + # find index for the date. + times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC") + time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end) + # extract temperature. + PEcAn.logger::logger.info("entering temperature.") + temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3) + # extract precipitation. + PEcAn.logger::logger.info("entering precipitation.") + precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3) + # extract shortwave solar radiation. + PEcAn.logger::logger.info("entering solar radiation.") + srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3) + # extract dewpoint. + PEcAn.logger::logger.info("entering dewpoint.") + dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3) + # close the NC connection. + ncdf4::nc_close(met.nc) + } + } + # aggregate across time. + # temperature. + temp <- apply(temp.all, c(1, 2), mean) + temp <- PEcAn.utils::ud_convert(temp, "K", "degC") + # precipitation. + precip <- apply(precip.all, c(1, 2), mean) + # solar radiation. + srd <- apply(srd.all, c(1, 2), mean) + # dewpoint. + dewpoint <- apply(dewpoint.all, c(1, 2), mean) + dewpoint <- PEcAn.utils::ud_convert(dewpoint, "K", "degC") + # convert dew point to relative humidity. + beta <- (112 - (0.1 * temp) + dewpoint) / (112 + (0.9 * temp)) + relative.humidity <- beta ^ 8 + VPD <- PEcAn.data.atmosphere::get.vpd(100*relative.humidity, temp) + # combine together. + PEcAn.logger::logger.info("Aggregate maps.") + met.rast <- c(terra::rast(matrix(temp, nrow = dim(temp)[2], ncol = dim(temp)[1], byrow = T)), + terra::rast(matrix(precip, nrow = dim(precip)[2], ncol = dim(precip)[1], byrow = T)), + terra::rast(matrix(srd, nrow = dim(srd)[2], ncol = dim(srd)[1], byrow = T)), + terra::rast(matrix(VPD, nrow = dim(VPD)[2], ncol = dim(VPD)[1], byrow = T))) + # adjust crs and extents. + terra::crs(met.rast) <- terra::crs(ERA5.tiff) + terra::ext(met.rast) <- terra::ext(ERA5.tiff) + names(met.rast) <- c("temp", "prec", "srad", "vapr") + # write into geotiff file. + terra::writeRaster(met.rast, file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff"))) + # end. + gc() + return(file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff"))) +} + + + +# assemble covariates from different spatial scales/resolutions and crs. +# Here is an example of the `cov.tif.file.list` object: +# cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif", +# var.name = "LC"), +# year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif", +# var.name = "year_since_disturb"), +# agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif", +# var.name = "agb"), +# twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff", +# var.name = "twi"), +# met = list(dir = paths[i], +# var.name = c("temp", "prec", "srad", "vapr")), +# soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif", +# var.name = c("PH", "N", "SOC", "Sand"))) +# This function helps to stack target data layers from various GeoTIFF maps to a single map +# cropped and projected to the `base.map`. It also enables the normalization feature to facilitate the ML process. + + +#' @description +#' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map. +#' @title stack.covariates.2.geotiff +#' +#' @param outdir character: the output directory where the stacked GeoTIFF file will be generated. +#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map. +#' @param cov.tif.file.list list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")). +#' @param normalize boolean: decide if we want to normalize each data layer, the default is TRUE. +#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. +#' +#' @return path to the exported GeoTIFF file. +#' +#' @examples +#' @author Dongchen Zhang +stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) { + # create the folder if it doesn't exist. + if (!file.exists(outdir)) { + dir.create(outdir) + } + # parallel loop. + # register parallel nodes. + if (cores > length(cov.tif.file.list)) { + cores <- length(cov.tif.file.list) + } + cl <- parallel::makeCluster(as.numeric(cores)) + doSNOW::registerDoSNOW(cl) + #progress bar. + pb <- utils::txtProgressBar(min=1, max=length(cov.tif.file.list), style=3) + progress <- function(n) utils::setTxtProgressBar(pb, n) + opts <- list(progress=progress) + # foreach loop. + paths <- foreach::foreach(f = cov.tif.file.list, + .packages=c("Kendall", "terra"), + .options.snow=opts) %dopar% { + # load the base map. + base.map <- terra::rast(base.map.dir) + # read geotif file. + temp.rast <- terra::rast(f$dir) + # normalize. + if (normalize & !"LC" %in% f$var.name) { + nx <- terra::minmax(temp.rast) + temp.rast <- (temp.rast - nx[1,]) / (nx[2,] - nx[1,]) + } + # set name to layers if we set it up in advance. + # otherwise the original layer name will be used. + if (!is.null(f$var.name)) { + names(temp.rast) <- f$var.name + } + # raster operations. + terra::crs(temp.rast) <- terra::crs(base.map) + temp.rast <- terra::crop(temp.rast, base.map) + temp.rast <- terra::resample(temp.rast, base.map) + # write the raster into disk. + file.name <- paste0(f$var.name, collapse = "_") + path <- file.path(outdir, paste0(file.name, ".tiff")) + terra::writeRaster(temp.rast, path) + return(path) + } %>% unlist + # stop parallel. + parallel::stopCluster(cl) + foreach::registerDoSEQ() + gc() + # combine rasters. + all.rast <- terra::rast(paths) + # write all covariates into disk. + terra::writeRaster(all.rast, file.path(outdir, "covariates.tiff"), overwrite = T) + # remove previous tiff files. + unlink(paths) + # return results. + return(file.path(outdir, "covariates.tiff")) +} + +#' @description +#' convert settings to geospatial points in terra. +#' @title pecan.settings.2.pts +#' +#' @param settings PEcAn settings: either a character that points to the settings or the actual settings object will be accepted. +#' +#' @return terra spatial points object. +#' +#' @examples +#' @author Dongchen Zhang +pecan.settings.2.pts <- function(settings) { + if (is.character(settings)) { + # read settings. + settings <- PEcAn.settings::read.settings(settings) + } + # grab lat/lon. + site.locs <- settings$run %>% purrr::map('site') %>% + purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% + t %>% `colnames<-`(c("Lon","Lat")) %>% as.data.frame() + # convert lat/lon to terra::vect. + pts <- terra::vect(site.locs, geom = c("Lon", "Lat"), crs = "EPSG:4326") + return(pts) +} + +#' @description +#' This function helps to build the data frame (pixels by data columns) for only vegetated pixels to improve the efficiency. +#' Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work. +#' @title stack.covariates.2.df +#' +#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function. +#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. +#' +#' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels. +#' +#' @examples +#' @author Dongchen Zhang +stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) { + # load maps. + all.rast <- terra::rast(rast.dir) + # parallel loop. + layer.names <- names(all.rast) + # register parallel nodes. + if (cores > length(layer.names)) { + cores <- length(layer.names) + } + cl <- parallel::makeCluster(as.numeric(cores)) + doSNOW::registerDoSNOW(cl) + #progress bar. + pb <- utils::txtProgressBar(min=1, max=length(layer.names), style=3) + progress <- function(n) utils::setTxtProgressBar(pb, n) + opts <- list(progress=progress) + # foreach loop. + vecs <- foreach::foreach(r = seq_along(layer.names), + .packages=c("Kendall", "terra"), + .options.snow=opts) %dopar% { + all.rast <- terra::rast(rast.dir) + temp.vec <- matrix(all.rast[[r]], byrow = T) + na.inds <- which(is.na(temp.vec)) + # if it's LC layer. + if ("LC" == names(all.rast)[r]) { + non.veg.inds <- which(! temp.vec %in% 1:8) + na.inds <- unique(c(na.inds, non.veg.inds)) + } + return(list(vec = temp.vec, + na.inds = na.inds)) + } + # stop parallel. + parallel::stopCluster(cl) + foreach::registerDoSEQ() + gc() + # grab uniqued NA index. + na.inds <- vecs %>% purrr::map("na.inds") %>% unlist %>% unique + # remove NA from each covariate. + cov.vecs <- vecs %>% purrr::map(function(v){ + return(v$vec[-na.inds]) + }) %>% dplyr::bind_cols() %>% `colnames<-`(layer.names) %>% as.data.frame() + non.na.inds <- seq_along(matrix(all.rast[[1]]))[-na.inds] + return(list(df = cov.vecs, non.na.inds = non.na.inds)) +} + +#' @description +#' This function helps to create the training dataset of specific variable type and locations for downscaling. +#' TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy. +#' @title prepare.train.dat +#' +#' @param settings character: physical path that points to the pecan settings XML file. +#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function. +#' @param covariates.dir character: path to the exported covariates GeoTIFF file. +#' @param variable character: name of state variable. It should match up with the column names of the analysis data frame. +#' +#' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates. +#' +#' @examples +#' @author Dongchen Zhang +prepare.train.dat <- function(settings, analysis, covariates.dir, variable) { + # convert settings into geospatial points. + pts <- pecan.settings.2.pts(settings) + # read covariates. + cov.rast <- terra::rast(covariates.dir) + # extract covariates by locations. + predictors <- as.data.frame(terra::extract(cov.rast, pts, ID = FALSE)) + covariate_names <- names(predictors) + if ("ID" %in% covariate_names) { + rm.ind <- which("ID" %in% covariate_names) + covariate_names <- covariate_names[-rm.ind] + predictors <- predictors[,-rm.ind] + } + # grab carbon data. + var.dat <- analysis[,which(colnames(analysis) == variable)] %>% t %>% + as.data.frame() %>% `colnames<-`(paste0("ensemble", seq(nrow(analysis)))) + # combine carbon and predictor. + full_data <- cbind(var.dat, predictors) + return(full_data) +} + +#' @description +#' This function helps to train the ML model across ensemble members in parallel. +#' @title parallel.rf.train +#' +#' @param full_data numeric: the matrix generated using the `prepare.train.dat` function. +#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. +#' +#' @return list of trained models across ensemble members. +#' +#' @examples +#' @author Dongchen Zhang +parallel.rf.train <- function(full_data, cores = parallel::detectCores()) { + # grab ensemble and predictor index. + col.names <- colnames(full_data) + ensemble.inds <- which(grepl("ensemble", col.names, fixed = TRUE)) + predictor.inds <- seq_along(col.names)[-ensemble.inds] + # parallel train. + # register parallel nodes. + if (cores > length(ensemble.inds)) { + cores <- length(ensemble.inds) + } + cl <- parallel::makeCluster(as.numeric(cores)) + doSNOW::registerDoSNOW(cl) + #progress bar. + pb <- utils::txtProgressBar(min=1, max=length(ensemble.inds), style=3) + progress <- function(n) utils::setTxtProgressBar(pb, n) + opts <- list(progress=progress) + # foreach loop. + models <- foreach::foreach(i = ensemble.inds, + .packages=c("Kendall", "stats", "randomForest"), + .options.snow=opts) %dopar% { + ensemble_col <- col.names[ensemble.inds[i]] + formula <- stats::as.formula(paste(ensemble_col, "~", paste(col.names[predictor.inds], collapse = " + "))) + randomForest::randomForest(formula, + data = full_data, + ntree = 1000, + na.action = stats::na.omit, + keep.forest = TRUE, + importance = TRUE) + } + # stop parallel. + parallel::stopCluster(cl) + foreach::registerDoSEQ() + gc() + return(models) +} + +#' @description +#' This function helps to predict the target variable observations based on the covariates. +#' The prediction is working in parallel across vegetated pixels. +#' @title parallel.prediction +#' +#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps. +#' @param models list: trained models across ensemble members generated by the `parallel.rf.train` function. +#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function. +#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function. +#' @param outdir character: the output directory where the downscaled maps will be stored. +#' @param name list: containing the time and variable name to create the final GeoTIFF file name. +#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. +#' +#' @return paths to the ensemble downscaled maps. +#' +#' @examples +#' @author Dongchen Zhang +parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) { + # load base map. + base.map <- terra::rast(base.map.dir) + dims <- dim(base.map) + # setup progress bar for ensemble members. + pb <- utils::txtProgressBar(min = 0, max = length(models), style = 3) + paths <- c() + # loop over ensemble members. + for (i in seq_along(models)) { + # update progress bar. + utils::setTxtProgressBar(pb, i) + # go to the next if the current file has already been generated. + file.name <- paste0(c("ensemble", i, name$time, name$variable), collapse = "_") + if (file.exists(file.path(outdir, paste0(file.name, ".tiff")))) { + next + } + # register parallel nodes. + cl <- parallel::makeCluster(cores) + doSNOW::registerDoSNOW(cl) + # foreach parallel. + model <- models[[i]] + output <- foreach::foreach(d=itertools::isplitRows(cov.vecs, chunks=cores), + .packages=c("stats", "randomForest")) %dopar% { + stats::predict(model, d) + } %>% unlist + # export to geotiff map. + vec <- rep(NA, dims[1]*dims[2]) + vec[non.na.inds] <- output + map <- terra::rast(matrix(vec, dims[1], dims[2], byrow = T)) + terra::ext(map) <- terra::ext(base.map) + terra::crs(map) <- terra::crs(base.map) + terra::writeRaster(map, file.path(outdir, paste0(file.name, ".tiff"))) + paths <- c(paths, file.path(outdir, paste0(file.name, ".tiff"))) + # stop parallel. + parallel::stopCluster(cl) + foreach::registerDoSEQ() + gc() + } + return(paths) +} + +#' @description +#' This is the main function to execute the RF training and prediction. +#' Note it will be deployed by each node you requested if the qsub feature is enabled below. +#' @title downscale.rf.main +#' +#' @param settings character: physical path that points to the pecan settings XML file. +#' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function. +#' @param covariates.dir character: path to the exported covariates GeoTIFF file. +#' @param time: character: the time tag used to differentiate the outputs from others. +#' @param variable: character: name of state variable. It should match up with the column names of the analysis data frame. +#' @param outdir character: the output directory where the downscaled maps will be stored. +#' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps. +#' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. +#' +#' @return paths to the ensemble downscaled maps. +#' +#' @examples +#' @author Dongchen Zhang +downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) { + # create folder specific for the time and carbon type. + folder.name <- file.path(outdir, paste0(c(variable, time), collapse = "_")) + if (!file.exists(folder.name)) { + dir.create(folder.name) + } + # prepare training data. + PEcAn.logger::logger.info("Preparing training data.") + full_data <- prepare.train.dat(settings = settings, + analysis = analysis, + covariates.dir = covariates.dir, + variable = variable) + # convert LC into factor. + if ("LC" %in% colnames(full_data)) { + full_data[,"LC"] <- factor(full_data[,"LC"]) + } + # parallel train. + PEcAn.logger::logger.info("Parallel training.") + models <- parallel.rf.train(full_data = full_data, cores = cores) + # save trained models for future analysis. + # saveRDS(models, file.path(folder.name, "rf_models.rds")) + save(models, file = file.path(folder.name, "rf_models.Rdata")) + # convert stacked covariates geotiff file into data frame. + PEcAn.logger::logger.info("Converting geotiff to df.") + cov.df <- stack.covariates.2.df(rast.dir = covariates.dir, cores = cores) + # reconstruct LC because of the computation accuracy. + cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0 + # convert LC into factor. + if ("LC" %in% colnames(cov.df$df)) { + cov.df$df[,"LC"] <- factor(cov.df$df[,"LC"]) + } + # parallel prediction. + PEcAn.logger::logger.info("Parallel prediction.") + paths <- parallel.prediction(base.map.dir = base.map.dir, + models = models, + cov.vecs = cov.df$df, + non.na.inds = cov.df$non.na.inds, + outdir = folder.name, + name = list(time = time, variable = variable), + cores = cores) + # calculate mean and std. + PEcAn.logger::logger.info("Calculate mean and std.") + ras.all <- terra::rast(paths) + mean <- terra::app(ras.all, "mean") + std <- terra::app(ras.all, "std") + # write into geotiff files. + image.base.name <- paste0(time, "_", variable, ".tiff") + terra::writeRaster(mean, filename = file.path(folder.name, paste0("mean_", image.base.name))) + terra::writeRaster(std, filename = file.path(folder.name, paste0("std_", image.base.name))) + return(list(ensemble.prediction.files = paths, + mean.prediction.file = file.path(folder.name, paste0("mean_", image.base.name)), + std.prediction.file = file.path(folder.name, paste0("std_", image.base.name)))) +} + +#' @description +#' This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function. +#' @title downscale.qsub.main +#' +#' @param folder.path Character: physical path to which the job file is located. +#' +#' @examples +#' @export +#' @author Dongchen Zhang +downscale.qsub.main <- function(folder.path) { + dat <- readRDS(file.path(folder.path, "dat.rds")) + out <- downscale.rf.main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores) + saveRDS(out, file.path(folder.path, "res.rds")) +} \ No newline at end of file From 12b41e35fe0370a9339baeafe51471158ea2edf8 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 19 Feb 2025 20:47:35 -0500 Subject: [PATCH 0050/1193] Update the Rd files. --- .../man/Average.ERA5.2.GeoTIFF.Rd | 26 +++++++++++ .../man/downscale.qsub.main.Rd | 17 +++++++ .../assim.sequential/man/downscale.rf.main.Rd | 44 +++++++++++++++++++ .../man/parallel.prediction.Rd | 41 +++++++++++++++++ .../assim.sequential/man/parallel.rf.train.Rd | 22 ++++++++++ .../man/pecan.settings.2.pts.Rd | 20 +++++++++ .../assim.sequential/man/prepare.train.dat.Rd | 27 ++++++++++++ .../man/stack.covariates.2.df.Rd | 23 ++++++++++ .../man/stack.covariates.2.geotiff.Rd | 34 ++++++++++++++ 9 files changed, 254 insertions(+) create mode 100644 modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd create mode 100644 modules/assim.sequential/man/downscale.qsub.main.Rd create mode 100644 modules/assim.sequential/man/downscale.rf.main.Rd create mode 100644 modules/assim.sequential/man/parallel.prediction.Rd create mode 100644 modules/assim.sequential/man/parallel.rf.train.Rd create mode 100644 modules/assim.sequential/man/pecan.settings.2.pts.Rd create mode 100644 modules/assim.sequential/man/prepare.train.dat.Rd create mode 100644 modules/assim.sequential/man/stack.covariates.2.df.Rd create mode 100644 modules/assim.sequential/man/stack.covariates.2.geotiff.Rd diff --git a/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd b/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd new file mode 100644 index 00000000000..0c4a561bde9 --- /dev/null +++ b/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{Average.ERA5.2.GeoTIFF} +\alias{Average.ERA5.2.GeoTIFF} +\title{Average.ERA5.2.GeoTIFF} +\usage{ +Average.ERA5.2.GeoTIFF(start.date, end.date, in.path, outdir) +} +\arguments{ +\item{start.date}{character: start point of when to average the data (e.g., 2012-01-01).} + +\item{end.date}{character: end point of when to average the data (e.g., 2021-12-31).} + +\item{in.path}{character: the directory where your ERA5 data stored (they should named as ERA5_YEAR.nc).} + +\item{outdir}{character: the output directory where the averaged GeoTIFF file will be generated.} +} +\value{ +character: path to the exported GeoTIFF file. +} +\description{ +This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/downscale.qsub.main.Rd b/modules/assim.sequential/man/downscale.qsub.main.Rd new file mode 100644 index 00000000000..50de6e2d945 --- /dev/null +++ b/modules/assim.sequential/man/downscale.qsub.main.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{downscale.qsub.main} +\alias{downscale.qsub.main} +\title{downscale.qsub.main} +\usage{ +downscale.qsub.main(folder.path) +} +\arguments{ +\item{folder.path}{Character: physical path to which the job file is located.} +} +\description{ +This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/downscale.rf.main.Rd b/modules/assim.sequential/man/downscale.rf.main.Rd new file mode 100644 index 00000000000..75ec6612760 --- /dev/null +++ b/modules/assim.sequential/man/downscale.rf.main.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{downscale.rf.main} +\alias{downscale.rf.main} +\title{downscale.rf.main} +\usage{ +downscale.rf.main( + settings, + analysis, + covariates.dir, + time, + variable, + outdir, + base.map.dir, + cores = parallel::detectCores() +) +} +\arguments{ +\item{settings}{character: physical path that points to the pecan settings XML file.} + +\item{analysis}{numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.} + +\item{covariates.dir}{character: path to the exported covariates GeoTIFF file.} + +\item{outdir}{character: the output directory where the downscaled maps will be stored.} + +\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.} + +\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} + +\item{time:}{character: the time tag used to differentiate the outputs from others.} + +\item{variable:}{character: name of state variable. It should match up with the column names of the analysis data frame.} +} +\value{ +paths to the ensemble downscaled maps. +} +\description{ +This is the main function to execute the RF training and prediction. +Note it will be deployed by each node you requested if the qsub feature is enabled below. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/parallel.prediction.Rd b/modules/assim.sequential/man/parallel.prediction.Rd new file mode 100644 index 00000000000..fdb04edb8b4 --- /dev/null +++ b/modules/assim.sequential/man/parallel.prediction.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{parallel.prediction} +\alias{parallel.prediction} +\title{parallel.prediction} +\usage{ +parallel.prediction( + base.map.dir, + models, + cov.vecs, + non.na.inds, + outdir, + name, + cores = parallel::detectCores() +) +} +\arguments{ +\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.} + +\item{models}{list: trained models across ensemble members generated by the `parallel.rf.train` function.} + +\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function.} + +\item{outdir}{character: the output directory where the downscaled maps will be stored.} + +\item{name}{list: containing the time and variable name to create the final GeoTIFF file name.} + +\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} + +\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function.} +} +\value{ +paths to the ensemble downscaled maps. +} +\description{ +This function helps to predict the target variable observations based on the covariates. +The prediction is working in parallel across vegetated pixels. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/parallel.rf.train.Rd b/modules/assim.sequential/man/parallel.rf.train.Rd new file mode 100644 index 00000000000..4b61a544126 --- /dev/null +++ b/modules/assim.sequential/man/parallel.rf.train.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{parallel.rf.train} +\alias{parallel.rf.train} +\title{parallel.rf.train} +\usage{ +parallel.rf.train(full_data, cores = parallel::detectCores()) +} +\arguments{ +\item{full_data}{numeric: the matrix generated using the `prepare.train.dat` function.} + +\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} +} +\value{ +list of trained models across ensemble members. +} +\description{ +This function helps to train the ML model across ensemble members in parallel. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/pecan.settings.2.pts.Rd b/modules/assim.sequential/man/pecan.settings.2.pts.Rd new file mode 100644 index 00000000000..91828b1077e --- /dev/null +++ b/modules/assim.sequential/man/pecan.settings.2.pts.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{pecan.settings.2.pts} +\alias{pecan.settings.2.pts} +\title{pecan.settings.2.pts} +\usage{ +pecan.settings.2.pts(settings) +} +\arguments{ +\item{settings}{PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.} +} +\value{ +terra spatial points object. +} +\description{ +convert settings to geospatial points in terra. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/prepare.train.dat.Rd b/modules/assim.sequential/man/prepare.train.dat.Rd new file mode 100644 index 00000000000..ec9122dc56d --- /dev/null +++ b/modules/assim.sequential/man/prepare.train.dat.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{prepare.train.dat} +\alias{prepare.train.dat} +\title{prepare.train.dat} +\usage{ +prepare.train.dat(settings, analysis, covariates.dir, variable) +} +\arguments{ +\item{settings}{character: physical path that points to the pecan settings XML file.} + +\item{analysis}{numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.} + +\item{covariates.dir}{character: path to the exported covariates GeoTIFF file.} + +\item{variable}{character: name of state variable. It should match up with the column names of the analysis data frame.} +} +\value{ +matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates. +} +\description{ +This function helps to create the training dataset of specific variable type and locations for downscaling. +TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/stack.covariates.2.df.Rd b/modules/assim.sequential/man/stack.covariates.2.df.Rd new file mode 100644 index 00000000000..64b9b2f7ffa --- /dev/null +++ b/modules/assim.sequential/man/stack.covariates.2.df.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{stack.covariates.2.df} +\alias{stack.covariates.2.df} +\title{stack.covariates.2.df} +\usage{ +\method{stack}{covariates.2.df}(rast.dir, cores = parallel::detectCores()) +} +\arguments{ +\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function.} + +\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} +} +\value{ +list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels. +} +\description{ +This function helps to build the data frame (pixels by data columns) for only vegetated pixels to improve the efficiency. +Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work. +} +\author{ +Dongchen Zhang +} diff --git a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd new file mode 100644 index 00000000000..6e2a1abb220 --- /dev/null +++ b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SDA_NA_downscale.R +\name{stack.covariates.2.geotiff} +\alias{stack.covariates.2.geotiff} +\title{stack.covariates.2.geotiff} +\usage{ +\method{stack}{covariates.2.geotiff}( + outdir, + base.map.dir, + cov.tif.file.list, + normalize = T, + cores = parallel::detectCores() +) +} +\arguments{ +\item{outdir}{character: the output directory where the stacked GeoTIFF file will be generated.} + +\item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map.} + +\item{cov.tif.file.list}{list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")).} + +\item{normalize}{boolean: decide if we want to normalize each data layer, the default is TRUE.} + +\item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} +} +\value{ +path to the exported GeoTIFF file. +} +\description{ +This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map. +} +\author{ +Dongchen Zhang +} From d027f62af2d5520b75f7835cd9b12ef979b40137 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 10:00:48 -0500 Subject: [PATCH 0051/1193] Add function to namespace. --- modules/assim.sequential/NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 3d034fd126f..700f2cc6c16 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(stack,covariates.2.geotiff) export(Analysis.sda) export(Average.ERA5.2.GeoTIFF) export(Construct.H.multisite) From d06dc17453807ce68d9c61dcd316710419cfbf2f Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 10:01:06 -0500 Subject: [PATCH 0052/1193] Update function. --- modules/assim.sequential/R/SDA_NA_downscale.R | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R index 599ea382b5b..7233c0bf8f5 100644 --- a/modules/assim.sequential/R/SDA_NA_downscale.R +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -9,20 +9,15 @@ #' #' @return character: path to the exported GeoTIFF file. #' -#' @examples #' @export #' @author Dongchen Zhang Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { # open ERA5 nc file as geotiff format for referencing crs and ext. ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", lubridate::year(start.date), ".nc"))) dates <- seq(start.date, end.date, "1 year") - if (length(dates) < 2) { - PEcAn.logger::logger.info("There is no time range to be calculated!") - return(NA) - } # initialize final outcomes. temp.all <- precip.all <- srd.all <- dewpoint.all <- c() - for (i in 2:length(dates)) { + for (i in seq_along(dates)) { # initialize start and end dates for the current period if (i == 1) { start <- start.date @@ -114,6 +109,7 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { #' @title stack.covariates.2.geotiff #' #' @param outdir character: the output directory where the stacked GeoTIFF file will be generated. +#' @param year numeric: the year of when the covariates are stacked. #' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map. #' @param cov.tif.file.list list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")). #' @param normalize boolean: decide if we want to normalize each data layer, the default is TRUE. @@ -121,9 +117,10 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { #' #' @return path to the exported GeoTIFF file. #' -#' @examples +#' @export +#' #' @author Dongchen Zhang -stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) { +stack.covariates.2.geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) { # create the folder if it doesn't exist. if (!file.exists(outdir)) { dir.create(outdir) @@ -174,11 +171,11 @@ stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list, # combine rasters. all.rast <- terra::rast(paths) # write all covariates into disk. - terra::writeRaster(all.rast, file.path(outdir, "covariates.tiff"), overwrite = T) + terra::writeRaster(all.rast, file.path(outdir, paste0("covariates_", year, ".tiff")), overwrite = T) # remove previous tiff files. unlink(paths) # return results. - return(file.path(outdir, "covariates.tiff")) + return(file.path(outdir, paste0("covariates_", year, ".tiff"))) } #' @description @@ -189,7 +186,6 @@ stack.covariates.2.geotiff <- function(outdir, base.map.dir, cov.tif.file.list, #' #' @return terra spatial points object. #' -#' @examples #' @author Dongchen Zhang pecan.settings.2.pts <- function(settings) { if (is.character(settings)) { @@ -215,7 +211,6 @@ pecan.settings.2.pts <- function(settings) { #' #' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels. #' -#' @examples #' @author Dongchen Zhang stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) { # load maps. @@ -273,7 +268,6 @@ stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) { #' #' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates. #' -#' @examples #' @author Dongchen Zhang prepare.train.dat <- function(settings, analysis, covariates.dir, variable) { # convert settings into geospatial points. @@ -305,7 +299,6 @@ prepare.train.dat <- function(settings, analysis, covariates.dir, variable) { #' #' @return list of trained models across ensemble members. #' -#' @examples #' @author Dongchen Zhang parallel.rf.train <- function(full_data, cores = parallel::detectCores()) { # grab ensemble and predictor index. @@ -358,7 +351,6 @@ parallel.rf.train <- function(full_data, cores = parallel::detectCores()) { #' #' @return paths to the ensemble downscaled maps. #' -#' @examples #' @author Dongchen Zhang parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) { # load base map. @@ -417,7 +409,6 @@ parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out #' #' @return paths to the ensemble downscaled maps. #' -#' @examples #' @author Dongchen Zhang downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) { # create folder specific for the time and carbon type. @@ -479,7 +470,6 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable #' #' @param folder.path Character: physical path to which the job file is located. #' -#' @examples #' @export #' @author Dongchen Zhang downscale.qsub.main <- function(folder.path) { From d90fcb85be2af3c8db28d71bfbefb3c5193d82f3 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 10:01:21 -0500 Subject: [PATCH 0053/1193] Parallel average ERA5. --- .../inst/anchor/NA_downscale_script.R | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R index d670bcfdff4..83613b1ff1b 100644 --- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -1,23 +1,24 @@ library(purrr) library(foreach) -setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/") +library(PEcAnAssimSequential) +setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/") # average ERA5 to climatic covariates. outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET" in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")) start.dates <- dates[1:10] end.dates <- dates[2:11] -paths <- c() -for (i in 1:10) { - paths <- c(paths, PEcAnAssimSequential:::Average.ERA5.2.GeoTIFF(start.dates[i], end.dates[i], in.path, outdir)) - print(i) -} +# parallel average ERA5 into covariates. +future::plan(future::multisession, workers = 5) +paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){ + Average.ERA5.2.GeoTIFF(d1, d2, in.path, outdir) +}, .progress = T) %>% unlist # setup. base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/sda.all.forecast.analysis.Rdata") variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb") -settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA/pecanIC.xml" -outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/downscale_maps/" +settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/pecanIC.xml" +outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/downscale_maps/" cores <- 28 date <- seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year") # loop over years. @@ -39,12 +40,12 @@ for (i in seq_along(date)) { if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) { covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff") } else { - covariates.dir <- create.covariates.geotiff(outdir = outdir, - year = lubridate::year(date[i]), - base.map.dir = base.map.dir, - cov.tif.file.list = cov.tif.file.list, - normalize = T, - cores = cores) + covariates.dir <- stack.covariates.2.geotiff(outdir = outdir, + year = lubridate::year(date[i]), + base.map.dir = base.map.dir, + cov.tif.file.list = cov.tif.file.list, + normalize = T, + cores = cores) } # grab analysis. analysis.yr <- analysis.all[[i]] @@ -55,8 +56,16 @@ for (i in seq_along(date)) { variable <- variables[j] folder.path <- file.path(outdir, paste0(variables[j], "_", date[i])) dir.create(folder.path) - save(list = c("settings", "analysis.yr", "covariates.dir", "time", "variable", "folder.path", "base.map.dir", "cores", "outdir"), - file = file.path(folder.path, "dat.Rdata")) + saveRDS(list(settings = settings, + analysis.yr = analysis.yr, + covariates.dir = covariates.dir, + time = time, + variable = variable, + folder.path = folder.path, + base.map.dir = base.map.dir, + cores = cores, + outdir = outdir), + file = file.path(folder.path, "dat.rds")) # prepare for qsub. jobsh <- c("#!/bin/bash -l", "module load R/4.1.2", From 7733d6a9ea5d6b814f4da6a08264ea13180f3935 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 10:01:29 -0500 Subject: [PATCH 0054/1193] Update documentation. --- modules/assim.sequential/man/stack.covariates.2.geotiff.Rd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd index 6e2a1abb220..02f3da9dc97 100644 --- a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd +++ b/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd @@ -6,6 +6,7 @@ \usage{ \method{stack}{covariates.2.geotiff}( outdir, + year, base.map.dir, cov.tif.file.list, normalize = T, @@ -15,6 +16,8 @@ \arguments{ \item{outdir}{character: the output directory where the stacked GeoTIFF file will be generated.} +\item{year}{numeric: the year of when the covariates are stacked.} + \item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the final map.} \item{cov.tif.file.list}{list: a list contains sub-lists with each including path to the corresponding map and the variables to be extracted (e.g., list(LC = list(dir = "path/to/landcover.tiff", var.name = "LC")).} From 0a030c84543f938c4512e8c0b3da6a14ad7da582 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 10:06:21 -0500 Subject: [PATCH 0055/1193] Update change log. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6547b2df78c..e55afe04100 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,7 +59,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha - Added GEDI AGB preparation workflow. - Added new feature of downloading datasets from the NASA DAAC ORNL database. - Extended downscale function and created 'downscale_hrly' so that it handles more frequent data -- Added 'aggregate' as a new feature for downscaled data +- Added 'aggregate' as a new feature for downscaled data. +- Added downscale functions and scripts that apply to the North America SDA run. ### Fixed From 623b4c149aca37e7fc88085119f69c3c0e4c566e Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 21 Feb 2025 10:07:44 -0500 Subject: [PATCH 0056/1193] Remove commented lines. --- modules/assim.sequential/R/SDA_NA_downscale.R | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R index 7233c0bf8f5..446d970fc5b 100644 --- a/modules/assim.sequential/R/SDA_NA_downscale.R +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -84,26 +84,6 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { return(file.path(outdir, paste0("ERA5_met_", lubridate::year(end.date), ".tiff"))) } - - -# assemble covariates from different spatial scales/resolutions and crs. -# Here is an example of the `cov.tif.file.list` object: -# cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif", -# var.name = "LC"), -# year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif", -# var.name = "year_since_disturb"), -# agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif", -# var.name = "agb"), -# twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff", -# var.name = "twi"), -# met = list(dir = paths[i], -# var.name = c("temp", "prec", "srad", "vapr")), -# soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif", -# var.name = c("PH", "N", "SOC", "Sand"))) -# This function helps to stack target data layers from various GeoTIFF maps to a single map -# cropped and projected to the `base.map`. It also enables the normalization feature to facilitate the ML process. - - #' @description #' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map. #' @title stack.covariates.2.geotiff From 2b7a0b6079fd058ee60adc021de08b089917e352 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 7 Mar 2025 14:13:28 -0800 Subject: [PATCH 0057/1193] Update CHANGELOG.md Co-authored-by: Istem Fer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07555797a0f..e2456536ba8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha * Modules `PEcAn.allometry`, `PEcAn.assim.batch`, `PEcAn.data.mining`, `PEcAn.emulator`, `PEcAn.MA`, `PEcAn.photosynthesis`, `PEcAn.priors`, and `PEcAn.RTM`. - Renamed master branch to main - `PEcAn.all::pecan_version()` now reports commit hashes as well as version numbers for each installed package. -- `write.conmfig.STICS()` now modifies parameters with vectors rather than individually. +- `write.config.STICS()` now modifies parameters with vectors rather than individually. ### Removed From c2a29f9519ae0168b29891d222a738cf41a2b2b2 Mon Sep 17 00:00:00 2001 From: Abhinav Pandey Date: Tue, 18 Mar 2025 23:07:17 +0530 Subject: [PATCH 0058/1193] Update test.check.missing.files.R --- base/db/tests/testthat/test.check.missing.files.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R index 75a531283dd..2dd4d2a9156 100644 --- a/base/db/tests/testthat/test.check.missing.files.R +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -1,7 +1,7 @@ test_that("`check_missing_files()` able to return correct missing files", { - # Mock `purrr::map_dfr` - mocked_size <- mockery::mock(100,200) - mockery::stub(check_missing_files, "file.size", mocked_res) + # Mock `file.size` + mocked_size <- mockery::mock(100, 200) + mockery::stub(check_missing_files, "file.size", mocked_size) res <- check_missing_files( result = list(data.frame(file = c("A", "B"))), @@ -9,8 +9,13 @@ test_that("`check_missing_files()` able to return correct missing files", { existing.dbfile = data.frame() ) - + # Check that result has expected structure expect_equal(length(res), 2) expect_true(is.list(res[[1]])) expect_true(is.list(res[[2]])) + + # Verify mock was called correctly + mockery::expect_called(mocked_size, 2) + expect_equal(mockery::mock_args(mocked_size)[[1]], list("A")) + expect_equal(mockery::mock_args(mocked_size)[[2]], list("B")) }) From 62e8a59a1fde391689c914763ea74970824ef313 Mon Sep 17 00:00:00 2001 From: kutumia Date: Sun, 23 Mar 2025 03:56:17 +0000 Subject: [PATCH 0059/1193] Fix 404 documentation links and update references in README and Rmd (#3269) --- modules/data.atmosphere/README.md | 6 ++++-- modules/data.atmosphere/man/download.FluxnetLaThuile.Rd | 4 +++- modules/data.atmosphere/vignettes/ameriflux_demo.Rmd | 6 ++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/data.atmosphere/README.md b/modules/data.atmosphere/README.md index cb59cf9df5c..fa4e4d14b7e 100644 --- a/modules/data.atmosphere/README.md +++ b/modules/data.atmosphere/README.md @@ -26,13 +26,15 @@ The PEcAn.data.atmosphere package is 'standalone'. ## Documentation -* [Package Documentation](https://pecanproject.github.io/pecan//modules/data.atmosphere/inst/web/index.html) +## * [Package Documentation](https://pecanproject.github.io/pecan//modules/data.atmosphere/inst/web/index.html) * Vignettes + ## PEcAn variable names -For the most updated list, see https://pecanproject.github.io/pecan-documentation/latest/time-standard.html#input-standards +## For the most updated list, see https://pecanproject.github.io/pecan-documentation/latest/time-standard.html#input-standards + General Note: dates in the database should be datatime (preferably with timezone), and datetime passed around in PEcAn should be of type POSIXlt. diff --git a/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd b/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd index 218365ee2e7..0165e565f14 100644 --- a/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd +++ b/modules/data.atmosphere/man/download.FluxnetLaThuile.Rd @@ -17,7 +17,9 @@ download.FluxnetLaThuile( } \arguments{ \item{sitename}{the FLUXNET ID of the site to be downloaded, used as file name prefix. -The 'SITE_ID' field in \href{http://www.fluxdata.org/DataInfo/Dataset\%20Doc\%20Lib/SynthDataSummary.aspx}{list of Fluxnet LaThuile sites}} +% The 'SITE_ID' field in \href{http://www.fluxdata.org/DataInfo/Dataset\%20Doc\%20Lib/SynthDataSummary.aspx}{list of Fluxnet LaThuile sites}} +% Link deprecated – was pointing to Fluxnet LaThuile dataset + \item{outfolder}{location on disk where outputs will be stored} diff --git a/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd b/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd index 344e1c27cfb..1f57a880f34 100644 --- a/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd +++ b/modules/data.atmosphere/vignettes/ameriflux_demo.Rmd @@ -11,9 +11,11 @@ vignette: > # Overview -This is a demonstration of the PEcAn utilities for downloading met data, converting it to the PEcAn-CF format (which is based on the Climate Forecasting conventions and similar to MsTMIP). These variables are defined in the [PEcAn documentation](https://pecanproject.github.io/pecan-documentation/latest/met-data.html). +This is a demonstration of the PEcAn utilities for downloading met data, converting it to the PEcAn-CF format (which is based on the Climate Forecasting conventions and similar to MsTMIP). These variables are described in the [PEcAn met data documentation](https://pecanproject.github.io/pecan-documentation/) (link previously pointed to a broken page). + +We’ll download 12 years of met data from the [Bondville Ameriflux site](https://ameriflux.lbl.gov/sites/siteinfo/US-Bo1), which has a `SITE_ID` of `US-Bo1`. + -In this example we will download 12 years of met data from the [Bondville Ameriflux site](http://ameriflux.ornl.gov/fullsiteinfo.php?sid=44). It has an Ameriflux `SITE_ID` of `US-Bo1` The PEcAn.data.atmosphere source code is in [`modules/data.atmosphere`](https://github.com/PecanProject/pecan/tree/main/modules/data.atmosphere) and the documentation can be found with either `package?PEcAn.data.atmosphere` or in the [data.atmosphere package documentation](https://pecanproject.github.io/pecan//modules/data.atmosphere/inst/web/index.html). From fa245526d27fac812d3b2dd2af1a51c4fa30ad29 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 24 Mar 2025 14:38:58 -0400 Subject: [PATCH 0060/1193] Rename the function. --- modules/assim.sequential/NAMESPACE | 2 +- modules/assim.sequential/R/SDA_NA_downscale.R | 6 +++--- modules/assim.sequential/man/stack.covariates.2.df.Rd | 2 +- ...ariates.2.geotiff.Rd => stack_covariates_2_geotiff.Rd} | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) rename modules/assim.sequential/man/{stack.covariates.2.geotiff.Rd => stack_covariates_2_geotiff.Rd} (89%) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 700f2cc6c16..0920b2aacb6 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(stack,covariates.2.geotiff) export(Analysis.sda) export(Average.ERA5.2.GeoTIFF) export(Construct.H.multisite) @@ -58,6 +57,7 @@ export(sda.enkf.multisite) export(sda.enkf.original) export(sda_weights_site) export(simple.local) +export(stack_covariates_2_geotiff) export(tobit.model) export(tobit2space.model) export(tobit_model_censored) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R index 446d970fc5b..e0f25975565 100644 --- a/modules/assim.sequential/R/SDA_NA_downscale.R +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -86,7 +86,7 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { #' @description #' This function helps to stack target data layers from various GeoTIFF maps (with different extents, CRS, and resolutions) to a single map. -#' @title stack.covariates.2.geotiff +#' @title stack_covariates_2_geotiff #' #' @param outdir character: the output directory where the stacked GeoTIFF file will be generated. #' @param year numeric: the year of when the covariates are stacked. @@ -100,7 +100,7 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { #' @export #' #' @author Dongchen Zhang -stack.covariates.2.geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) { +stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file.list, normalize = T, cores = parallel::detectCores()) { # create the folder if it doesn't exist. if (!file.exists(outdir)) { dir.create(outdir) @@ -186,7 +186,7 @@ pecan.settings.2.pts <- function(settings) { #' Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work. #' @title stack.covariates.2.df #' -#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function. +#' @param rast.dir character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function. #' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. #' #' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels. diff --git a/modules/assim.sequential/man/stack.covariates.2.df.Rd b/modules/assim.sequential/man/stack.covariates.2.df.Rd index 64b9b2f7ffa..b91a11cbb56 100644 --- a/modules/assim.sequential/man/stack.covariates.2.df.Rd +++ b/modules/assim.sequential/man/stack.covariates.2.df.Rd @@ -7,7 +7,7 @@ \method{stack}{covariates.2.df}(rast.dir, cores = parallel::detectCores()) } \arguments{ -\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack.covariates.2.geotiff` function.} +\item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function.} \item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} } diff --git a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd similarity index 89% rename from modules/assim.sequential/man/stack.covariates.2.geotiff.Rd rename to modules/assim.sequential/man/stack_covariates_2_geotiff.Rd index 02f3da9dc97..d83411ec708 100644 --- a/modules/assim.sequential/man/stack.covariates.2.geotiff.Rd +++ b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{stack.covariates.2.geotiff} -\alias{stack.covariates.2.geotiff} -\title{stack.covariates.2.geotiff} +\name{stack_covariates_2_geotiff} +\alias{stack_covariates_2_geotiff} +\title{stack_covariates_2_geotiff} \usage{ -\method{stack}{covariates.2.geotiff}( +stack_covariates_2_geotiff( outdir, year, base.map.dir, From 0b3bbbafd11518bd5d49b09772f9350f1d4e8ab7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 24 Mar 2025 14:40:03 -0400 Subject: [PATCH 0061/1193] Remove the redundant for loop. --- modules/assim.sequential/R/SDA_NA_downscale.R | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R index e0f25975565..ebfddd4bb36 100644 --- a/modules/assim.sequential/R/SDA_NA_downscale.R +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -29,28 +29,25 @@ Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { } else { end <- as.Date(paste0(lubridate::year(dates[i]), "-12-31")) } - # loop over years. - for (j in seq_along(dates)) { - # open ERA5 nc file. - met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc"))) - # find index for the date. - times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC") - time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end) - # extract temperature. - PEcAn.logger::logger.info("entering temperature.") - temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3) - # extract precipitation. - PEcAn.logger::logger.info("entering precipitation.") - precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3) - # extract shortwave solar radiation. - PEcAn.logger::logger.info("entering solar radiation.") - srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3) - # extract dewpoint. - PEcAn.logger::logger.info("entering dewpoint.") - dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3) - # close the NC connection. - ncdf4::nc_close(met.nc) - } + # open ERA5 nc file. + met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc"))) + # find index for the date. + times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC") + time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end) + # extract temperature. + PEcAn.logger::logger.info("entering temperature.") + temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3) + # extract precipitation. + PEcAn.logger::logger.info("entering precipitation.") + precip.all <- abind::abind(precip.all, apply(ncdf4::ncvar_get(met.nc, "tp")[,,,time.inds], c(1,2,4), mean), along = 3) + # extract shortwave solar radiation. + PEcAn.logger::logger.info("entering solar radiation.") + srd.all <- abind::abind(srd.all, apply(ncdf4::ncvar_get(met.nc, "ssrd")[,,,time.inds], c(1,2,4), mean), along = 3) + # extract dewpoint. + PEcAn.logger::logger.info("entering dewpoint.") + dewpoint.all <- abind::abind(dewpoint.all, apply(ncdf4::ncvar_get(met.nc, "d2m")[,,,time.inds], c(1,2,4), mean), along = 3) + # close the NC connection. + ncdf4::nc_close(met.nc) } # aggregate across time. # temperature. From 3831129365b012e5a96e474939df95c75bd36895 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 24 Mar 2025 16:36:09 -0400 Subject: [PATCH 0062/1193] Rename functions. --- modules/assim.sequential/NAMESPACE | 4 +- modules/assim.sequential/R/SDA_NA_downscale.R | 81 +++++++++---------- .../inst/anchor/NA_downscale_script.R | 7 +- ...2.GeoTIFF.Rd => Average_ERA5_2_GeoTIFF.Rd} | 8 +- ...le.qsub.main.Rd => downscale_qsub_main.Rd} | 10 +-- ...nscale.rf.main.Rd => downscale_rf_main.Rd} | 8 +- ...l.prediction.Rd => parallel_prediction.Rd} | 14 ++-- ...allel.rf.train.Rd => parallel_rf_train.Rd} | 10 +-- ...tings.2.pts.Rd => pecan_settings_2_pts.Rd} | 8 +- ...pare.train.dat.Rd => prepare_train_dat.Rd} | 10 +-- ...iates.2.df.Rd => stack_covariates_2_df.Rd} | 8 +- 11 files changed, 80 insertions(+), 88 deletions(-) rename modules/assim.sequential/man/{Average.ERA5.2.GeoTIFF.Rd => Average_ERA5_2_GeoTIFF.Rd} (82%) rename modules/assim.sequential/man/{downscale.qsub.main.Rd => downscale_qsub_main.Rd} (66%) rename modules/assim.sequential/man/{downscale.rf.main.Rd => downscale_rf_main.Rd} (93%) rename modules/assim.sequential/man/{parallel.prediction.Rd => parallel_prediction.Rd} (80%) rename modules/assim.sequential/man/{parallel.rf.train.Rd => parallel_rf_train.Rd} (74%) rename modules/assim.sequential/man/{pecan.settings.2.pts.Rd => pecan_settings_2_pts.Rd} (76%) rename modules/assim.sequential/man/{prepare.train.dat.Rd => prepare_train_dat.Rd} (77%) rename modules/assim.sequential/man/{stack.covariates.2.df.Rd => stack_covariates_2_df.Rd} (83%) diff --git a/modules/assim.sequential/NAMESPACE b/modules/assim.sequential/NAMESPACE index 0920b2aacb6..90512107500 100644 --- a/modules/assim.sequential/NAMESPACE +++ b/modules/assim.sequential/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand export(Analysis.sda) -export(Average.ERA5.2.GeoTIFF) +export(Average_ERA5_2_GeoTIFF) export(Construct.H.multisite) export(Construct.R) export(Construct_H) @@ -30,7 +30,7 @@ export(assessParams) export(block_matrix) export(conj_wt_wishart_sampler) export(construct_nimble_H) -export(downscale.qsub.main) +export(downscale_qsub_main) export(dwtmnorm) export(get_ensemble_weights) export(hop_test) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R index ebfddd4bb36..c1db7489258 100644 --- a/modules/assim.sequential/R/SDA_NA_downscale.R +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -1,6 +1,6 @@ #' @description #' This function helps to average the ERA5 data based on the start and end dates, and convert it to the GeoTIFF file. -#' @title Average.ERA5.2.GeoTIFF +#' @title Average_ERA5_2_GeoTIFF #' #' @param start.date character: start point of when to average the data (e.g., 2012-01-01). #' @param end.date character: end point of when to average the data (e.g., 2021-12-31). @@ -11,29 +11,20 @@ #' #' @export #' @author Dongchen Zhang -Average.ERA5.2.GeoTIFF <- function (start.date, end.date, in.path, outdir) { - # open ERA5 nc file as geotiff format for referencing crs and ext. - ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", lubridate::year(start.date), ".nc"))) - dates <- seq(start.date, end.date, "1 year") +Average_ERA5_2_GeoTIFF <- function (start.date, end.date, in.path, outdir) { + # create dates. + years <- sort(unique(lubridate::year(start.date):lubridate::year(end.date))) # initialize final outcomes. temp.all <- precip.all <- srd.all <- dewpoint.all <- c() - for (i in seq_along(dates)) { - # initialize start and end dates for the current period - if (i == 1) { - start <- start.date - } else { - start <- as.Date(paste0(lubridate::year(dates[i]), "-01-01")) - } - if (i == length(dates)) { - end <- end.date - } else { - end <- as.Date(paste0(lubridate::year(dates[i]), "-12-31")) - } + # loop over years. + for (i in seq_along(years)) { + # open ERA5 nc file as geotiff format for referencing crs and ext. + ERA5.tiff <- terra::rast(file.path(in.path, paste0("ERA5_", years[i], ".nc"))) # open ERA5 nc file. - met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", lubridate::year(dates[i]), ".nc"))) + met.nc <- ncdf4::nc_open(file.path(in.path, paste0("ERA5_", years[i], ".nc"))) # find index for the date. times <- as.POSIXct(met.nc$dim$time$vals*3600, origin="1900-01-01 00:00:00", tz = "UTC") - time.inds <- which(lubridate::date(times) >= start & lubridate::date(times) <= end) + time.inds <- which(lubridate::date(times) >= start.date & lubridate::date(times) <= end.date) # extract temperature. PEcAn.logger::logger.info("entering temperature.") temp.all <- abind::abind(temp.all, apply(ncdf4::ncvar_get(met.nc, "t2m")[,,,time.inds], c(1,2,4), mean), along = 3) @@ -157,14 +148,14 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file. #' @description #' convert settings to geospatial points in terra. -#' @title pecan.settings.2.pts +#' @title pecan_settings_2_pts #' #' @param settings PEcAn settings: either a character that points to the settings or the actual settings object will be accepted. #' #' @return terra spatial points object. #' #' @author Dongchen Zhang -pecan.settings.2.pts <- function(settings) { +pecan_settings_2_pts <- function(settings) { if (is.character(settings)) { # read settings. settings <- PEcAn.settings::read.settings(settings) @@ -181,7 +172,7 @@ pecan.settings.2.pts <- function(settings) { #' @description #' This function helps to build the data frame (pixels by data columns) for only vegetated pixels to improve the efficiency. #' Note that the `LC` field using the `MODIS land cover` observations (MCD12Q1.061) must be supplied in the covariates to make this function work. -#' @title stack.covariates.2.df +#' @title stack_covariates_2_df #' #' @param rast.dir character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function. #' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. @@ -189,7 +180,7 @@ pecan.settings.2.pts <- function(settings) { #' @return list containing the data frame of covariates for vegetated pixels and the corresponding index of the pixels. #' #' @author Dongchen Zhang -stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) { +stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) { # load maps. all.rast <- terra::rast(rast.dir) # parallel loop. @@ -235,8 +226,8 @@ stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) { #' @description #' This function helps to create the training dataset of specific variable type and locations for downscaling. -#' TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy. -#' @title prepare.train.dat +#' TODO: Add a ratio argument (training sample size/total sample size) so that we could calculate the out-of-sample accuracy. +#' @title prepare_train_dat #' #' @param settings character: physical path that points to the pecan settings XML file. #' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function. @@ -246,9 +237,9 @@ stack.covariates.2.df <- function(rast.dir, cores = parallel::detectCores()) { #' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates. #' #' @author Dongchen Zhang -prepare.train.dat <- function(settings, analysis, covariates.dir, variable) { +prepare_train_dat <- function(settings, analysis, covariates.dir, variable) { # convert settings into geospatial points. - pts <- pecan.settings.2.pts(settings) + pts <- pecan_settings_2_pts(settings) # read covariates. cov.rast <- terra::rast(covariates.dir) # extract covariates by locations. @@ -269,15 +260,15 @@ prepare.train.dat <- function(settings, analysis, covariates.dir, variable) { #' @description #' This function helps to train the ML model across ensemble members in parallel. -#' @title parallel.rf.train +#' @title parallel_rf_train #' -#' @param full_data numeric: the matrix generated using the `prepare.train.dat` function. +#' @param full_data numeric: the matrix generated using the `prepare_train_dat` function. #' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. #' #' @return list of trained models across ensemble members. #' #' @author Dongchen Zhang -parallel.rf.train <- function(full_data, cores = parallel::detectCores()) { +parallel_rf_train <- function(full_data, cores = parallel::detectCores()) { # grab ensemble and predictor index. col.names <- colnames(full_data) ensemble.inds <- which(grepl("ensemble", col.names, fixed = TRUE)) @@ -316,12 +307,12 @@ parallel.rf.train <- function(full_data, cores = parallel::detectCores()) { #' @description #' This function helps to predict the target variable observations based on the covariates. #' The prediction is working in parallel across vegetated pixels. -#' @title parallel.prediction +#' @title parallel_prediction #' #' @param base.map.dir character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps. -#' @param models list: trained models across ensemble members generated by the `parallel.rf.train` function. -#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function. -#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function. +#' @param models list: trained models across ensemble members generated by the `parallel_rf_train` function. +#' @param cov.vecs: numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function. +#' @param non.na.inds numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function. #' @param outdir character: the output directory where the downscaled maps will be stored. #' @param name list: containing the time and variable name to create the final GeoTIFF file name. #' @param cores numeric: how many CPus to be used in the calculation, the default is the total CPU number you have. @@ -329,7 +320,7 @@ parallel.rf.train <- function(full_data, cores = parallel::detectCores()) { #' @return paths to the ensemble downscaled maps. #' #' @author Dongchen Zhang -parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) { +parallel_prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, outdir, name, cores = parallel::detectCores()) { # load base map. base.map <- terra::rast(base.map.dir) dims <- dim(base.map) @@ -373,7 +364,7 @@ parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out #' @description #' This is the main function to execute the RF training and prediction. #' Note it will be deployed by each node you requested if the qsub feature is enabled below. -#' @title downscale.rf.main +#' @title downscale_rf_main #' #' @param settings character: physical path that points to the pecan settings XML file. #' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function. @@ -387,7 +378,7 @@ parallel.prediction <- function(base.map.dir, models, cov.vecs, non.na.inds, out #' @return paths to the ensemble downscaled maps. #' #' @author Dongchen Zhang -downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) { +downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable, outdir, base.map.dir, cores = parallel::detectCores()) { # create folder specific for the time and carbon type. folder.name <- file.path(outdir, paste0(c(variable, time), collapse = "_")) if (!file.exists(folder.name)) { @@ -395,7 +386,7 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable } # prepare training data. PEcAn.logger::logger.info("Preparing training data.") - full_data <- prepare.train.dat(settings = settings, + full_data <- prepare_train_dat(settings = settings, analysis = analysis, covariates.dir = covariates.dir, variable = variable) @@ -405,13 +396,13 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable } # parallel train. PEcAn.logger::logger.info("Parallel training.") - models <- parallel.rf.train(full_data = full_data, cores = cores) + models <- parallel_rf_train(full_data = full_data, cores = cores) # save trained models for future analysis. # saveRDS(models, file.path(folder.name, "rf_models.rds")) save(models, file = file.path(folder.name, "rf_models.Rdata")) # convert stacked covariates geotiff file into data frame. PEcAn.logger::logger.info("Converting geotiff to df.") - cov.df <- stack.covariates.2.df(rast.dir = covariates.dir, cores = cores) + cov.df <- stack_covariates_2_df(rast.dir = covariates.dir, cores = cores) # reconstruct LC because of the computation accuracy. cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0 # convert LC into factor. @@ -420,7 +411,7 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable } # parallel prediction. PEcAn.logger::logger.info("Parallel prediction.") - paths <- parallel.prediction(base.map.dir = base.map.dir, + paths <- parallel_prediction(base.map.dir = base.map.dir, models = models, cov.vecs = cov.df$df, non.na.inds = cov.df$non.na.inds, @@ -442,15 +433,15 @@ downscale.rf.main <- function(settings, analysis, covariates.dir, time, variable } #' @description -#' This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function. -#' @title downscale.qsub.main +#' This qsub function helps to run the submitted qsub jobs for running the downscale_rf_main function. +#' @title downscale_qsub_main #' #' @param folder.path Character: physical path to which the job file is located. #' #' @export #' @author Dongchen Zhang -downscale.qsub.main <- function(folder.path) { +downscale_qsub_main <- function(folder.path) { dat <- readRDS(file.path(folder.path, "dat.rds")) - out <- downscale.rf.main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores) + out <- downscale_rf_main(dat$settings, dat$analysis.yr, dat$covariates.dir, lubridate::year(dat$time), dat$variable, dat$outdir, dat$base.map.dir, dat$cores) saveRDS(out, file.path(folder.path, "res.rds")) } \ No newline at end of file diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R index 83613b1ff1b..2530e068ecc 100644 --- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -5,9 +5,10 @@ setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/") # average ERA5 to climatic covariates. outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET" in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" -dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")) -start.dates <- dates[1:10] -end.dates <- dates[2:11] +# dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")) +# dates <- seq(as.Date("2012-01-01"), as.Date("2024-12-31"), "1 year") +start.dates <- seq(as.Date("2012-01-01"), as.Date("2024-01-01"), "1 year") +end.dates <- seq(as.Date("2012-12-31"), as.Date("2024-12-31"), "1 year") # parallel average ERA5 into covariates. future::plan(future::multisession, workers = 5) paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){ diff --git a/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd similarity index 82% rename from modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd rename to modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd index 0c4a561bde9..5443add6937 100644 --- a/modules/assim.sequential/man/Average.ERA5.2.GeoTIFF.Rd +++ b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{Average.ERA5.2.GeoTIFF} -\alias{Average.ERA5.2.GeoTIFF} -\title{Average.ERA5.2.GeoTIFF} +\name{Average_ERA5_2_GeoTIFF} +\alias{Average_ERA5_2_GeoTIFF} +\title{Average_ERA5_2_GeoTIFF} \usage{ -Average.ERA5.2.GeoTIFF(start.date, end.date, in.path, outdir) +Average_ERA5_2_GeoTIFF(start.date, end.date, in.path, outdir) } \arguments{ \item{start.date}{character: start point of when to average the data (e.g., 2012-01-01).} diff --git a/modules/assim.sequential/man/downscale.qsub.main.Rd b/modules/assim.sequential/man/downscale_qsub_main.Rd similarity index 66% rename from modules/assim.sequential/man/downscale.qsub.main.Rd rename to modules/assim.sequential/man/downscale_qsub_main.Rd index 50de6e2d945..dbf35a30c41 100644 --- a/modules/assim.sequential/man/downscale.qsub.main.Rd +++ b/modules/assim.sequential/man/downscale_qsub_main.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{downscale.qsub.main} -\alias{downscale.qsub.main} -\title{downscale.qsub.main} +\name{downscale_qsub_main} +\alias{downscale_qsub_main} +\title{downscale_qsub_main} \usage{ -downscale.qsub.main(folder.path) +downscale_qsub_main(folder.path) } \arguments{ \item{folder.path}{Character: physical path to which the job file is located.} } \description{ -This qsub function helps to run the submitted qsub jobs for running the downscale.rf.main function. +This qsub function helps to run the submitted qsub jobs for running the downscale_rf_main function. } \author{ Dongchen Zhang diff --git a/modules/assim.sequential/man/downscale.rf.main.Rd b/modules/assim.sequential/man/downscale_rf_main.Rd similarity index 93% rename from modules/assim.sequential/man/downscale.rf.main.Rd rename to modules/assim.sequential/man/downscale_rf_main.Rd index 75ec6612760..05b7568a934 100644 --- a/modules/assim.sequential/man/downscale.rf.main.Rd +++ b/modules/assim.sequential/man/downscale_rf_main.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{downscale.rf.main} -\alias{downscale.rf.main} -\title{downscale.rf.main} +\name{downscale_rf_main} +\alias{downscale_rf_main} +\title{downscale_rf_main} \usage{ -downscale.rf.main( +downscale_rf_main( settings, analysis, covariates.dir, diff --git a/modules/assim.sequential/man/parallel.prediction.Rd b/modules/assim.sequential/man/parallel_prediction.Rd similarity index 80% rename from modules/assim.sequential/man/parallel.prediction.Rd rename to modules/assim.sequential/man/parallel_prediction.Rd index fdb04edb8b4..3fceff412a2 100644 --- a/modules/assim.sequential/man/parallel.prediction.Rd +++ b/modules/assim.sequential/man/parallel_prediction.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{parallel.prediction} -\alias{parallel.prediction} -\title{parallel.prediction} +\name{parallel_prediction} +\alias{parallel_prediction} +\title{parallel_prediction} \usage{ -parallel.prediction( +parallel_prediction( base.map.dir, models, cov.vecs, @@ -17,9 +17,9 @@ parallel.prediction( \arguments{ \item{base.map.dir}{character: path to the GeoTIFF file within which the extents and CRS will be used to generate the ensemble maps.} -\item{models}{list: trained models across ensemble members generated by the `parallel.rf.train` function.} +\item{models}{list: trained models across ensemble members generated by the `parallel_rf_train` function.} -\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack.covariates.2.df` function.} +\item{non.na.inds}{numeric: the corresponding index of vegetated pixels generated from the `stack_covariates_2_df` function.} \item{outdir}{character: the output directory where the downscaled maps will be stored.} @@ -27,7 +27,7 @@ parallel.prediction( \item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} -\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack.covariates.2.df` function.} +\item{cov.vecs:}{numeric: data frame containing covaraites across vegetated pixels generated from the `stack_covariates_2_df` function.} } \value{ paths to the ensemble downscaled maps. diff --git a/modules/assim.sequential/man/parallel.rf.train.Rd b/modules/assim.sequential/man/parallel_rf_train.Rd similarity index 74% rename from modules/assim.sequential/man/parallel.rf.train.Rd rename to modules/assim.sequential/man/parallel_rf_train.Rd index 4b61a544126..1d8863f582a 100644 --- a/modules/assim.sequential/man/parallel.rf.train.Rd +++ b/modules/assim.sequential/man/parallel_rf_train.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{parallel.rf.train} -\alias{parallel.rf.train} -\title{parallel.rf.train} +\name{parallel_rf_train} +\alias{parallel_rf_train} +\title{parallel_rf_train} \usage{ -parallel.rf.train(full_data, cores = parallel::detectCores()) +parallel_rf_train(full_data, cores = parallel::detectCores()) } \arguments{ -\item{full_data}{numeric: the matrix generated using the `prepare.train.dat` function.} +\item{full_data}{numeric: the matrix generated using the `prepare_train_dat` function.} \item{cores}{numeric: how many CPus to be used in the calculation, the default is the total CPU number you have.} } diff --git a/modules/assim.sequential/man/pecan.settings.2.pts.Rd b/modules/assim.sequential/man/pecan_settings_2_pts.Rd similarity index 76% rename from modules/assim.sequential/man/pecan.settings.2.pts.Rd rename to modules/assim.sequential/man/pecan_settings_2_pts.Rd index 91828b1077e..e676382d67f 100644 --- a/modules/assim.sequential/man/pecan.settings.2.pts.Rd +++ b/modules/assim.sequential/man/pecan_settings_2_pts.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{pecan.settings.2.pts} -\alias{pecan.settings.2.pts} -\title{pecan.settings.2.pts} +\name{pecan_settings_2_pts} +\alias{pecan_settings_2_pts} +\title{pecan_settings_2_pts} \usage{ -pecan.settings.2.pts(settings) +pecan_settings_2_pts(settings) } \arguments{ \item{settings}{PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.} diff --git a/modules/assim.sequential/man/prepare.train.dat.Rd b/modules/assim.sequential/man/prepare_train_dat.Rd similarity index 77% rename from modules/assim.sequential/man/prepare.train.dat.Rd rename to modules/assim.sequential/man/prepare_train_dat.Rd index ec9122dc56d..381a9eacd6f 100644 --- a/modules/assim.sequential/man/prepare.train.dat.Rd +++ b/modules/assim.sequential/man/prepare_train_dat.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{prepare.train.dat} -\alias{prepare.train.dat} -\title{prepare.train.dat} +\name{prepare_train_dat} +\alias{prepare_train_dat} +\title{prepare_train_dat} \usage{ -prepare.train.dat(settings, analysis, covariates.dir, variable) +prepare_train_dat(settings, analysis, covariates.dir, variable) } \arguments{ \item{settings}{character: physical path that points to the pecan settings XML file.} @@ -20,7 +20,7 @@ matrix within which the first sets of columns contain values of state variables } \description{ This function helps to create the training dataset of specific variable type and locations for downscaling. -TODO: There will be a ratio argument between training and testing samples to testify the ML regression accuracy. +TODO: Add a ratio argument (training sample size/total sample size) so that we could calculate the out-of-sample accuracy. } \author{ Dongchen Zhang diff --git a/modules/assim.sequential/man/stack.covariates.2.df.Rd b/modules/assim.sequential/man/stack_covariates_2_df.Rd similarity index 83% rename from modules/assim.sequential/man/stack.covariates.2.df.Rd rename to modules/assim.sequential/man/stack_covariates_2_df.Rd index b91a11cbb56..ed69f768faf 100644 --- a/modules/assim.sequential/man/stack.covariates.2.df.Rd +++ b/modules/assim.sequential/man/stack_covariates_2_df.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SDA_NA_downscale.R -\name{stack.covariates.2.df} -\alias{stack.covariates.2.df} -\title{stack.covariates.2.df} +\name{stack_covariates_2_df} +\alias{stack_covariates_2_df} +\title{stack_covariates_2_df} \usage{ -\method{stack}{covariates.2.df}(rast.dir, cores = parallel::detectCores()) +stack_covariates_2_df(rast.dir, cores = parallel::detectCores()) } \arguments{ \item{rast.dir}{character: a character that points to the covariates raster file generated by the `stack_covariates_2_geotiff` function.} From 94c030bf58aadea0cb9dff6485cbdfa5223488b6 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 24 Mar 2025 22:47:38 -0400 Subject: [PATCH 0063/1193] Update documentation. --- modules/assim.sequential/R/SDA_NA_downscale.R | 12 ++++++------ modules/assim.sequential/man/prepare_train_dat.Rd | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_NA_downscale.R index c1db7489258..8e75a562779 100644 --- a/modules/assim.sequential/R/SDA_NA_downscale.R +++ b/modules/assim.sequential/R/SDA_NA_downscale.R @@ -229,17 +229,15 @@ stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) { #' TODO: Add a ratio argument (training sample size/total sample size) so that we could calculate the out-of-sample accuracy. #' @title prepare_train_dat #' -#' @param settings character: physical path that points to the pecan settings XML file. +#' @param pts spatialpoints: spatial points returned by `terra::vectors` function. #' @param analysis numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function. #' @param covariates.dir character: path to the exported covariates GeoTIFF file. #' @param variable character: name of state variable. It should match up with the column names of the analysis data frame. #' -#' @return matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates. +#' @return matrix (num.sites, num.variables * num.ensemble + num.covariates) within which the first sets of columns contain values of state variables for each ensemble member of every site, and the rest columns contain the corresponding covariates. #' #' @author Dongchen Zhang -prepare_train_dat <- function(settings, analysis, covariates.dir, variable) { - # convert settings into geospatial points. - pts <- pecan_settings_2_pts(settings) +prepare_train_dat <- function(pts, analysis, covariates.dir, variable) { # read covariates. cov.rast <- terra::rast(covariates.dir) # extract covariates by locations. @@ -386,7 +384,9 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable } # prepare training data. PEcAn.logger::logger.info("Preparing training data.") - full_data <- prepare_train_dat(settings = settings, + # convert settings into geospatial points. + pts <- pecan_settings_2_pts(settings) + full_data <- prepare_train_dat(pts = pts, analysis = analysis, covariates.dir = covariates.dir, variable = variable) diff --git a/modules/assim.sequential/man/prepare_train_dat.Rd b/modules/assim.sequential/man/prepare_train_dat.Rd index 381a9eacd6f..1925a5342d1 100644 --- a/modules/assim.sequential/man/prepare_train_dat.Rd +++ b/modules/assim.sequential/man/prepare_train_dat.Rd @@ -4,10 +4,10 @@ \alias{prepare_train_dat} \title{prepare_train_dat} \usage{ -prepare_train_dat(settings, analysis, covariates.dir, variable) +prepare_train_dat(pts, analysis, covariates.dir, variable) } \arguments{ -\item{settings}{character: physical path that points to the pecan settings XML file.} +\item{pts}{spatialpoints: spatial points returned by `terra::vectors` function.} \item{analysis}{numeric: data frame (rows: ensemble member; columns: site*state_variables) of updated ensemble analysis results from the `sda_enkf` function.} @@ -16,7 +16,7 @@ prepare_train_dat(settings, analysis, covariates.dir, variable) \item{variable}{character: name of state variable. It should match up with the column names of the analysis data frame.} } \value{ -matrix within which the first sets of columns contain values of state variables for each ensemble mebers of every site, and the rest columns contain the corresponding covariates. +matrix (num.sites, num.variables * num.ensemble + num.covariates) within which the first sets of columns contain values of state variables for each ensemble member of every site, and the rest columns contain the corresponding covariates. } \description{ This function helps to create the training dataset of specific variable type and locations for downscaling. From 7c288d95808c06c3a2e47561703339cfced41d43 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 24 Mar 2025 22:48:29 -0400 Subject: [PATCH 0064/1193] Update the script for downscaling. --- .../inst/anchor/NA_downscale_script.R | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R index 2530e068ecc..9e43a865eb6 100644 --- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -5,14 +5,20 @@ setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/") # average ERA5 to climatic covariates. outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET" in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" -# dates <- c(as.Date("2012-01-01"), seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year")) -# dates <- seq(as.Date("2012-01-01"), as.Date("2024-12-31"), "1 year") -start.dates <- seq(as.Date("2012-01-01"), as.Date("2024-01-01"), "1 year") -end.dates <- seq(as.Date("2012-12-31"), as.Date("2024-12-31"), "1 year") +start.dates <- c("2012-01-01", "2012-07-16", "2013-07-16", + "2014-07-16", "2015-07-16", "2016-07-16", + "2017-07-16", "2018-07-16", "2019-07-16", + "2020-07-16", "2021-07-16", "2022-07-16", + "2023-07-16") +end.dates <- c("2012-07-15", "2013-07-15", "2014-07-15", + "2015-07-15", "2016-07-15", "2017-07-15", + "2018-07-15", "2019-07-15", "2020-07-15", + "2021-07-15", "2022-07-15", "2023-07-15", + "2024-07-15") # parallel average ERA5 into covariates. -future::plan(future::multisession, workers = 5) +future::plan(future::multisession, workers = 5, gc = T) paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){ - Average.ERA5.2.GeoTIFF(d1, d2, in.path, outdir) + Average_ERA5_2_GeoTIFF(d1, d2, in.path, outdir) }, .progress = T) %>% unlist # setup. base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" @@ -21,7 +27,7 @@ variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb") settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/pecanIC.xml" outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/downscale_maps/" cores <- 28 -date <- seq(as.Date("2012-07-15"), as.Date("2021-07-15"), "1 year") +date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year") # loop over years. for (i in seq_along(date)) { # setup covariates paths and variable names. @@ -41,7 +47,7 @@ for (i in seq_along(date)) { if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) { covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff") } else { - covariates.dir <- stack.covariates.2.geotiff(outdir = outdir, + covariates.dir <- stack_covariates_2_geotiff(outdir = outdir, year = lubridate::year(date[i]), base.map.dir = base.map.dir, cov.tif.file.list = cov.tif.file.list, @@ -73,7 +79,7 @@ for (i in seq_along(date)) { "echo \"require (PEcAnAssimSequential)", " require (foreach)", " require (purrr)", - " downscale.qsub.main('@FOLDER_PATH@')", + " downscale_qsub_main('@FOLDER_PATH@')", " \" | R --no-save") jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh) writeLines(jobsh, con = file.path(folder.path, "job.sh")) From 5dac92f2108cc39311e9c6944e3e887ac5d9b3d3 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 26 Mar 2025 15:09:18 -0400 Subject: [PATCH 0065/1193] Rename the file. --- .../R/{SDA_NA_downscale.R => SDA_parallel_downscale.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/assim.sequential/R/{SDA_NA_downscale.R => SDA_parallel_downscale.R} (100%) diff --git a/modules/assim.sequential/R/SDA_NA_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R similarity index 100% rename from modules/assim.sequential/R/SDA_NA_downscale.R rename to modules/assim.sequential/R/SDA_parallel_downscale.R From 51931a6f89070a3e4d637026be300713853b9b35 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Thu, 3 Apr 2025 08:34:25 +0000 Subject: [PATCH 0066/1193] Enhance robustness and error handling in soilgrids_soilC_extract() --- .../data.land/R/soilgrids_soc_extraction.R | 88 ++++++++++++++----- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/modules/data.land/R/soilgrids_soc_extraction.R b/modules/data.land/R/soilgrids_soc_extraction.R index 81a5995fc75..ceab4486a7c 100644 --- a/modules/data.land/R/soilgrids_soc_extraction.R +++ b/modules/data.land/R/soilgrids_soc_extraction.R @@ -66,6 +66,18 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { # prepare site info for extraction internal_site_info <- site_info[, c("site_id", "site_name", "lat", "lon")] + + # Early return if no valid sites (after processing internal_site_info) + if (nrow(internal_site_info) == 0) { + if (verbose) { + PEcAn.logger::logger.severe( + "No valid sites remaining after NA check. ", + "All sites had missing SoilGrids data for the first depth layer." + ) + } + return(NULL) + } + #create a variable to store mean and quantile of organic carbon density (ocd) for each soil depth ocdquant <- matrix(NA, nrow = 6, ncol = length(internal_site_info$lon) * 4) #row represents soil depth, col represents mean, 5%, 50% and 95%-quantile of ocd for all sites lonlat <- cbind(internal_site_info$lon, internal_site_info$lat) @@ -78,17 +90,27 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { p <- terra::vect(lonlat, crs = "+proj=longlat +datum=WGS84") # Users need to provide lon/lat newcrs <- "+proj=igh +datum=WGS84 +no_defs +towgs84=0,0,0" p_reproj <- terra::project(p, newcrs) # Transform the point vector to data with Homolosine projection + + # Extract coordinates for safe parallel transfer + p_coords <- terra::crds(p_reproj) + data_tag <- c("_mean.vrt", "_Q0.05.vrt", "_Q0.5.vrt", "_Q0.95.vrt") name_tag <- expand.grid(depths, data_tag, stringsAsFactors = F)#find the combinations between data and depth tags. L <- split(as.data.frame(name_tag), seq(nrow(as.data.frame(name_tag))))#convert tags into lists. get_layer <- function(l) { ocd_url <- paste0(base_data_url, l[[1]], l[[2]]) - ocd_map <- terra::extract(terra::rast(ocd_url), p_reproj) - unlist(ocd_map[, -1]) / 10 + tryCatch({ + # Create temporary vector inside worker + p_temp <- terra::vect(p_coords, crs = newcrs) + vals <- terra::extract(terra::rast(ocd_url), p_temp) + unlist(vals[, -1]) / 10 + }, error = function(e) { + rep(NA, nrow(p_coords)) + }) } - ocd_real <- try(furrr::future_map(L, get_layer, .progress = TRUE)) + ocd_real <- try(furrr::future_map(L, get_layer, .options = furrr::furrr_options(seed = TRUE), .progress = TRUE)) if ("try-error" %in% class(ocd_real)) { ocd_real <- vector("list", length = length(L)) pb <- utils::txtProgressBar(min = 0, max = length(L), style = 3) @@ -116,6 +138,19 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { ocd_df$Value<-as.numeric(ocd_df$Value) f1<-factor(ocd_df$Siteid,levels=unique(ocd_df$Siteid)) f2<-factor(ocd_df$Depth,levels=unique(ocd_df$Depth)) + + # Skip if not enough quantiles (before gamma fitting) + if (length(unique(ocd_df$Quantile)) < 2) { + if (verbose) { + PEcAn.logger::logger.warn( + "Insufficient quantiles (", length(unique(ocd_df$Quantile)), ") ", + "available for gamma distribution fitting at some sites. ", + "Require at least 2 different quantiles to fit parameters." + ) + } + return(NULL) + } + #split data by groups of sites and soil depth, while keeping the original order of each group dat <- split(ocd_df, list(f1, f2)) @@ -132,22 +167,29 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { } fitQ <- function(x) { - val = x$Value - stat = as.character(x$Quantile) - theta = c(10, 10) - fit <- - list(Gamma = stats::optim(theta, cgamma, val = val, stat = stat)) - SS <- sapply(fit, function(f) { - f$value - }) - par <- sapply(fit, function(f) { - f$par - }) - return(list(par = par, SS = SS)) + val <- x$Value + stat <- as.character(x$Quantile) + # Skip fitting if all values are NA or not numeric + if (all(is.na(val)) || length(val) == 0) { + return(list(par = c(NA, NA), SS = NA)) + } + theta <- c(10, 10) + fit <- tryCatch( + stats::optim(theta, cgamma, val = val, stat = stat), + error = function(e) NULL + ) + if (is.null(fit)) { + return(list(par = c(NA, NA), SS = NA)) + } + return(list(par = fit$par, SS = fit$value)) } score <- suppressWarnings(lapply(dat, fitQ)) bestPar <- sapply(score, function(f) { f$par }) + # Ensure bestPar is a 2-row matrix even when invalid sites are present + if (is.null(dim(bestPar)) || nrow(bestPar) != 2) { + bestPar <- matrix(bestPar, nrow = 2, byrow = TRUE) + } mean <- bestPar[1,] / bestPar[2,] std <- sqrt(bestPar[1,] / bestPar[2,] ^ 2) mean_site <- matrix(mean, length(internal_site_info$lon), 6) @@ -184,11 +226,17 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { rownames(soilgrids_soilC_data) <- NULL if (!is.null(outdir)) { - PEcAn.logger::logger.info(paste0("Storing results in: ",file.path(outdir,"soilgrids_soilC_data.csv"))) - utils::write.csv(soilgrids_soilC_data,file=file.path(outdir,"soilgrids_soilC_data.csv"),row.names = FALSE) - } - else { - PEcAn.logger::logger.error("No output directory found.") + # Ensure the directory exists; create if not + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = TRUE) + PEcAn.logger::logger.info(paste0("Created output directory: ", outdir)) + } + PEcAn.logger::logger.info(paste0("Storing results in: ", file.path(outdir, "soilgrids_soilC_data.csv"))) + utils::write.csv(soilgrids_soilC_data, + file = file.path(outdir, "soilgrids_soilC_data.csv"), + row.names = FALSE) + } else { + PEcAn.logger::logger.warn("No output directory found. Results are only returned to R environment.") } # return the results to the terminal as well return(soilgrids_soilC_data) From f4e26b6d0671e338ef729862e9a65ef9dfdb95bd Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 3 Apr 2025 12:43:05 -0400 Subject: [PATCH 0067/1193] Update documentation. --- modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd | 2 +- modules/assim.sequential/man/downscale_qsub_main.Rd | 2 +- modules/assim.sequential/man/downscale_rf_main.Rd | 2 +- modules/assim.sequential/man/parallel_prediction.Rd | 2 +- modules/assim.sequential/man/parallel_rf_train.Rd | 2 +- modules/assim.sequential/man/pecan_settings_2_pts.Rd | 2 +- modules/assim.sequential/man/prepare_train_dat.Rd | 2 +- modules/assim.sequential/man/stack_covariates_2_df.Rd | 2 +- modules/assim.sequential/man/stack_covariates_2_geotiff.Rd | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd index 5443add6937..0f2dca9761c 100644 --- a/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd +++ b/modules/assim.sequential/man/Average_ERA5_2_GeoTIFF.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{Average_ERA5_2_GeoTIFF} \alias{Average_ERA5_2_GeoTIFF} \title{Average_ERA5_2_GeoTIFF} diff --git a/modules/assim.sequential/man/downscale_qsub_main.Rd b/modules/assim.sequential/man/downscale_qsub_main.Rd index dbf35a30c41..0aeb957983f 100644 --- a/modules/assim.sequential/man/downscale_qsub_main.Rd +++ b/modules/assim.sequential/man/downscale_qsub_main.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{downscale_qsub_main} \alias{downscale_qsub_main} \title{downscale_qsub_main} diff --git a/modules/assim.sequential/man/downscale_rf_main.Rd b/modules/assim.sequential/man/downscale_rf_main.Rd index 05b7568a934..99962b4cc4e 100644 --- a/modules/assim.sequential/man/downscale_rf_main.Rd +++ b/modules/assim.sequential/man/downscale_rf_main.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{downscale_rf_main} \alias{downscale_rf_main} \title{downscale_rf_main} diff --git a/modules/assim.sequential/man/parallel_prediction.Rd b/modules/assim.sequential/man/parallel_prediction.Rd index 3fceff412a2..11edbf93cb5 100644 --- a/modules/assim.sequential/man/parallel_prediction.Rd +++ b/modules/assim.sequential/man/parallel_prediction.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{parallel_prediction} \alias{parallel_prediction} \title{parallel_prediction} diff --git a/modules/assim.sequential/man/parallel_rf_train.Rd b/modules/assim.sequential/man/parallel_rf_train.Rd index 1d8863f582a..a001a78d365 100644 --- a/modules/assim.sequential/man/parallel_rf_train.Rd +++ b/modules/assim.sequential/man/parallel_rf_train.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{parallel_rf_train} \alias{parallel_rf_train} \title{parallel_rf_train} diff --git a/modules/assim.sequential/man/pecan_settings_2_pts.Rd b/modules/assim.sequential/man/pecan_settings_2_pts.Rd index e676382d67f..0a027356987 100644 --- a/modules/assim.sequential/man/pecan_settings_2_pts.Rd +++ b/modules/assim.sequential/man/pecan_settings_2_pts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{pecan_settings_2_pts} \alias{pecan_settings_2_pts} \title{pecan_settings_2_pts} diff --git a/modules/assim.sequential/man/prepare_train_dat.Rd b/modules/assim.sequential/man/prepare_train_dat.Rd index 1925a5342d1..4dcde31dd5e 100644 --- a/modules/assim.sequential/man/prepare_train_dat.Rd +++ b/modules/assim.sequential/man/prepare_train_dat.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{prepare_train_dat} \alias{prepare_train_dat} \title{prepare_train_dat} diff --git a/modules/assim.sequential/man/stack_covariates_2_df.Rd b/modules/assim.sequential/man/stack_covariates_2_df.Rd index ed69f768faf..edb3011aa83 100644 --- a/modules/assim.sequential/man/stack_covariates_2_df.Rd +++ b/modules/assim.sequential/man/stack_covariates_2_df.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{stack_covariates_2_df} \alias{stack_covariates_2_df} \title{stack_covariates_2_df} diff --git a/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd index d83411ec708..ad74d3c6fdb 100644 --- a/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd +++ b/modules/assim.sequential/man/stack_covariates_2_geotiff.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/SDA_NA_downscale.R +% Please edit documentation in R/SDA_parallel_downscale.R \name{stack_covariates_2_geotiff} \alias{stack_covariates_2_geotiff} \title{stack_covariates_2_geotiff} From c94077dda05bfce6762d0417828782dbbd918987 Mon Sep 17 00:00:00 2001 From: Blesson Date: Sat, 5 Apr 2025 09:57:14 +0530 Subject: [PATCH 0068/1193] Add files via upload changed the file location --- .../tests/testthat/ensemble-test.R | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 modules/uncertainty/tests/testthat/ensemble-test.R diff --git a/modules/uncertainty/tests/testthat/ensemble-test.R b/modules/uncertainty/tests/testthat/ensemble-test.R new file mode 100644 index 00000000000..07b3d548dc9 --- /dev/null +++ b/modules/uncertainty/tests/testthat/ensemble-test.R @@ -0,0 +1,88 @@ +library(testthat) +library(PEcAn.logger) +library(PEcAn.DB) + + +source("modules/uncertainty/R/ensemble.R") +dummy_binary_path <- file.path(tempdir(), "sipnet") +file.create(dummy_binary_path) +# Mock SIPNET writer +if (!exists("write.config.SIPNET")) { + write.config.SIPNET <- function(...) { + PEcAn.logger::logger.info("Mock SIPNET writer called") + return(invisible(TRUE)) + } +} + +context("Ensemble Input Validation Tests") + +create_base_settings <- function() { + list( + workflow = list(id = 1), + model = list( + id = 1000, + type = "SIPNET", + binary = dummy_binary_path + ), + run = list( + site = list(id = 1, name = "Test Site", lat = 40.0, lon = -80.0), + start.date = "2004-01-01", + end.date = "2004-12-31" + ), + host = list( + outdir = tempdir(), + rundir = tempdir(), + name = "localhost" + ), + database = list(bety = list(write = FALSE)) + ) +} + +test_that("Single input with no samples works", { + withr::local_tempdir() + + def <- list( + inputs = list(soil = list(path = "soil1.nc")), + pfts = list(list( + name = "temperate.pft", + constants = list(param1 = 0.5) + )), + model = list(id = 1000), + database = list(bety = list(write = FALSE)) + ) + + settings <- create_base_settings() + settings$run$inputs <- list(soil = list(path = "soil1.nc")) + settings$ensemble <- list(size = 1) + + writeLines("", "soil1.nc") + + result <- write.ensemble.configs(def, NULL, settings, "SIPNET") + expect_true(!is.null(result$runs)) + expect_true(!is.null(result$ensemble.id)) +}) + +test_that("Multiple inputs without samples throws error", { + def <- list( + inputs = list(soil = list(path = c("soil1.nc", "soil2.nc"))), + pfts = list(list( + name = "temperate.pft", + constants = list(param1 = 0.5) + )), + model = list(id = 1000), + database = list(bety = list(write = FALSE)) + ) + + settings <- create_base_settings() + settings$ensemble <- list(size = 1) + + purrr::walk(c("soil1.nc", "soil2.nc"), ~ writeLines("", .x)) + + expect_error( + write.ensemble.configs(def, NULL, settings, "SIPNET"), + "Multiple soil inputs found but no sampling method specified" + ) +}) + +# ... rest of tests with similar corrections ... + From d8d58d9d478b4695f4d6e4f485b15305d4c77d6b Mon Sep 17 00:00:00 2001 From: Aritra Dey Date: Thu, 10 Apr 2025 00:56:02 +0530 Subject: [PATCH 0069/1193] docs: enhance X-schema.org-keywords in DESCRIPTION files for better package discoverability --- base/all/DESCRIPTION | 1 + base/db/DESCRIPTION | 1 + base/logger/DESCRIPTION | 1 + base/qaqc/DESCRIPTION | 1 + base/remote/DESCRIPTION | 1 + base/settings/DESCRIPTION | 1 + base/utils/DESCRIPTION | 1 + base/visualization/DESCRIPTION | 1 + base/workflow/DESCRIPTION | 1 + 9 files changed, 9 insertions(+) diff --git a/base/all/DESCRIPTION b/base/all/DESCRIPTION index 2cad68114e2..6e7f9b5631e 100644 --- a/base/all/DESCRIPTION +++ b/base/all/DESCRIPTION @@ -78,3 +78,4 @@ LazyData: true Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 +X-schema.org-keywords: PEcAn, QAQC, integration, model, skill, testing, quality assurance, quality control, model validation, model evaluation, performance metrics, statistical analysis, data visualization, model diagnostics, benchmarking, model comparison, scientific validation, reproducibility \ No newline at end of file diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION index f88a16982e4..861910aac54 100644 --- a/base/db/DESCRIPTION +++ b/base/db/DESCRIPTION @@ -79,3 +79,4 @@ LazyLoad: yes LazyData: FALSE Encoding: UTF-8 RoxygenNote: 7.3.2 +X-schema.org-keywords: PEcAn, database, management, tool, model, parameterization, execution, analysis diff --git a/base/logger/DESCRIPTION b/base/logger/DESCRIPTION index 50bb54d1eee..b8eff6dffbc 100644 --- a/base/logger/DESCRIPTION +++ b/base/logger/DESCRIPTION @@ -31,3 +31,4 @@ License: BSD_3_clause + file LICENSE Encoding: UTF-8 RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) +X-schema.org-keywords: PEcAn, logger, functions, outputs, workflow, management, tool, debugging, error handling, logging levels, console output, file logging, error control, log4j, message filtering, execution control, scientific workflow, diagnostic tools diff --git a/base/qaqc/DESCRIPTION b/base/qaqc/DESCRIPTION index 041f957d25b..26441f6a332 100644 --- a/base/qaqc/DESCRIPTION +++ b/base/qaqc/DESCRIPTION @@ -38,3 +38,4 @@ Encoding: UTF-8 VignetteBuilder: knitr, rmarkdown Config/testthat/edition: 3 RoxygenNote: 7.3.2 +X-schema.org-keywords: PEcAn, QAQC, integration, model, skill, testing, quality assurance, quality control, model validation, model evaluation, performance metrics, statistical analysis, data visualization, model diagnostics, benchmarking, model comparison, scientific validation, reproducibility diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION index 196d97967d2..a4e7be620fa 100644 --- a/base/remote/DESCRIPTION +++ b/base/remote/DESCRIPTION @@ -35,3 +35,4 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 +X-schema.org-keywords: PEcAn, remote, model, execution, utilities, communication, code, distributed computing, remote hosts, parallel processing, ecosystem modeling, workflow automation, SSH, HTTP, JSON, API integration, cluster computing, scientific computing, model deployment diff --git a/base/settings/DESCRIPTION b/base/settings/DESCRIPTION index f7b3e0409b0..d410a3a3ef3 100644 --- a/base/settings/DESCRIPTION +++ b/base/settings/DESCRIPTION @@ -31,3 +31,4 @@ Suggests: Encoding: UTF-8 RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) +X-schema.org-keywords: PEcAn, settings, functions, files, configuration management, XML parsing, parameter settings, workflow configuration, model parameters, environment variables, system settings, data settings, simulation settings, scientific workflow, configuration files, settings validation, parameter management diff --git a/base/utils/DESCRIPTION b/base/utils/DESCRIPTION index 72948b3d713..5b8cbb3f767 100644 --- a/base/utils/DESCRIPTION +++ b/base/utils/DESCRIPTION @@ -59,3 +59,4 @@ LazyData: true Encoding: UTF-8 RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) +X-schema.org-keywords: PEcAn, utilities, functions, tools, data manipulation, scientific workflow, model parameterization, data analysis, netCDF handling, time series analysis, unit conversion, data processing, scientific computing, ecological forecasting, model execution, data integration, workflow management, scientific investigation diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION index 8b38be4c664..c6ee5058f38 100644 --- a/base/visualization/DESCRIPTION +++ b/base/visualization/DESCRIPTION @@ -54,3 +54,4 @@ Encoding: UTF-8 VignetteBuilder: knitr, rmarkdown RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) +X-schema.org-keywords: PEcAn, visualization, functions, data, models, data visualization, scientific plotting, model output visualization, time series plots, spatial visualization, statistical graphics, ggplot2, data analysis, scientific communication, model diagnostics, ecological data visualization, climate data visualization, scientific figures, data exploration diff --git a/base/workflow/DESCRIPTION b/base/workflow/DESCRIPTION index ae07761cb71..d5a376b8fd7 100644 --- a/base/workflow/DESCRIPTION +++ b/base/workflow/DESCRIPTION @@ -46,3 +46,4 @@ Suggests: Copyright: Authors Encoding: UTF-8 RoxygenNote: 7.3.2 +X-schema.org-keywords: PEcAn, workflow, functions, ecological, forecasts, reanalysis, scientific workflow, model execution, data assimilation, parameter estimation, uncertainty analysis, ecosystem modeling, scientific analysis, workflow automation, model-data fusion, ecological forecasting, scientific investigation, data processing, model integration, workflow management From abb70f249cee5fe1f11efb540fbc4c51eaec2297 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 10 Apr 2025 01:31:19 -0400 Subject: [PATCH 0070/1193] Improve efficiency. --- .../R/SDA_parallel_downscale.R | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/assim.sequential/R/SDA_parallel_downscale.R b/modules/assim.sequential/R/SDA_parallel_downscale.R index 8e75a562779..a11b209f099 100644 --- a/modules/assim.sequential/R/SDA_parallel_downscale.R +++ b/modules/assim.sequential/R/SDA_parallel_downscale.R @@ -123,9 +123,15 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file. names(temp.rast) <- f$var.name } # raster operations. - terra::crs(temp.rast) <- terra::crs(base.map) - temp.rast <- terra::crop(temp.rast, base.map) - temp.rast <- terra::resample(temp.rast, base.map) + if (! terra::crs(base.map) == terra::crs(temp.rast)) { + terra::crs(temp.rast) <- terra::crs(base.map) + } + if (! terra::ext(base.map) == terra::ext(temp.rast)) { + temp.rast <- terra::crop(temp.rast, base.map) + } + if (! all(c(nrow(base.map) == nrow(temp.rast), ncol(base.map) == ncol(temp.rast)))) { + temp.rast <- terra::resample(temp.rast, base.map) + } # write the raster into disk. file.name <- paste0(f$var.name, collapse = "_") path <- file.path(outdir, paste0(file.name, ".tiff")) @@ -150,13 +156,17 @@ stack_covariates_2_geotiff <- function(outdir, year, base.map.dir, cov.tif.file. #' convert settings to geospatial points in terra. #' @title pecan_settings_2_pts #' -#' @param settings PEcAn settings: either a character that points to the settings or the actual settings object will be accepted. +#' @param settings PEcAn settings: either a character that points to the settings or shape file or the actual pecan settings object will be accepted. #' #' @return terra spatial points object. #' #' @author Dongchen Zhang pecan_settings_2_pts <- function(settings) { if (is.character(settings)) { + # if it's shapefile. + if (grepl(".shp", settings)) { + return(terra::vect(settings)) + } # read settings. settings <- PEcAn.settings::read.settings(settings) } @@ -205,6 +215,7 @@ stack_covariates_2_df <- function(rast.dir, cores = parallel::detectCores()) { # if it's LC layer. if ("LC" == names(all.rast)[r]) { non.veg.inds <- which(! temp.vec %in% 1:8) + # non.veg.inds <- which(! temp.vec %in% 0:11) na.inds <- unique(c(na.inds, non.veg.inds)) } return(list(vec = temp.vec, @@ -253,6 +264,7 @@ prepare_train_dat <- function(pts, analysis, covariates.dir, variable) { as.data.frame() %>% `colnames<-`(paste0("ensemble", seq(nrow(analysis)))) # combine carbon and predictor. full_data <- cbind(var.dat, predictors) + full_data <- full_data[which(full_data$LC %in% 1:8),] return(full_data) } @@ -404,7 +416,7 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable PEcAn.logger::logger.info("Converting geotiff to df.") cov.df <- stack_covariates_2_df(rast.dir = covariates.dir, cores = cores) # reconstruct LC because of the computation accuracy. - cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0 + # cov.df$df$LC[which(cov.df$df$LC < 1)] <- 0 # convert LC into factor. if ("LC" %in% colnames(cov.df$df)) { cov.df$df[,"LC"] <- factor(cov.df$df[,"LC"]) @@ -416,7 +428,7 @@ downscale_rf_main <- function(settings, analysis, covariates.dir, time, variable cov.vecs = cov.df$df, non.na.inds = cov.df$non.na.inds, outdir = folder.name, - name = list(time = time, variable = variable), + name = list(time = as.character(time), variable = variable), cores = cores) # calculate mean and std. PEcAn.logger::logger.info("Calculate mean and std.") From f677d1822fe43ae5379cfbaa96246eb8706e8acc Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 10 Apr 2025 01:31:33 -0400 Subject: [PATCH 0071/1193] Add varied land cover and stand age. --- .../inst/anchor/NA_downscale_script.R | 236 ++++++++++++++++-- 1 file changed, 209 insertions(+), 27 deletions(-) diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R index 9e43a865eb6..05d30bdbc80 100644 --- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -2,6 +2,175 @@ library(purrr) library(foreach) library(PEcAnAssimSequential) setwd("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/") +# prepare stand age time-series. +modis.lc.folder <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/LC" +stand.age.out.folder <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/stand_age" +# filter land cover time-series. +# loop over years. +# read tiff file. +forest_type <- c(1:4) +grass_type <- c(5:8) +non_veg_type <- c(0, 9, 10, 11) +base.map <- terra::rast("/projectnb/dietzelab/dongchen/anchorSites/downscale/base_map.tiff") +base_crs <- terra::crs(base.map) +base_ext <- terra::ext(base.map) +# load forest age data. +forest_age <- matrix(terra::rast("/projectnb/dietzelab/dongchen/anchorSites/downscale/forest_age/forest_age_2010_TC000_crop.tiff"), byrow = T) +# calculate mean age for different LC types. +LC <- matrix(terra::rast(file.path(modis.lc.folder, paste0(2010, ".tif")))[[5]], byrow = T) +mean_age <- c() +for (i in 1:8) { + mean_age <- c(mean_age, mean(forest_age[which(LC == i)], na.rm = T)) +} +# function for filtering time series. +filter.lc.ts <- function(vec, window.L = 3) { + L <- length(vec) + window <- c() + edge.case <- FALSE + # + if(length(unique(vec)) == 1) { + return(c(unique(vec), unique(vec), length(vec))) + } + for (i in L:1) { + # push item into the window. + window <- c(window, vec[i]) + # print(window) + # if window has not reached its size. + if (length(window) < window.L) { + next + } + # window operation. + uni.val <- unique(window) + # if there is no change. + if (length(uni.val) == 1) { + + } else if (length(uni.val) > 1) { + # check if head == tail. + if (head(window, 1) == tail(window, 1)) { + + } else { + window.ind <- window.L - tail(which(window == head(uni.val, 1)), 1) + 1 + return(c(tail(rev(uni.val), 2), L - i + 1 - window.ind + 1)) + } + } + # remove the last item from the window. + window <- tail(window, -1) + } + # if there is no disturbance afterall. + return(c(vec[length(vec)], vec[length(vec)], length(vec))) +} + +# store MODIS land cover time-series into matrix. +ts_lc <- c() +for (end.year in 2012:2023) { + print(end.year) + if (end.year == 2012) { + start.year <- 2001 + } else { + start.year <- end.year + } + # load last year MODIS LC map. + LC <- matrix(terra::rast(file.path(modis.lc.folder, paste0(end.year, ".tif")))[[5]], byrow = T) + # store MODIS land cover time-series into matrix. + # ts_lc <- c() + for (y in start.year:end.year) { + # load image. + lc_tif <- terra::rast(file.path(modis.lc.folder, paste0(y, ".tif"))) + lc_matrix <- matrix(lc_tif[[5]], byrow = T) + # reclassify. + lc_matrix[which(lc_matrix %in% forest_type)] <- 1 + lc_matrix[which(lc_matrix %in% grass_type)] <- 2 + lc_matrix[which(lc_matrix %in% non_veg_type)] <- 3 + # combine image. + ts_lc <- cbind(ts_lc, lc_matrix) + # print(y) + } + # loop over NA. + split_data.matrix <- function(matrix, chunk.size=100) { + ncols <- dim(matrix)[2] + nchunks <- (ncols-1) %/% chunk.size + 1 + split.data <- list() + min <- 1 + for (i in seq_len(nchunks)) { + if (i == nchunks-1) { #make last two chunks of equal size + left <- ncols-(i-1)*chunk.size + max <- min+round(left/2)-1 + } else { + max <- min(i*chunk.size, ncols) + } + split.data[[i]] <- t(matrix[,min:max,drop=FALSE]) + min <- max+1 #for next chunk + } + return(split.data) + } + mat.lists <- split_data.matrix(t(ts_lc), floor(dim(ts_lc)[1]/parallel::detectCores())) + # register parallel nodes. + cl <- parallel::makeCluster(parallel::detectCores()) + doSNOW::registerDoSNOW(cl) + res <- foreach::foreach(d = mat.lists, .packages=c("purrr")) %dopar% { + temp.res <- matrix(NA, dim(d)[1], 4) %>% `colnames<-`(c("from", "to", "years", "type")) + pb <- utils::txtProgressBar(min=1, max=dim(d)[1], style=3) + for (i in 1:dim(d)[1]) { + if (any(is.na(d[i,]))) next + temp.res[i, 1:3] <- filter.lc.ts(d[i,]) + # grab change patterns. + if (all(temp.res[i, 1:2] == c(1, 2))) { + temp.res[i, 4] <- 1 + } else if (all(temp.res[i, 1:2] == c(1, 3))) { + temp.res[i, 4] <- 2 + } else if (all(temp.res[i, 1:2] == c(2, 3))) { + temp.res[i, 4] <- 3 + } else if (all(temp.res[i, 1:2] == c(2, 1))) { + temp.res[i, 4] <- 4 + } else if (all(temp.res[i, 1:2] == c(3, 1))) { + temp.res[i, 4] <- 5 + } else if (all(temp.res[i, 1:2] == c(3, 2))) { + temp.res[i, 4] <- 6 + } + utils::setTxtProgressBar(pb, i) + } + return(temp.res) + } + res <- do.call(rbind, res) + # any pixel in forest that are tagged as grassland should be replaced with the + # load forest age data. + forest_age <- matrix(terra::rast("/projectnb/dietzelab/dongchen/anchorSites/downscale/forest_age/forest_age_2010_TC000_crop.tiff"), byrow = T) + forest_age <- cbind(forest_age, res, LC) %>% `colnames<-`(c("forest_age", "from", "to", "years", "type", "LC")) + forest_age <- split_data.matrix(t(forest_age), floor(dim(forest_age)[1]/parallel::detectCores())) + forest_age <- foreach::foreach(d = forest_age, .packages=c("purrr")) %dopar% { + for (i in 1:dim(d)[1]) { + # if it's diturbed vegetation. + if (is.na(d[i, "years"])) next + if (d[i, "years"] < (end.year - 2000)) { + d[i, "forest_age"] <- d[i, "years"] + next + } + # no record for the forest age. + if (is.na(d[i, "forest_age"])) { + # if it is non vegetation. + if (d[i, "to"] == 3) { + # forest_age[i] <- 0 + next + } else { + # if it's non-disturbed vegetation. + d[i, "forest_age"] <- mean_age[d[i, "LC"]] + } + } + } + return(d) + } + forest_age <- do.call(rbind, forest_age) + # stop parallel. + parallel::stopCluster(cl) + foreach::registerDoSEQ() + # write to raster. + forest_age <- terra::rast(matrix(forest_age[,"forest_age"], 9360, 19080, byrow = T)) + terra::ext(forest_age) <- base_ext + terra::crs(forest_age) <- base_crs + names(forest_age) <- "year_since_disturb" + terra::writeRaster(forest_age, file=file.path(stand.age.out.folder, paste0(end.year, "_stand_age.tif"))) + gc() +} # average ERA5 to climatic covariates. outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET" in.path <- "/projectnb/dietzelab/dongchen/anchorSites/ERA5/" @@ -20,48 +189,61 @@ future::plan(future::multisession, workers = 5, gc = T) paths <- start.dates %>% furrr::future_map2(end.dates, function(d1, d2){ Average_ERA5_2_GeoTIFF(d1, d2, in.path, outdir) }, .progress = T) %>% unlist -# setup. -base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" -load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/sda.all.forecast.analysis.Rdata") -variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb") -settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/pecanIC.xml" -outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/downscale_maps/" -cores <- 28 -date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year") -# loop over years. -for (i in seq_along(date)) { +# create covariates time series. +for (y in 2012:2024) { + print(y) + if (y == 2024) { + y.lc <- 2023 + } else { + y.lc <- y + } + # LC <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/LC", paste0(y.lc, ".tif")) + LC <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" + stand.age <- file.path("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/MODIS_LC/stand_age", paste0(y.lc, "_stand_age.tif")) + met <- list.files("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/GridMET", full.names = T) + met <- met[which(grepl(y, met))] # setup covariates paths and variable names. - cov.tif.file.list <- list(LC = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif", + cov.tif.file.list <- list(LC = list(dir = LC, var.name = "LC"), - year_since_disturb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_LC/outputs/age.tif", + year_since_disturb = list(dir = stand.age, var.name = "year_since_disturb"), agb = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/AGB/agb.tif", var.name = "agb"), twi = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/TWI/TWI_resample.tiff", var.name = "twi"), - met = list(dir = paths[i], + met = list(dir = met, var.name = c("temp", "prec", "srad", "vapr")), soil = list(dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/SoilGrids.tif", var.name = c("PH", "N", "SOC", "Sand"))) + covariates.dir <- stack_covariates_2_geotiff(outdir = "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/covariates", + year = y, + base.map.dir = "/projectnb/dietzelab/dongchen/anchorSites/downscale/base_map.tiff", + cov.tif.file.list = cov.tif.file.list, + normalize = T, + cores = parallel::detectCores()) +} + +# setup. +base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" +load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/sda.all.forecast.analysis.Rdata") +variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb") +# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/pecanIC.xml" +settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp" +outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/" +cores <- 28 +date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year") +# loop over years. +for (i in seq_along(date)) { # Assemble covariates. - if (file.exists(paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff"))) { - covariates.dir <- paste0(outdir, "covariates_", lubridate::year(date[i]), ".tiff") - } else { - covariates.dir <- stack_covariates_2_geotiff(outdir = outdir, - year = lubridate::year(date[i]), - base.map.dir = base.map.dir, - cov.tif.file.list = cov.tif.file.list, - normalize = T, - cores = cores) - } + covariates.dir <- file.path(outdir, "covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff")) # grab analysis. - analysis.yr <- analysis.all[[i]] + analysis.yr <- forecast.all[[i]] time <- date[i] # loop over carbon types. for (j in seq_along(variables)) { # setup folder. variable <- variables[j] - folder.path <- file.path(outdir, paste0(variables[j], "_", date[i])) + folder.path <- file.path(file.path(outdir, "downscale_maps_forecast_lc_ts"), paste0(variables[j], "_", date[i])) dir.create(folder.path) saveRDS(list(settings = settings, analysis.yr = analysis.yr, @@ -71,7 +253,7 @@ for (i in seq_along(date)) { folder.path = folder.path, base.map.dir = base.map.dir, cores = cores, - outdir = outdir), + outdir = file.path(outdir, "downscale_maps_forecast_lc_ts")), file = file.path(folder.path, "dat.rds")) # prepare for qsub. jobsh <- c("#!/bin/bash -l", @@ -84,7 +266,7 @@ for (i in seq_along(date)) { jobsh <- gsub("@FOLDER_PATH@", folder.path, jobsh) writeLines(jobsh, con = file.path(folder.path, "job.sh")) # qsub command. - qsub <- "qsub -l h_rt=6:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash" + qsub <- "qsub -l h_rt=10:00:00 -l buyin -pe omp @CORES@ -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash" qsub <- gsub("@CORES@", cores, qsub) qsub <- gsub("@NAME@", paste0("ds_", i, "_", j), qsub) qsub <- gsub("@STDOUT@", file.path(folder.path, "stdout.log"), qsub) From ea80cb209891b88ca4ba093e3126471a585b12a4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 10 Apr 2025 01:31:41 -0400 Subject: [PATCH 0072/1193] Update documentation. --- modules/assim.sequential/man/pecan_settings_2_pts.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/man/pecan_settings_2_pts.Rd b/modules/assim.sequential/man/pecan_settings_2_pts.Rd index 0a027356987..1acbac65946 100644 --- a/modules/assim.sequential/man/pecan_settings_2_pts.Rd +++ b/modules/assim.sequential/man/pecan_settings_2_pts.Rd @@ -7,7 +7,7 @@ pecan_settings_2_pts(settings) } \arguments{ -\item{settings}{PEcAn settings: either a character that points to the settings or the actual settings object will be accepted.} +\item{settings}{PEcAn settings: either a character that points to the settings or shape file or the actual pecan settings object will be accepted.} } \value{ terra spatial points object. From bb09c4bdc9135ccfc23282b5967bbe696359de81 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Thu, 10 Apr 2025 23:27:27 +0000 Subject: [PATCH 0073/1193] Added SoilGrid IC Utilities --- base/settings/NAMESPACE | 1 + base/settings/NEWS.md | 1 + base/settings/R/get.site.info.R | 205 ++++++++++ base/settings/man/get.site.info.Rd | 40 ++ .../tests/testthat/test.get.site.info.R | 182 +++++++++ modules/data.land/NAMESPACE | 3 + modules/data.land/NEWS.md | 6 +- modules/data.land/R/IC_SOILGRID_Utilities.R | 373 ++++++++++++++++++ .../man/generate_soilgrids_ensemble.Rd | 34 ++ .../man/preprocess_soilgrids_data.Rd | 28 ++ modules/data.land/man/soilgrids_ic_process.Rd | 52 +++ 11 files changed, 924 insertions(+), 1 deletion(-) create mode 100644 base/settings/R/get.site.info.R create mode 100644 base/settings/man/get.site.info.Rd create mode 100644 base/settings/tests/testthat/test.get.site.info.R create mode 100644 modules/data.land/R/IC_SOILGRID_Utilities.R create mode 100644 modules/data.land/man/generate_soilgrids_ensemble.Rd create mode 100644 modules/data.land/man/preprocess_soilgrids_data.Rd create mode 100644 modules/data.land/man/soilgrids_ic_process.Rd diff --git a/base/settings/NAMESPACE b/base/settings/NAMESPACE index ec2a23e9ea5..609d3c38fb3 100644 --- a/base/settings/NAMESPACE +++ b/base/settings/NAMESPACE @@ -35,6 +35,7 @@ export(createMultiSiteSettings) export(createSitegroupMultiSettings) export(expandMultiSettings) export(fix.deprecated.settings) +export(get.site.info) export(get_args) export(is.MultiSettings) export(is.SafeList) diff --git a/base/settings/NEWS.md b/base/settings/NEWS.md index 034822a763f..2a9591f2ed1 100644 --- a/base/settings/NEWS.md +++ b/base/settings/NEWS.md @@ -10,6 +10,7 @@ * `createMultiSiteSettings` argument `siteIds` now accepts data frames as well as the previously accepted numeric or character vectors. The data frame should have one site per row, uniquely identified by a mandatory `id` column. All columns of each row will become fields of the resulting `settings$run$site` block. * New function `setEnsemblePaths` inserts paths to your ensemble inputs (met, poolinitcond, etc) into every site's `inputs` block according to the filename pattern specified in a template string. +* New function `get.site.info` extracts and validates site information from either a PEcAn settings object or a CSV file, providing a standardized data frame with site_id, site_name, lat, lon, and str_id. ## Fixed diff --git a/base/settings/R/get.site.info.R b/base/settings/R/get.site.info.R new file mode 100644 index 00000000000..d252c54dace --- /dev/null +++ b/base/settings/R/get.site.info.R @@ -0,0 +1,205 @@ +#' Extract and validate site information from settings or CSV file +#' +#' @param settings PEcAn settings list containing site information (optional) +#' @param csv_path Path to a CSV file containing site information (optional) +#' @param strict_checking Logical. If TRUE, will validate coordinates more strictly +#' +#' @return A data frame with site_id, site_name, lat, lon, and str_id +#' @export get.site.info +#' +#' @details This function extracts and validates site information from either a PEcAn settings +#' object or a CSV file. At least one input must be provided. If both are provided, +#' the settings object takes precedence. +#' +#' If using a CSV file, it must contain at minimum the columns: site_id, lat, and lon. +#' The column site_name is optional and will default to site_id if not provided. +#' +#' @examples +#' \dontrun{ +#' # From settings object +#' settings <- PEcAn.settings::read.settings("pecan.xml") +#' site_info <- PEcAn.settings::get.site.info(settings) +#' +#' # From CSV file +#' site_info <- PEcAn.settings::get.site.info(csv_path = "sites.csv") +#' } +get.site.info <- function(settings = NULL, csv_path = NULL, strict_checking = TRUE) { + + # Check if at least one input is provided + if (is.null(settings) && is.null(csv_path)) { + PEcAn.logger::logger.severe("No site information provided. Please provide either settings or csv_path.") + } + + # Process settings object (highest precedence when both are provided) + if (!is.null(settings)) { + PEcAn.logger::logger.debug("Extracting site information from settings object") + + # Check if this is a MultiSettings object + if (inherits(settings, "MultiSettings")) { + PEcAn.logger::logger.info("Detected MultiSettings object") + + # Process sites from MultiSettings + site_list <- lapply(settings, function(s) { + if (is.null(s$run) || is.null(s$run$site)) { + PEcAn.logger::logger.severe("Site information missing from one of the settings in MultiSettings") + } + return(s$run$site) + }) + } else { + # Process single settings object + if (is.null(settings$run) || is.null(settings$run$site)) { + PEcAn.logger::logger.severe("Site information missing from settings (settings$run$site)") + } + + # Check if we have vectorized site information + site_fields <- c("id", "name", "lat", "lon") + field_lengths <- sapply(site_fields, function(f) { + if (is.null(settings$run$site[[f]])) 0 else length(settings$run$site[[f]]) + }) + + max_length <- max(field_lengths) + is_vectorized <- max_length > 1 + + if (is_vectorized) { + PEcAn.logger::logger.info("Detected vectorized site information in settings") + + # Create a list of site information from vectorized input + site_list <- list() + for (i in 1:max_length) { + site <- list() + for (field in site_fields) { + if (!is.null(settings$run$site[[field]]) && i <= length(settings$run$site[[field]])) { + site[[field]] <- settings$run$site[[field]][i] + } + } + site_list[[i]] <- site + } + } else { + # Just a single non-vectorized site + site_list <- list(settings$run$site) + } + } + } else { + # Process CSV file input + PEcAn.logger::logger.debug("Reading site information from CSV file:", csv_path) + + # Check if file exists + if (!file.exists(csv_path)) { + PEcAn.logger::logger.severe("CSV file not found:", csv_path) + } + + # Read CSV file + csv_data <- utils::read.csv(csv_path, stringsAsFactors = FALSE) + + # Check for required columns + required_cols <- c("site_id", "lat", "lon") + missing_cols <- setdiff(required_cols, colnames(csv_data)) + if (length(missing_cols) > 0) { + PEcAn.logger::logger.severe("Missing required columns in CSV file: ", + paste(missing_cols, collapse = ", ")) + } + + # Add site_name if missing (use site_id as default) + if (!"site_name" %in% colnames(csv_data)) { + csv_data$site_name <- as.character(csv_data$site_id) + PEcAn.logger::logger.debug("Added site_name column using site_id values") + } + + # Convert CSV data to the site_list format for consistent processing + site_list <- lapply(1:nrow(csv_data), function(i) { + row <- csv_data[i, ] + list( + id = row$site_id, + name = row$site_name, + lat = row$lat, + lon = row$lon + ) + }) + } + + # Process each site from the site_list + result <- lapply(seq_along(site_list), function(i) { + site <- site_list[[i]] + + # Check for required site ID + if (is.null(site$id)) { + PEcAn.logger::logger.severe(sprintf("Site ID is required but missing for site %d", i)) + } + + # Extract and validate site ID + site_id <- as.numeric(site$id) + if (is.na(site_id)) { + PEcAn.logger::logger.severe(sprintf("Site ID must be numeric for site %d", i)) + } + + # Check if the site name exists, use ID as name if missing + site_name <- ifelse(!is.null(site$name), site$name, as.character(site_id)) + + # Check for required coordinates + if (is.null(site$lat) || is.null(site$lon)) { + PEcAn.logger::logger.severe(sprintf("Site coordinates are required but missing for site %d", i)) + } + + # Extract and validate coordinates + lat <- as.numeric(site$lat) + lon <- as.numeric(site$lon) + + if (is.na(lat) || is.na(lon)) { + PEcAn.logger::logger.severe(sprintf("Site coordinates must be numeric for site %d", i)) + } + + # site ID for display and file naming + str_id <- as.character(site$id) + + # Return a standardized site info list + return(list( + site_id = site_id, + site_name = site_name, + lat = lat, + lon = lon, + str_id = str_id + )) + }) + + # Create the data frame using vapply to maintain types + site_df <- data.frame( + site_id = vapply(result, function(x) x$site_id, numeric(1)), + site_name = vapply(result, function(x) x$site_name, character(1)), + lat = vapply(result, function(x) x$lat, numeric(1)), + lon = vapply(result, function(x) x$lon, numeric(1)), + str_id = vapply(result, function(x) x$str_id, character(1)), + stringsAsFactors = FALSE + ) + + # Validate coordinates based on strictness settings + if (strict_checking) { + # Check for valid latitude range + invalid_lats <- site_df$lat < -90 | site_df$lat > 90 + if (any(invalid_lats)) { + invalid_sites <- paste(site_df$site_id[invalid_lats], collapse = ", ") + PEcAn.logger::logger.severe(sprintf("Invalid latitude values (outside -90 to 90) found for sites: %s", invalid_sites)) + } + + # Check for valid longitude range + invalid_lons <- site_df$lon < -180 | site_df$lon > 180 + if (any(invalid_lons)) { + invalid_sites <- paste(site_df$site_id[invalid_lons], collapse = ", ") + PEcAn.logger::logger.severe(sprintf("Invalid longitude values (outside -180 to 180) found for sites: %s", invalid_sites)) + } + } else { + # Just warn if coordinates are suspicious + suspicious_lats <- site_df$lat < -90 | site_df$lat > 90 + if (any(suspicious_lats)) { + suspicious_sites <- paste(site_df$site_id[suspicious_lats], collapse = ", ") + PEcAn.logger::logger.warn(sprintf("Suspicious latitude values (outside -90 to 90) found for sites: %s", suspicious_sites)) + } + + suspicious_lons <- site_df$lon < -180 | site_df$lon > 180 + if (any(suspicious_lons)) { + suspicious_sites <- paste(site_df$site_id[suspicious_lons], collapse = ", ") + PEcAn.logger::logger.warn(sprintf("Suspicious longitude values (outside -180 to 180) found for sites: %s", suspicious_sites)) + } + } + + return(site_df) +} \ No newline at end of file diff --git a/base/settings/man/get.site.info.Rd b/base/settings/man/get.site.info.Rd new file mode 100644 index 00000000000..fa8bc90ca22 --- /dev/null +++ b/base/settings/man/get.site.info.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.site.info.R +\name{get.site.info} +\alias{get.site.info} +\title{Extract and validate site information from settings or CSV file} +\usage{ +get.site.info(settings = NULL, csv_path = NULL, strict_checking = TRUE) +} +\arguments{ +\item{settings}{PEcAn settings list containing site information (optional)} + +\item{csv_path}{Path to a CSV file containing site information (optional)} + +\item{strict_checking}{Logical. If TRUE, will validate coordinates more strictly} +} +\value{ +A data frame with site_id, site_name, lat, lon, and str_id +} +\description{ +Extract and validate site information from settings or CSV file +} +\details{ +This function extracts and validates site information from either a PEcAn settings +object or a CSV file. At least one input must be provided. If both are provided, +the settings object takes precedence. + +\if{html}{\out{
}}\preformatted{ If using a CSV file, it must contain at minimum the columns: site_id, lat, and lon. + The column site_name is optional and will default to site_id if not provided. +}\if{html}{\out{
}} +} +\examples{ +\dontrun{ +# From settings object +settings <- PEcAn.settings::read.settings("pecan.xml") +site_info <- PEcAn.settings::get.site.info(settings) + +# From CSV file +site_info <- PEcAn.settings::get.site.info(csv_path = "sites.csv") +} +} diff --git a/base/settings/tests/testthat/test.get.site.info.R b/base/settings/tests/testthat/test.get.site.info.R new file mode 100644 index 00000000000..d2ec7e4b821 --- /dev/null +++ b/base/settings/tests/testthat/test.get.site.info.R @@ -0,0 +1,182 @@ +context("get.site.info") + +test_that("get.site.info works with settings object", { + # Create a simple settings object + settings <- list( + run = list( + site = list( + id = 1000000001, + name = "Test Site", + lat = 45.0, + lon = -90.0 + ) + ) + ) + + # Call get.site.info + site_info <- get.site.info(settings) + + # Check the result + expect_is(site_info, "data.frame") + expect_equal(nrow(site_info), 1) + expect_equal(site_info$site_id, 1000000001) + expect_equal(site_info$site_name, "Test Site") + expect_equal(site_info$lat, 45.0) + expect_equal(site_info$lon, -90.0) + expect_equal(site_info$str_id, as.character(settings$run$site$id)) +}) + +test_that("get.site.info works with CSV file", { + # Create a temporary CSV file + csv_file <- tempfile(fileext = ".csv") + csv_data <- data.frame( + site_id = c(1000000002, 1000000003), + site_name = c("Site 1", "Site 2"), + lat = c(40.0, 50.0), + lon = c(-80.0, -100.0) + ) + write.csv(csv_data, csv_file, row.names = FALSE) + + # Call get.site.info + site_info <- get.site.info(csv_path = csv_file) + + # Check the result + expect_is(site_info, "data.frame") + expect_equal(nrow(site_info), 2) + expect_equal(site_info$site_id, c(1000000002, 1000000003)) + expect_equal(site_info$site_name, c("Site 1", "Site 2")) + expect_equal(site_info$lat, c(40.0, 50.0)) + expect_equal(site_info$lon, c(-80.0, -100.0)) + expect_equal(site_info$str_id, as.character(csv_data$site_id)) + + # Clean up + unlink(csv_file) +}) + +test_that("get.site.info works with MultiSettings object", { + # Create a MultiSettings object + settings1 <- list( + run = list( + site = list( + id = 1000000004, + name = "Multi Site 1", + lat = 35.0, + lon = -85.0 + ) + ) + ) + + settings2 <- list( + run = list( + site = list( + id = 1000000005, + name = "Multi Site 2", + lat = 55.0, + lon = -95.0 + ) + ) + ) + + multi_settings <- structure( + list(settings1, settings2), + class = "MultiSettings" + ) + + # Call get.site.info + site_info <- get.site.info(multi_settings) + + # Check the result + expect_is(site_info, "data.frame") + expect_equal(nrow(site_info), 2) + expect_equal(site_info$site_id, c(1000000004, 1000000005)) + expect_equal(site_info$site_name, c("Multi Site 1", "Multi Site 2")) + expect_equal(site_info$lat, c(35.0, 55.0)) + expect_equal(site_info$lon, c(-85.0, -95.0)) + expect_equal(site_info$str_id, as.character(c(1000000004, 1000000005))) +}) + +test_that("get.site.info works with vectorized site information", { + # Create a settings object with vectorized site information + settings <- list( + run = list( + site = list( + id = c(1000000006, 1000000007), + name = c("Vector Site 1", "Vector Site 2"), + lat = c(30.0, 60.0), + lon = c(-75.0, -105.0) + ) + ) + ) + + # Call get.site.info + site_info <- get.site.info(settings) + + # Check the result + expect_is(site_info, "data.frame") + expect_equal(nrow(site_info), 2) + expect_equal(site_info$site_id, c(1000000006, 1000000007)) + expect_equal(site_info$site_name, c("Vector Site 1", "Vector Site 2")) + expect_equal(site_info$lat, c(30.0, 60.0)) + expect_equal(site_info$lon, c(-75.0, -105.0)) + expect_equal(site_info$str_id, as.character(c(1000000006, 1000000007))) +}) + +test_that("get.site.info validates coordinates with strict_checking", { + # Create a settings object with invalid coordinates + settings <- list( + run = list( + site = list( + id = 1000000008, + name = "Invalid Site", + lat = 100.0, # Invalid latitude + lon = -180.0 + ) + ) + ) + + # Call get.site.info with strict_checking = TRUE + expect_error(get.site.info(settings, strict_checking = TRUE), + "Invalid latitude values") + + # Call get.site.info with strict_checking = FALSE + site_info <- get.site.info(settings, strict_checking = FALSE) + + # Check the result + expect_is(site_info, "data.frame") + expect_equal(nrow(site_info), 1) + expect_equal(site_info$site_id, 1000000008) + expect_equal(site_info$site_name, "Invalid Site") + expect_equal(site_info$lat, 100.0) + expect_equal(site_info$lon, -180.0) + expect_equal(site_info$str_id, as.character(settings$run$site$id)) +}) + +test_that("str_id is correctly generated as a character string", { + settings <- list( + run = list( + site = list( + id = 1000000001, + name = "Test Site", + lat = 45.0, + lon = -90.0 + ) + ) + ) + site_info <- get.site.info(settings) + expect_type(site_info$str_id, "character") + expect_equal(site_info$str_id, as.character(settings$run$site$id)) + + # Test with CSV input + csv_file <- tempfile(fileext = ".csv") + csv_data <- data.frame( + site_id = c(1000000002, 1000000003), + site_name = c("Site 1", "Site 2"), + lat = c(40.0, 50.0), + lon = c(-80.0, -100.0) + ) + write.csv(csv_data, csv_file, row.names = FALSE) + site_info_csv <- get.site.info(csv_path = csv_file) + expect_type(site_info_csv$str_id, "character") + expect_equal(site_info_csv$str_id, as.character(csv_data$site_id)) + unlink(csv_file) +}) \ No newline at end of file diff --git a/modules/data.land/NAMESPACE b/modules/data.land/NAMESPACE index 3c28da884b7..de495841c93 100644 --- a/modules/data.land/NAMESPACE +++ b/modules/data.land/NAMESPACE @@ -27,6 +27,7 @@ export(format_identifier) export(from.Tag) export(from.TreeCode) export(gSSURGO.Query) +export(generate_soilgrids_ensemble) export(get.attributes) export(get.soil) export(get_resource_map) @@ -45,6 +46,7 @@ export(plot2AGB) export(pool_ic_list2netcdf) export(pool_ic_netcdf2list) export(prepare_pools) +export(preprocess_soilgrids_data) export(put_veg_module) export(sample_ic) export(sclass) @@ -53,6 +55,7 @@ export(soil.units) export(soil2netcdf) export(soil_params) export(soil_process) +export(soilgrids_ic_process) export(soilgrids_soilC_extract) export(subset_layer) export(to.Tag) diff --git a/modules/data.land/NEWS.md b/modules/data.land/NEWS.md index f77c74281e7..b135f192c65 100644 --- a/modules/data.land/NEWS.md +++ b/modules/data.land/NEWS.md @@ -9,7 +9,11 @@ ## Added * New function `soilgrids_soilC_extract` retrieves soil C estimates with uncertainty from the ISRIC SoilGrids 250m data. (#3040, @Qianyuxuan) - +* New utility script `IC_SOILGRID_Utilities.R` for processing SoilGrids data to generate soil carbon initial condition (IC) files. This includes: + - **`soilgrids_ic_process`**: A function to extract, process, and generate ensemble members from SoilGrids250m data, supporting input from PEcAn settings and optional CSV files. + - **`preprocess_soilgrids_data`**: A helper function to handle missing values and ensure data integrity during preprocessing. + - **`generate_soilgrids_ensemble`**: A function to create ensemble members for a site based on processed soil carbon data. + ## Fixed * `gSSURGO.Query()` now always returns all the columns requested, even ones that are all NA. It also now always requires `mukeys` to be specified. diff --git a/modules/data.land/R/IC_SOILGRID_Utilities.R b/modules/data.land/R/IC_SOILGRID_Utilities.R new file mode 100644 index 00000000000..feab19b9cc9 --- /dev/null +++ b/modules/data.land/R/IC_SOILGRID_Utilities.R @@ -0,0 +1,373 @@ +#' SoilGrids Initial Conditions (IC) Utilities +#' +#' @author Akash +#' @description Functions for generating soil carbon IC files from SoilGrids250m data +#' @details This module provides functions for extracting, processing, and generating +#' ensemble members for soil carbon initial conditions using SoilGrids data. +#' All soil carbon values are in kg/m². + +# Required package +library(truncnorm) + +#' Process SoilGrids data for initial conditions +#' +#' @param settings PEcAn settings list containing site information. Should include: +#' \itemize{ +#' \item settings$run$site - Site information with id, lat, lon +#' \item settings$ensemble$size - (Optional) Number of ensemble members to create +#' \item settings$soil$default_soilC - (Optional) Default soil carbon value in kg/m² +#' \item settings$soil$default_uncertainty - (Optional) Default uncertainty as fraction +#' } +#' @param csv_path Path to a CSV file containing site information (optional) +#' @param dir Output directory for IC files +#' @param overwrite Overwrite existing files? (Default: FALSE) +#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE +#' +#' @return List of paths to generated IC files +#' @export +#' +#' @details This function processes SoilGrids data to create carbon initial condition +#' files. It extracts soil carbon data for all sites, handles missing values, +#' generates ensemble members, and writes NetCDF files. +#' +#' @examples +#' \dontrun{ +#' # From settings object +#' settings <- PEcAn.settings::read.settings("pecan.xml") +#' ic_files <- soilgrids_ic_process(settings, dir = "output/IC/") +#' +#' # From CSV file +#' ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "output/IC/") +#' } +soilgrids_ic_process <- function(settings, csv_path=NULL, dir, overwrite = FALSE, verbose = FALSE) { + # Start timing + start_time <- proc.time() + + # Extract site information using PEcAn.settings::get.site.info + site_info <- PEcAn.settings::get.site.info(settings = settings, csv_path = csv_path) + + # Get optional parameters from settings if available + ensemble_size <- ifelse(is.null(settings$ensemble$size), 1, settings$ensemble$size) + default_soilC <- ifelse(is.null(settings$soil$default_soilC), 5.0, settings$soil$default_soilC) + default_uncertainty <- ifelse(is.null(settings$soil$default_uncertainty), 0.2, settings$soil$default_uncertainty) + + # Create output directory if it doesn't exist + if (!dir.exists(dir)) { + PEcAn.logger::logger.info(sprintf("Creating output directory: %s", dir)) + dir.create(dir, recursive = TRUE) + } + + # Create a data folder for intermediate outputs + data_dir <- file.path(dir, "SoilGrids_data") + if (!dir.exists(data_dir)) { + dir.create(data_dir, recursive = TRUE) + } + + # Log the number of sites being processed + n_sites <- nrow(site_info) + PEcAn.logger::logger.info(sprintf("Processing %d site(s)", n_sites)) + + if (verbose) { + for (i in 1:nrow(site_info)) { + PEcAn.logger::logger.info(sprintf("Site %d: %s (lat=%f, lon=%f)", + i, site_info$site_name[i], + site_info$lat[i], site_info$lon[i])) + } + } + + # Check for cached data + soilc_csv_path <- file.path(data_dir, "soilgrids_soilC_data.csv") + if (file.exists(soilc_csv_path) && !overwrite) { + PEcAn.logger::logger.info("Using existing SoilGrids data:", soilc_csv_path) + soil_data <- utils::read.csv(soilc_csv_path, check.names = FALSE) + } else { + # Extract data for all sites at once + PEcAn.logger::logger.info("Extracting SoilGrids data for", nrow(site_info), "sites") + soil_data <- PEcAn.data.land::soilgrids_soilC_extract( + site_info = site_info, + outdir = data_dir, + verbose = verbose + ) + + # Save the extracted data for future use + utils::write.csv(soil_data, soilc_csv_path, row.names = FALSE) + } + + # Validate soil carbon data units through range check + if (any(soil_data$`Total_soilC_0-30cm` > 150, na.rm = TRUE)) { + PEcAn.logger::logger.warn("Some soil carbon values exceed 150 kg/m², values may be in wrong units") + } + + # Preprocess data + PEcAn.logger::logger.info("Preprocessing soil carbon data") + processed_data <- preprocess_soilgrids_data( + soil_data = soil_data, + default_soilC = default_soilC, + default_uncertainty = default_uncertainty, + verbose = verbose + ) + + # Create a list to hold the ensemble files for each site + all_ensemble_files <- list() + + # Process each site + for (s in 1:nrow(site_info)) { + current_site <- site_info[s, ] + + # Create output directory for this site + site_outfolder <- file.path(dir, paste0("SoilGrids_site_", current_site$str_id)) + if (!dir.exists(site_outfolder)) { + dir.create(site_outfolder, recursive = TRUE) + } + + # Check for existing files + existing_files <- list.files(site_outfolder, "*.nc$", full.names = TRUE) + if (length(existing_files) > 0 && !overwrite) { + PEcAn.logger::logger.info(sprintf("Using existing SoilGrids IC files for site %s", current_site$site_name)) + all_ensemble_files[[current_site$str_id]] <- existing_files + next + } + + if (verbose) { + PEcAn.logger::logger.info(sprintf("Generating ensemble members for site %s", current_site$site_name)) + } + + # Generate ensemble members for this site + ensemble_data <- generate_soilgrids_ensemble( + processed_data = processed_data, + site_id = current_site$site_id, + lat = current_site$lat, + lon = current_site$lon, + ensemble_size = ensemble_size, + verbose = verbose + ) + + # Write ensemble members to NetCDF files + site_ensemble_files <- list() + + for (ens in seq_len(ensemble_size)) { + # Write to NetCDF + result <- PEcAn.data.land::pool_ic_list2netcdf( + input = ensemble_data[[ens]], + outdir = site_outfolder, + siteid = current_site$site_id, + ens = ens + ) + + site_ensemble_files[[ens]] <- result$file + + if (verbose) { + PEcAn.logger::logger.info(sprintf("Generated IC file: %s for site %s", + basename(result$file), + current_site$site_name)) + } + } + + # Add this site's files to the overall list + all_ensemble_files[[current_site$str_id]] <- site_ensemble_files + } + + # Log performance metrics + end_time <- proc.time() + elapsed_time <- end_time - start_time + PEcAn.logger::logger.info(sprintf("IC generation completed for %d site(s) in %.2f seconds", + n_sites, elapsed_time[3])) + + return(all_ensemble_files) +} + +#' Preprocess SoilGrids data +#' +#' @param soil_data Raw soil carbon data from soilgrids_soilC_extract +#' @param default_soilC Default soil carbon value in kg/m² to use when data is missing +#' @param default_uncertainty Default uncertainty as fraction to use when data is missing +#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE +#' +#' @return Processed soil carbon data +#' @export +preprocess_soilgrids_data <- function(soil_data, default_soilC = 5.0, + default_uncertainty = 0.2, verbose = FALSE) { + if (verbose) { + PEcAn.logger::logger.info("Preprocessing soil carbon data") + } + + # Create a copy to avoid modifying the original + processed <- soil_data + + # Handle missing values in Total_soilC_0-30cm + na_count <- sum(is.na(processed$`Total_soilC_0-30cm`)) + if (na_count > 0) { + PEcAn.logger::logger.warn(sprintf("Found %d missing values in soil carbon data", na_count)) + + # Sites with missing 0-30cm but available 0-200cm data + has_200cm_data <- is.na(processed$`Total_soilC_0-30cm`) & !is.na(processed$`Total_soilC_0-200cm`) + if (any(has_200cm_data)) { + processed$`Total_soilC_0-30cm`[has_200cm_data] <- processed$`Total_soilC_0-200cm`[has_200cm_data] * 0.15 + PEcAn.logger::logger.warn(sprintf( + "Using scaled 0-200cm soil carbon values for %d site(s)", sum(has_200cm_data) + )) + + if (verbose) { + for (i in which(has_200cm_data)) { + PEcAn.logger::logger.debug(sprintf( + "Using scaled 0-200cm soil carbon value (%.2f) for site %s", + processed$`Total_soilC_0-30cm`[i], processed$Site_ID[i] + )) + } + } + } + + # Sites still with missing data - use default value + still_missing <- is.na(processed$`Total_soilC_0-30cm`) + if (any(still_missing)) { + processed$`Total_soilC_0-30cm`[still_missing] <- default_soilC + PEcAn.logger::logger.warn(sprintf( + "Using default soil carbon value (%.2f kg/m²) for %d site(s)", + default_soilC, sum(still_missing) + )) + + if (verbose) { + for (i in which(still_missing)) { + PEcAn.logger::logger.debug(sprintf( + "Using default soil carbon value (%.2f kg/m²) for site %s", + default_soilC, processed$Site_ID[i] + )) + } + } + } + } + + # Handle missing values in Std_soilC_0-30cm + na_count <- sum(is.na(processed$`Std_soilC_0-30cm`)) + if (na_count > 0) { + PEcAn.logger::logger.warn(sprintf("Found %d missing values in soil carbon uncertainty", na_count)) + + # Sites with missing 0-30cm but available 0-200cm uncertainty data + has_200cm_data <- is.na(processed$`Std_soilC_0-30cm`) & !is.na(processed$`Std_soilC_0-200cm`) + if (any(has_200cm_data)) { + processed$`Std_soilC_0-30cm`[has_200cm_data] <- processed$`Std_soilC_0-200cm`[has_200cm_data] * 0.15 + PEcAn.logger::logger.warn(sprintf( + "Using scaled 0-200cm soil carbon uncertainty for %d site(s)", sum(has_200cm_data) + )) + + if (verbose) { + for (i in which(has_200cm_data)) { + PEcAn.logger::logger.debug(sprintf( + "Using scaled 0-200cm soil carbon uncertainty (%.2f) for site %s", + processed$`Std_soilC_0-30cm`[i], processed$Site_ID[i] + )) + } + } + } + + # Sites still with missing uncertainty - use default percentage of mean + still_missing <- is.na(processed$`Std_soilC_0-30cm`) + if (any(still_missing)) { + processed$`Std_soilC_0-30cm`[still_missing] <- + processed$`Total_soilC_0-30cm`[still_missing] * default_uncertainty + PEcAn.logger::logger.warn(sprintf( + "Using default uncertainty (%.1f%% of mean) for %d site(s)", + default_uncertainty * 100, sum(still_missing) + )) + + if (verbose) { + for (i in which(still_missing)) { + PEcAn.logger::logger.debug(sprintf( + "Using default uncertainty (%.1f%% of mean) for site %s", + default_uncertainty * 100, processed$Site_ID[i] + )) + } + } + } + } + + # Ensure standard deviation is non-negative + neg_sd_count <- sum(processed$`Std_soilC_0-30cm` < 0, na.rm = TRUE) + if (neg_sd_count > 0) { + PEcAn.logger::logger.warn(sprintf("Found %d negative standard deviations", neg_sd_count)) + processed$`Std_soilC_0-30cm` <- pmax(processed$`Std_soilC_0-30cm`, 0, na.rm = TRUE) + } + + # Ensure mean is non-negative + neg_mean_count <- sum(processed$`Total_soilC_0-30cm` < 0, na.rm = TRUE) + if (neg_mean_count > 0) { + PEcAn.logger::logger.warn(sprintf("Found %d negative mean values", neg_mean_count)) + processed$`Total_soilC_0-30cm` <- pmax(processed$`Total_soilC_0-30cm`, 0, na.rm = TRUE) + } + + # Add minimum standard deviation to avoid zero uncertainty + min_sd <- 0.1 * processed$`Total_soilC_0-30cm` # 10% of mean as minimum SD + is_zero_sd <- processed$`Std_soilC_0-30cm` == 0 | is.na(processed$`Std_soilC_0-30cm`) + zero_sd_count <- sum(is_zero_sd) + + if (zero_sd_count > 0) { + PEcAn.logger::logger.info(sprintf("Setting minimum uncertainty for %d zero/NA standard deviations", + zero_sd_count)) + processed$`Std_soilC_0-30cm` <- pmax(processed$`Std_soilC_0-30cm`, min_sd, na.rm = TRUE) + } + + return(processed) +} + +#' Generate ensemble members for a site +#' +#' @param processed_data Processed soil carbon data +#' @param site_id Site ID +#' @param lat Site latitude +#' @param lon Site longitude +#' @param ensemble_size Number of ensemble members to create +#' @param verbose Print detailed progress information to the terminal? TRUE/FALSE +#' +#' @return List of ensemble data for the site +#' @export +generate_soilgrids_ensemble <- function(processed_data, site_id, lat, lon, ensemble_size, verbose = FALSE) { + if (verbose) { + PEcAn.logger::logger.info(sprintf("Generating %d ensemble members for site %s", ensemble_size, site_id)) + } + + # Get site row from processed data + site_row <- which(processed_data$Site_ID == site_id) + if (length(site_row) == 0) { + PEcAn.logger::logger.severe(sprintf("Site %s not found in processed data", site_id)) + } + + # Set random seed for reproducibility + set.seed(as.numeric(site_id)) + + # Generate all ensemble members at once + soil_c_values <- truncnorm::rtruncnorm( + n = ensemble_size, + a = 0, # Lower bound (no negative values) + b = Inf, # Upper bound + mean = processed_data$`Total_soilC_0-30cm`[site_row], + sd = processed_data$`Std_soilC_0-30cm`[site_row] + ) + + if (verbose) { + PEcAn.logger::logger.debug(sprintf( + "Generated %d soil carbon values for site %s (mean: %.2f, sd: %.2f)", + ensemble_size, + site_id, + processed_data$`Total_soilC_0-30cm`[site_row], + processed_data$`Std_soilC_0-30cm`[site_row] + )) + } + + # Create input lists for pool_ic_list2netcdf + ensemble_data <- lapply(seq_len(ensemble_size), function(ens) { + list( + dims = list( + lat = lat, + lon = lon, + time = 1 + ), + vals = list( + soil_organic_carbon_content = soil_c_values[ens], + wood_carbon_content = 0, # Not provided by SoilGrids + litter_carbon_content = 0 # Not provided by SoilGrids + ) + ) + }) + + return(ensemble_data) +} diff --git a/modules/data.land/man/generate_soilgrids_ensemble.Rd b/modules/data.land/man/generate_soilgrids_ensemble.Rd new file mode 100644 index 00000000000..7604bfc8b71 --- /dev/null +++ b/modules/data.land/man/generate_soilgrids_ensemble.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IC_SOILGRID_Utilities.R +\name{generate_soilgrids_ensemble} +\alias{generate_soilgrids_ensemble} +\title{Generate ensemble members for a site} +\usage{ +generate_soilgrids_ensemble( + processed_data, + site_id, + lat, + lon, + ensemble_size, + verbose = FALSE +) +} +\arguments{ +\item{processed_data}{Processed soil carbon data} + +\item{site_id}{Site ID} + +\item{lat}{Site latitude} + +\item{lon}{Site longitude} + +\item{ensemble_size}{Number of ensemble members to create} + +\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE} +} +\value{ +List of ensemble data for the site +} +\description{ +Generate ensemble members for a site +} diff --git a/modules/data.land/man/preprocess_soilgrids_data.Rd b/modules/data.land/man/preprocess_soilgrids_data.Rd new file mode 100644 index 00000000000..b1e3cbd524e --- /dev/null +++ b/modules/data.land/man/preprocess_soilgrids_data.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IC_SOILGRID_Utilities.R +\name{preprocess_soilgrids_data} +\alias{preprocess_soilgrids_data} +\title{Preprocess SoilGrids data} +\usage{ +preprocess_soilgrids_data( + soil_data, + default_soilC = 5, + default_uncertainty = 0.2, + verbose = FALSE +) +} +\arguments{ +\item{soil_data}{Raw soil carbon data from soilgrids_soilC_extract} + +\item{default_soilC}{Default soil carbon value in kg/m² to use when data is missing} + +\item{default_uncertainty}{Default uncertainty as fraction to use when data is missing} + +\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE} +} +\value{ +Processed soil carbon data +} +\description{ +Preprocess SoilGrids data +} diff --git a/modules/data.land/man/soilgrids_ic_process.Rd b/modules/data.land/man/soilgrids_ic_process.Rd new file mode 100644 index 00000000000..5a8cd74ff03 --- /dev/null +++ b/modules/data.land/man/soilgrids_ic_process.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IC_SOILGRID_Utilities.R +\name{soilgrids_ic_process} +\alias{soilgrids_ic_process} +\title{Process SoilGrids data for initial conditions} +\usage{ +soilgrids_ic_process( + settings, + csv_path = NULL, + dir, + overwrite = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{settings}{PEcAn settings list containing site information. Should include: +\itemize{ + \item settings$run$site - Site information with id, lat, lon + \item settings$ensemble$size - (Optional) Number of ensemble members to create + \item settings$soil$default_soilC - (Optional) Default soil carbon value in kg/m² + \item settings$soil$default_uncertainty - (Optional) Default uncertainty as fraction +}} + +\item{csv_path}{Path to a CSV file containing site information (optional)} + +\item{dir}{Output directory for IC files} + +\item{overwrite}{Overwrite existing files? (Default: FALSE)} + +\item{verbose}{Print detailed progress information to the terminal? TRUE/FALSE} +} +\value{ +List of paths to generated IC files +} +\description{ +Process SoilGrids data for initial conditions +} +\details{ +This function processes SoilGrids data to create carbon initial condition + files. It extracts soil carbon data for all sites, handles missing values, + generates ensemble members, and writes NetCDF files. +} +\examples{ +\dontrun{ +# From settings object +settings <- PEcAn.settings::read.settings("pecan.xml") +ic_files <- soilgrids_ic_process(settings, dir = "output/IC/") + +# From CSV file +ic_files <- soilgrids_ic_process(csv_path = "sites.csv", dir = "output/IC/") +} +} From 7f2f1e8601daf6e6ecdb3dc85be5aa403fa01f37 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Mon, 14 Apr 2025 00:38:56 +0000 Subject: [PATCH 0074/1193] fix: "/pkgdoc/package_documentation/pkgdocs": not found --- docker/docs/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile index 9d764e540b3..ee9974ce9c1 100644 --- a/docker/docs/Dockerfile +++ b/docker/docs/Dockerfile @@ -37,6 +37,7 @@ COPY scripts/build_pkgdown.R /pkgdoc/scripts/build_pkgdown.R COPY base /pkgdoc/base/ COPY modules /pkgdoc/modules/ COPY models /pkgdoc/models/ +COPY package_documentation /pkgdoc/package_documentation RUN make clean && make pkgdocs # ---------------------------------------------------------------------- From 1872d7f54745b606fc04db09e43251345320e1d8 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Mon, 14 Apr 2025 07:45:08 +0000 Subject: [PATCH 0075/1193] Fix rsync path in gh action to correctly deploy pkgdocs from main repo to package-documentation --- .github/workflows/pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index 1a73549f402..9a4da14b6d0 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -63,7 +63,7 @@ jobs: fi cd package-documentation mkdir -p $VERSION - rsync -a --delete pkgdocs/ ${VERSION}/ + rsync -a --delete ../package_documentation/pkgdocs/ ${VERSION}/ git add --all * git commit -m "Build pkgdown docs from pecan revision ${GITHUB_SHA}" || true git push -q origin main From c185578af4c45afb2f682e669cf3a785d564af76 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 15 Apr 2025 14:09:33 -0400 Subject: [PATCH 0076/1193] Change the file path. --- .../inst/anchor/NA_downscale_script.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/assim.sequential/inst/anchor/NA_downscale_script.R b/modules/assim.sequential/inst/anchor/NA_downscale_script.R index 05d30bdbc80..746ed0886a4 100644 --- a/modules/assim.sequential/inst/anchor/NA_downscale_script.R +++ b/modules/assim.sequential/inst/anchor/NA_downscale_script.R @@ -225,11 +225,11 @@ for (y in 2012:2024) { # setup. base.map.dir <- "/projectnb/dietzelab/dongchen/anchorSites/downscale/MODIS_NLCD_LC.tif" -load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/sda.all.forecast.analysis.Rdata") +load("/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/sda.all.forecast.analysis.Rdata") variables <- c("AbvGrndWood", "LAI", "SoilMoistFrac", "TotSoilCarb") -# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/pecanIC.xml" +# settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/pecanIC.xml" settings <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_25ens_2024_11_25/ShapeFile/pts.shp" -outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_4/" +outdir <- "/projectnb/dietzelab/dongchen/anchorSites/NA_runs/SDA_50ens_2025_4_12/" cores <- 28 date <- seq(as.Date("2012-07-15"), as.Date("2024-07-15"), "1 year") # loop over years. @@ -237,13 +237,13 @@ for (i in seq_along(date)) { # Assemble covariates. covariates.dir <- file.path(outdir, "covariates_lc_ts", paste0("covariates_", lubridate::year(date[i]), ".tiff")) # grab analysis. - analysis.yr <- forecast.all[[i]] + analysis.yr <- analysis.all[[i]] time <- date[i] # loop over carbon types. for (j in seq_along(variables)) { # setup folder. variable <- variables[j] - folder.path <- file.path(file.path(outdir, "downscale_maps_forecast_lc_ts"), paste0(variables[j], "_", date[i])) + folder.path <- file.path(file.path(outdir, "downscale_maps_analysis_lc_ts"), paste0(variables[j], "_", date[i])) dir.create(folder.path) saveRDS(list(settings = settings, analysis.yr = analysis.yr, @@ -253,7 +253,7 @@ for (i in seq_along(date)) { folder.path = folder.path, base.map.dir = base.map.dir, cores = cores, - outdir = file.path(outdir, "downscale_maps_forecast_lc_ts")), + outdir = file.path(outdir, "downscale_maps_analysis_lc_ts")), file = file.path(folder.path, "dat.rds")) # prepare for qsub. jobsh <- c("#!/bin/bash -l", From 1706aada4dc738bab8fe799cd65d33428d69b933 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Wed, 16 Apr 2025 19:28:08 +0000 Subject: [PATCH 0077/1193] feat: restructure pkgdown docs, added custom _pkgdown.yml, and enhanced index page --- scripts/build_pkgdown.R | 152 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 144 insertions(+), 8 deletions(-) diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R index a6c0fee4f82..4e92e1322db 100644 --- a/scripts/build_pkgdown.R +++ b/scripts/build_pkgdown.R @@ -1,15 +1,16 @@ #!/usr/bin/env Rscript - # Build pkgdown documentation for PEcAn packages library(pkgdown) - +library(yaml) args <- commandArgs(trailingOnly = TRUE) if (length(args) == 0) { stop("No package names provided. Please pass package names as arguments.") } - packages <- args - +output_dir <- "_pkgdown_docs" +if (!dir.exists(output_dir)) { + dir.create(output_dir, recursive = TRUE) +} if (requireNamespace("PEcAn.logger", quietly = TRUE)) { logger <- PEcAn.logger::logger.info } else { @@ -18,8 +19,63 @@ if (requireNamespace("PEcAn.logger", quietly = TRUE)) { } } -logger("Building pkgdown docs for:", paste(packages, collapse = ", ")) +pkg_config <- function(pkg) { + pkgname <- desc::desc_get("Package", pkg) + + list( + url = "https://pecanproject.github.io/", + home = list( + title = sprintf("%s Functions for PEcAn", pkgname), + ), + template = list( + bootstrap = 5, + bslib = list( + primary = "#0054AD", + `border-radius` = "0.5rem", + `btn-border-radius` = "0.25rem" + ), + `light-switch` = TRUE, + ), + navbar = list( + structure = list( + left = c("pecan_home", "reference", "news"), + right = c("search", "github", "light-switch") + ), + components = list( + pecan_home = list( + text = "PEcAn Home", + href = "../../../index.html", + `aria-label` = "PEcAn Project Home" + ), + reference = list( + text = "Reference", + href = "reference/index.html" + ), + github = list( + icon = "fab fa-github", + href = "https://github.com/PecanProject/pecan", + `aria-label` = "GitHub" + ) + ) + ), + reference = list( + list( + title = "All Functions", + desc = "All functions exported by this package", + contents = list("matches('.*')") + ) + ), + news = list( + text = "News", + href = "news/index.html" + ), + development = list( + mode = "auto" + ) + ) +} +logger("Building pkgdown docs for:", paste(packages, collapse = ", ")) for (pkg in packages) { logger("Building pkgdown site for:", pkg) current_wd <- getwd() @@ -27,6 +83,17 @@ for (pkg in packages) { if (!dir.exists(pkg)) { stop(paste("Package directory does not exist:", pkg)) } + pkg_config_path <- file.path(pkg, "_pkgdown.yml") + pkg_config <- pkg_config(pkg) + # If _pkgdown.yml exists, merge with our config, otherwise create new + if (file.exists(pkg_config_path)) { + exist_config <- yaml::read_yaml(pkg_config_path) + # Merge configurations, preserving existing settings + merged_config <- modifyList(exist_config, pkg_config) + yaml::write_yaml(merged_config, pkg_config_path) + } else { + yaml::write_yaml(pkg_config, pkg_config_path) + } setwd(pkg) pkgdown::build_site() setwd(current_wd) @@ -35,11 +102,17 @@ for (pkg in packages) { warning(paste("No docs folder created for:", pkg)) next } - dest <- file.path("package_documentation/pkgdocs", pkg) - if (!dir.exists(dest)) { + pkgname <- desc::desc_get("Package", pkg) + dest <- file.path(output_dir, strsplit(pkg, "/")[[1]][1], pkgname) + if (!dir.exists(dest)) { dir.create(dest, recursive = TRUE, showWarnings = FALSE) } - file.copy(from = source_docs, to = dest, recursive = TRUE, overwrite = TRUE) + file.copy( + from = list.files(source_docs, full.names = TRUE), + to = dest, + recursive = TRUE, + overwrite = TRUE + ) logger("✅ Successfully copied docs from", pkg, "to", dest) }, error = function(e) { warning(paste("❌ Error building pkgdown site for", pkg, ":", e$message)) @@ -50,4 +123,67 @@ for (pkg in packages) { }) } +logger("Creating index page") + +built_pkg_dirs <- list.dirs(output_dir, recursive=FALSE, full.names = FALSE) +html_header <- c( + '', + '', + '', + ' Package-specific documentation for the PEcAn R packages', + ' ', + ' ', + '', + '', + '

PEcAn package documentation

', + '

Function documentation and articles for each PEcAn package,', + ' generated from the package source using pkgdown package.

', + '', + '
' +) +content <- character(0) +for (dir in built_pkg_dirs) { + content <- c(content, + sprintf('
'), + sprintf('
%s
', dir) + ) + pkg_dirs <- list.dirs(file.path(output_dir, dir), recursive=FALSE, full.names=FALSE) + content <- c(content, '
    ') + for (pkg in pkg_dirs) { + pkg_path <- file.path(dir, pkg, "dev/index.html") + content <- c(content, + sprintf('
  • %s
  • ', pkg_path, pkg) + ) + } + content <- c(content, + '
', + '
' + ) +} +html_footer <- c( + '
', + '', + '' +) +writeLines( + text = c(html_header, content, html_footer), + con = file.path(output_dir, "index.html") +) + logger("✅ All packages processed.") From 44b858a58f82f399bb7e6fdbaef7354b663743f4 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Sat, 19 Apr 2025 00:27:17 +0000 Subject: [PATCH 0078/1193] Add 'All Packages' button, fix dest path, overwrite _pkgdown.yml on each build and update index.html generation accordingly --- scripts/build_pkgdown.R | 123 ++++++++-------------------------------- 1 file changed, 24 insertions(+), 99 deletions(-) diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R index 4e92e1322db..1bb3f5db9ce 100644 --- a/scripts/build_pkgdown.R +++ b/scripts/build_pkgdown.R @@ -19,59 +19,23 @@ if (requireNamespace("PEcAn.logger", quietly = TRUE)) { } } -pkg_config <- function(pkg) { - pkgname <- desc::desc_get("Package", pkg) +pkg_config <- function() { list( url = "https://pecanproject.github.io/", - home = list( - title = sprintf("%s Functions for PEcAn", pkgname), - ), template = list( bootstrap = 5, - bslib = list( - primary = "#0054AD", - `border-radius` = "0.5rem", - `btn-border-radius` = "0.25rem" - ), - `light-switch` = TRUE, - ), - navbar = list( - structure = list( - left = c("pecan_home", "reference", "news"), - right = c("search", "github", "light-switch") - ), - components = list( - pecan_home = list( - text = "PEcAn Home", - href = "../../../index.html", - `aria-label` = "PEcAn Project Home" - ), - reference = list( - text = "Reference", - href = "reference/index.html" - ), - github = list( - icon = "fab fa-github", - href = "https://github.com/PecanProject/pecan", - `aria-label` = "GitHub" + includes = list( + before_navbar = paste0( + "\n", + "" + ) ) ) - ), - reference = list( - list( - title = "All Functions", - desc = "All functions exported by this package", - contents = list("matches('.*')") - ) - ), - news = list( - text = "News", - href = "news/index.html" - ), - development = list( - mode = "auto" - ) ) } @@ -84,16 +48,8 @@ for (pkg in packages) { stop(paste("Package directory does not exist:", pkg)) } pkg_config_path <- file.path(pkg, "_pkgdown.yml") - pkg_config <- pkg_config(pkg) - # If _pkgdown.yml exists, merge with our config, otherwise create new - if (file.exists(pkg_config_path)) { - exist_config <- yaml::read_yaml(pkg_config_path) - # Merge configurations, preserving existing settings - merged_config <- modifyList(exist_config, pkg_config) - yaml::write_yaml(merged_config, pkg_config_path) - } else { - yaml::write_yaml(pkg_config, pkg_config_path) - } + pkg_config_data <- pkg_config() + yaml::write_yaml(pkg_config_data, pkg_config_path) setwd(pkg) pkgdown::build_site() setwd(current_wd) @@ -103,7 +59,7 @@ for (pkg in packages) { next } pkgname <- desc::desc_get("Package", pkg) - dest <- file.path(output_dir, strsplit(pkg, "/")[[1]][1], pkgname) + dest <- file.path(output_dir, pkgname) if (!dir.exists(dest)) { dir.create(dest, recursive = TRUE, showWarnings = FALSE) } @@ -126,63 +82,32 @@ for (pkg in packages) { logger("Creating index page") built_pkg_dirs <- list.dirs(output_dir, recursive=FALSE, full.names = FALSE) -html_header <- c( +before_text <- c( '', '', '', ' Package-specific documentation for the PEcAn R packages', - ' ', - ' ', '', '', '

PEcAn package documentation

', '

Function documentation and articles for each PEcAn package,', ' generated from the package source using pkgdown package.

', '', - '
' + '
    ' ) -content <- character(0) -for (dir in built_pkg_dirs) { - content <- c(content, - sprintf('
    '), - sprintf('
    %s
    ', dir) - ) - pkg_dirs <- list.dirs(file.path(output_dir, dir), recursive=FALSE, full.names=FALSE) - content <- c(content, '
      ') - for (pkg in pkg_dirs) { - pkg_path <- file.path(dir, pkg, "dev/index.html") - content <- c(content, - sprintf('
    • %s
    • ', pkg_path, pkg) - ) - } - content <- c(content, - '
    ', - '
    ' - ) -} -html_footer <- c( - '
', +listing_text <- paste0( + '
  • ', + built_pkg_dirs, + '
  • ' +) +after_text <- c( + ' ', + '', '', '' ) writeLines( - text = c(html_header, content, html_footer), + text = c(before_text, listing_text, after_text), con = file.path(output_dir, "index.html") ) From 76146bb2d5a7f84bf0614a5c7dc7adfbdc2fa373 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Sat, 19 Apr 2025 00:34:13 +0000 Subject: [PATCH 0079/1193] fix docker and GHA path --- .github/workflows/pkgdown.yml | 2 +- docker/docs/Dockerfile | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index 9a4da14b6d0..d068b000cba 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -63,7 +63,7 @@ jobs: fi cd package-documentation mkdir -p $VERSION - rsync -a --delete ../package_documentation/pkgdocs/ ${VERSION}/ + rsync -a --delete ../_pkgdown_docs/ ${VERSION}/ git add --all * git commit -m "Build pkgdown docs from pecan revision ${GITHUB_SHA}" || true git push -q origin main diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile index ee9974ce9c1..d0af39672f2 100644 --- a/docker/docs/Dockerfile +++ b/docker/docs/Dockerfile @@ -37,8 +37,7 @@ COPY scripts/build_pkgdown.R /pkgdoc/scripts/build_pkgdown.R COPY base /pkgdoc/base/ COPY modules /pkgdoc/modules/ COPY models /pkgdoc/models/ -COPY package_documentation /pkgdoc/package_documentation -RUN make clean && make pkgdocs +RUN make clean && make pkgdocs # ---------------------------------------------------------------------- # copy html pages to container @@ -52,7 +51,7 @@ RUN apt-get update \ COPY docker/docs/index.html /usr/local/apache2/htdocs/ COPY --from=pecandocs /src/book_source/_book/ /usr/local/apache2/htdocs/docs/pecan/ -COPY --from=pecandocs /pkgdoc/package_documentation/pkgdocs/ /usr/local/apache2/htdocs/pkgdocs/ +COPY --from=pecandocs /pkgdoc/_pkgdown_docs/ /usr/local/apache2/htdocs/pkgdocs/ # ---------------------------------------------------------------------- # PEcAn version information @@ -66,4 +65,4 @@ ARG PECAN_GIT_DATE="unknown" ENV PECAN_VERSION=${PECAN_VERSION} \ PECAN_GIT_BRANCH=${PECAN_GIT_BRANCH} \ PECAN_GIT_CHECKSUM=${PECAN_GIT_CHECKSUM} \ - PECAN_GIT_DATE=${PECAN_GIT_DATE} + PECAN_GIT_DATE=${PECAN_GIT_DATE} \ No newline at end of file From c6b3b152a31ec646f312a6d71604dc45e4d2eee8 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Sat, 19 Apr 2025 00:37:14 +0000 Subject: [PATCH 0080/1193] Add URL and BugReports fields to DESCRIPTION files --- base/all/DESCRIPTION | 2 ++ base/db/DESCRIPTION | 2 ++ base/logger/DESCRIPTION | 2 +- base/qaqc/DESCRIPTION | 2 ++ base/remote/DESCRIPTION | 2 ++ base/settings/DESCRIPTION | 2 ++ base/utils/DESCRIPTION | 2 ++ base/visualization/DESCRIPTION | 2 ++ base/workflow/DESCRIPTION | 2 ++ models/basgra/DESCRIPTION | 2 ++ models/biocro/DESCRIPTION | 2 ++ models/cable/DESCRIPTION | 2 ++ models/clm45/DESCRIPTION | 2 ++ models/dalec/DESCRIPTION | 2 ++ models/dvmdostem/DESCRIPTION | 2 ++ models/ed/DESCRIPTION | 2 ++ models/fates/DESCRIPTION | 2 ++ models/gday/DESCRIPTION | 2 ++ models/jules/DESCRIPTION | 2 ++ models/ldndc/DESCRIPTION | 2 ++ models/linkages/DESCRIPTION | 2 ++ models/lpjguess/DESCRIPTION | 2 ++ models/maat/DESCRIPTION | 2 ++ models/maespa/DESCRIPTION | 2 ++ models/preles/DESCRIPTION | 2 ++ models/sibcasa/DESCRIPTION | 2 ++ models/sipnet/DESCRIPTION | 2 ++ models/stics/DESCRIPTION | 2 ++ models/template/DESCRIPTION | 2 ++ modules/allometry/DESCRIPTION | 2 ++ modules/assim.batch/DESCRIPTION | 2 ++ modules/assim.sequential/DESCRIPTION | 2 ++ modules/benchmark/DESCRIPTION | 2 ++ modules/data.atmosphere/DESCRIPTION | 2 ++ modules/data.land/DESCRIPTION | 2 ++ modules/data.mining/DESCRIPTION | 2 ++ modules/data.remote/DESCRIPTION | 2 ++ modules/emulator/DESCRIPTION | 2 ++ modules/meta.analysis/DESCRIPTION | 2 ++ modules/photosynthesis/DESCRIPTION | 2 ++ modules/priors/DESCRIPTION | 2 ++ modules/rtm/DESCRIPTION | 2 ++ modules/uncertainty/DESCRIPTION | 2 ++ 43 files changed, 85 insertions(+), 1 deletion(-) diff --git a/base/all/DESCRIPTION b/base/all/DESCRIPTION index 2cad68114e2..68ec2401c6d 100644 --- a/base/all/DESCRIPTION +++ b/base/all/DESCRIPTION @@ -43,6 +43,8 @@ Description: The Predictive Ecosystem Carbon Analyzer PEcAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: PEcAn.DB, PEcAn.settings, diff --git a/base/db/DESCRIPTION b/base/db/DESCRIPTION index f88a16982e4..c1ec1b53328 100644 --- a/base/db/DESCRIPTION +++ b/base/db/DESCRIPTION @@ -39,6 +39,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific model parameterization, execution, and analysis. The goal of PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: DBI, dbplyr (>= 2.4.0), diff --git a/base/logger/DESCRIPTION b/base/logger/DESCRIPTION index 50bb54d1eee..5eea045dd1a 100644 --- a/base/logger/DESCRIPTION +++ b/base/logger/DESCRIPTION @@ -19,8 +19,8 @@ Description: Convenience functions for logging outputs from 'PEcAn', and lenience when running large batches of simulations that should not be terminated by errors in individual models. It is loosely based on the 'log4j' package. +URL: https://pecanproject.github.io/, https://github.com/PecanProject/pecan BugReports: https://github.com/PecanProject/pecan/issues -URL: https://pecanproject.github.io/ Imports: utils, stringi diff --git a/base/qaqc/DESCRIPTION b/base/qaqc/DESCRIPTION index 041f957d25b..9c5db8a5d2b 100644 --- a/base/qaqc/DESCRIPTION +++ b/base/qaqc/DESCRIPTION @@ -10,6 +10,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"), Author: David LeBauer, Tess McCabe Maintainer: David LeBauer Description: PEcAn integration and model skill testing +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: dplyr, graphics, diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION index 196d97967d2..85bf7d92fc2 100644 --- a/base/remote/DESCRIPTION +++ b/base/remote/DESCRIPTION @@ -15,6 +15,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut"), person("University of Illinois, NCSA", role = c("cph"))) Description: This package contains utilities for communicating with and executing code on local and remote hosts. In particular, it has PEcAn-specific utilities for starting ecosystem model runs. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: dplyr, foreach, diff --git a/base/settings/DESCRIPTION b/base/settings/DESCRIPTION index f7b3e0409b0..5983d386a91 100644 --- a/base/settings/DESCRIPTION +++ b/base/settings/DESCRIPTION @@ -12,6 +12,8 @@ LazyLoad: yes LazyData: FALSE Require: hdf5 Description: Contains functions to read PEcAn settings files. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: methods Imports: diff --git a/base/utils/DESCRIPTION b/base/utils/DESCRIPTION index 72948b3d713..a248c1dddc4 100644 --- a/base/utils/DESCRIPTION +++ b/base/utils/DESCRIPTION @@ -30,6 +30,8 @@ Description: The Predictive Ecosystem Carbon Analyzer PEcAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: abind (>= 1.4.5), curl, diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION index 8b38be4c664..9d1638c18db 100644 --- a/base/visualization/DESCRIPTION +++ b/base/visualization/DESCRIPTION @@ -27,6 +27,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific efficacy of scientific investigation. This module is used to create more complex visualizations from the data generated by PEcAn code, specifically the models. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: data.table, ggplot2, diff --git a/base/workflow/DESCRIPTION b/base/workflow/DESCRIPTION index ae07761cb71..e05d6d9e67b 100644 --- a/base/workflow/DESCRIPTION +++ b/base/workflow/DESCRIPTION @@ -25,6 +25,8 @@ Description: The Predictive Ecosystem Carbon Analyzer models, and to improve the efficacy of scientific investigation. This package provides workhorse functions that can be used to run the major steps of a PEcAn analysis. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues License: BSD_3_clause + file LICENSE Imports: dplyr, diff --git a/models/basgra/DESCRIPTION b/models/basgra/DESCRIPTION index f34e3f69db1..4f103380387 100644 --- a/models/basgra/DESCRIPTION +++ b/models/basgra/DESCRIPTION @@ -6,6 +6,8 @@ Authors@R: c(person("Istem", "Fer", role = c("aut", "cre"), email = "istem.fer@fmi.fi"), person("University of Illinois, NCSA", role = c("cph"))) Description: This module provides functions to link the BASGRA model to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: R (>= 4.0.0) Imports: PEcAn.logger, diff --git a/models/biocro/DESCRIPTION b/models/biocro/DESCRIPTION index 51a2475e4dd..5a8832b0a99 100644 --- a/models/biocro/DESCRIPTION +++ b/models/biocro/DESCRIPTION @@ -12,6 +12,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"), Author: David LeBauer, Deepak Jaiswal, Christopher Black Maintainer: David LeBauer Description: This module provides functions to link BioCro to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.logger, PEcAn.remote, diff --git a/models/cable/DESCRIPTION b/models/cable/DESCRIPTION index d7de137afa5..cb9a189878c 100644 --- a/models/cable/DESCRIPTION +++ b/models/cable/DESCRIPTION @@ -9,6 +9,8 @@ Authors@R: c(person("Kaitlin", "Ragosta", role = c("aut")), Author: Kaitlin Ragosta Maintainer: Tony Gardella Description: This module provides functions to link the (CABLE) to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.logger, PEcAn.utils (>= 1.4.8) diff --git a/models/clm45/DESCRIPTION b/models/clm45/DESCRIPTION index 363b065e63c..40c55274444 100644 --- a/models/clm45/DESCRIPTION +++ b/models/clm45/DESCRIPTION @@ -11,6 +11,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific streamline the interaction between data and models, and to improve the efficacy of scientific investigation. This package provides functions to link the Community Land Model, version 4.5, to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: PEcAn.logger, PEcAn.utils diff --git a/models/dalec/DESCRIPTION b/models/dalec/DESCRIPTION index a73bd635c4d..f8119f40452 100644 --- a/models/dalec/DESCRIPTION +++ b/models/dalec/DESCRIPTION @@ -10,6 +10,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"), Author: Mike Dietze, Tristain Quaife Maintainer: Mike Dietze Description: This module provides functions to link DALEC to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.logger, PEcAn.remote, diff --git a/models/dvmdostem/DESCRIPTION b/models/dvmdostem/DESCRIPTION index a22b0daa62e..fe94de55fc2 100644 --- a/models/dvmdostem/DESCRIPTION +++ b/models/dvmdostem/DESCRIPTION @@ -11,6 +11,8 @@ Author: Tobey Carman, Shawn Serbin Maintainer: Tobey Carman , Shawn Serbin Description: This module provides functions to link the dvmdostem model to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: lubridate, ncdf4, diff --git a/models/ed/DESCRIPTION b/models/ed/DESCRIPTION index 3e87cfc89f4..26625d9a71c 100644 --- a/models/ed/DESCRIPTION +++ b/models/ed/DESCRIPTION @@ -30,6 +30,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific streamline the interaction between data and models, and to improve the efficacy of scientific investigation. This package provides functions to link the Ecosystem Demography Model, version 2, to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: R (>= 3.5) Imports: diff --git a/models/fates/DESCRIPTION b/models/fates/DESCRIPTION index a44c6011931..93bce032e32 100644 --- a/models/fates/DESCRIPTION +++ b/models/fates/DESCRIPTION @@ -15,6 +15,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific streamline the interaction between data and models, and to improve the efficacy of scientific investigation. This package provides functions to link the FATES model to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: stringr, PEcAn.logger, diff --git a/models/gday/DESCRIPTION b/models/gday/DESCRIPTION index 3cda7b49439..3edb2b191a2 100644 --- a/models/gday/DESCRIPTION +++ b/models/gday/DESCRIPTION @@ -10,6 +10,8 @@ Authors@R: c(person("Martin", "De Kauwe", role = c("aut", "cre"), Author: Martin De Kauwe Maintainer: Martin De Kauwe Description: This module provides functions to link the GDAY model to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: PEcAn.utils Imports: diff --git a/models/jules/DESCRIPTION b/models/jules/DESCRIPTION index 3e6210fee3e..6ba95818b5d 100644 --- a/models/jules/DESCRIPTION +++ b/models/jules/DESCRIPTION @@ -6,6 +6,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"), email = "dietze@bu.edu"), person("University of Illinois, NCSA", role = c("cph"))) Description: This module provides functions to link the (JULES) to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.data.atmosphere, PEcAn.logger, diff --git a/models/ldndc/DESCRIPTION b/models/ldndc/DESCRIPTION index 4cb800e9a37..5a3eefededd 100644 --- a/models/ldndc/DESCRIPTION +++ b/models/ldndc/DESCRIPTION @@ -5,6 +5,8 @@ Version: 1.0.0.9000 Authors@R: c(person("Henri", "Kajasilta", role = c("aut", "cre"), email = "henri.kajasilta@fmi.fi")) Description: This module provides functions to link the (LDNDC) to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: dplyr, lubridate, diff --git a/models/linkages/DESCRIPTION b/models/linkages/DESCRIPTION index 79e0b26d0fb..7696b136e56 100644 --- a/models/linkages/DESCRIPTION +++ b/models/linkages/DESCRIPTION @@ -8,6 +8,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut"), email = "araiho@nd.edu"), person("University of Illinois, NCSA", role = c("cph"))) Description: This module provides functions to link the (LINKAGES) to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.data.land, PEcAn.DB, diff --git a/models/lpjguess/DESCRIPTION b/models/lpjguess/DESCRIPTION index da914cf7d71..5c0ce84ddae 100644 --- a/models/lpjguess/DESCRIPTION +++ b/models/lpjguess/DESCRIPTION @@ -8,6 +8,8 @@ Authors@R: c(person("Istem", "Fer", role = c("aut", "cre"), email = "tonygard@bu.edu"), person("University of Illinois, NCSA", role = c("cph"))) Description: This module provides functions to link LPJ-GUESS to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.logger, PEcAn.remote, diff --git a/models/maat/DESCRIPTION b/models/maat/DESCRIPTION index ce1684852b2..8a9dda597f3 100644 --- a/models/maat/DESCRIPTION +++ b/models/maat/DESCRIPTION @@ -6,6 +6,8 @@ Authors@R: c( person("Shawn", "Serbin", role = c("aut", "cre"), email="sserbin@bnl.gov"), person("Anthony", "Walker", role = "aut", email="walkerap@ornl.gov")) Description: This module provides functions to wrap the MAAT model into the PEcAn workflows. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.data.atmosphere, PEcAn.logger, diff --git a/models/maespa/DESCRIPTION b/models/maespa/DESCRIPTION index 40d1f78eaa3..1c3c2724564 100644 --- a/models/maespa/DESCRIPTION +++ b/models/maespa/DESCRIPTION @@ -11,6 +11,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific streamline the interaction between data and models, and to improve the efficacy of scientific investigation.This package allows for MAESPA to be run through the PEcAN workflow. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.data.atmosphere, PEcAn.logger, diff --git a/models/preles/DESCRIPTION b/models/preles/DESCRIPTION index 125a9ffddbe..c83ce2863cd 100644 --- a/models/preles/DESCRIPTION +++ b/models/preles/DESCRIPTION @@ -14,6 +14,8 @@ Description: This module provides functions to run the PREdict Light use parameterization,execution, and analysis. The goal of PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.logger, lubridate (>= 1.6.0), diff --git a/models/sibcasa/DESCRIPTION b/models/sibcasa/DESCRIPTION index 0c3bae6faed..aaaeae34345 100644 --- a/models/sibcasa/DESCRIPTION +++ b/models/sibcasa/DESCRIPTION @@ -12,6 +12,8 @@ Authors@R: c(person("Rob", "Kooper", role = "cre", person("University of Illinois, NCSA", role = c("cph"))) Description: This module provides functions to link (SiBCASA) to PEcAn. It is a work in progress and is not yet fully functional. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: ncdf4, PEcAn.logger diff --git a/models/sipnet/DESCRIPTION b/models/sipnet/DESCRIPTION index e5f14edc35d..40ee9884497 100644 --- a/models/sipnet/DESCRIPTION +++ b/models/sipnet/DESCRIPTION @@ -10,6 +10,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific model parameterization, execution, and analysis. The goal of PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: dplyr, lubridate (>= 1.6.0), diff --git a/models/stics/DESCRIPTION b/models/stics/DESCRIPTION index 2fcce87bfa7..acf8c5b4764 100644 --- a/models/stics/DESCRIPTION +++ b/models/stics/DESCRIPTION @@ -7,6 +7,8 @@ Authors@R: c( email = "istem.fer@fmi.fi", role = c("aut", "cre"))) Description: This module provides functions to link the STICS to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.settings, PEcAn.logger, diff --git a/models/template/DESCRIPTION b/models/template/DESCRIPTION index 7e0aea20f21..52797ff2d8b 100644 --- a/models/template/DESCRIPTION +++ b/models/template/DESCRIPTION @@ -6,6 +6,8 @@ Authors@R: c(person("Jane", "Doe", role = c("aut", "cre"), email = "jdoe@illinois.edu"), person("John", "Doe", role = c("aut"))) Description: This module provides functions to link the (ModelName) to PEcAn. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: PEcAn.DB, PEcAn.logger, diff --git a/modules/allometry/DESCRIPTION b/modules/allometry/DESCRIPTION index c031dc02766..d0ab5cb0f1f 100644 --- a/modules/allometry/DESCRIPTION +++ b/modules/allometry/DESCRIPTION @@ -7,6 +7,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"), person("Shashank", "Singh", role = c("ctb")), person("University of Illinois, NCSA", role = c("cph"))) Description: Synthesize allometric equations or fit allometries to data. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: coda (>= 0.18), grDevices, diff --git a/modules/assim.batch/DESCRIPTION b/modules/assim.batch/DESCRIPTION index 6283791ace5..03d0fe4cf6e 100644 --- a/modules/assim.batch/DESCRIPTION +++ b/modules/assim.batch/DESCRIPTION @@ -14,6 +14,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific model parameterization, execution, and analysis. The goal of PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues VignetteBuilder: knitr, rmarkdown Imports: abind, diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION index dfa454384cd..5b65d48d31a 100644 --- a/modules/assim.sequential/DESCRIPTION +++ b/modules/assim.sequential/DESCRIPTION @@ -9,6 +9,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific model parameterization, execution, and analysis. The goal of PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: coda, dplyr, diff --git a/modules/benchmark/DESCRIPTION b/modules/benchmark/DESCRIPTION index e5a312f0c1d..a867a88537a 100644 --- a/modules/benchmark/DESCRIPTION +++ b/modules/benchmark/DESCRIPTION @@ -21,6 +21,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific efficacy of scientific investigation. The PEcAn.benchmark package provides utilities for comparing models and data, including a suite of statistical metrics and plots. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: dplyr, ggplot2, diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION index ebfc9bbc639..abce32df49a 100644 --- a/modules/data.atmosphere/DESCRIPTION +++ b/modules/data.atmosphere/DESCRIPTION @@ -19,6 +19,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific package converts climate driver data into a standard format for models integrated into PEcAn. As a standalone package, it provides an interface to access diverse climate data sets. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: abind (>= 1.4.5), amerifluxr, diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION index 0a86834e441..554305857c7 100644 --- a/modules/data.land/DESCRIPTION +++ b/modules/data.land/DESCRIPTION @@ -21,6 +21,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific model parameterization, execution, and analysis. The goal of PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: R (>= 3.5.0) Imports: coda, diff --git a/modules/data.mining/DESCRIPTION b/modules/data.mining/DESCRIPTION index 60229a54ef7..8ab63bf1907 100644 --- a/modules/data.mining/DESCRIPTION +++ b/modules/data.mining/DESCRIPTION @@ -2,6 +2,8 @@ Package: PEcAn.data.mining Type: Package Title: PEcAn Functions Used for Exploring Model Residuals and Structures Description: (Temporary description) PEcAn functions used for exploring model residuals and structures. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Version: 1.7.3.9000 Authors@R: c(person("Mike", "Dietze", role = c("aut", "cre"), email = "dietze@bu.edu"), diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION index abb339e187c..dbd38de0abe 100644 --- a/modules/data.remote/DESCRIPTION +++ b/modules/data.remote/DESCRIPTION @@ -10,6 +10,8 @@ Authors@R: c(person("Mike", "Dietze", role = c("aut"), Author: Mike Dietze, Bailey Morrison Maintainer: Bailey Morrison Description: PEcAn module for processing remote data. Python module requirements: requests, json, re, ast, panads, sys. If any of these modules are missing, install using pip install . +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: curl, DBI, diff --git a/modules/emulator/DESCRIPTION b/modules/emulator/DESCRIPTION index ab62ac706ad..0d3fe2698b8 100644 --- a/modules/emulator/DESCRIPTION +++ b/modules/emulator/DESCRIPTION @@ -13,6 +13,8 @@ Imports: Description: Implementation of a Gaussian Process model (both likelihood and bayesian approaches) for kriging and model emulation. Includes functions for sampling design and prediction. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues License: BSD_3_clause + file LICENSE Encoding: UTF-8 RoxygenNote: 7.3.2 diff --git a/modules/meta.analysis/DESCRIPTION b/modules/meta.analysis/DESCRIPTION index 60ba579a6e4..18270488f79 100644 --- a/modules/meta.analysis/DESCRIPTION +++ b/modules/meta.analysis/DESCRIPTION @@ -24,6 +24,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific streamline the interaction between data and models, and to improve the efficacy of scientific investigation. The PEcAn.MA package contains the functions used in the Bayesian meta-analysis of trait data. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: coda (>= 0.18), lattice, diff --git a/modules/photosynthesis/DESCRIPTION b/modules/photosynthesis/DESCRIPTION index b101522ae90..40d79efe89c 100644 --- a/modules/photosynthesis/DESCRIPTION +++ b/modules/photosynthesis/DESCRIPTION @@ -18,6 +18,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific efficacy of scientific investigation. The PEcAn.photosynthesis package contains functions used in the Hierarchical Bayesian calibration of the Farquhar et al 1980 model. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: rjags Imports: diff --git a/modules/priors/DESCRIPTION b/modules/priors/DESCRIPTION index abfc6f2bb56..a3ef7212975 100644 --- a/modules/priors/DESCRIPTION +++ b/modules/priors/DESCRIPTION @@ -6,6 +6,8 @@ Authors@R: c(person("David", "LeBauer", role = c("aut", "cre"), email = "dlebauer@email.arizona.edu"), person("University of Illinois, NCSA", role = c("cph"))) Description: Functions to estimate priors from data. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues License: BSD_3_clause + file LICENSE Copyright: Authors LazyLoad: yes diff --git a/modules/rtm/DESCRIPTION b/modules/rtm/DESCRIPTION index 2be3ff4f0a8..1d903ece731 100644 --- a/modules/rtm/DESCRIPTION +++ b/modules/rtm/DESCRIPTION @@ -13,6 +13,8 @@ Description: Functions for performing forward runs and inversions of radiative transfer models (RTMs). Inversions can be performed using maximum likelihood, or more complex hierarchical Bayesian methods. Underlying numerical analyses are optimized for speed using Fortran code. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Depends: R (>= 2.10) Imports: PEcAn.logger, diff --git a/modules/uncertainty/DESCRIPTION b/modules/uncertainty/DESCRIPTION index 6074bd2cc0d..895c561cbdb 100644 --- a/modules/uncertainty/DESCRIPTION +++ b/modules/uncertainty/DESCRIPTION @@ -26,6 +26,8 @@ Description: The Predictive Ecosystem Carbon Analyzer PECAn is to streamline the interaction between data and models, and to improve the efficacy of scientific investigation. +URL: https://pecanproject.github.io, https://github.com/PecanProject/pecan +BugReports: https://github.com/PecanProject/pecan/issues Imports: dplyr, ggplot2, From 69526b019d2ea0765737adcf0ec0a329a74ab6e5 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Sat, 19 Apr 2025 04:28:24 +0000 Subject: [PATCH 0081/1193] added 'yaml' and 'desc' dependencies for pkgdown docs build --- .github/workflows/pkgdown.yml | 4 ++-- docker/docs/Dockerfile | 2 +- scripts/build_pkgdown.R | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index d068b000cba..9297072675f 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -24,9 +24,9 @@ jobs: # Checkout source code - uses: actions/checkout@v4 - # Install pkgdown + # Install dependencies - name: Install dependencies - run: Rscript -e 'install.packages("pkgdown")' + run: Rscript -e 'install.packages(c("pkgdown", "yaml", "desc"))' # Generate documentation using Makefile - name: Generate Package Documentation diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile index d0af39672f2..19e9b10edac 100644 --- a/docker/docs/Dockerfile +++ b/docker/docs/Dockerfile @@ -15,7 +15,7 @@ RUN apt-get update \ -e 'remotes::install_version("rmarkdown", ">= 2.19", dependencies = TRUE, upgrade = FALSE, repos = repos)' \ -e 'remotes::install_version("knitr", ">= 1.42", dependencies = TRUE, upgrade = FALSE, repos = repos)' \ -e 'remotes::install_version("bookdown", ">= 0.31", dependencies = TRUE, upgrade = FALSE, repos = repos)' \ - -e 'install.packages("pkgdown", repos = repos)' \ + -e 'install.packages(c("pkgdown", "yaml", "desc"), repos = repos)' \ && rm -rf /var/lib/apt/lists/* # ---------------------------------------------------------------------- diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R index 1bb3f5db9ce..d0cb44bae14 100644 --- a/scripts/build_pkgdown.R +++ b/scripts/build_pkgdown.R @@ -2,6 +2,7 @@ # Build pkgdown documentation for PEcAn packages library(pkgdown) library(yaml) +library(desc) args <- commandArgs(trailingOnly = TRUE) if (length(args) == 0) { stop("No package names provided. Please pass package names as arguments.") From 7b2ee28c55ca0ecb787a94f08f59d77aa15bc447 Mon Sep 17 00:00:00 2001 From: divne7022 Date: Sat, 19 Apr 2025 15:24:52 +0000 Subject: [PATCH 0082/1193] added updated .Rd files after adding URL to DESCRIPTION --- base/db/man/PEcAn.DB-package.Rd | 9 +++++++++ base/utils/man/PEcAn.Rd | 9 +++++++++ modules/emulator/man/PEcAn.emulator-package.Rd | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/base/db/man/PEcAn.DB-package.Rd b/base/db/man/PEcAn.DB-package.Rd index cdd80bdc1d9..5a63f5dd82e 100644 --- a/base/db/man/PEcAn.DB-package.Rd +++ b/base/db/man/PEcAn.DB-package.Rd @@ -8,6 +8,15 @@ \description{ This package provides an interface between PEcAn and the BETY database. For usage examples, please see \code{vignette("betydb_access")} +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://pecanproject.github.io} + \item \url{https://github.com/PecanProject/pecan} + \item Report bugs at \url{https://github.com/PecanProject/pecan/issues} +} + } \author{ \strong{Maintainer}: David LeBauer \email{dlebauer@email.arizona.edu} diff --git a/base/utils/man/PEcAn.Rd b/base/utils/man/PEcAn.Rd index 4a6752d93c7..773527bedba 100644 --- a/base/utils/man/PEcAn.Rd +++ b/base/utils/man/PEcAn.Rd @@ -49,6 +49,15 @@ Current development is focused on developing PEcAn into a real-time data assimilation and forecasting system. This system will provide a detailed analysis of the past and present ecosystem functioning that seamlessly transitions into forecasts. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://pecanproject.github.io} + \item \url{https://github.com/PecanProject/pecan} + \item Report bugs at \url{https://github.com/PecanProject/pecan/issues} +} + } \author{ \strong{Maintainer}: Rob Kooper \email{kooper@illinois.edu} diff --git a/modules/emulator/man/PEcAn.emulator-package.Rd b/modules/emulator/man/PEcAn.emulator-package.Rd index ba35157964c..14f5d38e38b 100644 --- a/modules/emulator/man/PEcAn.emulator-package.Rd +++ b/modules/emulator/man/PEcAn.emulator-package.Rd @@ -9,6 +9,15 @@ Supports both likelihood and bayesian approaches for kriging and model emulation. Includes functions for sampling design and prediction.} \description{ Implementation of a Gaussian Process model (both likelihood and bayesian approaches) for kriging and model emulation. Includes functions for sampling design and prediction. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://pecanproject.github.io} + \item \url{https://github.com/PecanProject/pecan} + \item Report bugs at \url{https://github.com/PecanProject/pecan/issues} +} + } \author{ \strong{Maintainer}: Mike Dietze \email{dietze@bu.edu} From 775f820e285647930db5db0b0d0a0d866625bc0c Mon Sep 17 00:00:00 2001 From: divne7022 Date: Wed, 23 Apr 2025 23:36:28 +0000 Subject: [PATCH 0083/1193] updated build_pkgdown.R --- scripts/build_pkgdown.R | 49 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/scripts/build_pkgdown.R b/scripts/build_pkgdown.R index d0cb44bae14..6327b064503 100644 --- a/scripts/build_pkgdown.R +++ b/scripts/build_pkgdown.R @@ -20,26 +20,6 @@ if (requireNamespace("PEcAn.logger", quietly = TRUE)) { } } -pkg_config <- function() { - - list( - url = "https://pecanproject.github.io/", - template = list( - bootstrap = 5, - includes = list( - before_navbar = paste0( - "\n", - "" - ) - ) - ) - ) -} - logger("Building pkgdown docs for:", paste(packages, collapse = ", ")) for (pkg in packages) { logger("Building pkgdown site for:", pkg) @@ -48,11 +28,30 @@ for (pkg in packages) { if (!dir.exists(pkg)) { stop(paste("Package directory does not exist:", pkg)) } - pkg_config_path <- file.path(pkg, "_pkgdown.yml") - pkg_config_data <- pkg_config() - yaml::write_yaml(pkg_config_data, pkg_config_path) setwd(pkg) - pkgdown::build_site() + pkgdown::build_site( + pkg = ".", + override = list( + repo = list( + url = list( + source = paste0("https://github.com/PecanProject/pecan/blob/develop/", pkg) + ) + ), + template = list( + bootstrap = 5, + includes = list( + before_navbar = paste0( + "\n", + "" + ) + ) + ) + ) + ) setwd(current_wd) source_docs <- file.path(pkg, "docs") if (!dir.exists(source_docs)) { @@ -92,7 +91,7 @@ before_text <- c( '', '

    PEcAn package documentation

    ', '

    Function documentation and articles for each PEcAn package,', - ' generated from the package source using pkgdown package.

    ', + ' generated from the package source using pkgdown.

    ', '', '