diff --git a/modules/data.land/R/look_up_fertilizer_components.R b/modules/data.land/R/look_up_fertilizer_components.R index 0f07ac47d21..ed6a7818d50 100644 --- a/modules/data.land/R/look_up_fertilizer_components.R +++ b/modules/data.land/R/look_up_fertilizer_components.R @@ -104,7 +104,7 @@ look_up_fertilizer_components <- function( ) res <- fertilizer_info |> - dplyr::select(.data$name, .data$NO3_N, .data$NH4_N, .data$N_org, .data$C_org) |> + dplyr::select("name", "NO3_N", "NH4_N", "N_org", "C_org") |> dplyr::rename(type = .data$name) |> as.list() return(res) diff --git a/modules/data.land/inst/events_schema_v0.1.0.json b/modules/data.land/inst/events_schema_v0.1.0.json new file mode 100644 index 00000000000..21a853f8bc3 --- /dev/null +++ b/modules/data.land/inst/events_schema_v0.1.0.json @@ -0,0 +1,73 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://pecanproject.org/schema/events-mvp-0-1-0.json", + "type": "object", + "required": ["pecan_events_version", "site_id", "events"], + "properties": { + "pecan_events_version": { "type": "string", "const": "0.1.0" }, + "site_id": { "type": "string", "minLength": 1 }, + "ensemble_id": { "type": ["string", "null"], "minLength": 1 }, + "geometry_uri": { "type": ["string", "null"], "format": "uri" }, + "provenance": { "type": "object", "additionalProperties": true }, + "events": { + "type": "array", + "items": { + "type": "object", + "required": ["event_type", "date"], + "properties": { + "event_type": { + "type": "string", + "enum": ["planting", "harvest", "irrigation", "fertilization", "tillage"] + }, + "date": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}$" }, + "fraction_area": { "type": "number", "minimum": 0, "maximum": 1, "default": 1.0 }, + "source": { "type": "string" }, + + "leaf_c_kg_m2": { "type": "number", "minimum": 0 }, + "wood_c_kg_m2": { "type": "number", "minimum": 0 }, + "fine_root_c_kg_m2": { "type": "number", "minimum": 0 }, + "coarse_root_c_kg_m2": { "type": "number", "minimum": 0 }, + "cultivar": { "type": "string" }, + "crop_code": { "type": "string" }, + "crop_display": { "type": "string" }, + + "frac_above_removed_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_below_removed_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_above_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_below_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + + "amount_mm": { "type": "number", "minimum": 0 }, + "method": { "type": "string", "enum": ["soil", "canopy", "flood"] }, + "immed_evap_frac_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + + "org_c_kg_m2": { "type": "number", "minimum": 0 }, + "org_n_kg_m2": { "type": "number", "minimum": 0 }, + "nh4_n_kg_m2": { "type": "number", "minimum": 0 }, + "no3_n_kg_m2": { "type": "number", "minimum": 0 }, + + "tillage_eff_0to1": { "type": "number", "minimum": 0 }, + "intensity_category": { "type": "string" }, + "depth_m": { "type": "number", "minimum": 0 } + }, + "allOf": [ + { "if": { "properties": { "event_type": { "const": "planting" } } }, + "then": { "required": ["leaf_c_kg_m2"] } }, + { "if": { "properties": { "event_type": { "const": "harvest" } } }, + "then": { "required": ["frac_above_removed_0to1"] } }, + { "if": { "properties": { "event_type": { "const": "irrigation" } } }, + "then": { "required": ["amount_mm", "method"] } }, + { "if": { "properties": { "event_type": { "const": "fertilization" } } }, + "then": { "anyOf": [ + { "required": ["org_c_kg_m2"] }, + { "required": ["nh4_n_kg_m2"] }, + { "required": ["no3_n_kg_m2"] } + ] } }, + { "if": { "properties": { "event_type": { "const": "tillage" } } }, + "then": { "required": ["tillage_eff_0to1"] } } + ], + "additionalProperties": true + } + } + }, + "additionalProperties": false +} diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R new file mode 100755 index 00000000000..f44f148821f --- /dev/null +++ b/modules/data.land/inst/generate_events.R @@ -0,0 +1,258 @@ +#!/usr/bin/env Rscript + +# --- Profiling Start --- +# Rprof("profiling.out") +# --- End Profiling Start --- + +# Minimal MVP: build mvp_events.json from ca_field_attributes.csv +# - Input: data/ca_field_attributes.csv (columns: site_id, year, pft, ...) +# - Output: data/mvp_events.json following data/pecan_events_schema_v0.1.0.json +# - Events (minimal): +# * planting: annual crops -> every site-year; woody perennials -> first observed year only +# * harvest: all site-years +# Each event includes only the schema-required fields per event_type. + +# --- Config --- +data_dir <- "/projectnb2/dietzelab/ccmmf/data" +field_attr_csv <- file.path(data_dir, "ca_field_attributes.csv") +sample_output_json <- file.path(data_dir, "events/mvp_events.json") +output_json <- file.path(data_dir, "events/events.json") + +# if TRUE, only generate for design points +# TODO: generate full set for all sites to use in site selection and downscaling +DESIGN_POINTS <- TRUE + +PRODUCTION <- FALSE # set TRUE for all sites, not needed if DESIGN_POINTS is TRUE +if (PRODUCTION) { + stop("This could be very slow; consider profiling and writing to db or arrow etc") +} +set.seed(123) + +ca_field_attributes <- vroom::vroom( +field_attr_csv, + show_col_types = FALSE +) + +if (DESIGN_POINTS) { + # design_points <- readr::read_csv("https://raw.githubusercontent.com/ccmmf/workflows/refs/heads/main/data/design_points.csv") + # d <- update_design_point_site_ids(design_points, ca_field_attributes) + # readr::write_csv(d, file.path(data_dir, "design_points.csv")) + # readr::write_csv(d, "~/downscaling/data/design_points.csv") + # design_points <- readr::read_csv(file.path(data_dir, "design_points.csv")) + # use the one under version control + design_points <- readr::read_csv("~/downscaling/data/design_points.csv") + ca_field_attributes <- ca_field_attributes |> + dplyr::filter(site_id %in% design_points$site_id) +} else if (!PRODUCTION) { + ca_field_attributes <- ca_field_attributes |> + dplyr::slice_sample(n = 1000) +} + +ca_fields <- ca_field_attributes |> + dplyr::select(site_id, pft, crop) |> + dplyr::distinct() |> + tidyr::crossing(year = 2016:2024) |> + dplyr::group_by(site_id) |> + dplyr::mutate(first_year = min(year)) |> + dplyr::ungroup() + +# Planting (annuals) +planting_annual <- ca_fields |> + dplyr::filter(pft == "annual crop") |> + dplyr::transmute( + event_type = "planting", + date = paste0(year, "-03-15"), + site_id = site_id, + # required for planting + leaf_c_kg_m2 = 0.05, + crop = crop + ) + +# Planting (woody): first year +planting_woody <- ca_fields |> + dplyr::filter(pft == "woody perennial crop") |> + dplyr::filter(year == first_year) |> + dplyr::transmute( + event_type = "planting", + date = paste0(year, "-03-15"), + site_id = site_id, + leaf_c_kg_m2 = 0.2, + crop = crop + ) + +# Fertilization +fertilization <- ca_fields |> + dplyr::transmute( + event_type = "fertilization", + date = paste0(year, "-02-11"), + site_id = site_id, + org_n_kg_m2 = 0.0, + org_c_kg_m2 = 0.0, + nh4_n_kg_m2 = 0.02, + no3_n_kg_m2 = 0.03 + ) + +# Organic Matter Addition +organic_matter_addition <- ca_fields |> + dplyr::transmute( + event_type = "fertilization", + date = paste0(year, "-03-11"), + site_id = site_id, + org_n_kg_m2 = 0.05, + org_c_kg_m2 = 0.5, + nh4_n_kg_m2 = 0.0, + no3_n_kg_m2 = 0.0 + ) + +# Harvest +harvest <- ca_fields |> + dplyr::transmute( + event_type = "harvest", + date = paste0(year, "-10-15"), + site_id = site_id, + frac_above_removed_0to1 = 0.10, + frac_below_removed_0to1 = 0.0, + frac_above_to_litter_0to1 = 0.0, + frac_below_to_litter_0to1 = 0.0, + crop = crop + ) + +# Pruning (woody) +pruning <- ca_fields |> + dplyr::filter(pft == "woody perennial crop") |> + dplyr::mutate(offset = year - first_year) |> + dplyr::filter(offset %% 4 == 1) |> + dplyr::transmute( + event_type = "harvest", + date = paste0(year, "-12-15"), + site_id = site_id, + frac_above_removed_0to1 = 0.30, + frac_below_removed_0to1 = 0.0, + frac_above_to_litter_0to1 = 0.0, + frac_below_to_litter_0to1 = 0.0, + crop = crop + ) + +# Tillage +tillage <- ca_fields |> + dplyr::filter(pft == "annual crop") |> + tidyr::crossing(till_suffix = c("-03-01", "-11-01")) |> + dplyr::transmute( + event_type = "tillage", + date = paste0(year, till_suffix), + site_id = site_id, + tillage_eff_0to1 = 0.10 + ) + +# Irrigation (both pfts): 3 per month for all months +# TODO: Should annual crops skip irrigation during fallow season? +months <- sprintf("%02d", 1:12) +days <- c("05", "15", "25") + +irrigation <- ca_fields |> + tidyr::crossing(month = months, day = days) |> + dplyr::transmute( + event_type = "irrigation", + date = paste0(year, "-", month, "-", day), + site_id = site_id, + amount_mm = 40, + method = "soil" + ) + +# Combine and order by site/date +events_all <- dplyr::bind_rows( + planting_annual, planting_woody, + harvest, pruning, + tillage, irrigation, + fertilization, organic_matter_addition +) |> + dplyr::arrange(site_id, date) + +# --- Build site objects per schema ------------------------------------------ +# Helper: drop NULL/NA fields from a named list +compact_list <- function(x) { + Filter(function(v) !(is.null(v) || (length(v) == 1 && is.atomic(v) && is.na(v))), x) +} + +sites <- unique(events_all$site_id) + +site_objs <- purrr::map(sites, function(sid) { + evs_df <- events_all |> + dplyr::filter(site_id == sid) |> + dplyr::arrange(date) + + # Only include required fields for each event type + evs_list <- purrr::pmap( + evs_df, + function(event_type, date, site_id, leaf_c_kg_m2 = NA_real_, frac_above_removed_0to1 = NA_real_, + frac_below_removed_0to1 = NA_real_, frac_above_to_litter_0to1 = NA_real_, + frac_below_to_litter_0to1 = NA_real_, amount_mm = NA_real_, method = NA_character_, + tillage_eff_0to1 = NA_real_, org_c_kg_m2 = NA_real_, org_n_kg_m2 = NA_real_, + nh4_n_kg_m2 = NA_real_, no3_n_kg_m2 = NA_real_, + crop = NA_character_, ...) { + base <- list(event_type = event_type, date = date) + + # Add required fields per event type + if (event_type == "planting" && !is.na(leaf_c_kg_m2)) { + base$leaf_c_kg_m2 <- leaf_c_kg_m2 + if (!is.na(crop)) base$crop <- crop + } + if (event_type == "harvest" && !is.na(frac_above_removed_0to1)) { + base$frac_above_removed_0to1 <- frac_above_removed_0to1 + if (!is.na(frac_below_removed_0to1)) base$frac_below_removed_0to1 <- frac_below_removed_0to1 + if (!is.na(frac_above_to_litter_0to1)) base$frac_above_to_litter_0to1 <- frac_above_to_litter_0to1 + if (!is.na(frac_below_to_litter_0to1)) base$frac_below_to_litter_0to1 <- frac_below_to_litter_0to1 + if (!is.na(crop)) base$crop <- crop + } + if (event_type == "irrigation" && !is.na(amount_mm) && !is.na(method)) { + base$amount_mm <- amount_mm + base$method <- method + } + if (event_type == "tillage" && !is.na(tillage_eff_0to1)) { + base$tillage_eff_0to1 <- tillage_eff_0to1 + } + if (event_type == "fertilization" && !is.na(org_c_kg_m2)) { + base$org_c_kg_m2 <- org_c_kg_m2 + if (!is.na(org_n_kg_m2)) base$org_n_kg_m2 <- org_n_kg_m2 + } + + compact_list(base) + } + ) + list( + pecan_events_version = "0.1.0", + site_id = sid, + events = evs_list + ) +}) + +# TODO add PEcAn Schema info + +# Validate JSON given schema +# schema <- "data/pecan_events_schema_v0.1.0.json" +# validator <- jsonvalidate::json_validator(schema) +# json_txt_temp <- jsonlite::toJSON(site_objs, auto_unbox = TRUE) +# if (!validator(json_txt_temp)) { +# stop("JSON does not match schema") +# } + +# --- Write JSON -------------------------------------------------------------- + +# Complete +jsonlite::write_json(site_objs, path = output_json, pretty = FALSE, auto_unbox = TRUE) +# Single site example +jsonlite::write_json(site_objs[1:3], path = gsub(".json", "_3sites.json", output_json), pretty = TRUE, auto_unbox = TRUE) +# When dealing with full dataset, may need to write to more performant files +# #Sample +# jsonlite::write_json(site_objs[1:100], path = sample_output_json, pretty = TRUE, auto_unbox = TRUE) + +# # Complete - compressed +output_json_gz <- paste0(output_json, ".gz") +gz_con <- gzfile(output_json_gz, "w") +jsonlite::write_json(site_objs, path = gz_con, pretty = FALSE, auto_unbox = TRUE) +close(gz_con) + +# --- Profiling End --- +# Rprof(NULL) +# summaryRprof("profiling.out") +# --- End Profiling End ---