From af486464561448df6fd4f6ce412004695860defb Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Tue, 9 Sep 2025 17:58:26 -0400 Subject: [PATCH 01/12] minimal example events.json for california design points --- .../data.land/data/events_schema_v0.1.0.json | 71 +++++++ modules/data.land/inst/generate_events.R | 187 ++++++++++++++++++ 2 files changed, 258 insertions(+) create mode 100644 modules/data.land/data/events_schema_v0.1.0.json create mode 100644 modules/data.land/inst/generate_events.R diff --git a/modules/data.land/data/events_schema_v0.1.0.json b/modules/data.land/data/events_schema_v0.1.0.json new file mode 100644 index 00000000000..bdaa9390cf9 --- /dev/null +++ b/modules/data.land/data/events_schema_v0.1.0.json @@ -0,0 +1,71 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://pecanproject.org/schema/events-mvp-0-1-0.json", + "type": "object", + "required": ["pecan_events_version", "site_id", "events"], + "properties": { + "pecan_events_version": { "type": "string", "const": "0.1.0" }, + "site_id": { "type": "string", "minLength": 1 }, + "ensemble_id": { "type": ["integer", "null"], "minimum": 0 }, + "geometry_uri": { "type": ["string", "null"], "format": "uri" }, + "provenance": { "type": "object", "additionalProperties": true }, + "events": { + "type": "array", + "items": { + "type": "object", + "required": ["event_type", "date", "site_id"], + "properties": { + "event_type": { + "type": "string", + "enum": ["planting", "harvest", "irrigation", "fertilization", "tillage"] + }, + "date": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}$" }, + "site_id": { "type": "string" }, + "fraction_area": { "type": "number", "minimum": 0, "maximum": 1, "default": 1.0 }, + "source": { "type": "string" }, + + "leaf_c_g_m2": { "type": "number", "minimum": 0 }, + "wood_c_g_m2": { "type": "number", "minimum": 0 }, + "fine_root_c_g_m2": { "type": "number", "minimum": 0 }, + "coarse_root_c_g_m2": { "type": "number", "minimum": 0 }, + "cultivar": { "type": "string" }, + "crop_code": { "type": "string" }, + "crop_display": { "type": "string" }, + + "frac_above_removed_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_below_removed_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_above_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_below_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + + "amount_cm": { "type": "number", "minimum": 0 }, + "method": { "type": "string", "enum": ["soil", "canopy", "flood"] }, + "immed_evap_frac_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + + "org_c_g_m2": { "type": "number", "minimum": 0 }, + "org_n_g_m2": { "type": "number", "minimum": 0 }, + "min_n_g_m2": { "type": "number", "minimum": 0 }, + "fertilizer_code": { "type": "string" }, + "fertilizer_display": { "type": "string" }, + + "tillage_eff_0to1": { "type": "number", "minimum": 0 }, + "intensity_category": { "type": "string" }, + "depth_m": { "type": "number", "minimum": 0 } + }, + "allOf": [ + { "if": { "properties": { "event_type": { "const": "planting" } } }, + "then": { "required": ["leaf_c_g_m2"] } }, + { "if": { "properties": { "event_type": { "const": "harvest" } } }, + "then": { "required": ["frac_above_removed_0to1"] } }, + { "if": { "properties": { "event_type": { "const": "irrigation" } } }, + "then": { "required": ["amount_cm", "method"] } }, + { "if": { "properties": { "event_type": { "const": "fertilization" } } }, + "then": { "required": ["org_c_g_m2"] } }, + { "if": { "properties": { "event_type": { "const": "tillage" } } }, + "then": { "required": ["tillage_eff_0to1"] } } + ], + "additionalProperties": true + } + } + }, + "additionalProperties": false +} diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R new file mode 100644 index 00000000000..085e06f33d3 --- /dev/null +++ b/modules/data.land/inst/generate_events.R @@ -0,0 +1,187 @@ +#!/usr/bin/env Rscript + +# --- Profiling Start --- +# Rprof("profiling.out") +# --- End Profiling Start --- + +# Minimal MVP: build mvp_events.json from ca_field_attributes.csv +# - Input: data/ca_field_attributes.csv (columns: site_id, year, pft, ...) +# - Output: data/mvp_events.json following data/pecan_events_schema_v0.1.0.json +# - Events (minimal): +# * planting: annual crops -> every site-year; woody perennials -> first observed year only +# * harvest: all site-years +# Each event includes only the schema-required fields per event_type. + +# --- Config --- +data_dir <- "/projectnb2/dietzelab/ccmmf/data" +input_csv <- file.path(data_dir, "ca_field_attributes.csv") +sample_output_json <- file.path(data_dir, "events/mvp_events.json") +output_json <- file.path(data_dir, "events/events.json") + +# if TRUE, only generate for design points +# TODO: generate full set for all sites to use in site selection and downscaling +DESIGN_POINTS <- TRUE + +PRODUCTION <- FALSE # set TRUE for all sites, not needed if DESIGN_POINTS is TRUE +if (PRODUCTION) { + stop("This could be very slow; consider profiling and writing to db or arrow etc") +} +set.seed(123) + +ca_field_attributes <- vroom::vroom( + input_csv +) + +if (DESIGN_POINTS) { + design_points <- readr::read_csv("../downscaling/data/design_points.csv") + ca_field_attributes <- ca_field_attributes |> + dplyr::filter(site_id %in% design_points$site_id) +} else if (!PRODUCTION) { + ca_field_attributes <- ca_field_attributes |> + dplyr::slice_sample(n = 1000) +} + +ca_fields <- ca_field_attributes |> + dplyr::select(site_id, year, pft) |> + dplyr::distinct() |> + dplyr::group_by(site_id) |> + dplyr::mutate(first_year = min(year)) |> + dplyr::ungroup() + +# Planting (annuals) +planting_annual <- ca_fields |> + dplyr::filter(pft == "annual crop") |> + dplyr::transmute( + event_type = "planting", + date = paste0(year, "-03-15"), + site_id = site_id, + # required for planting + leaf_c_g_m2 = 50 + ) + +# Planting (woody): first year +planting_woody <- ca_fields |> + dplyr::filter(pft == "woody perennial crop") |> + dplyr::filter(year == first_year) |> + dplyr::transmute( + event_type = "planting", + date = paste0(year, "-03-15"), + site_id = site_id, + leaf_c_g_m2 = 200 + ) + +# Harvest +harvest <- ca_fields |> + dplyr::transmute( + event_type = "harvest", + date = paste0(year, "-10-15"), + site_id = site_id, + frac_above_removed_0to1 = 0.10 + ) + +# Pruning (woody) +pruning <- ca_fields |> + dplyr::filter(pft == "woody perennial crop") |> + dplyr::mutate(offset = year - first_year) |> + dplyr::filter(offset %% 4 == 1) |> + dplyr::transmute( + event_type = "harvest", + date = paste0(year, "-12-15"), + site_id = site_id, + frac_above_removed_0to1 = 0.30, + frac_below_removed_0to1 = 0.0, + frac_above_to_litter_0to1 = 0.0, + frac_below_to_litter_0to1 = 0.0 + ) + +# Tillage +tillage <- ca_fields |> + dplyr::filter(pft == "annual crop") |> + tidyr::crossing(till_suffix = c("-03-01", "-11-01")) |> + dplyr::transmute( + event_type = "tillage", + date = paste0(year, till_suffix), + site_id = site_id, + tillage_eff_0to1 = 0.10 + ) + +# Irrigation (both pfts): 3 per month for all months +months <- sprintf("%02d", 1:12) +days <- c("05", "15", "25") + +irrigation <- ca_fields |> + tidyr::crossing(month = months, day = days) |> + dplyr::transmute( + event_type = "irrigation", + date = paste0(year, "-", month, "-", day), + site_id = site_id, + amount_cm = 4, # (= 40 mm) + method = "soil" + ) + +# Combine and order by site/date +events_all <- dplyr::bind_rows( + planting_annual, planting_woody, + harvest, pruning, + tillage, irrigation +) |> + dplyr::arrange(site_id, date) + +# --- Build site objects per schema ------------------------------------------ +# Helper: drop NULL/NA fields from a named list +compact_list <- function(x) { + Filter(function(v) !(is.null(v) || (length(v) == 1 && is.atomic(v) && is.na(v))), x) +} + +sites <- unique(events_all$site_id) + +site_objs <- purrr::map(sites, function(sid) { + evs_df <- events_all |> + dplyr::filter(site_id == sid) |> + dplyr::arrange(date) + + # Only include required fields for each event type + evs_list <- purrr::pmap( + evs_df[, c("event_type", "date", "site_id", "leaf_c_g_m2", "frac_above_removed_0to1")], + function(event_type, date, site_id, leaf_c_g_m2 = NA_real_, frac_above_removed_0to1 = NA_real_) { + base <- list(event_type = event_type, date = date, site_id = site_id) + if (event_type == "planting" && !is.na(leaf_c_g_m2)) base$leaf_c_g_m2 <- leaf_c_g_m2 + if (event_type == "harvest" && !is.na(frac_above_removed_0to1)) base$frac_above_removed_0to1 <- frac_above_removed_0to1 + compact_list(base) + } + ) + + list( + pecan_events_version = "0.1.0", + site_id = sid, + events = evs_list + ) +}) + +# Validate JSON given schema +# schema <- "data/pecan_events_schema_v0.1.0.json" +# validator <- jsonvalidate::json_validator(schema) +# json_txt_temp <- jsonlite::toJSON(site_objs, auto_unbox = TRUE) +# if (!validator(json_txt_temp)) { +# stop("JSON does not match schema") +# } + +# --- Write JSON -------------------------------------------------------------- + +# Complete +jsonlite::write_json(site_objs, path = output_json, pretty = FALSE, auto_unbox = TRUE) + +# When dealing with full dataset, may need to write to more performant files +# #Sample +# jsonlite::write_json(site_objs[1:100], path = sample_output_json, pretty = TRUE, auto_unbox = TRUE) + +# # Complete - compressed +# output_json_gz <- paste0(output_json, ".gz") +# gz_con <- gzfile(output_json_gz, "w") +# jsonlite::write_json(site_objs, path = gz_con, pretty = FALSE, auto_unbox = TRUE) +# close(gz_con) + +# --- Profiling End --- +# Rprof(NULL) +# summaryRprof("profiling.out") +# --- End Profiling End --- From 250d4d530b460dbb3c14bf8cd7e09016f64019ea Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Tue, 9 Sep 2025 18:57:24 -0400 Subject: [PATCH 02/12] updated events schema and script --- .../data.land/data/events_schema_v0.1.0.json | 21 ++++--- modules/data.land/inst/generate_events.R | 55 +++++++++++++------ 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/modules/data.land/data/events_schema_v0.1.0.json b/modules/data.land/data/events_schema_v0.1.0.json index bdaa9390cf9..8a080ec2b91 100644 --- a/modules/data.land/data/events_schema_v0.1.0.json +++ b/modules/data.land/data/events_schema_v0.1.0.json @@ -13,21 +13,20 @@ "type": "array", "items": { "type": "object", - "required": ["event_type", "date", "site_id"], + "required": ["event_type", "date"], "properties": { "event_type": { "type": "string", "enum": ["planting", "harvest", "irrigation", "fertilization", "tillage"] }, "date": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}$" }, - "site_id": { "type": "string" }, "fraction_area": { "type": "number", "minimum": 0, "maximum": 1, "default": 1.0 }, "source": { "type": "string" }, - "leaf_c_g_m2": { "type": "number", "minimum": 0 }, - "wood_c_g_m2": { "type": "number", "minimum": 0 }, - "fine_root_c_g_m2": { "type": "number", "minimum": 0 }, - "coarse_root_c_g_m2": { "type": "number", "minimum": 0 }, + "leaf_c_kg_m2": { "type": "number", "minimum": 0 }, + "wood_c_kg_m2": { "type": "number", "minimum": 0 }, + "fine_root_c_kg_m2": { "type": "number", "minimum": 0 }, + "coarse_root_c_kg_m2": { "type": "number", "minimum": 0 }, "cultivar": { "type": "string" }, "crop_code": { "type": "string" }, "crop_display": { "type": "string" }, @@ -37,11 +36,11 @@ "frac_above_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, "frac_below_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, - "amount_cm": { "type": "number", "minimum": 0 }, + "amount_mm": { "type": "number", "minimum": 0 }, "method": { "type": "string", "enum": ["soil", "canopy", "flood"] }, "immed_evap_frac_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, - "org_c_g_m2": { "type": "number", "minimum": 0 }, + "org_c_kg_m2": { "type": "number", "minimum": 0 }, "org_n_g_m2": { "type": "number", "minimum": 0 }, "min_n_g_m2": { "type": "number", "minimum": 0 }, "fertilizer_code": { "type": "string" }, @@ -53,13 +52,13 @@ }, "allOf": [ { "if": { "properties": { "event_type": { "const": "planting" } } }, - "then": { "required": ["leaf_c_g_m2"] } }, + "then": { "required": ["leaf_c_kg_m2"] } }, { "if": { "properties": { "event_type": { "const": "harvest" } } }, "then": { "required": ["frac_above_removed_0to1"] } }, { "if": { "properties": { "event_type": { "const": "irrigation" } } }, - "then": { "required": ["amount_cm", "method"] } }, + "then": { "required": ["amount_mm", "method"] } }, { "if": { "properties": { "event_type": { "const": "fertilization" } } }, - "then": { "required": ["org_c_g_m2"] } }, + "then": { "required": ["org_c_kg_m2"] } }, { "if": { "properties": { "event_type": { "const": "tillage" } } }, "then": { "required": ["tillage_eff_0to1"] } } ], diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index 085e06f33d3..8cb191849cf 100644 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -33,7 +33,7 @@ ca_field_attributes <- vroom::vroom( ) if (DESIGN_POINTS) { - design_points <- readr::read_csv("../downscaling/data/design_points.csv") + design_points <- readr::read_csv("~/downscaling/data/design_points.csv") ca_field_attributes <- ca_field_attributes |> dplyr::filter(site_id %in% design_points$site_id) } else if (!PRODUCTION) { @@ -42,8 +42,9 @@ if (DESIGN_POINTS) { } ca_fields <- ca_field_attributes |> - dplyr::select(site_id, year, pft) |> + dplyr::select(site_id, pft) |> dplyr::distinct() |> + tidyr::crossing(year = 2016:2024) |> dplyr::group_by(site_id) |> dplyr::mutate(first_year = min(year)) |> dplyr::ungroup() @@ -56,7 +57,7 @@ planting_annual <- ca_fields |> date = paste0(year, "-03-15"), site_id = site_id, # required for planting - leaf_c_g_m2 = 50 + leaf_c_kg_m2 = 0.5 ) # Planting (woody): first year @@ -67,7 +68,7 @@ planting_woody <- ca_fields |> event_type = "planting", date = paste0(year, "-03-15"), site_id = site_id, - leaf_c_g_m2 = 200 + leaf_c_kg_m2 = 0.5 ) # Harvest @@ -115,7 +116,7 @@ irrigation <- ca_fields |> event_type = "irrigation", date = paste0(year, "-", month, "-", day), site_id = site_id, - amount_cm = 4, # (= 40 mm) + amount_mm = 40, method = "soil" ) @@ -142,15 +143,34 @@ site_objs <- purrr::map(sites, function(sid) { # Only include required fields for each event type evs_list <- purrr::pmap( - evs_df[, c("event_type", "date", "site_id", "leaf_c_g_m2", "frac_above_removed_0to1")], - function(event_type, date, site_id, leaf_c_g_m2 = NA_real_, frac_above_removed_0to1 = NA_real_) { - base <- list(event_type = event_type, date = date, site_id = site_id) - if (event_type == "planting" && !is.na(leaf_c_g_m2)) base$leaf_c_g_m2 <- leaf_c_g_m2 - if (event_type == "harvest" && !is.na(frac_above_removed_0to1)) base$frac_above_removed_0to1 <- frac_above_removed_0to1 + evs_df, + function(event_type, date, site_id, leaf_c_kg_m2 = NA_real_, frac_above_removed_0to1 = NA_real_, + frac_below_removed_0to1 = NA_real_, frac_above_to_litter_0to1 = NA_real_, + frac_below_to_litter_0to1 = NA_real_, amount_mm = NA_real_, method = NA_character_, + tillage_eff_0to1 = NA_real_, ...) { + base <- list(event_type = event_type, date = date) + + # Add required fields per event type + if (event_type == "planting" && !is.na(leaf_c_kg_m2)) { + base$leaf_c_kg_m2 <- leaf_c_kg_m2 + } + if (event_type == "harvest" && !is.na(frac_above_removed_0to1)) { + base$frac_above_removed_0to1 <- frac_above_removed_0to1 + if (!is.na(frac_below_removed_0to1)) base$frac_below_removed_0to1 <- frac_below_removed_0to1 + if (!is.na(frac_above_to_litter_0to1)) base$frac_above_to_litter_0to1 <- frac_above_to_litter_0to1 + if (!is.na(frac_below_to_litter_0to1)) base$frac_below_to_litter_0to1 <- frac_below_to_litter_0to1 + } + if (event_type == "irrigation" && !is.na(amount_mm) && !is.na(method)) { + base$amount_mm <- amount_mm + base$method <- method + } + if (event_type == "tillage" && !is.na(tillage_eff_0to1)) { + base$tillage_eff_0to1 <- tillage_eff_0to1 + } + compact_list(base) } ) - list( pecan_events_version = "0.1.0", site_id = sid, @@ -158,6 +178,8 @@ site_objs <- purrr::map(sites, function(sid) { ) }) +# TODO add PEcAn Schema info + # Validate JSON given schema # schema <- "data/pecan_events_schema_v0.1.0.json" # validator <- jsonvalidate::json_validator(schema) @@ -170,16 +192,17 @@ site_objs <- purrr::map(sites, function(sid) { # Complete jsonlite::write_json(site_objs, path = output_json, pretty = FALSE, auto_unbox = TRUE) - +# Single site example +jsonlite::write_json(site_objs[1], path = gsub(".json", "_site1.json", output_json), pretty = TRUE, auto_unbox = TRUE) # When dealing with full dataset, may need to write to more performant files # #Sample # jsonlite::write_json(site_objs[1:100], path = sample_output_json, pretty = TRUE, auto_unbox = TRUE) # # Complete - compressed -# output_json_gz <- paste0(output_json, ".gz") -# gz_con <- gzfile(output_json_gz, "w") -# jsonlite::write_json(site_objs, path = gz_con, pretty = FALSE, auto_unbox = TRUE) -# close(gz_con) +output_json_gz <- paste0(output_json, ".gz") +gz_con <- gzfile(output_json_gz, "w") +jsonlite::write_json(site_objs, path = gz_con, pretty = FALSE, auto_unbox = TRUE) +close(gz_con) # --- Profiling End --- # Rprof(NULL) From bdec1f8206633769b5b3043f7124c8f9c5c1c11d Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Tue, 9 Sep 2025 23:55:56 -0400 Subject: [PATCH 03/12] add crop to planting & harvest, add fert and om events add nh4 and no3 to fertilization --- .../data.land/data/events_schema_v0.1.0.json | 13 ++-- modules/data.land/inst/generate_events.R | 60 ++++++++++++++++--- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/modules/data.land/data/events_schema_v0.1.0.json b/modules/data.land/data/events_schema_v0.1.0.json index 8a080ec2b91..a41af5f4928 100644 --- a/modules/data.land/data/events_schema_v0.1.0.json +++ b/modules/data.land/data/events_schema_v0.1.0.json @@ -41,10 +41,9 @@ "immed_evap_frac_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, "org_c_kg_m2": { "type": "number", "minimum": 0 }, - "org_n_g_m2": { "type": "number", "minimum": 0 }, - "min_n_g_m2": { "type": "number", "minimum": 0 }, - "fertilizer_code": { "type": "string" }, - "fertilizer_display": { "type": "string" }, + "org_n_kg_m2": { "type": "number", "minimum": 0 }, + "nh4_n_kg_m2": { "type": "number", "minimum": 0 }, + "no3_n_kg_m2": { "type": "number", "minimum": 0 }, "tillage_eff_0to1": { "type": "number", "minimum": 0 }, "intensity_category": { "type": "string" }, @@ -58,7 +57,11 @@ { "if": { "properties": { "event_type": { "const": "irrigation" } } }, "then": { "required": ["amount_mm", "method"] } }, { "if": { "properties": { "event_type": { "const": "fertilization" } } }, - "then": { "required": ["org_c_kg_m2"] } }, + "then": { "anyOf": [ + { "required": ["org_c_kg_m2"] }, + { "required": ["nh4_n_kg_m2"] }, + { "required": ["no3_n_kg_m2"] } + ] } }, { "if": { "properties": { "event_type": { "const": "tillage" } } }, "then": { "required": ["tillage_eff_0to1"] } } ], diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index 8cb191849cf..0bec65b4730 100644 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -29,10 +29,17 @@ if (PRODUCTION) { set.seed(123) ca_field_attributes <- vroom::vroom( - input_csv + input_csv, + show_col_types = FALSE ) if (DESIGN_POINTS) { + # design_points <- readr::read_csv("https://raw.githubusercontent.com/ccmmf/workflows/refs/heads/main/data/design_points.csv") + # d <- update_design_point_site_ids(design_points, ca_field_attributes) + # readr::write_csv(d, file.path(data_dir, "design_points.csv")) + # readr::write_csv(d, "~/downscaling/data/design_points.csv") + # design_points <- readr::read_csv(file.path(data_dir, "design_points.csv")) + # use the one under version control design_points <- readr::read_csv("~/downscaling/data/design_points.csv") ca_field_attributes <- ca_field_attributes |> dplyr::filter(site_id %in% design_points$site_id) @@ -42,7 +49,7 @@ if (DESIGN_POINTS) { } ca_fields <- ca_field_attributes |> - dplyr::select(site_id, pft) |> + dplyr::select(site_id, pft, crop) |> dplyr::distinct() |> tidyr::crossing(year = 2016:2024) |> dplyr::group_by(site_id) |> @@ -57,7 +64,32 @@ planting_annual <- ca_fields |> date = paste0(year, "-03-15"), site_id = site_id, # required for planting - leaf_c_kg_m2 = 0.5 + leaf_c_kg_m2 = 500, + crop = crop + ) + +# Fertilization +fertilization <- ca_fields |> + dplyr::transmute( + event_type = "fertilization", + date = paste0(year, "-02-11"), + site_id = site_id, + org_n_kg_m2 = 0.0, + org_c_kg_m2 = 0.0, + nh4_n_kg_m2 = 0.02, + no3_n_kg_m2 = 0.03 + ) + +# Organic Matter Addition +organic_matter_addition <- ca_fields |> + dplyr::transmute( + event_type = "fertilization", + date = paste0(year, "-03-11"), + site_id = site_id, + org_n_kg_m2 = 0.05, + org_c_kg_m2 = 0.5, + nh4_n_kg_m2 = 0.0, + no3_n_kg_m2 = 0.0 ) # Planting (woody): first year @@ -68,7 +100,8 @@ planting_woody <- ca_fields |> event_type = "planting", date = paste0(year, "-03-15"), site_id = site_id, - leaf_c_kg_m2 = 0.5 + leaf_c_kg_m2 = 1, + crop = crop ) # Harvest @@ -77,7 +110,8 @@ harvest <- ca_fields |> event_type = "harvest", date = paste0(year, "-10-15"), site_id = site_id, - frac_above_removed_0to1 = 0.10 + frac_above_removed_0to1 = 0.10, + crop = crop ) # Pruning (woody) @@ -92,7 +126,8 @@ pruning <- ca_fields |> frac_above_removed_0to1 = 0.30, frac_below_removed_0to1 = 0.0, frac_above_to_litter_0to1 = 0.0, - frac_below_to_litter_0to1 = 0.0 + frac_below_to_litter_0to1 = 0.0, + crop = crop ) # Tillage @@ -124,7 +159,8 @@ irrigation <- ca_fields |> events_all <- dplyr::bind_rows( planting_annual, planting_woody, harvest, pruning, - tillage, irrigation + tillage, irrigation, + fertilization, organic_matter_addition ) |> dplyr::arrange(site_id, date) @@ -147,18 +183,22 @@ site_objs <- purrr::map(sites, function(sid) { function(event_type, date, site_id, leaf_c_kg_m2 = NA_real_, frac_above_removed_0to1 = NA_real_, frac_below_removed_0to1 = NA_real_, frac_above_to_litter_0to1 = NA_real_, frac_below_to_litter_0to1 = NA_real_, amount_mm = NA_real_, method = NA_character_, - tillage_eff_0to1 = NA_real_, ...) { + tillage_eff_0to1 = NA_real_, org_c_kg_m2 = NA_real_, org_n_kg_m2 = NA_real_, + nh4_n_kg_m2 = NA_real_, no3_n_kg_m2 = NA_real_, + crop = NA_character_, ...) { base <- list(event_type = event_type, date = date) # Add required fields per event type if (event_type == "planting" && !is.na(leaf_c_kg_m2)) { base$leaf_c_kg_m2 <- leaf_c_kg_m2 + if (!is.na(crop)) base$crop <- crop } if (event_type == "harvest" && !is.na(frac_above_removed_0to1)) { base$frac_above_removed_0to1 <- frac_above_removed_0to1 if (!is.na(frac_below_removed_0to1)) base$frac_below_removed_0to1 <- frac_below_removed_0to1 if (!is.na(frac_above_to_litter_0to1)) base$frac_above_to_litter_0to1 <- frac_above_to_litter_0to1 if (!is.na(frac_below_to_litter_0to1)) base$frac_below_to_litter_0to1 <- frac_below_to_litter_0to1 + if (!is.na(crop)) base$crop <- crop } if (event_type == "irrigation" && !is.na(amount_mm) && !is.na(method)) { base$amount_mm <- amount_mm @@ -167,6 +207,10 @@ site_objs <- purrr::map(sites, function(sid) { if (event_type == "tillage" && !is.na(tillage_eff_0to1)) { base$tillage_eff_0to1 <- tillage_eff_0to1 } + if (event_type == "fertilization" && !is.na(org_c_kg_m2)) { + base$org_c_kg_m2 <- org_c_kg_m2 + if (!is.na(org_n_kg_m2)) base$org_n_kg_m2 <- org_n_kg_m2 + } compact_list(base) } From 2ed545242d8ed5de421b42a416e8301334a89e7f Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Wed, 10 Sep 2025 17:04:02 -0400 Subject: [PATCH 04/12] fix magnitudes after conversion from g-->kg --- modules/data.land/inst/generate_events.R | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index 0bec65b4730..213907d0b33 100644 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -64,7 +64,19 @@ planting_annual <- ca_fields |> date = paste0(year, "-03-15"), site_id = site_id, # required for planting - leaf_c_kg_m2 = 500, + leaf_c_kg_m2 = 0.05, + crop = crop + ) + +# Planting (woody): first year +planting_woody <- ca_fields |> + dplyr::filter(pft == "woody perennial crop") |> + dplyr::filter(year == first_year) |> + dplyr::transmute( + event_type = "planting", + date = paste0(year, "-03-15"), + site_id = site_id, + leaf_c_kg_m2 = 0.2, crop = crop ) @@ -92,18 +104,6 @@ organic_matter_addition <- ca_fields |> no3_n_kg_m2 = 0.0 ) -# Planting (woody): first year -planting_woody <- ca_fields |> - dplyr::filter(pft == "woody perennial crop") |> - dplyr::filter(year == first_year) |> - dplyr::transmute( - event_type = "planting", - date = paste0(year, "-03-15"), - site_id = site_id, - leaf_c_kg_m2 = 1, - crop = crop - ) - # Harvest harvest <- ca_fields |> dplyr::transmute( From cde85b21e4fb7c213c4b691ad0176426c55b90f8 Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Wed, 10 Sep 2025 17:04:02 -0400 Subject: [PATCH 05/12] fix magnitudes after conversion from g-->kg write out three sample sites --- modules/data.land/inst/generate_events.R | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) mode change 100644 => 100755 modules/data.land/inst/generate_events.R diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R old mode 100644 new mode 100755 index 0bec65b4730..7d8606342b4 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -64,7 +64,19 @@ planting_annual <- ca_fields |> date = paste0(year, "-03-15"), site_id = site_id, # required for planting - leaf_c_kg_m2 = 500, + leaf_c_kg_m2 = 0.05, + crop = crop + ) + +# Planting (woody): first year +planting_woody <- ca_fields |> + dplyr::filter(pft == "woody perennial crop") |> + dplyr::filter(year == first_year) |> + dplyr::transmute( + event_type = "planting", + date = paste0(year, "-03-15"), + site_id = site_id, + leaf_c_kg_m2 = 0.2, crop = crop ) @@ -92,18 +104,6 @@ organic_matter_addition <- ca_fields |> no3_n_kg_m2 = 0.0 ) -# Planting (woody): first year -planting_woody <- ca_fields |> - dplyr::filter(pft == "woody perennial crop") |> - dplyr::filter(year == first_year) |> - dplyr::transmute( - event_type = "planting", - date = paste0(year, "-03-15"), - site_id = site_id, - leaf_c_kg_m2 = 1, - crop = crop - ) - # Harvest harvest <- ca_fields |> dplyr::transmute( @@ -237,7 +237,7 @@ site_objs <- purrr::map(sites, function(sid) { # Complete jsonlite::write_json(site_objs, path = output_json, pretty = FALSE, auto_unbox = TRUE) # Single site example -jsonlite::write_json(site_objs[1], path = gsub(".json", "_site1.json", output_json), pretty = TRUE, auto_unbox = TRUE) +jsonlite::write_json(site_objs[1:3], path = gsub(".json", "_3sites.json", output_json), pretty = TRUE, auto_unbox = TRUE) # When dealing with full dataset, may need to write to more performant files # #Sample # jsonlite::write_json(site_objs[1:100], path = sample_output_json, pretty = TRUE, auto_unbox = TRUE) From 52bb36f0097c57eee51b695bdc68995d0dbbaa3a Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Thu, 25 Sep 2025 09:38:26 -0700 Subject: [PATCH 06/12] Update modules/data.land/inst/generate_events.R Co-authored-by: Chris Black --- modules/data.land/inst/generate_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index 7d8606342b4..14b68e7bcf0 100755 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -14,7 +14,7 @@ # --- Config --- data_dir <- "/projectnb2/dietzelab/ccmmf/data" -input_csv <- file.path(data_dir, "ca_field_attributes.csv") +field_attr_csv <- file.path(data_dir, "ca_field_attributes.csv") sample_output_json <- file.path(data_dir, "events/mvp_events.json") output_json <- file.path(data_dir, "events/events.json") From dc220d818e1e87fd602531ba31748586e13e9c1f Mon Sep 17 00:00:00 2001 From: David LeBauer Date: Thu, 25 Sep 2025 10:39:20 -0700 Subject: [PATCH 07/12] Apply suggestion from @infotroph Co-authored-by: Chris Black --- modules/data.land/inst/generate_events.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index 14b68e7bcf0..efc1d6ecc66 100755 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -29,7 +29,7 @@ if (PRODUCTION) { set.seed(123) ca_field_attributes <- vroom::vroom( - input_csv, +field_attr_csv, show_col_types = FALSE ) From 2048404103a4bd4ffc12a208dbc6d3ca255ef21c Mon Sep 17 00:00:00 2001 From: Chris Black Date: Tue, 30 Sep 2025 16:30:47 -0700 Subject: [PATCH 08/12] specify all required params for harvest events --- modules/data.land/inst/generate_events.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index efc1d6ecc66..38470890db8 100755 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -107,11 +107,14 @@ organic_matter_addition <- ca_fields |> # Harvest harvest <- ca_fields |> dplyr::transmute( - event_type = "harvest", - date = paste0(year, "-10-15"), - site_id = site_id, + event_type = "harvest", + date = paste0(year, "-10-15"), + site_id = site_id, frac_above_removed_0to1 = 0.10, - crop = crop + frac_below_removed_0to1 = 0.0, + frac_above_to_litter_0to1 = 0.0, + frac_below_to_litter_0to1 = 0.0, + crop = crop ) # Pruning (woody) From cf43e529264a9c7dd41e4f787ff4a733bcbc646d Mon Sep 17 00:00:00 2001 From: Chris Black Date: Tue, 30 Sep 2025 16:37:26 -0700 Subject: [PATCH 09/12] package checks complain about json in data/ --- modules/data.land/{data => inst}/events_schema_v0.1.0.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename modules/data.land/{data => inst}/events_schema_v0.1.0.json (100%) diff --git a/modules/data.land/data/events_schema_v0.1.0.json b/modules/data.land/inst/events_schema_v0.1.0.json similarity index 100% rename from modules/data.land/data/events_schema_v0.1.0.json rename to modules/data.land/inst/events_schema_v0.1.0.json From 6d32fe4d57090c13d6a142450f41df809666c959 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Tue, 30 Sep 2025 16:58:17 -0700 Subject: [PATCH 10/12] specify ensembleID as char not int --- modules/data.land/inst/events_schema_v0.1.0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.land/inst/events_schema_v0.1.0.json b/modules/data.land/inst/events_schema_v0.1.0.json index a41af5f4928..21a853f8bc3 100644 --- a/modules/data.land/inst/events_schema_v0.1.0.json +++ b/modules/data.land/inst/events_schema_v0.1.0.json @@ -6,7 +6,7 @@ "properties": { "pecan_events_version": { "type": "string", "const": "0.1.0" }, "site_id": { "type": "string", "minLength": 1 }, - "ensemble_id": { "type": ["integer", "null"], "minimum": 0 }, + "ensemble_id": { "type": ["string", "null"], "minLength": 1 }, "geometry_uri": { "type": ["string", "null"], "format": "uri" }, "provenance": { "type": "object", "additionalProperties": true }, "events": { From 5a479d46ee4b5d18dca11277a3fe06135c02ae06 Mon Sep 17 00:00:00 2001 From: Chris Black Date: Tue, 30 Sep 2025 16:58:53 -0700 Subject: [PATCH 11/12] resolve check complaint about deprecation of .data in tidyselect context --- modules/data.land/R/look_up_fertilizer_components.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.land/R/look_up_fertilizer_components.R b/modules/data.land/R/look_up_fertilizer_components.R index 0f07ac47d21..ed6a7818d50 100644 --- a/modules/data.land/R/look_up_fertilizer_components.R +++ b/modules/data.land/R/look_up_fertilizer_components.R @@ -104,7 +104,7 @@ look_up_fertilizer_components <- function( ) res <- fertilizer_info |> - dplyr::select(.data$name, .data$NO3_N, .data$NH4_N, .data$N_org, .data$C_org) |> + dplyr::select("name", "NO3_N", "NH4_N", "N_org", "C_org") |> dplyr::rename(type = .data$name) |> as.list() return(res) From f311199018db23b5c2992c3eb8287fd8cc633efa Mon Sep 17 00:00:00 2001 From: Chris Black Date: Fri, 3 Oct 2025 15:35:34 -0700 Subject: [PATCH 12/12] Update modules/data.land/inst/generate_events.R --- modules/data.land/inst/generate_events.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/data.land/inst/generate_events.R b/modules/data.land/inst/generate_events.R index 38470890db8..f44f148821f 100755 --- a/modules/data.land/inst/generate_events.R +++ b/modules/data.land/inst/generate_events.R @@ -145,6 +145,7 @@ tillage <- ca_fields |> ) # Irrigation (both pfts): 3 per month for all months +# TODO: Should annual crops skip irrigation during fallow season? months <- sprintf("%02d", 1:12) days <- c("05", "15", "25")