From daeadc698254578e9a0296b0bbf104f3c0d75096 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 20:07:14 -0400 Subject: [PATCH 01/23] add table_id arg --- NAMESPACE | 1 + R/compare.R | 66 +++++++++++++++++++++++++++----- R/helpers.R | 12 +++--- R/slice-diffs.R | 2 +- R/slice-unmatched.R | 6 ++- R/value-diffs.R | 16 ++++---- R/versus.R | 2 +- R/weave-diffs.R | 9 +++-- man/compare.Rd | 11 +++++- tests/testthat/_snaps/compare.md | 48 +++++++++++------------ 10 files changed, 118 insertions(+), 55 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 71fdc37..56968cd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ import(rlang) importFrom(collapse,"%!=%") importFrom(collapse,add_vars) importFrom(collapse,frename) +importFrom(collapse,recode_char) importFrom(collapse,ss) importFrom(collapse,whichNA) importFrom(data.table,copy) diff --git a/R/compare.R b/R/compare.R index 8deeaa9..e73d7f9 100644 --- a/R/compare.R +++ b/R/compare.R @@ -9,7 +9,7 @@ #' @param table_a A data frame #' @param table_b A data frame #' @param by <[`tidy-select`][versus_tidy_select]>. Selection of columns to use when matching rows between -#' \code{.data_a} and \code{.data_b}. Both data frames must be unique on \code{by}. +#' \code{table_a} and \code{table_b}. Both data frames must be unique on \code{by}. #' @param allow_both_NA Logical. If \code{TRUE} a missing value in both data frames is #' considered as equal #' @param coerce Logical. If \code{FALSE} and columns from the input tables have @@ -58,16 +58,23 @@ #' @rdname compare #' @export -compare <- function(table_a, table_b, by, allow_both_NA = TRUE, coerce = TRUE) { +compare <- function( + table_a, + table_b, + by, + allow_both_NA = TRUE, + coerce = TRUE, + table_id = c("a", "b")) { check_required(by) by <- enquo(by) + table_id <- clean_table_id(table_id) table_chr <- names(enquos(table_a, table_b, .named = TRUE)) validate_tables(table_a, table_b, coerce = coerce) by_names <- get_by_names(table_a, table_b, by = by) table_summ <- tibble( - table = c("table_a", "table_b"), + table = table_id, expr = table_chr, nrow = c(nrow(table_a), nrow(table_b)), ncol = c(ncol(table_a), ncol(table_b)) @@ -101,15 +108,16 @@ compare <- function(table_a, table_b, by, allow_both_NA = TRUE, coerce = TRUE) { tbl_contents$compare <- tbl_contents$compare %>% mutate(n_diffs = map_int(diff_rows, nrow), .after = column) - out <- list( + list( tables = table_summ, by = tbl_contents$by, intersection = tbl_contents$compare, unmatched_cols = tbl_contents$unmatched_cols, unmatched_rows = unmatched_rows, - input = store_tables(table_a, table_b) - ) - structure(out, class = "vs_comparison") + input = store_tables(table_a, table_b, table_id) + ) %>% + apply_table_id(table_id) %>% + structure(class = "vs_comparison") } # Methods ----------- @@ -223,9 +231,9 @@ get_contents <- function(table_a, table_b, by) { out } -store_tables <- function(table_a, table_b) { +store_tables <- function(table_a, table_b, table_id) { env <- new_environment() - env$value <- list(a = table_a, b = table_b) + env$value <- list(table_a, table_b) %>% setNames(table_id) dt_copy <- getOption("versus.copy_data_table", default = FALSE) if (dt_copy) { env$value <- env$value %>% @@ -235,6 +243,22 @@ store_tables <- function(table_a, table_b) { env } +apply_table_id <- function(comparison, table_id, call = caller_env()) { + if (identical(table_id, c("a", "b"))) { + return(comparison) + } + comparison$tables$table <- table_id + names(comparison$by)[2:3] <- paste0("class_", table_id) + names(comparison$intersection)[3:4] <- paste0("class_", table_id) + comparison$intersection$diff_rows <- comparison$intersection$diff_rows %>% + lapply(frename, paste0("row_", table_id)) + comparison$unmatched_cols$table <- comparison$unmatched_cols$table %>% + recode_char(a = table_id[1], b = table_id[2]) + comparison$unmatched_rows$table <- comparison$unmatched_rows$table %>% + recode_char(a = table_id[1], b = table_id[2]) + comparison +} + # Error handling ------------- rethrow_match_relationship <- function(table_a, table_b, by) { @@ -257,6 +281,30 @@ rethrow_match_relationship <- function(table_a, table_b, by) { } } +clean_table_id <- function(table_id, call = caller_env()) { + if (identical(table_id, c("a", "b"))) { + return(table_id) + } + if (!is_character(table_id, n = 2)) { + message <- c( + "{.arg table_id} must be a string of length 2", + i = "{.arg table_id} is {.obj_type_friendly type} of length {length(table_id)}" + ) + cli_abort(message, call = call) + } + attributes(table_id) <- NULL + new <- vec_as_names(table_id, repair = "universal", quiet = TRUE) + old <- table_id %|% "" + is_changed <- new != old + if (!any(is_changed)) { + return(table_id) + } + bullets <- paste0("`", old[is_changed], "` -> `", new[is_changed], "`") + message <- c("{.arg table_id} has been adjusted", set_names(bullets, "*")) + cli_inform(message = message) + new +} + validate_tables <- function(table_a, table_b, coerce, call = caller_env()) { assert_data_frame(table_a, call = call) assert_data_frame(table_b, call = call) diff --git a/R/helpers.R b/R/helpers.R index 212e1b8..f7a46c8 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -2,19 +2,21 @@ fsubset <- function(x, i, j, check = FALSE) { ss(x, i, j, check = check) } -assert_table_is_a_or_b <- function(table, call = caller_env()) { +check_table_arg <- function(table, comparison, call = caller_env()) { if (identical(table, quo())) { cli_abort("`table` is absent but must be supplied.", call = call) } + table_id <- names(comparison$input$value) table_expr <- quo_squash(table) table_chr <- shorten(deparse(table_expr), 30) top_msg <- "Problem with argument `table = {table_chr}`" - if (!is_character(table_expr)) { - info <- '`table` must be a single character value: "a" or "b"' + a_or_b <- paste0('"', table_id[1], '" or "', table_id[2], '"') + if (!is_string(table_expr)) { + info <- paste0("`table` must be a single character value: ", a_or_b) cli_abort(c(top_msg, i = info), call = call) } - if (!(identical(table_expr, "a") | identical(table_expr, "b"))) { - info <- '`table` must be either "a" or "b"' + if (!(identical(table_expr, table_id[1]) || identical(table_expr, table_id[2]))) { + info <- paste0("`table` must be either ", a_or_b) cli_abort(c(top_msg, i = info), call = call) } } diff --git a/R/slice-diffs.R b/R/slice-diffs.R index 7bca4c1..8738ebc 100644 --- a/R/slice-diffs.R +++ b/R/slice-diffs.R @@ -20,7 +20,7 @@ #' @export slice_diffs <- function(comparison, table, column = everything()) { assert_is_comparison(enquo(comparison)) - assert_table_is_a_or_b(enquo(table)) + check_table_arg(enquo(table), comparison) slice_diffs_impl(comparison, table, enquo(column)) } diff --git a/R/slice-unmatched.R b/R/slice-unmatched.R index 6964df1..355cfa0 100644 --- a/R/slice-unmatched.R +++ b/R/slice-unmatched.R @@ -23,7 +23,7 @@ #' @export slice_unmatched <- function(comparison, table) { assert_is_comparison(enquo(comparison)) - assert_table_is_a_or_b(enquo(table)) + check_table_arg(enquo(table), comparison) slice_unmatched_impl(comparison, table) } @@ -31,10 +31,12 @@ slice_unmatched <- function(comparison, table) { #' @export slice_unmatched_both <- function(comparison) { assert_is_comparison(enquo(comparison)) + table_id <- names(comparison$input$value) out_cols <- with(comparison, c(by$column, intersection$column)) - c(a = "a", b = "b") %>% + table_id %>% + setNames(table_id) %>% map(slice_unmatched_impl, comparison = comparison, j = out_cols) %>% ensure_ptype_compatible() %>% bind_rows(.id = "table") diff --git a/R/value-diffs.R b/R/value-diffs.R index 8685bea..bea33df 100644 --- a/R/value-diffs.R +++ b/R/value-diffs.R @@ -29,16 +29,17 @@ value_diffs <- function(comparison, column) { column <- enquo(column) column_loc <- get_cols_from_comparison(comparison, column) assert_is_single_column(column_loc) + table_id <- names(comparison$input$value) diff_rows <- fsubset(comparison$intersection, column_loc, "diff_rows") %>% pluck(1, 1) col <- names(column_loc) - a <- comparison$input$value$a %>% - fsubset(diff_rows$row_a, col) %>% - rename("{col}_a" := !!sym(col)) - b <- comparison$input$value$b %>% - fsubset(diff_rows$row_b, c(col, comparison$by$column)) %>% - rename("{col}_b" := !!sym(col)) + a <- comparison$input$value[[1]] %>% + fsubset(diff_rows[[1]], col) %>% + rename("{col}_{table_id[1]}" := !!sym(col)) + b <- comparison$input$value[[2]] %>% + fsubset(diff_rows[[2]], c(col, comparison$by$column)) %>% + rename("{col}_{table_id[2]}" := !!sym(col)) tibble(a, b) } @@ -47,10 +48,11 @@ value_diffs <- function(comparison, column) { value_diffs_stacked <- function(comparison, column = everything()) { assert_is_comparison(enquo(comparison)) column <- enquo(column) + table_id <- names(comparison$input$value) get_value_diff_for_stack <- function(comparison, col_name) { value_diffs(comparison, all_of(col_name)) %>% - frename(c("val_a", "val_b"), cols = 1:2) %>% + frename(paste0("val_", table_id), cols = 1:2) %>% mutate(column = .env$col_name, .before = 1) } diff --git a/R/versus.R b/R/versus.R index 40aabce..799ff2b 100644 --- a/R/versus.R +++ b/R/versus.R @@ -14,6 +14,6 @@ #' @importFrom purrr map imap map_int map_lgl map2_lgl map_chr reduce map_if #' @importFrom purrr pmap pmap_lgl compose pluck #' @importFrom tibble tibble rownames_to_column enframe -#' @importFrom collapse ss add_vars frename whichNA %!=% +#' @importFrom collapse ss add_vars frename whichNA %!=% recode_char #' @importFrom data.table fcoalesce copy "_PACKAGE" diff --git a/R/weave-diffs.R b/R/weave-diffs.R index d5fca8a..38a4adc 100644 --- a/R/weave-diffs.R +++ b/R/weave-diffs.R @@ -42,15 +42,16 @@ weave_diffs_long <- function(comparison, column = everything()) { weave_diffs_wide <- function(comparison, column = everything()) { assert_is_comparison(enquo(comparison)) column <- enquo(column) + table_id <- names(comparison$input$value) out_cols <- with(comparison, c(by$column, intersection$column)) diff_cols <- names(identify_diff_cols(comparison, column)) - slice_a <- slice_diffs_impl(comparison, "a", column, j = out_cols) - slice_b <- slice_diffs_impl(comparison, "b", column, j = diff_cols) + slice_a <- slice_diffs_impl(comparison, table_id[1], column, j = out_cols) + slice_b <- slice_diffs_impl(comparison, table_id[2], column, j = diff_cols) reduce(.init = slice_a, diff_cols, \(x, col) { x %>% - mutate("{col}_b" := slice_b[[col]], .after = !!sym(col)) %>% - rename("{col}_a" := !!sym(col)) + mutate("{col}_{table_id[2]}" := slice_b[[col]], .after = !!sym(col)) %>% + rename("{col}_{table_id[1]}" := !!sym(col)) }) } diff --git a/man/compare.Rd b/man/compare.Rd index 79e4cae..a699844 100644 --- a/man/compare.Rd +++ b/man/compare.Rd @@ -4,7 +4,14 @@ \alias{compare} \title{Compare two data frames} \usage{ -compare(table_a, table_b, by, allow_both_NA = TRUE, coerce = TRUE) +compare( + table_a, + table_b, + by, + allow_both_NA = TRUE, + coerce = TRUE, + table_id = c("a", "b") +) } \arguments{ \item{table_a}{A data frame} @@ -12,7 +19,7 @@ compare(table_a, table_b, by, allow_both_NA = TRUE, coerce = TRUE) \item{table_b}{A data frame} \item{by}{<\code{\link[=versus_tidy_select]{tidy-select}}>. Selection of columns to use when matching rows between -\code{.data_a} and \code{.data_b}. Both data frames must be unique on \code{by}.} +\code{table_a} and \code{table_b}. Both data frames must be unique on \code{by}.} \item{allow_both_NA}{Logical. If \code{TRUE} a missing value in both data frames is considered as equal} diff --git a/tests/testthat/_snaps/compare.md b/tests/testthat/_snaps/compare.md index 54f3ef7..5c28bf0 100644 --- a/tests/testthat/_snaps/compare.md +++ b/tests/testthat/_snaps/compare.md @@ -183,10 +183,10 @@ Output $tables # A tibble: 2 x 4 - table expr nrow ncol - - 1 table_a test_df_a 11 13 - 2 table_b test_df_b 12 12 + table expr nrow ncol + + 1 a test_df_a 11 13 + 2 b test_df_b 12 12 $by # A tibble: 1 x 3 @@ -234,10 +234,10 @@ Output $tables # A tibble: 2 x 4 - table expr nrow ncol - - 1 table_a a 2 2 - 2 table_b b 2 2 + table expr nrow ncol + + 1 a a 2 2 + 2 b b 2 2 $by # A tibble: 1 x 3 @@ -272,10 +272,10 @@ Output $tables # A tibble: 2 x 4 - table expr nrow ncol - - 1 table_a a 4 1 - 2 table_b b 4 1 + table expr nrow ncol + + 1 a a 4 1 + 2 b b 4 1 $by # A tibble: 1 x 3 @@ -307,10 +307,10 @@ Output $tables # A tibble: 2 x 4 - table expr nrow ncol - - 1 table_a a 4 2 - 2 table_b b 4 2 + table expr nrow ncol + + 1 a a 4 2 + 2 b b 4 2 $by # A tibble: 1 x 3 @@ -345,10 +345,10 @@ Output $tables # A tibble: 2 x 4 - table expr nrow ncol - - 1 table_a a 2 1 - 2 table_b b 2 1 + table expr nrow ncol + + 1 a a 2 1 + 2 b b 2 1 $by # A tibble: 1 x 3 @@ -382,10 +382,10 @@ Output $tables # A tibble: 2 x 4 - table expr nrow ncol - - 1 table_a a 2 2 - 2 table_b b 2 2 + table expr nrow ncol + + 1 a a 2 2 + 2 b b 2 2 $by # A tibble: 1 x 3 From d7e8211d1b2e2e6127768ca362aa7220accb493a Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 20:39:06 -0400 Subject: [PATCH 02/23] apply at source --- R/compare.R | 58 ++++++++++++++++++++--------------------------- R/get-diff-rows.R | 4 ++-- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/R/compare.R b/R/compare.R index e73d7f9..cd1def5 100644 --- a/R/compare.R +++ b/R/compare.R @@ -80,7 +80,7 @@ compare <- function( ncol = c(ncol(table_a), ncol(table_b)) ) - tbl_contents <- get_contents(table_a, table_b, by = by_names) + tbl_contents <- get_contents(table_a, table_b, by = by_names, table_id = table_id) matches <- withCallingHandlers( locate_matches(table_a, table_b, by = by_names), @@ -94,7 +94,8 @@ compare <- function( table_a, table_b, by = by_names, - matches = matches + matches = matches, + table_id = table_id ) tbl_contents$compare$diff_rows <- tbl_contents$compare$column %>% @@ -102,7 +103,8 @@ compare <- function( table_a = table_a, table_b = table_b, matches = matches, - allow_both_NA = allow_both_NA + allow_both_NA = allow_both_NA, + table_id = table_id ) tbl_contents$compare <- tbl_contents$compare %>% @@ -116,7 +118,6 @@ compare <- function( unmatched_rows = unmatched_rows, input = store_tables(table_a, table_b, table_id) ) %>% - apply_table_id(table_id) %>% structure(class = "vs_comparison") } @@ -183,18 +184,18 @@ split_matches <- function(matches) { ) } -get_unmatched_rows <- function(table_a, table_b, by, matches) { - unmatched <- list( - a = fsubset(table_a, matches$a, by), - b = fsubset(table_b, matches$b, by) - ) - unmatched %>% +get_unmatched_rows <- function(table_a, table_b, by, matches, table_id) { + list( + fsubset(table_a, matches$a, by), + fsubset(table_b, matches$b, by) + ) %>% + setNames(table_id) %>% bind_rows(.id = "table") %>% mutate(row = with(matches, c(a, b))) %>% as_tibble() } -converge <- function(table_a, table_b, by, matches) { +converge <- function(table_a, table_b, by, matches, table_id) { common_cols <- setdiff(intersect(names(table_a), names(table_b)), by) by_a <- fsubset(table_a, matches$common$a, by) @@ -203,20 +204,25 @@ converge <- function(table_a, table_b, by, matches) { add_vars( by_a, - frename(common_a, \(nm) paste0(nm, "_a")), - frename(common_b, \(nm) paste0(nm, "_b")) + frename(common_a, \(nm) paste0(nm, "_", table_id[1])), + frename(common_b, \(nm) paste0(nm, "_", table_id[2])) ) } -join_split <- function(table_a, table_b, by) { +join_split <- function(table_a, table_b, by, table_id) { matches <- locate_matches(table_a, table_b, by) - intersection <- converge(table_a, table_b, by, matches) - unmatched_rows <- get_unmatched_rows(table_a, table_b, by, matches) + intersection <- converge(table_a, table_b, by, matches, table_id) + unmatched_rows <- get_unmatched_rows(table_a, table_b, by, matches, table_id) list(intersection = intersection, unmatched_rows = unmatched_rows) } -get_contents <- function(table_a, table_b, by) { - tbl_contents <- join_split(contents(table_a), contents(table_b), by = "column") +get_contents <- function(table_a, table_b, by, table_id) { + tbl_contents <- join_split( + contents(table_a), + contents(table_b), + by = "column", + table_id = table_id + ) out <- list() out$by <- tbl_contents$intersection %>% @@ -243,22 +249,6 @@ store_tables <- function(table_a, table_b, table_id) { env } -apply_table_id <- function(comparison, table_id, call = caller_env()) { - if (identical(table_id, c("a", "b"))) { - return(comparison) - } - comparison$tables$table <- table_id - names(comparison$by)[2:3] <- paste0("class_", table_id) - names(comparison$intersection)[3:4] <- paste0("class_", table_id) - comparison$intersection$diff_rows <- comparison$intersection$diff_rows %>% - lapply(frename, paste0("row_", table_id)) - comparison$unmatched_cols$table <- comparison$unmatched_cols$table %>% - recode_char(a = table_id[1], b = table_id[2]) - comparison$unmatched_rows$table <- comparison$unmatched_rows$table %>% - recode_char(a = table_id[1], b = table_id[2]) - comparison -} - # Error handling ------------- rethrow_match_relationship <- function(table_a, table_b, by) { diff --git a/R/get-diff-rows.R b/R/get-diff-rows.R index f2bfa39..325d093 100644 --- a/R/get-diff-rows.R +++ b/R/get-diff-rows.R @@ -1,9 +1,9 @@ -get_diff_rows <- function(col, table_a, table_b, matches, allow_both_NA) { +get_diff_rows <- function(col, table_a, table_b, matches, allow_both_NA, table_id) { col_a <- fsubset(table_a, matches$common$a, col)[[1]] col_b <- fsubset(table_b, matches$common$b, col)[[1]] matches$common %>% fsubset(not_equal(col_a, col_b, allow_both_NA)) %>% - frename(c("row_a", "row_b")) + frename(paste0("row_", table_id)) } not_equal <- function(col_a, col_b, allow_both_NA) { From d59ad93976a306c0cc3040b20cb472dff82096ea Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 20:40:41 -0400 Subject: [PATCH 03/23] remove unused import --- NAMESPACE | 1 - R/versus.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 56968cd..71fdc37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,7 +16,6 @@ import(rlang) importFrom(collapse,"%!=%") importFrom(collapse,add_vars) importFrom(collapse,frename) -importFrom(collapse,recode_char) importFrom(collapse,ss) importFrom(collapse,whichNA) importFrom(data.table,copy) diff --git a/R/versus.R b/R/versus.R index 799ff2b..40aabce 100644 --- a/R/versus.R +++ b/R/versus.R @@ -14,6 +14,6 @@ #' @importFrom purrr map imap map_int map_lgl map2_lgl map_chr reduce map_if #' @importFrom purrr pmap pmap_lgl compose pluck #' @importFrom tibble tibble rownames_to_column enframe -#' @importFrom collapse ss add_vars frename whichNA %!=% recode_char +#' @importFrom collapse ss add_vars frename whichNA %!=% #' @importFrom data.table fcoalesce copy "_PACKAGE" From a5dc749c1c384a92a2eb6041bef07efe08b7b370 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:07:01 -0400 Subject: [PATCH 04/23] add NEWS entry --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 3be1aa4..72d9a22 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # versus (development version) +* New `table_id` argument in `compare()` allows custom table identifiers. + In prior versions, tables are identified as "a" and "b". You can now provide + meaningful names e.g. `compare(..., table_id = c("original", "updated"))`. + @elipousson + # versus 0.3.0 # versus 0.3 From a3c73e284bb10e42ca0f919d0973a4aa4b785242 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:12:23 -0400 Subject: [PATCH 05/23] clarify news item --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 72d9a22..86a6b70 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,8 +2,8 @@ * New `table_id` argument in `compare()` allows custom table identifiers. In prior versions, tables are identified as "a" and "b". You can now provide - meaningful names e.g. `compare(..., table_id = c("original", "updated"))`. - @elipousson + meaningful names e.g. `compare(..., table_id = c("original", "updated"))`, which + are reflected in the output of `compare()` and related functions. @elipousson # versus 0.3.0 From ed3e2357d4f57cf05c78f413fa4653954bd0af3e Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:18:07 -0400 Subject: [PATCH 06/23] document() --- R/compare.R | 3 +++ man/compare.Rd | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/R/compare.R b/R/compare.R index cd1def5..41420c7 100644 --- a/R/compare.R +++ b/R/compare.R @@ -14,6 +14,9 @@ #' considered as equal #' @param coerce Logical. If \code{FALSE} and columns from the input tables have #' differing classes, the function throws an error. +#' @param table_id A character vector of length 2 providing custom identifiers for +#' \code{table_a} and \code{table_b} respectively. These identifiers are used in the +#' output instead of the default "a" and "b". #' #' @return #' \describe{ diff --git a/man/compare.Rd b/man/compare.Rd index a699844..f86c8db 100644 --- a/man/compare.Rd +++ b/man/compare.Rd @@ -26,6 +26,10 @@ considered as equal} \item{coerce}{Logical. If \code{FALSE} and columns from the input tables have differing classes, the function throws an error.} + +\item{table_id}{A character vector of length 2 providing custom identifiers for +\code{table_a} and \code{table_b} respectively. These identifiers are used in the +output instead of the default "a" and "b".} } \value{ \describe{ From a193b4ce2183bcad5a84cfd4d0af6d6fd044b33a Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sat, 11 Oct 2025 23:06:16 -0400 Subject: [PATCH 07/23] show the right object type in table_id error --- R/compare.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/compare.R b/R/compare.R index 41420c7..c76d68f 100644 --- a/R/compare.R +++ b/R/compare.R @@ -275,13 +275,10 @@ rethrow_match_relationship <- function(table_a, table_b, by) { } clean_table_id <- function(table_id, call = caller_env()) { - if (identical(table_id, c("a", "b"))) { - return(table_id) - } if (!is_character(table_id, n = 2)) { message <- c( "{.arg table_id} must be a string of length 2", - i = "{.arg table_id} is {.obj_type_friendly type} of length {length(table_id)}" + i = "{.arg table_id} is {.obj_type_friendly {table_id}} of length {length(table_id)}" ) cli_abort(message, call = call) } From faffb9c4c06c4a15ddcc29fdeede809ae9fc5bdb Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 12 Oct 2025 09:15:00 -0400 Subject: [PATCH 08/23] add tests part 1 --- NEWS.md | 8 +- R/compare.R | 2 +- R/helpers.R | 5 +- R/slice-unmatched.R | 2 +- R/value-diffs.R | 4 +- R/weave-diffs.R | 2 +- tests/testthat/_snaps/compare.md | 129 +++++++++++++++++++++++ tests/testthat/_snaps/slice-diffs.md | 70 +++++++++++- tests/testthat/_snaps/slice-unmatched.md | 25 +++++ tests/testthat/test-compare.R | 20 ++++ tests/testthat/test-slice-diffs.R | 21 +++- tests/testthat/test-slice-unmatched.R | 15 +++ 12 files changed, 292 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 86a6b70..3332a1e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,16 @@ # versus (development version) * New `table_id` argument in `compare()` allows custom table identifiers. - In prior versions, tables are identified as "a" and "b". You can now provide + By default, outputs identify tables as "a" and "b". You can now provide meaningful names e.g. `compare(..., table_id = c("original", "updated"))`, which are reflected in the output of `compare()` and related functions. @elipousson +* In the output of `compare()` with no `table_id` specified, the table summary + `compare()$tables` uses "a" and "b" as identifiers for the `table` column rather + than "table_a" and "table_b". This is for consistency with the case when custom + ids are provided using the new `table_id` argument, so that `compare()$tables$table` + always matches the `table_id` argument. + # versus 0.3.0 # versus 0.3 diff --git a/R/compare.R b/R/compare.R index c76d68f..3f4b9b0 100644 --- a/R/compare.R +++ b/R/compare.R @@ -277,7 +277,7 @@ rethrow_match_relationship <- function(table_a, table_b, by) { clean_table_id <- function(table_id, call = caller_env()) { if (!is_character(table_id, n = 2)) { message <- c( - "{.arg table_id} must be a string of length 2", + "{.arg table_id} must be a character vector of length 2", i = "{.arg table_id} is {.obj_type_friendly {table_id}} of length {length(table_id)}" ) cli_abort(message, call = call) diff --git a/R/helpers.R b/R/helpers.R index f7a46c8..2870cb0 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -6,7 +6,7 @@ check_table_arg <- function(table, comparison, call = caller_env()) { if (identical(table, quo())) { cli_abort("`table` is absent but must be supplied.", call = call) } - table_id <- names(comparison$input$value) + table_id <- comparison$tables$table table_expr <- quo_squash(table) table_chr <- shorten(deparse(table_expr), 30) top_msg <- "Problem with argument `table = {table_chr}`" @@ -47,10 +47,9 @@ is_ptype_compatible <- function(...) { !incompatible } -table_init <- function(comparison, cols = c("intersection", "by"), tbl = c("a", "b")) { +table_init <- function(comparison, cols = c("intersection", "by"), tbl = 1) { # simulate a data frame with the same classes as table_[tbl] cols <- arg_match(cols) - tbl <- arg_match(tbl) fsubset(comparison$input$value[[tbl]], integer(0), comparison[[cols]]$column) } diff --git a/R/slice-unmatched.R b/R/slice-unmatched.R index 355cfa0..4646935 100644 --- a/R/slice-unmatched.R +++ b/R/slice-unmatched.R @@ -31,7 +31,7 @@ slice_unmatched <- function(comparison, table) { #' @export slice_unmatched_both <- function(comparison) { assert_is_comparison(enquo(comparison)) - table_id <- names(comparison$input$value) + table_id <- comparison$tables$table out_cols <- with(comparison, c(by$column, intersection$column)) diff --git a/R/value-diffs.R b/R/value-diffs.R index bea33df..aa32a05 100644 --- a/R/value-diffs.R +++ b/R/value-diffs.R @@ -29,7 +29,7 @@ value_diffs <- function(comparison, column) { column <- enquo(column) column_loc <- get_cols_from_comparison(comparison, column) assert_is_single_column(column_loc) - table_id <- names(comparison$input$value) + table_id <- comparison$tables$table diff_rows <- fsubset(comparison$intersection, column_loc, "diff_rows") %>% pluck(1, 1) @@ -48,7 +48,7 @@ value_diffs <- function(comparison, column) { value_diffs_stacked <- function(comparison, column = everything()) { assert_is_comparison(enquo(comparison)) column <- enquo(column) - table_id <- names(comparison$input$value) + table_id <- comparison$tables$table get_value_diff_for_stack <- function(comparison, col_name) { value_diffs(comparison, all_of(col_name)) %>% diff --git a/R/weave-diffs.R b/R/weave-diffs.R index 38a4adc..0f1ad88 100644 --- a/R/weave-diffs.R +++ b/R/weave-diffs.R @@ -42,7 +42,7 @@ weave_diffs_long <- function(comparison, column = everything()) { weave_diffs_wide <- function(comparison, column = everything()) { assert_is_comparison(enquo(comparison)) column <- enquo(column) - table_id <- names(comparison$input$value) + table_id <- comparison$tables$table out_cols <- with(comparison, c(by$column, intersection$column)) diff_cols <- names(identify_diff_cols(comparison, column)) diff --git a/tests/testthat/_snaps/compare.md b/tests/testthat/_snaps/compare.md index 5c28bf0..92d486d 100644 --- a/tests/testthat/_snaps/compare.md +++ b/tests/testthat/_snaps/compare.md @@ -166,6 +166,24 @@ ! `join_by()` is not supported i provide `by` columns with tidy-select, as in `dplyr::across()` +# Error when table_id is not as expected + + Code + compare(a, b, by = x, table_id = c("a", "b", "c")) + Condition + Error in `compare()`: + ! `table_id` must be a character vector of length 2 + i `table_id` is a character vector of length 3 + +--- + + Code + compare(a, b, by = x, table_id = 1:2) + Condition + Error in `compare()`: + ! `table_id` must be a character vector of length 2 + i `table_id` is an integer vector of length 2 + # Error on different classes with coerce = FALSE Code @@ -227,6 +245,117 @@ 5 b extra_b 12 +# example comparison with custom `table_id` + + Code + compare(test_df_a, test_df_b, by = car, table_id = c("orignal", "updated")) + Output + $tables + # A tibble: 2 x 4 + table expr nrow ncol + + 1 orignal test_df_a 11 13 + 2 updated test_df_b 12 12 + + $by + # A tibble: 1 x 3 + column class_orignal class_updated + + 1 car character character + + $intersection + # A tibble: 11 x 5 + column n_diffs class_orignal class_updated diff_rows + + 1 mpg 2 numeric numeric + 2 cyl 1 numeric numeric + 3 disp 2 numeric numeric + 4 hp 0 numeric numeric + 5 drat 0 numeric numeric + 6 wt 0 numeric character + 7 qsec 0 numeric numeric + 8 vs 0 numeric numeric + 9 am 0 numeric numeric + 10 gear 0 numeric numeric + 11 carb 0 numeric numeric + + $unmatched_cols + # A tibble: 1 x 2 + table column + + 1 orignal extracol_a + + $unmatched_rows + # A tibble: 5 x 3 + table car row + + 1 orignal Mazda RX4 1 + 2 orignal extra_a 11 + 3 updated Merc 280C 10 + 4 updated Merc 450SE 11 + 5 updated extra_b 12 + + +# example comparison with `table_id` which is not a universal name + + Code + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("first result", + "new")) + Message + `table_id` has been adjusted + * `first result` -> `first.result` + +--- + + Code + comp + Output + $tables + # A tibble: 2 x 4 + table expr nrow ncol + + 1 first.result test_df_a 11 13 + 2 new test_df_b 12 12 + + $by + # A tibble: 1 x 3 + column class_first.result class_new + + 1 car character character + + $intersection + # A tibble: 11 x 5 + column n_diffs class_first.result class_new diff_rows + + 1 mpg 2 numeric numeric + 2 cyl 1 numeric numeric + 3 disp 2 numeric numeric + 4 hp 0 numeric numeric + 5 drat 0 numeric numeric + 6 wt 0 numeric character + 7 qsec 0 numeric numeric + 8 vs 0 numeric numeric + 9 am 0 numeric numeric + 10 gear 0 numeric numeric + 11 carb 0 numeric numeric + + $unmatched_cols + # A tibble: 1 x 2 + table column + + 1 first.result extracol_a + + $unmatched_rows + # A tibble: 5 x 3 + table car row + + 1 first.result Mazda RX4 1 + 2 first.result extra_a 11 + 3 new Merc 280C 10 + 4 new Merc 450SE 11 + 5 new extra_b 12 + + # compare() works when no rows are common Code diff --git a/tests/testthat/_snaps/slice-diffs.md b/tests/testthat/_snaps/slice-diffs.md index 1c801b2..0e7a0a0 100644 --- a/tests/testthat/_snaps/slice-diffs.md +++ b/tests/testthat/_snaps/slice-diffs.md @@ -22,6 +22,30 @@ 3 Datsun 710 22.8 NA 109 93 3.85 2.32 1 1 4 Hornet 4 Drive 21.4 6 259 110 3.08 3.22 1 0 +# slice_diffs() works with custom table_id + + Code + slice_diffs(comp, "x", disp) + Output + # A tibble: 2 x 9 + car mpg cyl disp hp drat wt vs am + + 1 Datsun 710 22.8 NA 109 93 3.85 2.32 1 1 + 2 Hornet 4 Drive 21.4 6 259 110 3.08 3.22 1 0 + +--- + + Code + slice_diffs(comp, "x", c(mpg, disp)) + Output + # A tibble: 4 x 9 + car mpg cyl disp hp drat wt vs am + + 1 Duster 360 14.3 8 360 245 3.21 3.57 0 0 + 2 Merc 240D 24.4 4 147. 62 3.69 3.19 1 0 + 3 Datsun 710 22.8 NA 109 93 3.85 2.32 1 1 + 4 Hornet 4 Drive 21.4 6 259 110 3.08 3.22 1 0 + # Error when `comparison` isn't a comparison Code @@ -31,7 +55,7 @@ ! Problem with argument `comparison = example_df_a` i `comparison` must be the output of `versus::compare()` -# Error when `table` isn't 'a' or 'b' +# Error when `table` isn't expected - default table_id Code slice_diffs(comp, a, disp) @@ -66,6 +90,50 @@ Error in `slice_diffs()`: ! `table` is absent but must be supplied. +# Error when `table` isn't expected - custom table_id + + Code + slice_diffs(comp, a, disp) + Condition + Error in `slice_diffs()`: + ! Problem with argument `table = a` + i `table` must be a single character value: "x" or "y" + +--- + + Code + slice_diffs(comp, disp) + Condition + Error in `slice_diffs()`: + ! Problem with argument `table = disp` + i `table` must be a single character value: "x" or "y" + +--- + + Code + slice_diffs(comp, "z") + Condition + Error in `slice_diffs()`: + ! Problem with argument `table = "z"` + i `table` must be either "x" or "y" + +--- + + Code + slice_diffs(comp, "a") + Condition + Error in `slice_diffs()`: + ! Problem with argument `table = "a"` + i `table` must be either "x" or "y" + +--- + + Code + slice_diffs(comp) + Condition + Error in `slice_diffs()`: + ! `table` is absent but must be supplied. + # Error on slice_diffs() with empty selection Code diff --git a/tests/testthat/_snaps/slice-unmatched.md b/tests/testthat/_snaps/slice-unmatched.md index eab9f9a..a886648 100644 --- a/tests/testthat/_snaps/slice-unmatched.md +++ b/tests/testthat/_snaps/slice-unmatched.md @@ -14,6 +14,31 @@ 4 b Merc ~ 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 5 b extra~ 21 6 160 110 3.9 2.875 17.0 0 1 4 4 +# slice_unmatched_both works with custom table_id + + Code + slice_unmatched_both(comp) + Message + i Columns converted to character: wt + Output + # A tibble: 5 x 13 + table car mpg cyl disp hp drat wt qsec vs am gear carb + + 1 x Mazda~ 21 6 160 110 3.9 2.62 16.5 0 1 4 4 + 2 x extra~ 21 6 160 110 3.9 2.62 16.5 0 1 4 4 + 3 y Merc ~ 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4 + 4 y Merc ~ 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 + 5 y extra~ 21 6 160 110 3.9 2.875 17.0 0 1 4 4 + +# slice_unmatched errors when `table` isn't expected + + Code + slice_unmatched(comp, "a") + Condition + Error in `slice_unmatched()`: + ! Problem with argument `table = "a"` + i `table` must be either "x" or "y" + # unmatched() errors when `comparison` isn't a comparison Code diff --git a/tests/testthat/test-compare.R b/tests/testthat/test-compare.R index 8785f53..63ec488 100644 --- a/tests/testthat/test-compare.R +++ b/tests/testthat/test-compare.R @@ -110,6 +110,13 @@ test_that("Error when `by` uses `join_by`", { expect_snapshot(compare(a, b, by = join_by(x)), error = TRUE) }) +test_that("Error when table_id is not as expected", { + a <- data.frame(x = 1) + b <- data.frame(x = 1) + expect_snapshot(compare(a, b, by = x, table_id = c("a", "b", "c")), error = TRUE) + expect_snapshot(compare(a, b, by = x, table_id = 1:2), error = TRUE) +}) + test_that("Error on different classes with coerce = FALSE", { expect_snapshot( compare(test_df_a, test_df_b, by = car, coerce = FALSE), @@ -128,6 +135,19 @@ test_that("example comparison", { expect_snapshot(comp) }) +test_that("example comparison with custom `table_id`", { + expect_snapshot( + compare(test_df_a, test_df_b, by = car, table_id = c("orignal", "updated")) + ) +}) + +test_that("example comparison with `table_id` which is not a universal name", { + expect_snapshot( + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("first result", "new")) + ) + expect_snapshot(comp) +}) + test_that("allow_bothNA works", { comp <- compare( tibble(x = 1, y = NA), diff --git a/tests/testthat/test-slice-diffs.R b/tests/testthat/test-slice-diffs.R index 58b832f..cd90694 100644 --- a/tests/testthat/test-slice-diffs.R +++ b/tests/testthat/test-slice-diffs.R @@ -8,6 +8,16 @@ test_that("slice_diffs() works", { ) }) +test_that("slice_diffs() works with custom table_id", { + comp <- compare(example_df_a, example_df_b, by = car, table_id = c("x", "y")) + expect_snapshot(slice_diffs(comp, "x", disp)) + expect_snapshot(slice_diffs(comp, "x", c(mpg, disp))) + expect_identical( + slice_diffs(comp, "x", c(wt, disp)), + slice_diffs(comp, "x", disp) + ) +}) + test_that("Error when `comparison` isn't a comparison", { comp <- compare(example_df_a, example_df_b, by = car) expect_snapshot( @@ -16,7 +26,7 @@ test_that("Error when `comparison` isn't a comparison", { ) }) -test_that("Error when `table` isn't 'a' or 'b'", { +test_that("Error when `table` isn't expected - default table_id", { comp <- compare(example_df_a, example_df_b, by = c(car, drat)) expect_snapshot(slice_diffs(comp, a, disp), error = TRUE) expect_snapshot(slice_diffs(comp, disp), error = TRUE) @@ -24,6 +34,15 @@ test_that("Error when `table` isn't 'a' or 'b'", { expect_snapshot(slice_diffs(comp), error = TRUE) }) +test_that("Error when `table` isn't expected - custom table_id", { + comp <- compare(example_df_a, example_df_b, by = c(car, drat), table_id = c("x", "y")) + expect_snapshot(slice_diffs(comp, a, disp), error = TRUE) + expect_snapshot(slice_diffs(comp, disp), error = TRUE) + expect_snapshot(slice_diffs(comp, "z"), error = TRUE) + expect_snapshot(slice_diffs(comp, "a"), error = TRUE) + expect_snapshot(slice_diffs(comp), error = TRUE) +}) + test_that("slice_diffs works when there are no diffs", { # because there are no diff cols df <- rownames_to_column(mtcars, "car") diff --git a/tests/testthat/test-slice-unmatched.R b/tests/testthat/test-slice-unmatched.R index af0bccb..76fdcd3 100644 --- a/tests/testthat/test-slice-unmatched.R +++ b/tests/testthat/test-slice-unmatched.R @@ -3,11 +3,26 @@ test_that("slice_unmatched_both works", { expect_snapshot(slice_unmatched_both(comp)) }) +test_that("slice_unmatched_both works with custom table_id", { + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("x", "y")) + expect_snapshot(slice_unmatched_both(comp)) +}) + test_that("slice_unmatched works", { comp <- compare(example_df_a, example_df_b, by = car) expect_identical(slice_unmatched(comp, "a"), as_tibble(example_df_a[7, ])) }) +test_that("slice_unmatched errors when `table` isn't expected", { + comp <- compare(example_df_a, example_df_b, by = car, table_id = c("x", "y")) + expect_snapshot(slice_unmatched(comp, "a"), error = TRUE) +}) + +test_that("slice_unmatched works with custom table_id", { + comp <- compare(example_df_a, example_df_b, by = car, table_id = c("x", "y")) + expect_identical(slice_unmatched(comp, "x"), as_tibble(example_df_a[7, ])) +}) + test_that("unmatched() errors when `comparison` isn't a comparison", { comp <- compare(example_df_a, example_df_b, by = car) expect_snapshot( From 16febb1c8537929b5ed74043ccb97ed77dd46952 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 12 Oct 2025 09:43:03 -0400 Subject: [PATCH 09/23] add tests --- tests/testthat/_snaps/helpers.md | 44 ++++++++++++++++++++++++++++ tests/testthat/_snaps/value-diffs.md | 24 +++++++++++++++ tests/testthat/_snaps/weave-diffs.md | 42 ++++++++++++++++++++++++++ tests/testthat/test-get-diff-rows.R | 18 ++++++++++++ tests/testthat/test-helpers.R | 21 +++++++++++++ tests/testthat/test-value-diffs.R | 10 +++++++ tests/testthat/test-weave-diffs.R | 11 +++++++ 7 files changed, 170 insertions(+) create mode 100644 tests/testthat/_snaps/helpers.md diff --git a/tests/testthat/_snaps/helpers.md b/tests/testthat/_snaps/helpers.md new file mode 100644 index 0000000..b48b6f0 --- /dev/null +++ b/tests/testthat/_snaps/helpers.md @@ -0,0 +1,44 @@ +# check_table_arg respects custom table_id + + Code + check_table_arg(quo("original"), comp_default) + Condition + Error: + ! Problem with argument `table = "original"` + i `table` must be either "a" or "b" + +--- + + Code + check_table_arg(quo("a"), comp_custom) + Condition + Error: + ! Problem with argument `table = "a"` + i `table` must be either "original" or "updated" + +--- + + Code + check_table_arg(quo("b"), comp_custom) + Condition + Error: + ! Problem with argument `table = "b"` + i `table` must be either "original" or "updated" + +--- + + Code + check_table_arg(quo(c("a", "b")), comp_default) + Condition + Error: + ! Problem with argument `table = c("a", "b")` + i `table` must be a single character value: "a" or "b" + +--- + + Code + check_table_arg(quo(), comp_default) + Condition + Error: + ! `table` is absent but must be supplied. + diff --git a/tests/testthat/_snaps/value-diffs.md b/tests/testthat/_snaps/value-diffs.md index 6d575e4..ceb18a1 100644 --- a/tests/testthat/_snaps/value-diffs.md +++ b/tests/testthat/_snaps/value-diffs.md @@ -148,3 +148,27 @@ 4 disp 109 108 Datsun 710 5 disp 259 258 Hornet 4 Drive +# value_diffs() respects custom table_id + + Code + value_diffs(comp, mpg) + Output + # A tibble: 2 x 3 + mpg_original mpg_updated car + + 1 14.3 16.3 Duster 360 + 2 24.4 26.4 Merc 240D + +# value_diffs_stacked() respects custom table_id + + Code + value_diffs_stacked(comp, c(mpg, disp)) + Output + # A tibble: 4 x 4 + column val_original val_updated car + + 1 mpg 14.3 16.3 Duster 360 + 2 mpg 24.4 26.4 Merc 240D + 3 disp 109 108 Datsun 710 + 4 disp 259 258 Hornet 4 Drive + diff --git a/tests/testthat/_snaps/weave-diffs.md b/tests/testthat/_snaps/weave-diffs.md index 7396083..272eece 100644 --- a/tests/testthat/_snaps/weave-diffs.md +++ b/tests/testthat/_snaps/weave-diffs.md @@ -156,3 +156,45 @@ * Must select columns from `comparison$intersection` i column `bear` is not part of the supplied comparison +# weave_diffs_wide respects custom table_id + + Code + weave_diffs_wide(comp, mpg) + Output + # A tibble: 2 x 13 + car mpg_original mpg_updated cyl disp hp drat wt qsec vs am + + 1 Dust~ 14.3 16.3 8 360 245 3.21 3.57 15.8 0 0 + 2 Merc~ 24.4 26.4 4 147. 62 3.69 3.19 20 1 0 + # i 2 more variables: gear , carb + +--- + + Code + weave_diffs_wide(comp, c(mpg, disp)) + Output + # A tibble: 4 x 14 + car mpg_original mpg_updated cyl disp_original disp_updated hp drat + + 1 Duster ~ 14.3 16.3 8 360 360 245 3.21 + 2 Merc 24~ 24.4 26.4 4 147. 147. 62 3.69 + 3 Datsun ~ 22.8 22.8 NA 109 108 93 3.85 + 4 Hornet ~ 21.4 21.4 6 259 258 110 3.08 + # i 6 more variables: wt , qsec , vs , am , gear , + # carb + +# weave_diffs_long respects custom table_id + + Code + weave_diffs_long(comp, mpg) + Message + i Columns converted to character: wt + Output + # A tibble: 4 x 13 + table car mpg cyl disp hp drat wt qsec vs am gear carb + + 1 origi~ Dust~ 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 + 2 updat~ Dust~ 16.3 8 360 245 3.21 3.57 15.8 0 0 3 4 + 3 origi~ Merc~ 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 + 4 updat~ Merc~ 26.4 4 147. 62 3.69 3.19 20 1 0 4 2 + diff --git a/tests/testthat/test-get-diff-rows.R b/tests/testthat/test-get-diff-rows.R index 57d562a..deb1fee 100644 --- a/tests/testthat/test-get-diff-rows.R +++ b/tests/testthat/test-get-diff-rows.R @@ -39,3 +39,21 @@ test_that("is_simple_class() works", { expect_identical(is_simple_class(FALSE, FALSE), TRUE) expect_identical(is_simple_class(list(1), list(1)), FALSE) }) + +test_that("get_diff_rows respects custom table_id", { + table_a <- data.frame(x = 1:5, y = c(1, 2, 3, 4, 5)) + table_b <- data.frame(x = 1:5, y = c(1, 2, 9, 4, 5)) + matches <- list(common = data.frame(a = 1:5, b = 1:5)) + + # Default table_id + result_default <- get_diff_rows("y", table_a, table_b, matches, TRUE, c("a", "b")) + expect_named(result_default, c("row_a", "row_b")) + expect_equal(result_default$row_a, 3) + expect_equal(result_default$row_b, 3) + + # Custom table_id + result_custom <- get_diff_rows("y", table_a, table_b, matches, TRUE, c("original", "updated")) + expect_named(result_custom, c("row_original", "row_updated")) + expect_equal(result_custom$row_original, 3) + expect_equal(result_custom$row_updated, 3) +}) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 4a97d9f..2c0562b 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -65,3 +65,24 @@ test_that("ensure_ptype_compatible() works", { post_coerce ) }) + +test_that("check_table_arg respects custom table_id", { + # Create comparisons with different table_id values + comp_default <- compare(test_df_a, test_df_b, by = car) + comp_custom <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) + + # Default table_id - accepts "a" and "b" + expect_silent(check_table_arg(quo("a"), comp_default)) + expect_silent(check_table_arg(quo("b"), comp_default)) + expect_snapshot(check_table_arg(quo("original"), comp_default), error = TRUE) + + # Custom table_id - accepts "original" and "updated" + expect_silent(check_table_arg(quo("original"), comp_custom)) + expect_silent(check_table_arg(quo("updated"), comp_custom)) + expect_snapshot(check_table_arg(quo("a"), comp_custom), error = TRUE) + expect_snapshot(check_table_arg(quo("b"), comp_custom), error = TRUE) + + # Invalid inputs work the same regardless of table_id + expect_snapshot(check_table_arg(quo(c("a", "b")), comp_default), error = TRUE) + expect_snapshot(check_table_arg(quo(), comp_default), error = TRUE) +}) diff --git a/tests/testthat/test-value-diffs.R b/tests/testthat/test-value-diffs.R index a5d3706..4a637ab 100644 --- a/tests/testthat/test-value-diffs.R +++ b/tests/testthat/test-value-diffs.R @@ -57,3 +57,13 @@ test_that("value_diffs_stacked() coerces to char on incompatible ptypes", { comp <- compare(test_df_a_char_mpg, test_df_b, by = car) expect_snapshot(value_diffs_stacked(comp)) }) + +test_that("value_diffs() respects custom table_id", { + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) + expect_snapshot(value_diffs(comp, mpg)) +}) + +test_that("value_diffs_stacked() respects custom table_id", { + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) + expect_snapshot(value_diffs_stacked(comp, c(mpg, disp))) +}) diff --git a/tests/testthat/test-weave-diffs.R b/tests/testthat/test-weave-diffs.R index 0f0b740..5e02814 100644 --- a/tests/testthat/test-weave-diffs.R +++ b/tests/testthat/test-weave-diffs.R @@ -35,3 +35,14 @@ test_that("Error on value_diffs when column doesn't exist", { expect_snapshot(weave_diffs_long(comp, bear), error = TRUE) expect_snapshot(weave_diffs_wide(comp, bear), error = TRUE) }) + +test_that("weave_diffs_wide respects custom table_id", { + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) + expect_snapshot(weave_diffs_wide(comp, mpg)) + expect_snapshot(weave_diffs_wide(comp, c(mpg, disp))) +}) + +test_that("weave_diffs_long respects custom table_id", { + comp <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) + expect_snapshot(weave_diffs_long(comp, mpg)) +}) From 2e6d0d59b4506ae1038fad92a3958060a3dc45fb Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 12 Oct 2025 09:52:00 -0400 Subject: [PATCH 10/23] adjust comment for new setup --- R/helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/helpers.R b/R/helpers.R index 2870cb0..8dc0720 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -48,7 +48,7 @@ is_ptype_compatible <- function(...) { } table_init <- function(comparison, cols = c("intersection", "by"), tbl = 1) { - # simulate a data frame with the same classes as table_[tbl] + # simulate a data frame with the same classes as an input table cols <- arg_match(cols) fsubset(comparison$input$value[[tbl]], integer(0), comparison[[cols]]$column) } From 8289e9c2f258d40e3d98d1c2a4c8208858d45b30 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 12 Oct 2025 10:19:01 -0400 Subject: [PATCH 11/23] more tests --- tests/testthat/test-helpers.R | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 2c0562b..845fe9f 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -70,19 +70,44 @@ test_that("check_table_arg respects custom table_id", { # Create comparisons with different table_id values comp_default <- compare(test_df_a, test_df_b, by = car) comp_custom <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) - + # Default table_id - accepts "a" and "b" expect_silent(check_table_arg(quo("a"), comp_default)) expect_silent(check_table_arg(quo("b"), comp_default)) expect_snapshot(check_table_arg(quo("original"), comp_default), error = TRUE) - + # Custom table_id - accepts "original" and "updated" expect_silent(check_table_arg(quo("original"), comp_custom)) expect_silent(check_table_arg(quo("updated"), comp_custom)) expect_snapshot(check_table_arg(quo("a"), comp_custom), error = TRUE) expect_snapshot(check_table_arg(quo("b"), comp_custom), error = TRUE) - + # Invalid inputs work the same regardless of table_id expect_snapshot(check_table_arg(quo(c("a", "b")), comp_default), error = TRUE) expect_snapshot(check_table_arg(quo(), comp_default), error = TRUE) }) + +test_that("clean_table_id strips attributes from named vectors", { + # Named vector - attributes should be stripped + named_vec <- c(first = "table1", second = "table2") + expect_identical( + clean_table_id(named_vec), + c("table1", "table2") + ) + + # Vector with custom attributes + vec_with_attrs <- c("x", "y") + attr(vec_with_attrs, "custom") <- "attribute" + attr(vec_with_attrs, "another") <- 123 + expect_identical( + clean_table_id(vec_with_attrs), + c("x", "y") + ) + + # Plain vector without attributes should remain unchanged + plain_vec <- c("alpha", "beta") + expect_identical( + clean_table_id(plain_vec), + plain_vec + ) +}) From 37568a12baac15fa9218b46a81ded1d6c980e5df Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Sun, 12 Oct 2025 10:36:25 -0400 Subject: [PATCH 12/23] add contributor --- DESCRIPTION | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b1d886b..422c0f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,11 @@ Package: versus Title: Compare Data Frames Version: 0.3.0.9000 -Authors@R: - person("Ryan", "Dickerson", , "fresh.tent5866@fastmail.com", role = c("aut", "cre", "cph")) +Authors@R: c( + person("Ryan", "Dickerson", , "fresh.tent5866@fastmail.com", role = c("aut", "cre", "cph")), + person("Eli", "Pousson", , "eli.pousson@gmail.com", role = "ctb", + comment = c(ORCID = "0000-0001-8280-1706")) + ) Description: A toolset for interactively exploring the differences between two data frames. License: MIT + file LICENSE Suggests: From 31f72ade4474b23ab60780701d0eac89c5981fce Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Mon, 13 Oct 2025 07:45:26 -0400 Subject: [PATCH 13/23] document() and remove docs for non-existent function --- R/value-diffs.R | 2 -- man/value-diffs.Rd | 2 -- man/versus-package.Rd | 5 +++++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/value-diffs.R b/R/value-diffs.R index aa32a05..9aee39d 100644 --- a/R/value-diffs.R +++ b/R/value-diffs.R @@ -15,8 +15,6 @@ #' the \code{value_diffs()} outputs for the specified columns combined row-wise #' using \code{dplyr::bind_rows()}. If \code{dplyr::bind_rows()} is not possible #' due to incompatible types, values are converted to character first. -#' \code{value_diffs_all()} is the same as \code{value_diffs_stacked()} with -#' \code{column = everything()}} #' @examples #' comp <- compare(example_df_a, example_df_b, by = car) #' value_diffs(comp, disp) diff --git a/man/value-diffs.Rd b/man/value-diffs.Rd index d97f6f4..a9640d6 100644 --- a/man/value-diffs.Rd +++ b/man/value-diffs.Rd @@ -26,8 +26,6 @@ input tables, plus the \code{by} columns. } the \code{value_diffs()} outputs for the specified columns combined row-wise using \code{dplyr::bind_rows()}. If \code{dplyr::bind_rows()} is not possible due to incompatible types, values are converted to character first. -\code{value_diffs_all()} is the same as \code{value_diffs_stacked()} with -\code{column = everything()}} } \description{ Get the differing values from a comparison diff --git a/man/versus-package.Rd b/man/versus-package.Rd index d92a4ae..4f033ce 100644 --- a/man/versus-package.Rd +++ b/man/versus-package.Rd @@ -20,5 +20,10 @@ Useful links: \author{ \strong{Maintainer}: Ryan Dickerson \email{fresh.tent5866@fastmail.com} [copyright holder] +Other contributors: +\itemize{ + \item Eli Pousson \email{eli.pousson@gmail.com} (\href{https://orcid.org/0000-0001-8280-1706}{ORCID}) [contributor] +} + } \keyword{internal} From d8ad94304168cb4568890528e30f8332cbade4d1 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Mon, 13 Oct 2025 07:57:03 -0400 Subject: [PATCH 14/23] adjust documentation for `table` argument --- R/slice-diffs.R | 6 ++++-- R/value-diffs.R | 26 ++++++++++++++------------ man/slice_diffs.Rd | 6 ++++-- man/slice_unmatched.Rd | 6 ++++-- man/value-diffs.Rd | 20 +++++++++++--------- 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/R/slice-diffs.R b/R/slice-diffs.R index 8738ebc..1adde5f 100644 --- a/R/slice-diffs.R +++ b/R/slice-diffs.R @@ -1,8 +1,10 @@ #' Get rows with differing values #' #' @param comparison The output of \code{compare()} -#' @param table One of \code{"a"} or \code{"b"} indicating which of the tables used to -#' create \code{comparison} should be sliced +#' @param table A string matching one of the identifiers supplied via +#' \code{table_id} when calling \code{compare()} (defaults are \code{"a"} and +#' \code{"b"}). Within the comparison, these identifiers are stored in +#' \code{comparison$tables$table}. #' @param column <[`tidy-select`][versus_tidy_select]>. A row will be in the output if #' the comparison shows differing values for any columns matching this argument #' diff --git a/R/value-diffs.R b/R/value-diffs.R index 9aee39d..d65fc97 100644 --- a/R/value-diffs.R +++ b/R/value-diffs.R @@ -1,20 +1,22 @@ #' Get the differing values from a comparison #' #' @param comparison The output of \code{compare()} -#' @param column <[`tidy-select`][versus_tidy_select]>. The output will show the differing values -#' for the provided columns. +#' @param column <[`tidy-select`][versus_tidy_select]>. The output will show the +#' differing values for the provided columns. #' #' @return -#' \item{\code{value_diffs()}}{A data frame with one row for each element -#' of \code{col} found to be unequal between the input tables ( -#' \code{table_a} and \code{table_b} from the original \code{compare()} output) -#' The output table has the column specified by \code{column} from each of the -#' input tables, plus the \code{by} columns. } -#' -#' \item{\code{value_diffs_stacked()}, \code{value_diffs_all()}}{A data frame containing -#' the \code{value_diffs()} outputs for the specified columns combined row-wise -#' using \code{dplyr::bind_rows()}. If \code{dplyr::bind_rows()} is not possible -#' due to incompatible types, values are converted to character first. +#' \itemize{ +#' \item \code{value_diffs()}: A data frame with one row for each element +#' of \code{col} found to be unequal between the input tables ( +#' \code{table_a} and \code{table_b} from the original \code{compare()} +#' output). The output table has the column specified by \code{column} +#' from each of the input tables, plus the \code{by} columns. +#' \item \code{value_diffs_stacked()}: A data frame containing the +#' \code{value_diffs()} outputs for the specified columns combined +#' row-wise using \code{dplyr::bind_rows()}. If \code{dplyr::bind_rows()} +#' is not possible due to incompatible types, values are converted to +#' character first. +#' } #' @examples #' comp <- compare(example_df_a, example_df_b, by = car) #' value_diffs(comp, disp) diff --git a/man/slice_diffs.Rd b/man/slice_diffs.Rd index 4faffea..db30d6e 100644 --- a/man/slice_diffs.Rd +++ b/man/slice_diffs.Rd @@ -9,8 +9,10 @@ slice_diffs(comparison, table, column = everything()) \arguments{ \item{comparison}{The output of \code{compare()}} -\item{table}{One of \code{"a"} or \code{"b"} indicating which of the tables used to -create \code{comparison} should be sliced} +\item{table}{A string matching one of the identifiers supplied via +\code{table_id} when calling \code{compare()} (defaults are \code{"a"} and +\code{"b"}). Within the comparison, these identifiers are stored in +\code{comparison$tables$table}.} \item{column}{<\code{\link[=versus_tidy_select]{tidy-select}}>. A row will be in the output if the comparison shows differing values for any columns matching this argument} diff --git a/man/slice_unmatched.Rd b/man/slice_unmatched.Rd index bd384bc..aa779c0 100644 --- a/man/slice_unmatched.Rd +++ b/man/slice_unmatched.Rd @@ -12,8 +12,10 @@ slice_unmatched_both(comparison) \arguments{ \item{comparison}{The output of \code{compare()}} -\item{table}{One of \code{"a"} or \code{"b"} indicating which of the tables used to -create \code{comparison} should be sliced} +\item{table}{A string matching one of the identifiers supplied via +\code{table_id} when calling \code{compare()} (defaults are \code{"a"} and +\code{"b"}). Within the comparison, these identifiers are stored in +\code{comparison$tables$table}.} } \value{ \item{\code{slice_unmatched()}}{The table identified by \code{table} is filtered diff --git a/man/value-diffs.Rd b/man/value-diffs.Rd index a9640d6..4605075 100644 --- a/man/value-diffs.Rd +++ b/man/value-diffs.Rd @@ -16,16 +16,18 @@ value_diffs_stacked(comparison, column = everything()) for the provided columns.} } \value{ -\item{\code{value_diffs()}}{A data frame with one row for each element +\itemize{ +\item \code{value_diffs()}: A data frame with one row for each element of \code{col} found to be unequal between the input tables ( -\code{table_a} and \code{table_b} from the original \code{compare()} output) -The output table has the column specified by \code{column} from each of the -input tables, plus the \code{by} columns. } - -\item{\code{value_diffs_stacked()}, \code{value_diffs_all()}}{A data frame containing -the \code{value_diffs()} outputs for the specified columns combined row-wise -using \code{dplyr::bind_rows()}. If \code{dplyr::bind_rows()} is not possible -due to incompatible types, values are converted to character first. +\code{table_a} and \code{table_b} from the original \code{compare()} +output). The output table has the column specified by \code{column} +from each of the input tables, plus the \code{by} columns. +\item \code{value_diffs_stacked()}: A data frame containing the +\code{value_diffs()} outputs for the specified columns combined +row-wise using \code{dplyr::bind_rows()}. If \code{dplyr::bind_rows()} +is not possible due to incompatible types, values are converted to +character first. +} } \description{ Get the differing values from a comparison From b4ff0b585fdacebb593acb39cf3f1b3cd1d78785 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Mon, 13 Oct 2025 08:27:54 -0400 Subject: [PATCH 15/23] document examples with custom table_id --- R/slice-diffs.R | 3 +++ R/slice-unmatched.R | 3 +++ man/slice_diffs.Rd | 3 +++ man/slice_unmatched.Rd | 3 +++ man/value-diffs.Rd | 4 ++-- 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/R/slice-diffs.R b/R/slice-diffs.R index 1adde5f..760b6c3 100644 --- a/R/slice-diffs.R +++ b/R/slice-diffs.R @@ -17,6 +17,9 @@ #' comp |> slice_diffs("a", mpg) #' comp |> slice_diffs("b", mpg) #' comp |> slice_diffs("a", c(mpg, disp)) +#' +#' comp <- compare(example_df_a, example_df_b, by = car, table_id = c("old", "new")) +#' comp |> slice_diffs("old", mpg) #' @rdname slice_diffs #' @export diff --git a/R/slice-unmatched.R b/R/slice-unmatched.R index 4646935..99d8ba2 100644 --- a/R/slice-unmatched.R +++ b/R/slice-unmatched.R @@ -17,6 +17,9 @@ #' # slice_unmatched(comp, "a") output is the same as #' example_df_a |> dplyr::anti_join(example_df_b, by = comp$by$column) #' +#' comp <- compare(example_df_a, example_df_b, by = car, table_id = c("old", "new")) +#' comp |> slice_unmatched("old") +#' #' comp |> slice_unmatched_both() #' @rdname slice_unmatched diff --git a/man/slice_diffs.Rd b/man/slice_diffs.Rd index db30d6e..a4d426f 100644 --- a/man/slice_diffs.Rd +++ b/man/slice_diffs.Rd @@ -29,4 +29,7 @@ comp <- compare(example_df_a, example_df_b, by = car) comp |> slice_diffs("a", mpg) comp |> slice_diffs("b", mpg) comp |> slice_diffs("a", c(mpg, disp)) + +comp <- compare(example_df_a, example_df_b, by = car, table_id = c("old", "new")) +comp |> slice_diffs("old", mpg) } diff --git a/man/slice_unmatched.Rd b/man/slice_unmatched.Rd index aa779c0..0bfdddd 100644 --- a/man/slice_unmatched.Rd +++ b/man/slice_unmatched.Rd @@ -35,5 +35,8 @@ comp |> slice_unmatched("b") # slice_unmatched(comp, "a") output is the same as example_df_a |> dplyr::anti_join(example_df_b, by = comp$by$column) +comp <- compare(example_df_a, example_df_b, by = car, table_id = c("old", "new")) +comp |> slice_unmatched("old") + comp |> slice_unmatched_both() } diff --git a/man/value-diffs.Rd b/man/value-diffs.Rd index 4605075..778164b 100644 --- a/man/value-diffs.Rd +++ b/man/value-diffs.Rd @@ -12,8 +12,8 @@ value_diffs_stacked(comparison, column = everything()) \arguments{ \item{comparison}{The output of \code{compare()}} -\item{column}{<\code{\link[=versus_tidy_select]{tidy-select}}>. The output will show the differing values -for the provided columns.} +\item{column}{<\code{\link[=versus_tidy_select]{tidy-select}}>. The output will show the +differing values for the provided columns.} } \value{ \itemize{ From 8533058a58c829004734065403d7b17ceee24ac7 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 17:55:10 -0400 Subject: [PATCH 16/23] add suffix arg to weave_diffs_wide --- NEWS.md | 11 ++++++--- R/weave-diffs.R | 35 +++++++++++++++++++++++--- man/weave_diffs.Rd | 8 +++++- tests/testthat/_snaps/weave-diffs.md | 37 ++++++++++++++++++++++++++++ tests/testthat/test-weave-diffs.R | 23 +++++++++++++++++ 5 files changed, 107 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3332a1e..49acfab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,9 +7,14 @@ * In the output of `compare()` with no `table_id` specified, the table summary `compare()$tables` uses "a" and "b" as identifiers for the `table` column rather - than "table_a" and "table_b". This is for consistency with the case when custom - ids are provided using the new `table_id` argument, so that `compare()$tables$table` - always matches the `table_id` argument. + than "table_a" and "table_b" as used in prior versions. This is for consistency + with the case when custom ids are provided using the new `table_id` argument, + so that `compare()$tables$table` always matches the `table_id` argument. + +* `weave_diffs_wide()` gains a `suffix` argument so column names in the wide + output can use custom suffixes instead of `_{table_id}`. The default keeps the + existing behaviour, and input validation ensures suffix vectors are length two, + non-missing, and distinct. @elipousson # versus 0.3.0 diff --git a/R/weave-diffs.R b/R/weave-diffs.R index 0f1ad88..0fe3858 100644 --- a/R/weave-diffs.R +++ b/R/weave-diffs.R @@ -1,6 +1,11 @@ #' Get differences in context #' #' @inheritParams slice_diffs +#' @param suffix A character vector of length 2 providing suffixes appended to +#' the renamed columns in `weave_diffs_wide()`. Set to `NULL` (the default) to +#' use `paste0("_", table_id)`. The first suffix is applied to values from +#' `table_a`, the second to values from `table_b`. Values must not be missing +#' and the two suffixes must differ. #' #' @return #' \item{\code{weave_diffs_wide()}}{The input \code{table_a} filtered to rows where @@ -39,10 +44,11 @@ weave_diffs_long <- function(comparison, column = everything()) { #' @rdname weave_diffs #' @export -weave_diffs_wide <- function(comparison, column = everything()) { +weave_diffs_wide <- function(comparison, column = everything(), suffix = NULL) { assert_is_comparison(enquo(comparison)) column <- enquo(column) table_id <- comparison$tables$table + suffix <- validate_suffix(suffix, table_id) out_cols <- with(comparison, c(by$column, intersection$column)) diff_cols <- names(identify_diff_cols(comparison, column)) @@ -50,8 +56,31 @@ weave_diffs_wide <- function(comparison, column = everything()) { slice_b <- slice_diffs_impl(comparison, table_id[2], column, j = diff_cols) reduce(.init = slice_a, diff_cols, \(x, col) { + col_first <- paste0(col, suffix[1]) + col_second <- paste0(col, suffix[2]) x %>% - mutate("{col}_{table_id[2]}" := slice_b[[col]], .after = !!sym(col)) %>% - rename("{col}_{table_id[1]}" := !!sym(col)) + rename("{col_first}" := !!sym(col)) %>% + mutate("{col_second}" := slice_b[[col]], .after = !!sym(col_first)) }) } + +validate_suffix <- function(suffix, table_id, call = caller_env()) { + if (is.null(suffix)) { + return(paste0("_", table_id)) + } + if (!is_character(suffix, n = 2)) { + message <- c( + "{.arg suffix} must be NULL or a character vector of length 2", + i = "{.arg suffix} is {.obj_type_friendly {suffix}} of length {length(suffix)}" + ) + cli_abort(message, call = call) + } + attributes(suffix) <- NULL + if (anyNA(suffix)) { + cli_abort("{.arg suffix} must not contain missing values.", call = call) + } + if (identical(suffix[1], suffix[2])) { + cli_abort("{.arg suffix} entries must be distinct.", call = call) + } + suffix +} diff --git a/man/weave_diffs.Rd b/man/weave_diffs.Rd index 1e51746..d50abb0 100644 --- a/man/weave_diffs.Rd +++ b/man/weave_diffs.Rd @@ -7,13 +7,19 @@ \usage{ weave_diffs_long(comparison, column = everything()) -weave_diffs_wide(comparison, column = everything()) +weave_diffs_wide(comparison, column = everything(), suffix = NULL) } \arguments{ \item{comparison}{The output of \code{compare()}} \item{column}{<\code{\link[=versus_tidy_select]{tidy-select}}>. A row will be in the output if the comparison shows differing values for any columns matching this argument} + +\item{suffix}{A character vector of length 2 providing suffixes appended to +the renamed columns in \code{weave_diffs_wide()}. Set to \code{NULL} (the default) to +use \code{paste0("_", table_id)}. The first suffix is applied to values from +\code{table_a}, the second to values from \code{table_b}. Values must not be missing +and the two suffixes must differ.} } \value{ \item{\code{weave_diffs_wide()}}{The input \code{table_a} filtered to rows where diff --git a/tests/testthat/_snaps/weave-diffs.md b/tests/testthat/_snaps/weave-diffs.md index 272eece..b389010 100644 --- a/tests/testthat/_snaps/weave-diffs.md +++ b/tests/testthat/_snaps/weave-diffs.md @@ -198,3 +198,40 @@ 3 origi~ Merc~ 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 4 updat~ Merc~ 26.4 4 147. 62 3.69 3.19 20 1 0 4 2 +# weave_diffs_wide applies custom suffix + + Code + out + Output + # A tibble: 2 x 13 + car mpg `mpg (new)` cyl disp hp drat wt qsec vs am gear + + 1 Duste~ 14.3 16.3 8 360 245 3.21 3.57 15.8 0 0 3 + 2 Merc ~ 24.4 26.4 4 147. 62 3.69 3.19 20 1 0 4 + # i 1 more variable: carb + +# weave_diffs_wide validates suffix input + + Code + weave_diffs_wide(comp, mpg, suffix = "oops") + Condition + Error in `weave_diffs_wide()`: + ! `suffix` must be NULL or a character vector of length 2 + i `suffix` is a string of length 1 + +--- + + Code + weave_diffs_wide(comp, mpg, suffix = c("dup", "dup")) + Condition + Error in `weave_diffs_wide()`: + ! `suffix` entries must be distinct. + +--- + + Code + weave_diffs_wide(comp, mpg, suffix = c("old", NA)) + Condition + Error in `weave_diffs_wide()`: + ! `suffix` must not contain missing values. + diff --git a/tests/testthat/test-weave-diffs.R b/tests/testthat/test-weave-diffs.R index 5e02814..31d6caa 100644 --- a/tests/testthat/test-weave-diffs.R +++ b/tests/testthat/test-weave-diffs.R @@ -46,3 +46,26 @@ test_that("weave_diffs_long respects custom table_id", { comp <- compare(test_df_a, test_df_b, by = car, table_id = c("original", "updated")) expect_snapshot(weave_diffs_long(comp, mpg)) }) + +test_that("weave_diffs_wide applies custom suffix", { + comp <- compare(test_df_a, test_df_b, by = car) + out <- weave_diffs_wide(comp, mpg, suffix = c("", " (new)")) + expect_identical(names(out)[1:3], c("car", "mpg", "mpg (new)")) + expect_snapshot(out) +}) + +test_that("weave_diffs_wide validates suffix input", { + comp <- compare(test_df_a, test_df_b, by = car) + expect_snapshot( + weave_diffs_wide(comp, mpg, suffix = "oops"), + error = TRUE + ) + expect_snapshot( + weave_diffs_wide(comp, mpg, suffix = c("dup", "dup")), + error = TRUE + ) + expect_snapshot( + weave_diffs_wide(comp, mpg, suffix = c("old", NA)), + error = TRUE + ) +}) From 74d99832b5136698a61a16a2cbf71c012c29208d Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:03:09 -0400 Subject: [PATCH 17/23] adjust function name --- R/weave-diffs.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/weave-diffs.R b/R/weave-diffs.R index 0fe3858..6a47faa 100644 --- a/R/weave-diffs.R +++ b/R/weave-diffs.R @@ -48,7 +48,7 @@ weave_diffs_wide <- function(comparison, column = everything(), suffix = NULL) { assert_is_comparison(enquo(comparison)) column <- enquo(column) table_id <- comparison$tables$table - suffix <- validate_suffix(suffix, table_id) + suffix <- clean_suffix(suffix, table_id) out_cols <- with(comparison, c(by$column, intersection$column)) diff_cols <- names(identify_diff_cols(comparison, column)) @@ -64,7 +64,7 @@ weave_diffs_wide <- function(comparison, column = everything(), suffix = NULL) { }) } -validate_suffix <- function(suffix, table_id, call = caller_env()) { +clean_suffix <- function(suffix, table_id, call = caller_env()) { if (is.null(suffix)) { return(paste0("_", table_id)) } From 60e2f6f17f1d2b70d0277fdc0e012c1b0ec7c6a1 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:12:57 -0400 Subject: [PATCH 18/23] don't mention extremely rare edge cases in user docs --- R/weave-diffs.R | 3 +-- man/weave_diffs.Rd | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/weave-diffs.R b/R/weave-diffs.R index 6a47faa..a6e777a 100644 --- a/R/weave-diffs.R +++ b/R/weave-diffs.R @@ -4,8 +4,7 @@ #' @param suffix A character vector of length 2 providing suffixes appended to #' the renamed columns in `weave_diffs_wide()`. Set to `NULL` (the default) to #' use `paste0("_", table_id)`. The first suffix is applied to values from -#' `table_a`, the second to values from `table_b`. Values must not be missing -#' and the two suffixes must differ. +#' `table_a`, the second to values from `table_b`. #' #' @return #' \item{\code{weave_diffs_wide()}}{The input \code{table_a} filtered to rows where diff --git a/man/weave_diffs.Rd b/man/weave_diffs.Rd index d50abb0..cf0d380 100644 --- a/man/weave_diffs.Rd +++ b/man/weave_diffs.Rd @@ -18,8 +18,7 @@ the comparison shows differing values for any columns matching this argument} \item{suffix}{A character vector of length 2 providing suffixes appended to the renamed columns in \code{weave_diffs_wide()}. Set to \code{NULL} (the default) to use \code{paste0("_", table_id)}. The first suffix is applied to values from -\code{table_a}, the second to values from \code{table_b}. Values must not be missing -and the two suffixes must differ.} +\code{table_a}, the second to values from \code{table_b}.} } \value{ \item{\code{weave_diffs_wide()}}{The input \code{table_a} filtered to rows where From c6a685bed865ae5c3a83f511e2198a534d09d3de Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:17:14 -0400 Subject: [PATCH 19/23] add example using suffix --- R/weave-diffs.R | 1 + man/weave_diffs.Rd | 1 + 2 files changed, 2 insertions(+) diff --git a/R/weave-diffs.R b/R/weave-diffs.R index a6e777a..29d9bd5 100644 --- a/R/weave-diffs.R +++ b/R/weave-diffs.R @@ -20,6 +20,7 @@ #' comp <- compare(example_df_a, example_df_b, by = car) #' comp |> weave_diffs_wide(disp) #' comp |> weave_diffs_wide(c(mpg, disp)) +#' comp |> weave_diffs_wide(c(mpg, disp), suffix = c("", "_new")) #' comp |> weave_diffs_long(disp) #' comp |> weave_diffs_long(c(mpg, disp)) diff --git a/man/weave_diffs.Rd b/man/weave_diffs.Rd index cf0d380..af09b95 100644 --- a/man/weave_diffs.Rd +++ b/man/weave_diffs.Rd @@ -37,6 +37,7 @@ Get differences in context comp <- compare(example_df_a, example_df_b, by = car) comp |> weave_diffs_wide(disp) comp |> weave_diffs_wide(c(mpg, disp)) +comp |> weave_diffs_wide(c(mpg, disp), suffix = c("", "_new")) comp |> weave_diffs_long(disp) comp |> weave_diffs_long(c(mpg, disp)) } From c179bcc846ffcc4532ba0e2ced2774113ec2b38f Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:19:16 -0400 Subject: [PATCH 20/23] styler::style_pkg() --- R/value-diffs.R | 2 +- tests/testthat/test-get-diff-rows.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/value-diffs.R b/R/value-diffs.R index d65fc97..711dea7 100644 --- a/R/value-diffs.R +++ b/R/value-diffs.R @@ -1,7 +1,7 @@ #' Get the differing values from a comparison #' #' @param comparison The output of \code{compare()} -#' @param column <[`tidy-select`][versus_tidy_select]>. The output will show the +#' @param column <[`tidy-select`][versus_tidy_select]>. The output will show the #' differing values for the provided columns. #' #' @return diff --git a/tests/testthat/test-get-diff-rows.R b/tests/testthat/test-get-diff-rows.R index deb1fee..047bd35 100644 --- a/tests/testthat/test-get-diff-rows.R +++ b/tests/testthat/test-get-diff-rows.R @@ -44,13 +44,13 @@ test_that("get_diff_rows respects custom table_id", { table_a <- data.frame(x = 1:5, y = c(1, 2, 3, 4, 5)) table_b <- data.frame(x = 1:5, y = c(1, 2, 9, 4, 5)) matches <- list(common = data.frame(a = 1:5, b = 1:5)) - + # Default table_id result_default <- get_diff_rows("y", table_a, table_b, matches, TRUE, c("a", "b")) expect_named(result_default, c("row_a", "row_b")) expect_equal(result_default$row_a, 3) expect_equal(result_default$row_b, 3) - + # Custom table_id result_custom <- get_diff_rows("y", table_a, table_b, matches, TRUE, c("original", "updated")) expect_named(result_custom, c("row_original", "row_updated")) From 66a6a017f4a9053d65cae3aafd936d2a679ede67 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:47:26 -0400 Subject: [PATCH 21/23] update readme and handle vctrs #1013 --- R/compare.R | 5 ++++- README.Rmd | 2 +- README.md | 18 +++++++++--------- tests/testthat/_snaps/helpers.md | 11 +++++++++++ tests/testthat/test-helpers.R | 4 ++++ 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/R/compare.R b/R/compare.R index 3f4b9b0..3f0a491 100644 --- a/R/compare.R +++ b/R/compare.R @@ -283,7 +283,10 @@ clean_table_id <- function(table_id, call = caller_env()) { cli_abort(message, call = call) } attributes(table_id) <- NULL - new <- vec_as_names(table_id, repair = "universal", quiet = TRUE) + new <- table_id %>% + vec_as_names(repair = "universal", quiet = TRUE) %>% + # second vec_as_names() is needed due to vctrs issue #1013 + vec_as_names(repair = "unique", quiet = TRUE) old <- table_id %|% "" is_changed <- new != old if (!any(is_changed)) { diff --git a/README.Rmd b/README.Rmd index 6282ad8..35a1519 100644 --- a/README.Rmd +++ b/README.Rmd @@ -83,7 +83,7 @@ comparison |> slice_diffs("a", mpg) ``` -Use `slice_unmatched()` to get the rows unmatched rows from one or both tables. +Use `slice_unmatched()` to get the unmatched rows from one or both tables. ```{r} comparison |> diff --git a/README.md b/README.md index 66dfff7..fe839fb 100644 --- a/README.md +++ b/README.md @@ -66,20 +66,20 @@ Use `compare()` to create a comparison of two tables. A comparison contains: -- `compare()$intersection`: columns in both tables and rows with - differing values -- `compare()$unmatched_cols`: columns in only one table -- `compare()$unmatched_rows`: rows in only one table +- `compare()$intersection`: columns in both tables and rows with + differing values +- `compare()$unmatched_cols`: columns in only one table +- `compare()$unmatched_rows`: rows in only one table ``` r comparison <- compare(example_df_a, example_df_b, by = car) comparison #> $tables #> # A tibble: 2 × 4 -#> table expr nrow ncol -#> -#> 1 table_a example_df_a 9 9 -#> 2 table_b example_df_b 10 9 +#> table expr nrow ncol +#> +#> 1 a example_df_a 9 9 +#> 2 b example_df_b 10 9 #> #> $by #> # A tibble: 1 × 3 @@ -179,7 +179,7 @@ comparison |> #> 2 Merc 240D 24.4 4 147. 62 3.69 3.19 1 0 ``` -Use `slice_unmatched()` to get the unmatched rows from one or both +Use `slice_unmatched()` to get the rows unmatched rows from one or both tables. ``` r diff --git a/tests/testthat/_snaps/helpers.md b/tests/testthat/_snaps/helpers.md index b48b6f0..60dad5c 100644 --- a/tests/testthat/_snaps/helpers.md +++ b/tests/testthat/_snaps/helpers.md @@ -42,3 +42,14 @@ Error: ! `table` is absent but must be supplied. +# clean_table_id is unaffected by vctrs issue #1013 + + Code + clean_table_id(c("", "1")) + Message + `table_id` has been adjusted + * `` -> `...1` + * `1` -> `...2` + Output + [1] "...1" "...2" + diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 845fe9f..8f94bd5 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -111,3 +111,7 @@ test_that("clean_table_id strips attributes from named vectors", { plain_vec ) }) + +test_that("clean_table_id is unaffected by vctrs issue #1013", { + expect_snapshot(clean_table_id(c("", "1"))) +}) From 7ae66c0891dc8de1aeae430dfa9611f3edff3fb7 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Wed, 15 Oct 2025 19:21:47 -0400 Subject: [PATCH 22/23] build_readme() --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fe839fb..f8acc00 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,10 @@ Use `compare()` to create a comparison of two tables. A comparison contains: -- `compare()$intersection`: columns in both tables and rows with - differing values -- `compare()$unmatched_cols`: columns in only one table -- `compare()$unmatched_rows`: rows in only one table +- `compare()$intersection`: columns in both tables and rows with + differing values +- `compare()$unmatched_cols`: columns in only one table +- `compare()$unmatched_rows`: rows in only one table ``` r comparison <- compare(example_df_a, example_df_b, by = car) @@ -179,7 +179,7 @@ comparison |> #> 2 Merc 240D 24.4 4 147. 62 3.69 3.19 1 0 ``` -Use `slice_unmatched()` to get the rows unmatched rows from one or both +Use `slice_unmatched()` to get the unmatched rows from one or both tables. ``` r From 8321cea96ab9db88d86871c0d5c08781e0f20651 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Thu, 16 Oct 2025 19:47:45 -0400 Subject: [PATCH 23/23] remove technical details from NEWS --- NEWS.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 49acfab..8f55e82 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,8 +13,7 @@ * `weave_diffs_wide()` gains a `suffix` argument so column names in the wide output can use custom suffixes instead of `_{table_id}`. The default keeps the - existing behaviour, and input validation ensures suffix vectors are length two, - non-missing, and distinct. @elipousson + behaviour of prior versions. @elipousson # versus 0.3.0