From 5fd27dc435ecd9f8e738f893c6fbe0adce391b4f Mon Sep 17 00:00:00 2001 From: Jesse Connell Date: Tue, 10 May 2022 14:04:58 -0400 Subject: [PATCH 1/3] For #78: more report_genotypes testing first --- tests/testthat/test_report.R | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/testthat/test_report.R b/tests/testthat/test_report.R index 21370de..333817f 100644 --- a/tests/testthat/test_report.R +++ b/tests/testthat/test_report.R @@ -206,6 +206,7 @@ with(test_data, { # test report_genotypes --------------------------------------------------- test_that("report_genotypes produces expected data frame", { + # Basic test # Largely just a wrapper around tabulate_allele_names, but with a few # additional features like NA handling for specific kinds of columns tbl_known <- data.frame( @@ -237,6 +238,7 @@ with(test_data, { }) test_that("report_genotypes handles replicates including NA", { + # Test for na.replicates argument results <- results_summary_data$results # Explicitly label Sample 1 with a replicate, which will make that column # show up in the output @@ -248,4 +250,32 @@ with(test_data, { expect_identical(tbl$Replicate, c("1", "X", "X")) }) + test_that("report_genotypes uses text for absent sample/locus combos", { + # Test for na.alleles argument + # remove one tested combo from the results + results <- results_summary_data$results + results$summary <- subset(results$summary, ! (Sample == 3 & Locus == 2)) + results$files <- results$files[results$summary$Filename] + results$samples <- results$samples[rownames(results$summary)] + # by default, an empty string is shown for missing info, indistinguishable + # from blank results. Locus 1 should be unaffected, but we should see a + # blank for sample 3 in Locus 2's first column. + tbl <- report_genotypes(results) + expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46")) + expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", "")) + # If we give an na.alleles argument we should be able to get different + # placeholder text there. + tbl <- report_genotypes(results, na.alleles = "X") + expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46")) + expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", "X")) + # That placeholder text should only be applied to allele columns, + # not elsewhere like Replicate or known ID info columns + results$summary$Replicate <- rep(1, nrow(results$summary)) + results$summary$Replicate[results$summary$Sample == 3] <- NA + tbl <- report_genotypes(results) + expect_equal(tbl$Replicate, c("1", "1", "")) + tbl <- report_genotypes(results, na.alleles = "X") + expect_equal(tbl$Replicate, c("1", "1", "")) + }) + }) From a2b1332dd7517192a0a15c19c73d79813146c098 Mon Sep 17 00:00:00 2001 From: Jesse Connell Date: Tue, 10 May 2022 14:11:06 -0400 Subject: [PATCH 2/3] For #78: failing test for report_genotypes bug --- tests/testthat/test_report.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/testthat/test_report.R b/tests/testthat/test_report.R index 333817f..b20b5ef 100644 --- a/tests/testthat/test_report.R +++ b/tests/testthat/test_report.R @@ -276,6 +276,19 @@ with(test_data, { expect_equal(tbl$Replicate, c("1", "1", "")) tbl <- report_genotypes(results, na.alleles = "X") expect_equal(tbl$Replicate, c("1", "1", "")) + # That's somewhat a special case, though, since Replicate has some + # NA-handling logic of its own. How about the identity columns, if present? + # (Faking the output from find_closest_matches here: nobody has a close + # match except for sample 3, which matches Bob perfectly) + closest <- lapply(rownames(tbl), function(entryname) numeric()) + names(closest) <- rownames(tbl) + closest[["3"]] <- c(Bob = 0) + tbl <- report_genotypes(results, closest = closest) + expect_equal(tbl[["Distance"]], c("", "", "0")) + expect_equal(tbl[["Name"]], c("", "", "Bob")) + tbl <- report_genotypes(results, closest = closest, na.alleles = "X") + expect_equal(tbl[["Distance"]], c("", "", "0")) + expect_equal(tbl[["Name"]], c("", "", "Bob")) }) }) From 9593787b46015c4ceddf4ebc919b19bbf5c2716c Mon Sep 17 00:00:00 2001 From: Jesse Connell Date: Tue, 10 May 2022 14:24:24 -0400 Subject: [PATCH 3/3] For #78: only apply na.alleles to allele columns --- R/report.R | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/R/report.R b/R/report.R index 272df1d..3e21fac 100644 --- a/R/report.R +++ b/R/report.R @@ -76,7 +76,9 @@ tabulate_allele_names <- function(data, extra_cols=NULL) { #' Report the genotypes present in a processed dataset in a concise data frame. #' This will arrange the allele names into a wide-format table with unique #' samples on rows and loci on columns, do some automatic cleanup on the -#' columns, and show closest-matching individuals per entry, if given. +#' columns, and show closest-matching individuals per entry, if given. All NA +#' entries are replaced with blank strings or optionally (for NA Replicates or +#' untested sample/locus combinations) other custom placeholder text. #' #' @param results list of results data as produced by \code{analyze_dataset}. #' @param na.replicates text to replace NA entries with for the Replicates @@ -108,13 +110,29 @@ report_genotypes <- function(results, tbl <- cbind(tbl, idents) } - # If we have no replicates drop that column + # If we have no replicates drop that column. Otherwise put placeholder text + # for any NA replicate entries. if (all(is.na(tbl$Replicate))) tbl <- tbl[, -2] else tbl$Replicate[is.na(tbl$Replicate)] <- na.replicates + + # Put placeholder text for any untested sample/locus combinations + # (This is a clumsy way of handling different columns differently, and is + # probably a hint that more logic handled in the long-format data frames would + # be better, but this can be a stopgap before some reorganization at some + # point.) + locus_cols <- do.call( + paste0, + expand.grid(unique(results$summary$Locus), c("_1", "_2"))) + for (colnm in colnames(tbl)) { + if (colnm %in% locus_cols) { + tbl[[colnm]][is.na(tbl[[colnm]])] <- na.alleles + } + } + # Blank out any remaining NA values - tbl[is.na(tbl)] <- na.alleles + tbl[is.na(tbl)] <- "" tbl }