ShawHahnLab · ressy · May 10, 2022 · May 10, 2022 · May 10, 2022 · May 10, 2022
diff --git a/R/report.R b/R/report.R
@@ -76,7 +76,9 @@ tabulate_allele_names <- function(data, extra_cols=NULL) {
 #' Report the genotypes present in a processed dataset in a concise data frame.
 #' This will arrange the allele names into a wide-format table with unique
 #' samples on rows and loci on columns, do some automatic cleanup on the
-#' columns, and show closest-matching individuals per entry, if given.
+#' columns, and show closest-matching individuals per entry, if given.  All NA
+#' entries are replaced with blank strings or optionally (for NA Replicates or
+#' untested sample/locus combinations) other custom placeholder text.
 #'
 #' @param results list of results data as produced by \code{analyze_dataset}.
 #' @param na.replicates text to replace NA entries with for the Replicates
@@ -108,13 +110,29 @@ report_genotypes <- function(results,
     tbl <- cbind(tbl, idents)
   }
 
-  # If we have no replicates drop that column
+  # If we have no replicates drop that column.  Otherwise put placeholder text
+  # for any NA replicate entries.
   if (all(is.na(tbl$Replicate)))
     tbl <- tbl[, -2]
   else
     tbl$Replicate[is.na(tbl$Replicate)] <- na.replicates
+
+  # Put placeholder text for any untested sample/locus combinations
+  # (This is a clumsy way of handling different columns differently, and is
+  # probably a hint that more logic handled in the long-format data frames would
+  # be better, but this can be a stopgap before some reorganization at some
+  # point.)
+  locus_cols <- do.call(
+    paste0,
+    expand.grid(unique(results$summary$Locus), c("_1", "_2")))
+  for (colnm in colnames(tbl)) {
+      if (colnm %in% locus_cols) {
+        tbl[[colnm]][is.na(tbl[[colnm]])] <- na.alleles
+      }
+  }
+
   # Blank out any remaining NA values
-  tbl[is.na(tbl)] <- na.alleles
+  tbl[is.na(tbl)] <- ""
 
   tbl
 }

diff --git a/tests/testthat/test_report.R b/tests/testthat/test_report.R
@@ -206,6 +206,7 @@ with(test_data, {
 # test report_genotypes ---------------------------------------------------
 
   test_that("report_genotypes produces expected data frame", {
+    # Basic test
     # Largely just a wrapper around tabulate_allele_names, but with a few
     # additional features like NA handling for specific kinds of columns
     tbl_known <- data.frame(
@@ -237,6 +238,7 @@ with(test_data, {
   })
 
   test_that("report_genotypes handles replicates including NA", {
+    # Test for na.replicates argument
     results <- results_summary_data$results
     # Explicitly label Sample 1 with a replicate, which will make that column
     # show up in the output
@@ -248,4 +250,45 @@ with(test_data, {
     expect_identical(tbl$Replicate, c("1", "X", "X"))
   })
 
+  test_that("report_genotypes uses text for absent sample/locus combos", {
+    # Test for na.alleles argument
+    # remove one tested combo from the results
+    results <- results_summary_data$results
+    results$summary <- subset(results$summary, ! (Sample == 3 & Locus == 2))
+    results$files <- results$files[results$summary$Filename]
+    results$samples <- results$samples[rownames(results$summary)]
+    # by default, an empty string is shown for missing info, indistinguishable
+    # from blank results.  Locus 1 should be unaffected, but we should see a
+    # blank for sample 3 in Locus 2's first column.
+    tbl <- report_genotypes(results)
+    expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46"))
+    expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", ""))
+    # If we give an na.alleles argument we should be able to get different
+    # placeholder text there.
+    tbl <- report_genotypes(results, na.alleles = "X")
+    expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46"))
+    expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", "X"))
+    # That placeholder text should only be applied to allele columns,
+    # not elsewhere like Replicate or known ID info columns
+    results$summary$Replicate <- rep(1, nrow(results$summary))
+    results$summary$Replicate[results$summary$Sample == 3] <- NA
+    tbl <- report_genotypes(results)
+    expect_equal(tbl$Replicate, c("1", "1", ""))
+    tbl <- report_genotypes(results, na.alleles = "X")
+    expect_equal(tbl$Replicate, c("1", "1", ""))
+    # That's somewhat a special case, though, since Replicate has some
+    # NA-handling logic of its own.  How about the identity columns, if present?
+    # (Faking the output from find_closest_matches here: nobody has a close
+    # match except for sample 3, which matches Bob perfectly)
+    closest <- lapply(rownames(tbl), function(entryname) numeric())
+    names(closest) <- rownames(tbl)
+    closest[["3"]] <- c(Bob = 0)
+    tbl <- report_genotypes(results, closest = closest)
+    expect_equal(tbl[["Distance"]], c("", "", "0"))
+    expect_equal(tbl[["Name"]], c("", "", "Bob"))
+    tbl <- report_genotypes(results, closest = closest, na.alleles = "X")
+    expect_equal(tbl[["Distance"]], c("", "", "0"))
+    expect_equal(tbl[["Name"]], c("", "", "Bob"))
+  })
+
 })