From 5fd27dc435ecd9f8e738f893c6fbe0adce391b4f Mon Sep 17 00:00:00 2001
From: Jesse Connell <ancon@upenn.edu>
Date: Tue, 10 May 2022 14:04:58 -0400
Subject: [PATCH 1/3] For #78: more report_genotypes testing first

---
 tests/testthat/test_report.R | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/testthat/test_report.R b/tests/testthat/test_report.R
index 21370de..333817f 100644
--- a/tests/testthat/test_report.R
+++ b/tests/testthat/test_report.R
@@ -206,6 +206,7 @@ with(test_data, {
 # test report_genotypes ---------------------------------------------------
 
   test_that("report_genotypes produces expected data frame", {
+    # Basic test
     # Largely just a wrapper around tabulate_allele_names, but with a few
     # additional features like NA handling for specific kinds of columns
     tbl_known <- data.frame(
@@ -237,6 +238,7 @@ with(test_data, {
   })
 
   test_that("report_genotypes handles replicates including NA", {
+    # Test for na.replicates argument
     results <- results_summary_data$results
     # Explicitly label Sample 1 with a replicate, which will make that column
     # show up in the output
@@ -248,4 +250,32 @@ with(test_data, {
     expect_identical(tbl$Replicate, c("1", "X", "X"))
   })
 
+  test_that("report_genotypes uses text for absent sample/locus combos", {
+    # Test for na.alleles argument
+    # remove one tested combo from the results
+    results <- results_summary_data$results
+    results$summary <- subset(results$summary, ! (Sample == 3 & Locus == 2))
+    results$files <- results$files[results$summary$Filename]
+    results$samples <- results$samples[rownames(results$summary)]
+    # by default, an empty string is shown for missing info, indistinguishable
+    # from blank results.  Locus 1 should be unaffected, but we should see a
+    # blank for sample 3 in Locus 2's first column.
+    tbl <- report_genotypes(results)
+    expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46"))
+    expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", ""))
+    # If we give an na.alleles argument we should be able to get different
+    # placeholder text there.
+    tbl <- report_genotypes(results, na.alleles = "X")
+    expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46"))
+    expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", "X"))
+    # That placeholder text should only be applied to allele columns,
+    # not elsewhere like Replicate or known ID info columns
+    results$summary$Replicate <- rep(1, nrow(results$summary))
+    results$summary$Replicate[results$summary$Sample == 3] <- NA
+    tbl <- report_genotypes(results)
+    expect_equal(tbl$Replicate, c("1", "1", ""))
+    tbl <- report_genotypes(results, na.alleles = "X")
+    expect_equal(tbl$Replicate, c("1", "1", ""))
+  })
+
 })

From a2b1332dd7517192a0a15c19c73d79813146c098 Mon Sep 17 00:00:00 2001
From: Jesse Connell <ancon@upenn.edu>
Date: Tue, 10 May 2022 14:11:06 -0400
Subject: [PATCH 2/3] For #78: failing test for report_genotypes bug

---
 tests/testthat/test_report.R | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/testthat/test_report.R b/tests/testthat/test_report.R
index 333817f..b20b5ef 100644
--- a/tests/testthat/test_report.R
+++ b/tests/testthat/test_report.R
@@ -276,6 +276,19 @@ with(test_data, {
     expect_equal(tbl$Replicate, c("1", "1", ""))
     tbl <- report_genotypes(results, na.alleles = "X")
     expect_equal(tbl$Replicate, c("1", "1", ""))
+    # That's somewhat a special case, though, since Replicate has some
+    # NA-handling logic of its own.  How about the identity columns, if present?
+    # (Faking the output from find_closest_matches here: nobody has a close
+    # match except for sample 3, which matches Bob perfectly)
+    closest <- lapply(rownames(tbl), function(entryname) numeric())
+    names(closest) <- rownames(tbl)
+    closest[["3"]] <- c(Bob = 0)
+    tbl <- report_genotypes(results, closest = closest)
+    expect_equal(tbl[["Distance"]], c("", "", "0"))
+    expect_equal(tbl[["Name"]], c("", "", "Bob"))
+    tbl <- report_genotypes(results, closest = closest, na.alleles = "X")
+    expect_equal(tbl[["Distance"]], c("", "", "0"))
+    expect_equal(tbl[["Name"]], c("", "", "Bob"))
   })
 
 })

From 9593787b46015c4ceddf4ebc919b19bbf5c2716c Mon Sep 17 00:00:00 2001
From: Jesse Connell <ancon@upenn.edu>
Date: Tue, 10 May 2022 14:24:24 -0400
Subject: [PATCH 3/3] For #78: only apply na.alleles to allele columns

---
 R/report.R | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/R/report.R b/R/report.R
index 272df1d..3e21fac 100644
--- a/R/report.R
+++ b/R/report.R
@@ -76,7 +76,9 @@ tabulate_allele_names <- function(data, extra_cols=NULL) {
 #' Report the genotypes present in a processed dataset in a concise data frame.
 #' This will arrange the allele names into a wide-format table with unique
 #' samples on rows and loci on columns, do some automatic cleanup on the
-#' columns, and show closest-matching individuals per entry, if given.
+#' columns, and show closest-matching individuals per entry, if given.  All NA
+#' entries are replaced with blank strings or optionally (for NA Replicates or
+#' untested sample/locus combinations) other custom placeholder text.
 #'
 #' @param results list of results data as produced by \code{analyze_dataset}.
 #' @param na.replicates text to replace NA entries with for the Replicates
@@ -108,13 +110,29 @@ report_genotypes <- function(results,
     tbl <- cbind(tbl, idents)
   }
 
-  # If we have no replicates drop that column
+  # If we have no replicates drop that column.  Otherwise put placeholder text
+  # for any NA replicate entries.
   if (all(is.na(tbl$Replicate)))
     tbl <- tbl[, -2]
   else
     tbl$Replicate[is.na(tbl$Replicate)] <- na.replicates
+
+  # Put placeholder text for any untested sample/locus combinations
+  # (This is a clumsy way of handling different columns differently, and is
+  # probably a hint that more logic handled in the long-format data frames would
+  # be better, but this can be a stopgap before some reorganization at some
+  # point.)
+  locus_cols <- do.call(
+    paste0,
+    expand.grid(unique(results$summary$Locus), c("_1", "_2")))
+  for (colnm in colnames(tbl)) {
+      if (colnm %in% locus_cols) {
+        tbl[[colnm]][is.na(tbl[[colnm]])] <- na.alleles
+      }
+  }
+
   # Blank out any remaining NA values
-  tbl[is.na(tbl)] <- na.alleles
+  tbl[is.na(tbl)] <- ""
 
   tbl
 }