Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions R/report.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ tabulate_allele_names <- function(data, extra_cols=NULL) {
#' Report the genotypes present in a processed dataset in a concise data frame.
#' This will arrange the allele names into a wide-format table with unique
#' samples on rows and loci on columns, do some automatic cleanup on the
#' columns, and show closest-matching individuals per entry, if given.
#' columns, and show closest-matching individuals per entry, if given. All NA
#' entries are replaced with blank strings or optionally (for NA Replicates or
#' untested sample/locus combinations) other custom placeholder text.
#'
#' @param results list of results data as produced by \code{analyze_dataset}.
#' @param na.replicates text to replace NA entries with for the Replicates
Expand Down Expand Up @@ -108,13 +110,29 @@ report_genotypes <- function(results,
tbl <- cbind(tbl, idents)
}

# If we have no replicates drop that column
# If we have no replicates drop that column. Otherwise put placeholder text
# for any NA replicate entries.
if (all(is.na(tbl$Replicate)))
tbl <- tbl[, -2]
else
tbl$Replicate[is.na(tbl$Replicate)] <- na.replicates

# Put placeholder text for any untested sample/locus combinations
# (This is a clumsy way of handling different columns differently, and is
# probably a hint that more logic handled in the long-format data frames would
# be better, but this can be a stopgap before some reorganization at some
# point.)
locus_cols <- do.call(
paste0,
expand.grid(unique(results$summary$Locus), c("_1", "_2")))
for (colnm in colnames(tbl)) {
if (colnm %in% locus_cols) {
tbl[[colnm]][is.na(tbl[[colnm]])] <- na.alleles
}
}

# Blank out any remaining NA values
tbl[is.na(tbl)] <- na.alleles
tbl[is.na(tbl)] <- ""

tbl
}
Expand Down
43 changes: 43 additions & 0 deletions tests/testthat/test_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ with(test_data, {
# test report_genotypes ---------------------------------------------------

test_that("report_genotypes produces expected data frame", {
# Basic test
# Largely just a wrapper around tabulate_allele_names, but with a few
# additional features like NA handling for specific kinds of columns
tbl_known <- data.frame(
Expand Down Expand Up @@ -237,6 +238,7 @@ with(test_data, {
})

test_that("report_genotypes handles replicates including NA", {
# Test for na.replicates argument
results <- results_summary_data$results
# Explicitly label Sample 1 with a replicate, which will make that column
# show up in the output
Expand All @@ -248,4 +250,45 @@ with(test_data, {
expect_identical(tbl$Replicate, c("1", "X", "X"))
})

test_that("report_genotypes uses text for absent sample/locus combos", {
# Test for na.alleles argument
# remove one tested combo from the results
results <- results_summary_data$results
results$summary <- subset(results$summary, ! (Sample == 3 & Locus == 2))
results$files <- results$files[results$summary$Filename]
results$samples <- results$samples[rownames(results$summary)]
# by default, an empty string is shown for missing info, indistinguishable
# from blank results. Locus 1 should be unaffected, but we should see a
# blank for sample 3 in Locus 2's first column.
tbl <- report_genotypes(results)
expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46"))
expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", ""))
# If we give an na.alleles argument we should be able to get different
# placeholder text there.
tbl <- report_genotypes(results, na.alleles = "X")
expect_equal(tbl[["1_2"]], c("280-74dd46", "284-2b3fab", "280-74dd46"))
expect_equal(tbl[["2_1"]], c("250-5dacee", "266-2aa675", "X"))
# That placeholder text should only be applied to allele columns,
# not elsewhere like Replicate or known ID info columns
results$summary$Replicate <- rep(1, nrow(results$summary))
results$summary$Replicate[results$summary$Sample == 3] <- NA
tbl <- report_genotypes(results)
expect_equal(tbl$Replicate, c("1", "1", ""))
tbl <- report_genotypes(results, na.alleles = "X")
expect_equal(tbl$Replicate, c("1", "1", ""))
# That's somewhat a special case, though, since Replicate has some
# NA-handling logic of its own. How about the identity columns, if present?
# (Faking the output from find_closest_matches here: nobody has a close
# match except for sample 3, which matches Bob perfectly)
closest <- lapply(rownames(tbl), function(entryname) numeric())
names(closest) <- rownames(tbl)
closest[["3"]] <- c(Bob = 0)
tbl <- report_genotypes(results, closest = closest)
expect_equal(tbl[["Distance"]], c("", "", "0"))
expect_equal(tbl[["Name"]], c("", "", "Bob"))
tbl <- report_genotypes(results, closest = closest, na.alleles = "X")
expect_equal(tbl[["Distance"]], c("", "", "0"))
expect_equal(tbl[["Name"]], c("", "", "Bob"))
})

})