From 228931165a3e6a4d5eb83933cdfd662431ce683d Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Mon, 20 Apr 2020 16:27:46 -0400
Subject: [PATCH 1/8] add function to check for equitimed data add tests

---
 NAMESPACE                                   |  2 +
 R/utils-data-processing.R                   | 61 +++++++++++++++++++++
 man/is_equitimed.Rd                         | 25 +++++++++
 tests/testthat/test-02-dataset-processing.R | 17 +++++-
 4 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 R/utils-data-processing.R
 create mode 100644 man/is_equitimed.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 8ee186a..7d7ac3e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -44,6 +44,8 @@ export(import_retriever_data)
 export(install_retriever_data)
 export(interpolate_obs)
 export(invoke)
+export(is_equitimed)
+export(is_evenly_sampled)
 export(normalize_effort)
 export(normalize_obs)
 export(normalize_times)
diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
new file mode 100644
index 0000000..0b4e9ce
--- /dev/null
+++ b/R/utils-data-processing.R
@@ -0,0 +1,61 @@
+#' @title Check that the times of a dataset are evenly sampled
+#' @aliases is_evenly_sampled
+#' 
+#' @param data dataset to check
+#' @param period period to check the times against (if `NULL`, first check to 
+#'   see if there is a known `period` set in the metadata, otherwise assumes 1)
+#' @param tol tolerance for the period 
+#' @param return_if_time_missing default value to return if times are missing
+#' 
+#' @return TRUE or FALSE, invisibly
+#' 
+#' @export
+is_equitimed <- function(data, period = NULL, tol = 1e-06, 
+                         return_if_time_missing = TRUE)
+{
+    stopifnot(check_data_format(data))
+    
+    times <- get_times_from_data(data)
+    if (is.null(times))
+    {
+        message("No time index found. Returning ", return_if_time_missing)
+        return(return_if_time_missing)
+    }
+    period <- get_period(data$metadata$period, period)
+
+    full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
+                           error = function(e) {
+                               message(e)
+                               return(NULL)
+                           })
+    invisible(isTRUE(all.equal(times, full_times)))
+}
+
+#' @export
+is_evenly_sampled <- is_equitimed
+
+#' extract the period, given the value from the metadata field, and a value 
+#' specified by the user. The flowchart is:
+#'   (1) if user has supplied non-null `period`, use that
+#'   (2) if metadata period is non-null, use that
+#'   (3) use a default value of 1 and print a message
+#' 
+#' @noRd
+get_period <- function(metadata_period, period = NULL)
+{
+    if (is.null(period))
+    {
+        period <- metadata_period
+        if (is.null(period))
+        {
+            message("No time period found. Assuming period = 1.")
+            period <- 1
+        }
+    }
+    return(period)
+}
+
+
+
+
+
diff --git a/man/is_equitimed.Rd b/man/is_equitimed.Rd
new file mode 100644
index 0000000..86f25ea
--- /dev/null
+++ b/man/is_equitimed.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data-processing.R
+\name{is_equitimed}
+\alias{is_equitimed}
+\alias{is_evenly_sampled}
+\title{Check that the times of a dataset are evenly sampled}
+\usage{
+is_equitimed(data, period = NULL, tol = 1e-06, return_if_time_missing = TRUE)
+}
+\arguments{
+\item{data}{dataset to check}
+
+\item{period}{period to check the times against (if \code{NULL}, first check to
+see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
+
+\item{tol}{tolerance for the period}
+
+\item{return_if_time_missing}{default value to return if times are missing}
+}
+\value{
+TRUE or FALSE, invisibly
+}
+\description{
+Check that the times of a dataset are evenly sampled
+}
diff --git a/tests/testthat/test-02-dataset-processing.R b/tests/testthat/test-02-dataset-processing.R
index 9f2c56e..288c4d4 100644
--- a/tests/testthat/test-02-dataset-processing.R
+++ b/tests/testthat/test-02-dataset-processing.R
@@ -1,7 +1,6 @@
 context("Check Dataset Processing Code")
 
 test_that("BBS data processing works", {
-    
     species_table <- data.frame(aou = c(1, 3, 33, 44), 
                                 spanish_common_name = c("a x", "c x", "c x / yy", "d x zz"))
     
@@ -14,3 +13,19 @@ test_that("BBS data processing works", {
     expect_equal(out$species_id, c(1, 2, 3, 44))
     expect_equal(out$abundance, c(2, 2, 4, 2))
 })
+
+test_that("is_equitimed works", {
+    m <- capture_error(expect_false(is_equitimed(dragons)))
+    expect_match(as.character(m), "Error: `x` is not a regular sequence.", fixed = TRUE)
+    
+    path <- system.file("extdata", "subsampled",
+                        package = "MATSS", mustWork = TRUE)
+    dat <- get_mtquad_data()
+    expect_true(is_equitimed(dat))
+})
+
+
+
+
+
+test_that("make_integer_times")
\ No newline at end of file

From dd20bfd5319768fb2886bbf5c436a967a0ef2c09 Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Mon, 20 Apr 2020 21:32:56 -0400
Subject: [PATCH 2/8] add function to produce equitimed data

---
 DESCRIPTION                                 |   3 +-
 NAMESPACE                                   |   2 +
 R/utils-data-processing.R                   | 140 +++++++++++++++++---
 man/is_equitimed.Rd                         |   4 +-
 man/make_equitimed.Rd                       |  50 +++++++
 tests/testthat/test-02-dataset-processing.R |   5 +-
 6 files changed, 181 insertions(+), 23 deletions(-)
 create mode 100644 man/make_equitimed.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 401b376..9d7371f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -64,7 +64,8 @@ Imports:
     stringr,
     tibble,
     tidyr,
-    usethis
+    usethis, 
+    vctrs
 Suggests:
     covr,
     knitr,
diff --git a/NAMESPACE b/NAMESPACE
index 7d7ac3e..a9e4d09 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,6 +46,8 @@ export(interpolate_obs)
 export(invoke)
 export(is_equitimed)
 export(is_evenly_sampled)
+export(make_equitimed)
+export(make_evenly_sampled)
 export(normalize_effort)
 export(normalize_obs)
 export(normalize_times)
diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index 0b4e9ce..033f662 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -5,35 +5,40 @@
 #' @param period period to check the times against (if `NULL`, first check to 
 #'   see if there is a known `period` set in the metadata, otherwise assumes 1)
 #' @param tol tolerance for the period 
-#' @param return_if_time_missing default value to return if times are missing
 #' 
 #' @return TRUE or FALSE, invisibly
 #' 
 #' @export
-is_equitimed <- function(data, period = NULL, tol = 1e-06, 
-                         return_if_time_missing = TRUE)
+is_equitimed <- function(data, period = NULL, tol = 1e-06)
 {
     stopifnot(check_data_format(data))
     
+    fulL_times <- get_full_times(data = data, period = period, tol = tol)
     times <- get_times_from_data(data)
-    if (is.null(times))
-    {
-        message("No time index found. Returning ", return_if_time_missing)
-        return(return_if_time_missing)
-    }
-    period <- get_period(data$metadata$period, period)
-
-    full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
-                           error = function(e) {
-                               message(e)
-                               return(NULL)
-                           })
     invisible(isTRUE(all.equal(times, full_times)))
 }
 
 #' @export
 is_evenly_sampled <- is_equitimed
 
+#' get the complete time index, filling in gaps where necessary, and using the 
+#' period to establish the sampling frequency
+#' 
+#' @noRd
+get_full_times <- function(data, period = NULL, tol = 1e-06, 
+                           convert_error_to_message = TRUE)
+{
+    times <- get_times_from_data(data)
+    period <- get_period_from_data(data, period)
+    
+    full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
+             error = function(e) {
+                 message(e$message);
+                 return(NULL)
+             })
+    return(full_times)
+}
+
 #' extract the period, given the value from the metadata field, and a value 
 #' specified by the user. The flowchart is:
 #'   (1) if user has supplied non-null `period`, use that
@@ -41,11 +46,11 @@ is_evenly_sampled <- is_equitimed
 #'   (3) use a default value of 1 and print a message
 #' 
 #' @noRd
-get_period <- function(metadata_period, period = NULL)
+get_period_from_data <- function(data, period = NULL)
 {
     if (is.null(period))
     {
-        period <- metadata_period
+        period <- data$metadata$period
         if (is.null(period))
         {
             message("No time period found. Assuming period = 1.")
@@ -54,8 +59,109 @@ get_period <- function(metadata_period, period = NULL)
     }
     return(period)
 }
+#' @title Insert rows if necessary so that time series are evenly sampled
+#' @aliases make_evenly_sampled
+#' 
+#' @param data dataset to modify
+#' @inheritParams is_equitimed
+#' @param method one of `c("mean", "method", "closest")` that determines how 
+#'   the rows of the original data will get coerced into the output here.
+#' @inheritParams base::mean
+#' 
+#' @return the dataset, with rows coerced according to the equitimed time 
+#'   indices, and additional empty rows inserted if needed
+#'
+#' @details First, `get_full_times()` computes the sequence of time index values 
+#'   at a regular sampling interval of period. These will be the final time 
+#'   index values for the output. *Some* set of rows of the original dataset 
+#'   will map to each of these time indices.
+#'   
+#'   The `method` argument determines how these rows get coerced:
+#'   \describe{
+#'     \item{mean}{the values in the rows are averaged together using `mean`}
+#'     \item{median}{the values in the rows are averaged together using `median`}
+#'     \item{closest}{the values in the row that is closest in time to the 
+#'     desired time index are used.}
+#'   }
+#' 
+#' @export
+make_equitimed <- function(data, period = NULL, tol = 1e-06, 
+                           method = c("mean", "method", "closest"), 
+                           na.rm = TRUE)
+{
+    stopifnot(check_data_format(data))
+    
+    full_times <- get_full_times(data = data, period = period, tol = tol)
+    if (is.null(full_times))
+    {
+        stop("Unable to construct an equitimed time index.")
+    }
+    
+    times <- get_times_from_data(data)
+    if (isTRUE(all.equal(times, full_times)))
+    {
+        message("Dataset is already equitimed (evenly sampled in time).")
+        return(invisible(data))
+    }
+    
+    # generate empty matrices to hold final abundance and covariates
+    abundance <- matrix(NA, nrow = length(full_times), ncol = NCOL(data$abundance))
+    covariates <- data$covariates[0,]
 
+    # compute separation between times and full_times
+    times_dist <- outer(times, full_times, function(a, b) {abs(b - a)})
+    
+    # fill abundance and covariates
+    method <- match.arg(method)
+    switch(method, 
+           mean = {
+               idx <- times_dist <= tol
+               for (i in seq_along(full_times))
+               {
+                   abundance[i, ] <- colMeans(data$abundance[idx[, i], , drop = FALSE], na.rm = na.rm)
+                   covariates[i, ] <- purrr::map_dfc(yy$covariates[idx[, i], , drop = FALSE], mean, na.rm = TRUE)
+               }
+           }, 
+           median = {
+               idx <- times_dist <= tol
+               for (i in seq_along(full_times))
+               {
+                   abundance[i, ] <- apply(data$abundance[idx[, i], , drop = FALSE], 2, median, na.rm = na.rm)
+                   covariates[i, ] <- purrr::map_dfc(yy$covariates[idx[, i], , drop = FALSE], median, na.rm = TRUE)
+               }
+           }, 
+           closest = {
+               idx <- apply(times_dist, 2, which.min)
+               abundance <- data$abundance[idx,]
+               covariates <- data$covariates[idx,]
+           })
+    
+    # restore column names and convert to tibbles
+    colnames(abundance) <- colnames(data$abundance)
+    abundance <- tibble::as_tibble(abundance)
+    covariates <- tibble::as_tibble(covariates)
+    
+    # make sure times column is properly filled
+    time_var <- resolve_covariate_variable(data, "timename")
+    if (is.null(time_var))
+    {
+        # make sure timename variable is unique
+        new_col_names <- vctrs::vec_as_names(c(colnames(covariates), "time"), repair = "unique")
+        time_var <- tail(new_col_names, 1)
+        data$metadata$timename <- time_var
+    }
+    covariates[time_var] <- full_times
 
+    # assemble data to return
+    out <- list(abundance = abundance, 
+                covariates = covariates, 
+                metadata = data$metadata)
+    attr(out, "class") <- "matssdata"
+    
+    return(out)
+}
 
+#' @export
+make_evenly_sampled <- make_equitimed
 
 
diff --git a/man/is_equitimed.Rd b/man/is_equitimed.Rd
index 86f25ea..141a8e6 100644
--- a/man/is_equitimed.Rd
+++ b/man/is_equitimed.Rd
@@ -5,7 +5,7 @@
 \alias{is_evenly_sampled}
 \title{Check that the times of a dataset are evenly sampled}
 \usage{
-is_equitimed(data, period = NULL, tol = 1e-06, return_if_time_missing = TRUE)
+is_equitimed(data, period = NULL, tol = 1e-06)
 }
 \arguments{
 \item{data}{dataset to check}
@@ -14,8 +14,6 @@ is_equitimed(data, period = NULL, tol = 1e-06, return_if_time_missing = TRUE)
 see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
 
 \item{tol}{tolerance for the period}
-
-\item{return_if_time_missing}{default value to return if times are missing}
 }
 \value{
 TRUE or FALSE, invisibly
diff --git a/man/make_equitimed.Rd b/man/make_equitimed.Rd
new file mode 100644
index 0000000..f07d32c
--- /dev/null
+++ b/man/make_equitimed.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data-processing.R
+\name{make_equitimed}
+\alias{make_equitimed}
+\alias{make_evenly_sampled}
+\title{Insert rows if necessary so that time series are evenly sampled}
+\usage{
+make_equitimed(
+  data,
+  period = NULL,
+  tol = 1e-06,
+  method = c("mean", "method", "closest"),
+  na.rm = TRUE
+)
+}
+\arguments{
+\item{data}{dataset to modify}
+
+\item{period}{period to check the times against (if \code{NULL}, first check to
+see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
+
+\item{tol}{tolerance for the period}
+
+\item{method}{one of \code{c("mean", "method", "closest")} that determines how
+the rows of the original data will get coerced into the output here.}
+
+\item{na.rm}{a logical value indicating whether \code{NA}
+    values should be stripped before the computation proceeds.}
+}
+\value{
+the dataset, with rows coerced according to the equitimed time
+indices, and additional empty rows inserted if needed
+}
+\description{
+Insert rows if necessary so that time series are evenly sampled
+}
+\details{
+First, \code{get_full_times()} computes the sequence of time index values
+at a regular sampling interval of period. These will be the final time
+index values for the output. \emph{Some} set of rows of the original dataset
+will map to each of these time indices.
+
+The \code{method} argument determines how these rows get coerced:
+\describe{
+\item{mean}{the values in the rows are averaged together using \code{mean}}
+\item{median}{the values in the rows are averaged together using \code{median}}
+\item{closest}{the values in the row that is closest in time to the
+desired time index are used.}
+}
+}
diff --git a/tests/testthat/test-02-dataset-processing.R b/tests/testthat/test-02-dataset-processing.R
index 288c4d4..3c0e83d 100644
--- a/tests/testthat/test-02-dataset-processing.R
+++ b/tests/testthat/test-02-dataset-processing.R
@@ -20,8 +20,9 @@ test_that("is_equitimed works", {
     
     path <- system.file("extdata", "subsampled",
                         package = "MATSS", mustWork = TRUE)
-    dat <- get_mtquad_data()
-    expect_true(is_equitimed(dat))
+    dat <- get_mtquad_data(path = file.path(path, "mapped-plant-quads-mt"))
+    m <- capture_messages(expect_true(is_equitimed(dat)))
+    expect_match(m, "No time period found. Assuming period = 1.", fixed = TRUE)
 })
 
 

From 6dbc113207135c42200efeab29e37b7ae0af05eb Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Mon, 20 Apr 2020 23:48:11 -0400
Subject: [PATCH 3/8] fix make_equitimed and add tests

---
 R/utils-data-processing.R                   | 17 +++++-----
 tests/testthat/test-02-dataset-processing.R | 37 +++++++++++++++++++--
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index 033f662..e1f72d8 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -13,7 +13,7 @@ is_equitimed <- function(data, period = NULL, tol = 1e-06)
 {
     stopifnot(check_data_format(data))
     
-    fulL_times <- get_full_times(data = data, period = period, tol = tol)
+    full_times <- get_full_times(data = data, period = period, tol = tol)
     times <- get_times_from_data(data)
     invisible(isTRUE(all.equal(times, full_times)))
 }
@@ -25,15 +25,14 @@ is_evenly_sampled <- is_equitimed
 #' period to establish the sampling frequency
 #' 
 #' @noRd
-get_full_times <- function(data, period = NULL, tol = 1e-06, 
-                           convert_error_to_message = TRUE)
+get_full_times <- function(data, period = NULL, tol = 1e-06)
 {
     times <- get_times_from_data(data)
     period <- get_period_from_data(data, period)
     
     full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
              error = function(e) {
-                 message(e$message);
+                 message(e$message)
                  return(NULL)
              })
     return(full_times)
@@ -94,19 +93,19 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
     full_times <- get_full_times(data = data, period = period, tol = tol)
     if (is.null(full_times))
     {
-        stop("Unable to construct an equitimed time index.")
+        stop("Unable to construct an evenly spaced time index.")
     }
     
     times <- get_times_from_data(data)
     if (isTRUE(all.equal(times, full_times)))
     {
-        message("Dataset is already equitimed (evenly sampled in time).")
+        message("Dataset is already evenly sampled in time.")
         return(invisible(data))
     }
     
     # generate empty matrices to hold final abundance and covariates
     abundance <- matrix(NA, nrow = length(full_times), ncol = NCOL(data$abundance))
-    covariates <- data$covariates[0,]
+    covariates <- data$covariates[0, , drop = FALSE]
 
     # compute separation between times and full_times
     times_dist <- outer(times, full_times, function(a, b) {abs(b - a)})
@@ -119,7 +118,7 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
                for (i in seq_along(full_times))
                {
                    abundance[i, ] <- colMeans(data$abundance[idx[, i], , drop = FALSE], na.rm = na.rm)
-                   covariates[i, ] <- purrr::map_dfc(yy$covariates[idx[, i], , drop = FALSE], mean, na.rm = TRUE)
+                   covariates[i, ] <- purrr::map_dfc(data$covariates[idx[, i], , drop = FALSE], mean, na.rm = TRUE)
                }
            }, 
            median = {
@@ -127,7 +126,7 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
                for (i in seq_along(full_times))
                {
                    abundance[i, ] <- apply(data$abundance[idx[, i], , drop = FALSE], 2, median, na.rm = na.rm)
-                   covariates[i, ] <- purrr::map_dfc(yy$covariates[idx[, i], , drop = FALSE], median, na.rm = TRUE)
+                   covariates[i, ] <- purrr::map_dfc(data$covariates[idx[, i], , drop = FALSE], median, na.rm = TRUE)
                }
            }, 
            closest = {
diff --git a/tests/testthat/test-02-dataset-processing.R b/tests/testthat/test-02-dataset-processing.R
index 3c0e83d..e43af7f 100644
--- a/tests/testthat/test-02-dataset-processing.R
+++ b/tests/testthat/test-02-dataset-processing.R
@@ -15,8 +15,8 @@ test_that("BBS data processing works", {
 })
 
 test_that("is_equitimed works", {
-    m <- capture_error(expect_false(is_equitimed(dragons)))
-    expect_match(as.character(m), "Error: `x` is not a regular sequence.", fixed = TRUE)
+    m <- capture_messages(expect_false(is_equitimed(dragons)))
+    expect_match(m, "`x` is not a regular sequence.", fixed = TRUE)
     
     path <- system.file("extdata", "subsampled",
                         package = "MATSS", mustWork = TRUE)
@@ -25,8 +25,39 @@ test_that("is_equitimed works", {
     expect_match(m, "No time period found. Assuming period = 1.", fixed = TRUE)
 })
 
+test_that("make_equitimed works", {
+    expect_error(make_equitimed(dragons), 
+                 "Unable to construct an evenly spaced time index.", 
+                 fixed = TRUE)
 
+    path <- system.file("extdata", "subsampled",
+                        package = "MATSS", mustWork = TRUE)
+    dat <- get_mtquad_data(path = file.path(path, "mapped-plant-quads-mt"))
+    expect_error(m <- capture_messages(out <- make_equitimed(dat)), NA)
+    expect_match(m, "No time period found. Assuming period = 1.", fixed = TRUE, all = FALSE)
+    expect_match(m, "Dataset is already evenly sampled in time.", fixed = TRUE, all = FALSE)
+    expect_equal(out, dat)
+    
+    dat <- list(
+        abundance = data.frame(a = c(1, 2, 3, 5, 6, 9, 10, 11, 12, 13), 
+                               b = c(1, 1, 1, 1, 1, 1, 1,  1,  1,  1)), 
+        covariates = data.frame(time = c(0, 0.5, 1, 2, 2.5, 4, 4.5, 5, 5.5, 6)), 
+        metadata = list(timename = "time", 
+                        period = 0.5, 
+                        is_community = FALSE, 
+                        citation = "NA")
+    )
+    attr(dat, "class") <- "matssdata"
+    expect_error(out <- make_equitimed(dat), NA)
+    expect_true(is_equitimed(out))
+    expect_true(check_data_format(out))
+    expect_equal(dim(out$abundance), c(13, 2))
+    expect_true(all(is.na(out$abundance[c(4, 7, 8), ])))
+    expect_true(!any(is.na(out$abundance[-c(4, 7, 8), ])))
+})
 
 
 
-test_that("make_integer_times")
\ No newline at end of file
+test_that("make_integer_times", {
+    
+})
\ No newline at end of file

From c2ea2177fc6b9564cd7422405e30e41de8511823 Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Tue, 21 Apr 2020 21:30:34 -0400
Subject: [PATCH 4/8] add `make_integer_times`

---
 DESCRIPTION                                 |  5 +-
 NAMESPACE                                   |  1 +
 R/create_MATSS_compendium.R                 |  3 +-
 R/utils-data-processing.R                   | 84 ++++++++++++++++++++-
 man/make_integer_times.Rd                   | 29 +++++++
 tests/testthat/test-02-dataset-processing.R | 46 ++++++++---
 6 files changed, 151 insertions(+), 17 deletions(-)
 create mode 100644 man/make_integer_times.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 9d7371f..27d6f4e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -57,7 +57,7 @@ Imports:
     portalr (>= 0.3.2),
     purrr,
     rdataretriever (>= 2.0.0),
-    reticulate (>= 1.14.9002),
+    reticulate (>= 1.15),
     rlang,
     RSQLite,
     rstudioapi,
@@ -77,9 +77,6 @@ Suggests:
     visNetwork
 VignetteBuilder: 
     knitr
-Remotes: 
-    rstudio/reticulate,
-    weecology/portalr
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
index a9e4d09..f87cd03 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -48,6 +48,7 @@ export(is_equitimed)
 export(is_evenly_sampled)
 export(make_equitimed)
 export(make_evenly_sampled)
+export(make_integer_times)
 export(normalize_effort)
 export(normalize_obs)
 export(normalize_times)
diff --git a/R/create_MATSS_compendium.R b/R/create_MATSS_compendium.R
index 7b31cbb..ee0a7d6 100644
--- a/R/create_MATSS_compendium.R
+++ b/R/create_MATSS_compendium.R
@@ -56,7 +56,8 @@ create_MATSS_compendium <- function(path,
                                       matss_ref_key = matss_citation$key), 
                           package = "MATSS")
     if (DEPLOY)
-        preamble_text <- ":rotating_light: **THIS IS AN AUTO-GENERATED EXAMPLE COMPENDIUM** :rotating_light:\n"
+        preamble_text <- c(":rotating_light: **THIS IS AN AUTO-GENERATED EXAMPLE COMPENDIUM** :rotating_light:\n", 
+                           "*last updated: ", Sys.Date(), "*\n")
     else
         preamble_text <- ""
     
diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index e1f72d8..d5b270d 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -28,6 +28,12 @@ is_evenly_sampled <- is_equitimed
 get_full_times <- function(data, period = NULL, tol = 1e-06)
 {
     times <- get_times_from_data(data)
+    if (is.null(times))
+    {
+        stop("Dataset does not appear to have a times variable.\n", 
+             "Check", usethis::ui_code("covariates"), " and ", 
+               usethis::ui_code("metadata$timename"), ".\n")
+    }
     period <- get_period_from_data(data, period)
     
     full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
@@ -58,6 +64,7 @@ get_period_from_data <- function(data, period = NULL)
     }
     return(period)
 }
+
 #' @title Insert rows if necessary so that time series are evenly sampled
 #' @aliases make_evenly_sampled
 #' 
@@ -145,7 +152,8 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
     if (is.null(time_var))
     {
         # make sure timename variable is unique
-        new_col_names <- vctrs::vec_as_names(c(colnames(covariates), "time"), repair = "unique")
+        new_col_names <- vctrs::vec_as_names(c(colnames(covariates), "time"), 
+                                             repair = "unique", quiet = TRUE)
         time_var <- tail(new_col_names, 1)
         data$metadata$timename <- time_var
     }
@@ -163,4 +171,78 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
 #' @export
 make_evenly_sampled <- make_equitimed
 
+#' @title Add a time variable with integer values for evenly sampled data
+#' 
+#' @param data dataset to modify
+#' @inheritParams is_equitimed
+#' @inheritParams base::mean
+#' 
+#' @return the dataset, with integer times
+#'
+#' @details First, check if the data are evenly sampled in time. If not, we 
+#'   exit early. Next, if the times are already integer or Date, we don't do 
+#'   anything. If the times are numeric, but roundable to integer, we round. 
+#'   Otherwise, we add a new variable to `covariates` from 1:n and designate 
+#'   this variable as the `timename`.
+#' 
+#' @export
+make_integer_times <- function(data, period = NULL, tol = 1e-06, 
+                               confirm = interactive())
+{
+    is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5)
+    {
+        abs(x - round(x)) < tol
+    }
+    
+    # check for existence of times
+    times <- get_times_from_data(data)
+    
+    # do checks based on existing times
+    if (!is.null(times))
+    {    
+        # check for equitimed
+        if (!is_equitimed(data = data, period = period, tol = tol))
+        {
+            stop(c("Dataset is not evenly sampled in time.\n",
+                   "Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
+        }
+        
+        # check for integer times
+        if (is.integer(times))
+        {
+            message("Dataset appears evenly sampled in time with integer times already.")
+            return(invisible(data))
+        } else if (inherits(times, "Date"))
+        {
+            message("Dataset appears evenly sampled in time with `Date` formatted times already.")
+            return(invisible(data))
+        }
+        if (all(is.wholenumber(times)))
+        {
+            message("Dataset appears evenly sampled in time with (close to) integer times already.")
+            message("Rounding times to integer and replacing them...")
+            time_var <- data$metadata$timename
+            data$covariates[time_var] <- as.integer(round(times))
+            return(invisible(data))
+        }
+    }
+    
+    # add time
+    times <- seq_len(NROW(data$abundance))
+    if (is.null(data$covariates)) # create covariates
+    {
+        time_var <- "time"
+        data$covariates <- tibble::tibble(time_var = times)
+    } else {
+        new_col_names <- vctrs::vec_as_names(c(colnames(data$covariates), "time"), 
+                                             repair = "unique", quiet = TRUE)
+        time_var <- tail(new_col_names, 1)
+        data$covariates[time_var] <- times
+    }
+    data$metadata$timename <- time_var
+    message("Integer times created in variable ", usethis::ui_code(time_var), ".")
+    return(invisible(data))
+}
+
+
 
diff --git a/man/make_integer_times.Rd b/man/make_integer_times.Rd
new file mode 100644
index 0000000..f74dcdc
--- /dev/null
+++ b/man/make_integer_times.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data-processing.R
+\name{make_integer_times}
+\alias{make_integer_times}
+\title{Add a time variable with integer values for evenly sampled data}
+\usage{
+make_integer_times(data, period = NULL, tol = 1e-06, confirm = interactive())
+}
+\arguments{
+\item{data}{dataset to modify}
+
+\item{period}{period to check the times against (if \code{NULL}, first check to
+see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
+
+\item{tol}{tolerance for the period}
+}
+\value{
+the dataset, with integer times
+}
+\description{
+Add a time variable with integer values for evenly sampled data
+}
+\details{
+First, check if the data are evenly sampled in time. If not, we
+exit early. Next, if the times are already integer or Date, we don't do
+anything. If the times are numeric, but roundable to integer, we round.
+Otherwise, we add a new variable to \code{covariates} from 1:n and designate
+this variable as the \code{timename}.
+}
diff --git a/tests/testthat/test-02-dataset-processing.R b/tests/testthat/test-02-dataset-processing.R
index e43af7f..cf03f65 100644
--- a/tests/testthat/test-02-dataset-processing.R
+++ b/tests/testthat/test-02-dataset-processing.R
@@ -14,6 +14,26 @@ test_that("BBS data processing works", {
     expect_equal(out$abundance, c(2, 2, 4, 2))
 })
 
+sample_dat <- list(
+    abundance = data.frame(a = c(1, 2, 3, 5, 6, 9, 10, 11, 12, 13), 
+                           b = c(1, 1, 1, 1, 1, 1, 1,  1,  1,  1)), 
+    covariates = data.frame(time = c(0, 0.5, 1, 2, 2.5, 4, 4.5, 5, 5.5, 6)), 
+    metadata = list(timename = "time", 
+                    period = 0.5, 
+                    is_community = FALSE, 
+                    citation = "NA")
+)
+attr(sample_dat, "class") <- "matssdata"
+
+test_that("get_full_times works", {
+    expect_error(full_times <- get_full_times(sample_dat), NA)
+    expect_equal(full_times, seq(0.0, 6.0, by = 0.5))
+    
+    error_dat <- sample_dat
+    error_dat$metadata$timename <- NULL
+    expect_error(get_full_times(error_dat))
+})
+
 test_that("is_equitimed works", {
     m <- capture_messages(expect_false(is_equitimed(dragons)))
     expect_match(m, "`x` is not a regular sequence.", fixed = TRUE)
@@ -38,17 +58,7 @@ test_that("make_equitimed works", {
     expect_match(m, "Dataset is already evenly sampled in time.", fixed = TRUE, all = FALSE)
     expect_equal(out, dat)
     
-    dat <- list(
-        abundance = data.frame(a = c(1, 2, 3, 5, 6, 9, 10, 11, 12, 13), 
-                               b = c(1, 1, 1, 1, 1, 1, 1,  1,  1,  1)), 
-        covariates = data.frame(time = c(0, 0.5, 1, 2, 2.5, 4, 4.5, 5, 5.5, 6)), 
-        metadata = list(timename = "time", 
-                        period = 0.5, 
-                        is_community = FALSE, 
-                        citation = "NA")
-    )
-    attr(dat, "class") <- "matssdata"
-    expect_error(out <- make_equitimed(dat), NA)
+    expect_error(out <- make_equitimed(sample_dat), NA)
     expect_true(is_equitimed(out))
     expect_true(check_data_format(out))
     expect_equal(dim(out$abundance), c(13, 2))
@@ -59,5 +69,19 @@ test_that("make_equitimed works", {
 
 
 test_that("make_integer_times", {
+    expect_error(out <- make_integer_times(sample_dat), 
+                 "Dataset is not evenly sampled in time.")
     
+    expect_error(out <- make_integer_times(make_equitimed(sample_dat)), NA)
+    expect_true(check_data_format(out))
+    expect_equal(dim(out$covariates), c(13, 2))
+    expect_equal(out$covariates[[2]], 1:13)
+
+    path <- system.file("extdata", "subsampled",
+                        package = "MATSS", mustWork = TRUE)
+    dat <- get_mtquad_data(path = file.path(path, "mapped-plant-quads-mt"))
+    m <- capture_messages(out <- make_integer_times(dat))
+    expect_match(m, "No time period found. Assuming period = 1.", fixed = TRUE, all = FALSE)
+    expect_match(m, "Dataset appears evenly sampled in time with integer times already.", fixed = TRUE, all = FALSE)
+    expect_equal(out, dat)
 })
\ No newline at end of file

From b2538f43bc62b797fab91afb4d00aa0426ab5236 Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Tue, 21 Apr 2020 22:19:27 -0400
Subject: [PATCH 5/8] add function to check for integer times

---
 NAMESPACE                 |   1 +
 R/utils-data-processing.R | 154 +++++++++++++++++++++++---------------
 man/has_integer_times.Rd  |  22 ++++++
 man/make_integer_times.Rd |  16 ++--
 4 files changed, 120 insertions(+), 73 deletions(-)
 create mode 100644 man/has_integer_times.Rd

diff --git a/NAMESPACE b/NAMESPACE
index f87cd03..9e40c80 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -40,6 +40,7 @@ export(get_portal_rodents)
 export(get_sdl_data)
 export(get_sgs_data)
 export(get_times_from_data)
+export(has_integer_times)
 export(import_retriever_data)
 export(install_retriever_data)
 export(interpolate_obs)
diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index d5b270d..918b847 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -21,50 +21,6 @@ is_equitimed <- function(data, period = NULL, tol = 1e-06)
 #' @export
 is_evenly_sampled <- is_equitimed
 
-#' get the complete time index, filling in gaps where necessary, and using the 
-#' period to establish the sampling frequency
-#' 
-#' @noRd
-get_full_times <- function(data, period = NULL, tol = 1e-06)
-{
-    times <- get_times_from_data(data)
-    if (is.null(times))
-    {
-        stop("Dataset does not appear to have a times variable.\n", 
-             "Check", usethis::ui_code("covariates"), " and ", 
-               usethis::ui_code("metadata$timename"), ".\n")
-    }
-    period <- get_period_from_data(data, period)
-    
-    full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
-             error = function(e) {
-                 message(e$message)
-                 return(NULL)
-             })
-    return(full_times)
-}
-
-#' extract the period, given the value from the metadata field, and a value 
-#' specified by the user. The flowchart is:
-#'   (1) if user has supplied non-null `period`, use that
-#'   (2) if metadata period is non-null, use that
-#'   (3) use a default value of 1 and print a message
-#' 
-#' @noRd
-get_period_from_data <- function(data, period = NULL)
-{
-    if (is.null(period))
-    {
-        period <- data$metadata$period
-        if (is.null(period))
-        {
-            message("No time period found. Assuming period = 1.")
-            period <- 1
-        }
-    }
-    return(period)
-}
-
 #' @title Insert rows if necessary so that time series are evenly sampled
 #' @aliases make_evenly_sampled
 #' 
@@ -171,11 +127,45 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
 #' @export
 make_evenly_sampled <- make_equitimed
 
+#' @title Check if a dataset has integer times
+#' 
+#' @param data dataset to check
+#' 
+#' @return TRUE or FALSE
+#'
+#' @details If the times are already integer or Date, true. Otherwise FALSE, 
+#'   with a message if times are missing, or if times could potentially be 
+#'   rounded.
+#' 
+#' @export
+has_integer_times <- function(data)
+{
+    # check for existence of times
+    times <- get_times_from_data(data)
+    if (is.null(times))
+    {
+        message("Dataset is missing times.")
+        return(FALSE)
+    }
+    
+    # check for integer times
+    if (is.integer(times) || inherits(times, "Date"))
+    {
+        return(TRUE)
+    } else if (all(is.wholenumber(times))) {
+        message("Dataset has close to integer times, but they need to be rounded.\n", 
+                "Perhaps you want to call ", usethis::ui_code("make_integer_times()"), ".\n")
+        return(FALSE)
+    }
+    
+    # otherwise
+    return(FALSE)
+}
+
 #' @title Add a time variable with integer values for evenly sampled data
 #' 
 #' @param data dataset to modify
 #' @inheritParams is_equitimed
-#' @inheritParams base::mean
 #' 
 #' @return the dataset, with integer times
 #'
@@ -186,20 +176,13 @@ make_evenly_sampled <- make_equitimed
 #'   this variable as the `timename`.
 #' 
 #' @export
-make_integer_times <- function(data, period = NULL, tol = 1e-06, 
-                               confirm = interactive())
+make_integer_times <- function(data, period = NULL, tol = 1e-06)
 {
-    is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5)
-    {
-        abs(x - round(x)) < tol
-    }
-    
-    # check for existence of times
     times <- get_times_from_data(data)
     
     # do checks based on existing times
     if (!is.null(times))
-    {    
+    {
         # check for equitimed
         if (!is_equitimed(data = data, period = period, tol = tol))
         {
@@ -210,16 +193,13 @@ make_integer_times <- function(data, period = NULL, tol = 1e-06,
         # check for integer times
         if (is.integer(times))
         {
-            message("Dataset appears evenly sampled in time with integer times already.")
+            message("Dataset is evenly sampled with integer times already.")
             return(invisible(data))
-        } else if (inherits(times, "Date"))
-        {
-            message("Dataset appears evenly sampled in time with `Date` formatted times already.")
+        } else if (inherits(times, "Date")) {
+            message("Dataset is evenly sampled with `Date` formatted times already.")
             return(invisible(data))
-        }
-        if (all(is.wholenumber(times)))
-        {
-            message("Dataset appears evenly sampled in time with (close to) integer times already.")
+        } else if (all(is.wholenumber(times))) {
+            message("Dataset is evenly sampled with (close to) integer times already.")
             message("Rounding times to integer and replacing them...")
             time_var <- data$metadata$timename
             data$covariates[time_var] <- as.integer(round(times))
@@ -245,4 +225,54 @@ make_integer_times <- function(data, period = NULL, tol = 1e-06,
 }
 
 
+#' get the complete time index, filling in gaps where necessary, and using the 
+#' period to establish the sampling frequency
+#' 
+#' @noRd
+get_full_times <- function(data, period = NULL, tol = 1e-06)
+{
+    times <- get_times_from_data(data)
+    if (is.null(times))
+    {
+        stop("Dataset does not appear to have a times variable.\n", 
+             "Check", usethis::ui_code("covariates"), " and ", 
+             usethis::ui_code("metadata$timename"), ".\n")
+    }
+    period <- get_period_from_data(data, period)
+    
+    full_times <- tryCatch(tidyr::full_seq(times, period, tol), 
+                           error = function(e) {
+                               message(e$message)
+                               return(NULL)
+                           })
+    return(full_times)
+}
+
+#' extract the period, given the value from the metadata field, and a value 
+#' specified by the user. The flowchart is:
+#'   (1) if user has supplied non-null `period`, use that
+#'   (2) if metadata period is non-null, use that
+#'   (3) use a default value of 1 and print a message
+#' 
+#' @noRd
+get_period_from_data <- function(data, period = NULL)
+{
+    if (is.null(period))
+    {
+        period <- data$metadata$period
+        if (is.null(period))
+        {
+            message("No time period found. Assuming period = 1.")
+            period <- 1
+        }
+    }
+    return(period)
+}
+
+#' @noRd
+is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5)
+{
+    abs(x - round(x)) < tol
+}
+
 
diff --git a/man/has_integer_times.Rd b/man/has_integer_times.Rd
new file mode 100644
index 0000000..ad400a7
--- /dev/null
+++ b/man/has_integer_times.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data-processing.R
+\name{has_integer_times}
+\alias{has_integer_times}
+\title{Check if a dataset has integer times}
+\usage{
+has_integer_times(data)
+}
+\arguments{
+\item{data}{dataset to check}
+}
+\value{
+TRUE or FALSE
+}
+\description{
+Check if a dataset has integer times
+}
+\details{
+If the times are already integer or Date, true. Otherwise FALSE,
+with a message if times are missing, or if times could potentially be
+rounded.
+}
diff --git a/man/make_integer_times.Rd b/man/make_integer_times.Rd
index f74dcdc..48399f9 100644
--- a/man/make_integer_times.Rd
+++ b/man/make_integer_times.Rd
@@ -4,15 +4,10 @@
 \alias{make_integer_times}
 \title{Add a time variable with integer values for evenly sampled data}
 \usage{
-make_integer_times(data, period = NULL, tol = 1e-06, confirm = interactive())
+make_integer_times(data)
 }
 \arguments{
 \item{data}{dataset to modify}
-
-\item{period}{period to check the times against (if \code{NULL}, first check to
-see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
-
-\item{tol}{tolerance for the period}
 }
 \value{
 the dataset, with integer times
@@ -21,9 +16,8 @@ the dataset, with integer times
 Add a time variable with integer values for evenly sampled data
 }
 \details{
-First, check if the data are evenly sampled in time. If not, we
-exit early. Next, if the times are already integer or Date, we don't do
-anything. If the times are numeric, but roundable to integer, we round.
-Otherwise, we add a new variable to \code{covariates} from 1:n and designate
-this variable as the \code{timename}.
+If the times are already integer or Date, we don't do anything. If
+the times are numeric, but roundable to integer, we round. Otherwise, we
+add a new variable to \code{covariates} from 1:n and designate this variable as
+the \code{timename}.
 }

From 6cee72101779847c56c7337ba667fe53fa46be97 Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Wed, 22 Apr 2020 12:44:32 -0400
Subject: [PATCH 6/8] Add check for missing values in abundance and covariates

---
 NAMESPACE                                   |  2 +
 R/utils-data-processing.R                   | 67 ++++++++++++++++++---
 man/has_integer_times.Rd                    |  2 +-
 man/has_missing_samples.Rd                  | 30 +++++++++
 man/is_equitimed.Rd                         |  2 +-
 man/make_integer_times.Rd                   | 16 +++--
 tests/testthat/setup.R                      |  5 ++
 tests/testthat/teardown.R                   |  1 +
 tests/testthat/test-02-dataset-processing.R | 40 ++++++++++--
 9 files changed, 146 insertions(+), 19 deletions(-)
 create mode 100644 man/has_missing_samples.Rd
 create mode 100644 tests/testthat/setup.R
 create mode 100644 tests/testthat/teardown.R

diff --git a/NAMESPACE b/NAMESPACE
index 9e40c80..f1171b0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -41,12 +41,14 @@ export(get_sdl_data)
 export(get_sgs_data)
 export(get_times_from_data)
 export(has_integer_times)
+export(has_missing_samples)
 export(import_retriever_data)
 export(install_retriever_data)
 export(interpolate_obs)
 export(invoke)
 export(is_equitimed)
 export(is_evenly_sampled)
+export(is_fully_sampled)
 export(make_equitimed)
 export(make_evenly_sampled)
 export(make_integer_times)
diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index 918b847..8a79bfa 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -6,16 +6,16 @@
 #'   see if there is a known `period` set in the metadata, otherwise assumes 1)
 #' @param tol tolerance for the period 
 #' 
-#' @return TRUE or FALSE, invisibly
+#' @return `TRUE` or `FALSE`
 #' 
 #' @export
 is_equitimed <- function(data, period = NULL, tol = 1e-06)
 {
     stopifnot(check_data_format(data))
     
-    full_times <- get_full_times(data = data, period = period, tol = tol)
+    full_times <- get_full_times(data, period, tol)
     times <- get_times_from_data(data)
-    invisible(isTRUE(all.equal(times, full_times)))
+    isTRUE(all.equal(times, full_times))
 }
 
 #' @export
@@ -53,7 +53,7 @@ make_equitimed <- function(data, period = NULL, tol = 1e-06,
 {
     stopifnot(check_data_format(data))
     
-    full_times <- get_full_times(data = data, period = period, tol = tol)
+    full_times <- get_full_times(data, period, tol)
     if (is.null(full_times))
     {
         stop("Unable to construct an evenly spaced time index.")
@@ -131,7 +131,7 @@ make_evenly_sampled <- make_equitimed
 #' 
 #' @param data dataset to check
 #' 
-#' @return TRUE or FALSE
+#' @return `TRUE` or `FALSE`
 #'
 #' @details If the times are already integer or Date, true. Otherwise FALSE, 
 #'   with a message if times are missing, or if times could potentially be 
@@ -184,7 +184,7 @@ make_integer_times <- function(data, period = NULL, tol = 1e-06)
     if (!is.null(times))
     {
         # check for equitimed
-        if (!is_equitimed(data = data, period = period, tol = tol))
+        if (!is_equitimed(data, period, tol))
         {
             stop(c("Dataset is not evenly sampled in time.\n",
                    "Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
@@ -224,6 +224,55 @@ make_integer_times <- function(data, period = NULL, tol = 1e-06)
     return(invisible(data))
 }
 
+#' Check for missing samples
+#' @aliases is_fully_sampled
+#'
+#' @description Some analyses may require evenly sampled data without missing 
+#'   values. `has_missing_samples` checks that the dataset is equitimed, and 
+#'   then for missing values within `abundance` (and optionally, `covariates`)
+#'   
+#'   `is_full_sampled()` does the same check, but returns `TRUE` if there are 
+#'   NO missing samples.
+#' 
+#' @inheritParams is_equitimed
+#' @param check_covariates `TRUE` or `FALSE` (whether to check covariates, too)
+#' 
+#' @return `TRUE` or `FALSE`
+#' 
+#' @export
+has_missing_samples <- function(data, period = NULL, tol = 1e-06, 
+                                check_covariates = FALSE)
+{
+    if (!is_equitimed(data, period, tol))
+    {
+        message(c("Dataset is not evenly sampled in time.\n",
+               "Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
+        return(TRUE)
+    }
+    
+    # check abundance
+    if (!all(is.finite(data$abundance)))
+    {
+        message("Dataset has NA (or Inf) values in ", usethis::ui_code("abundance"), ".")
+        return(TRUE)
+    }
+    
+    # check covariates
+    if (check_covariates && !all(is.finite(data$covariates)))
+    {
+        message("Dataset has NA (or Inf) values in ", usethis::ui_code("covariates"), ".")
+        return(TRUE)
+    }
+    
+    return(FALSE)
+}
+
+#' @export
+is_fully_sampled <- function(data, period = NULL, tol = 1e-06, 
+                             check_covariates = FALSE)
+{
+    return(!has_missing_samples(data, period, tol, check_covariates))
+}
 
 #' get the complete time index, filling in gaps where necessary, and using the 
 #' period to establish the sampling frequency
@@ -275,4 +324,8 @@ is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5)
     abs(x - round(x)) < tol
 }
 
-
+#' @noRd
+is.finite.data.frame <- function(df)
+{
+    do.call(cbind, lapply(df, is.finite))
+}
diff --git a/man/has_integer_times.Rd b/man/has_integer_times.Rd
index ad400a7..c324189 100644
--- a/man/has_integer_times.Rd
+++ b/man/has_integer_times.Rd
@@ -10,7 +10,7 @@ has_integer_times(data)
 \item{data}{dataset to check}
 }
 \value{
-TRUE or FALSE
+\code{TRUE} or \code{FALSE}
 }
 \description{
 Check if a dataset has integer times
diff --git a/man/has_missing_samples.Rd b/man/has_missing_samples.Rd
new file mode 100644
index 0000000..87d6f9d
--- /dev/null
+++ b/man/has_missing_samples.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data-processing.R
+\name{has_missing_samples}
+\alias{has_missing_samples}
+\alias{is_fully_sampled}
+\title{Check for missing samples}
+\usage{
+has_missing_samples(data, period = NULL, tol = 1e-06, check_covariates = FALSE)
+}
+\arguments{
+\item{data}{dataset to check}
+
+\item{period}{period to check the times against (if \code{NULL}, first check to
+see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
+
+\item{tol}{tolerance for the period}
+
+\item{check_covariates}{\code{TRUE} or \code{FALSE} (whether to check covariates, too)}
+}
+\value{
+\code{TRUE} or \code{FALSE}
+}
+\description{
+Some analyses may require evenly sampled data without missing
+values. \code{has_missing_samples} checks that the dataset is equitimed, and
+then for missing values within \code{abundance} (and optionally, \code{covariates})
+
+\code{is_full_sampled()} does the same check, but returns \code{TRUE} if there are
+NO missing samples.
+}
diff --git a/man/is_equitimed.Rd b/man/is_equitimed.Rd
index 141a8e6..9afd089 100644
--- a/man/is_equitimed.Rd
+++ b/man/is_equitimed.Rd
@@ -16,7 +16,7 @@ see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
 \item{tol}{tolerance for the period}
 }
 \value{
-TRUE or FALSE, invisibly
+\code{TRUE} or \code{FALSE}
 }
 \description{
 Check that the times of a dataset are evenly sampled
diff --git a/man/make_integer_times.Rd b/man/make_integer_times.Rd
index 48399f9..c4f5b66 100644
--- a/man/make_integer_times.Rd
+++ b/man/make_integer_times.Rd
@@ -4,10 +4,15 @@
 \alias{make_integer_times}
 \title{Add a time variable with integer values for evenly sampled data}
 \usage{
-make_integer_times(data)
+make_integer_times(data, period = NULL, tol = 1e-06)
 }
 \arguments{
 \item{data}{dataset to modify}
+
+\item{period}{period to check the times against (if \code{NULL}, first check to
+see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
+
+\item{tol}{tolerance for the period}
 }
 \value{
 the dataset, with integer times
@@ -16,8 +21,9 @@ the dataset, with integer times
 Add a time variable with integer values for evenly sampled data
 }
 \details{
-If the times are already integer or Date, we don't do anything. If
-the times are numeric, but roundable to integer, we round. Otherwise, we
-add a new variable to \code{covariates} from 1:n and designate this variable as
-the \code{timename}.
+First, check if the data are evenly sampled in time. If not, we
+exit early. Next, if the times are already integer or Date, we don't do
+anything. If the times are numeric, but roundable to integer, we round.
+Otherwise, we add a new variable to \code{covariates} from 1:n and designate
+this variable as the \code{timename}.
 }
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
new file mode 100644
index 0000000..f0f90c7
--- /dev/null
+++ b/tests/testthat/setup.R
@@ -0,0 +1,5 @@
+pre_test_options <- options(
+    ## make ui_*() output easier to test against
+    ## just say no to ANSI escape codes
+    "crayon.enabled" = FALSE
+)
diff --git a/tests/testthat/teardown.R b/tests/testthat/teardown.R
new file mode 100644
index 0000000..6c7ef9f
--- /dev/null
+++ b/tests/testthat/teardown.R
@@ -0,0 +1 @@
+options(pre_test_options)
diff --git a/tests/testthat/test-02-dataset-processing.R b/tests/testthat/test-02-dataset-processing.R
index cf03f65..3b4ae31 100644
--- a/tests/testthat/test-02-dataset-processing.R
+++ b/tests/testthat/test-02-dataset-processing.R
@@ -66,10 +66,18 @@ test_that("make_equitimed works", {
     expect_true(!any(is.na(out$abundance[-c(4, 7, 8), ])))
 })
 
+test_that("has_integer_times works", {
+    expect_true(has_integer_times(dragons))
+    expect_false(has_integer_times(sample_dat))
+    
+    path <- system.file("extdata", "subsampled",
+                        package = "MATSS", mustWork = TRUE)
+    dat <- get_mtquad_data(path = file.path(path, "mapped-plant-quads-mt"))
+    expect_true(has_integer_times(dat))
+})
 
-
-test_that("make_integer_times", {
-    expect_error(out <- make_integer_times(sample_dat), 
+test_that("make_integer_times works", {
+    expect_error(make_integer_times(sample_dat), 
                  "Dataset is not evenly sampled in time.")
     
     expect_error(out <- make_integer_times(make_equitimed(sample_dat)), NA)
@@ -82,6 +90,28 @@ test_that("make_integer_times", {
     dat <- get_mtquad_data(path = file.path(path, "mapped-plant-quads-mt"))
     m <- capture_messages(out <- make_integer_times(dat))
     expect_match(m, "No time period found. Assuming period = 1.", fixed = TRUE, all = FALSE)
-    expect_match(m, "Dataset appears evenly sampled in time with integer times already.", fixed = TRUE, all = FALSE)
+    expect_match(m, "Dataset is evenly sampled with integer times already.", fixed = TRUE, all = FALSE)
     expect_equal(out, dat)
-})
\ No newline at end of file
+})
+
+test_that("is_fully_sampled works", {
+    m <- capture_messages(expect_false(is_fully_sampled(sample_dat)))
+    expect_match(m, "Dataset is not evenly sampled in time.")
+    
+    sample_dat$covariates["temp"] <- rnorm(NROW(sample_dat$covariates))
+    
+    out <- make_equitimed(sample_dat)
+    m <- capture_messages(expect_false(is_fully_sampled(out)))
+    expect_match(m, "Dataset has NA (or Inf) values in `abundance`", fixed = TRUE, all = FALSE)
+    idx <- is.na(out$abundance)
+    out$abundance[idx] <- Inf
+    m <- capture_messages(expect_false(is_fully_sampled(out)))
+    expect_match(m, "Dataset has NA (or Inf) values in `abundance`", fixed = TRUE, all = FALSE)
+    out$abundance[idx] <- -999
+    expect_true(is_fully_sampled(out))
+    
+    m <- capture_messages(expect_false(is_fully_sampled(out, check_covariates = TRUE)))
+    expect_match(m, "Dataset has NA (or Inf) values in `covariates`", fixed = TRUE, all = FALSE)
+    out$covariates[is.na(out$covariates)] <- -999
+    expect_true(is_fully_sampled(out, check_covariates = TRUE))
+})

From 2f93525e0978e6be2143858a52563bda2a3a4d35 Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Wed, 22 Apr 2020 19:47:19 -0400
Subject: [PATCH 7/8] add function to interpolate missing vals

---
 NAMESPACE                                   |  1 +
 R/utils-data-processing.R                   | 61 +++++++++++++++++----
 man/interpolate_missing_samples.Rd          | 34 ++++++++++++
 tests/testthat/test-02-dataset-processing.R | 19 +++++--
 4 files changed, 100 insertions(+), 15 deletions(-)
 create mode 100644 man/interpolate_missing_samples.Rd

diff --git a/NAMESPACE b/NAMESPACE
index f1171b0..37dbf35 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -44,6 +44,7 @@ export(has_integer_times)
 export(has_missing_samples)
 export(import_retriever_data)
 export(install_retriever_data)
+export(interpolate_missing_samples)
 export(interpolate_obs)
 export(invoke)
 export(is_equitimed)
diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index 8a79bfa..d73128e 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -251,16 +251,16 @@ has_missing_samples <- function(data, period = NULL, tol = 1e-06,
     }
     
     # check abundance
-    if (!all(is.finite(data$abundance)))
+    if (any(is.na(data$abundance)))
     {
-        message("Dataset has NA (or Inf) values in ", usethis::ui_code("abundance"), ".")
+        message("Dataset has NA values in ", usethis::ui_code("abundance"), ".")
         return(TRUE)
     }
     
     # check covariates
-    if (check_covariates && !all(is.finite(data$covariates)))
+    if (check_covariates && any(is.na(data$covariates)))
     {
-        message("Dataset has NA (or Inf) values in ", usethis::ui_code("covariates"), ".")
+        message("Dataset has NA values in ", usethis::ui_code("covariates"), ".")
         return(TRUE)
     }
     
@@ -274,6 +274,53 @@ is_fully_sampled <- function(data, period = NULL, tol = 1e-06,
     return(!has_missing_samples(data, period, tol, check_covariates))
 }
 
+#' @title Impute missing samples using linear interpolation
+#' 
+#' @param data dataset to modify
+#' @inheritParams is_equitimed
+#' @param interpolate_covariates `TRUE` or `FALSE` (whether to do covariates, too)
+#' 
+#' @return the dataset, with interpolated samples
+#'
+#' @details First, check if the data are evenly sampled in time. If not, we 
+#'   exit early. Next, apply `forecast::na.interp()` to each variable that has 
+#'   non-finite values. 
+#' 
+#' @export
+interpolate_missing_samples <- function(data, period = NULL, tol = 1e-06, 
+                                        interpolate_covariates = FALSE)
+{
+    interpolate_tbl <- function(df)
+    {
+        finite_cols_idx <- apply(is.na(df), 2, all)
+        # replace all non finite values with NA
+        
+        for (j in which(!finite_cols_idx))
+        {
+            x <- df[[j]]
+            x[!is.finite(x)] <- NA
+            interpolated <- forecast::na.interp(x)
+            df[[j]] <- as.numeric(interpolated)
+        }
+        return(df)
+    }
+    
+    if (!is_equitimed(data, period, tol))
+    {
+        stop(c("Dataset is not evenly sampled in time.\n",
+               "Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
+    }
+    
+    data$abundance <- interpolate_tbl(data$abundance)
+    
+    if (interpolate_covariates)
+    {
+        data$covariates <- interpolate_tbl(data$covariates)
+    }
+    
+    return(invisible(data))
+}
+
 #' get the complete time index, filling in gaps where necessary, and using the 
 #' period to establish the sampling frequency
 #' 
@@ -323,9 +370,3 @@ is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5)
 {
     abs(x - round(x)) < tol
 }
-
-#' @noRd
-is.finite.data.frame <- function(df)
-{
-    do.call(cbind, lapply(df, is.finite))
-}
diff --git a/man/interpolate_missing_samples.Rd b/man/interpolate_missing_samples.Rd
new file mode 100644
index 0000000..5ba58e7
--- /dev/null
+++ b/man/interpolate_missing_samples.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-data-processing.R
+\name{interpolate_missing_samples}
+\alias{interpolate_missing_samples}
+\title{Impute missing samples using linear interpolation}
+\usage{
+interpolate_missing_samples(
+  data,
+  period = NULL,
+  tol = 1e-06,
+  interpolate_covariates = FALSE
+)
+}
+\arguments{
+\item{data}{dataset to modify}
+
+\item{period}{period to check the times against (if \code{NULL}, first check to
+see if there is a known \code{period} set in the metadata, otherwise assumes 1)}
+
+\item{tol}{tolerance for the period}
+
+\item{interpolate_covariates}{\code{TRUE} or \code{FALSE} (whether to do covariates, too)}
+}
+\value{
+the dataset, with interpolated samples
+}
+\description{
+Impute missing samples using linear interpolation
+}
+\details{
+First, check if the data are evenly sampled in time. If not, we
+exit early. Next, apply \code{forecast::na.interp()} to each variable that has
+non-finite values.
+}
diff --git a/tests/testthat/test-02-dataset-processing.R b/tests/testthat/test-02-dataset-processing.R
index 3b4ae31..1418c94 100644
--- a/tests/testthat/test-02-dataset-processing.R
+++ b/tests/testthat/test-02-dataset-processing.R
@@ -102,16 +102,25 @@ test_that("is_fully_sampled works", {
     
     out <- make_equitimed(sample_dat)
     m <- capture_messages(expect_false(is_fully_sampled(out)))
-    expect_match(m, "Dataset has NA (or Inf) values in `abundance`", fixed = TRUE, all = FALSE)
+    expect_match(m, "Dataset has NA values in `abundance`", fixed = TRUE, all = FALSE)
     idx <- is.na(out$abundance)
-    out$abundance[idx] <- Inf
-    m <- capture_messages(expect_false(is_fully_sampled(out)))
-    expect_match(m, "Dataset has NA (or Inf) values in `abundance`", fixed = TRUE, all = FALSE)
     out$abundance[idx] <- -999
     expect_true(is_fully_sampled(out))
     
     m <- capture_messages(expect_false(is_fully_sampled(out, check_covariates = TRUE)))
-    expect_match(m, "Dataset has NA (or Inf) values in `covariates`", fixed = TRUE, all = FALSE)
+    expect_match(m, "Dataset has NA values in `covariates`", fixed = TRUE, all = FALSE)
     out$covariates[is.na(out$covariates)] <- -999
     expect_true(is_fully_sampled(out, check_covariates = TRUE))
 })
+
+test_that("interpolate_missing_samples works", {
+    sample_dat$covariates["temp"] <- rnorm(NROW(sample_dat$covariates))
+    
+    dat <- make_equitimed(sample_dat)
+    interpolated_dat <- interpolate_missing_samples(dat)
+    expect_true(is_fully_sampled(interpolated_dat))
+    expect_false(is_fully_sampled(interpolated_dat, check_covariates = TRUE))
+    expect_true(is_fully_sampled(interpolate_missing_samples(dat, interpolate_covariates = TRUE), 
+                                 check_covariates = TRUE))
+})
+    
\ No newline at end of file

From d3123003351d1cf922b08707cf124095d44a024e Mon Sep 17 00:00:00 2001
From: Hao Ye <lhopitalified@gmail.com>
Date: Wed, 22 Apr 2020 23:16:16 -0400
Subject: [PATCH 8/8] preserve class when interpolating

---
 R/utils-data-processing.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/utils-data-processing.R b/R/utils-data-processing.R
index d73128e..43466e7 100644
--- a/R/utils-data-processing.R
+++ b/R/utils-data-processing.R
@@ -301,6 +301,7 @@ interpolate_missing_samples <- function(data, period = NULL, tol = 1e-06,
             x[!is.finite(x)] <- NA
             interpolated <- forecast::na.interp(x)
             df[[j]] <- as.numeric(interpolated)
+            class(df[[j]]) <- class(x)
         }
         return(df)
     }