diff --git a/DESCRIPTION b/DESCRIPTION index 1691b4d..e332b2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,11 +14,12 @@ Description: A set of utilites for working with the Arctic Data Center Depends: R (>= 3.2.3) Imports: - digest, dataone, datapack, + digest, EML, httr, + magrittr, methods, stringr, stringi, diff --git a/NAMESPACE b/NAMESPACE index 869a3fa..16dada6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(eml_contact) export(eml_creator) export(eml_individual_name) export(eml_metadata_provider) +export(eml_otherEntity_to_dataTable) export(eml_party) export(eml_personnel) export(eml_project) @@ -56,6 +57,7 @@ export(sysmeta_to_eml_physical) export(update_object) export(update_resource_map) export(view_profile) +export(which_in_eml) import(EML) import(XML) import(dataone) diff --git a/R/eml.R b/R/eml.R index 212138a..cf2e850 100644 --- a/R/eml.R +++ b/R/eml.R @@ -979,3 +979,146 @@ eml_add_entities <- function(doc, doc } + +#' Convert otherEntities to dataTables +#' +#' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +#' otherEntity objectas currently constructed - it does not add a physical or add attributes. +#' However, if these are already in their respective slots, they will be retained. +#' +#' @param eml (S4) An EML S4 object +#' @param otherEntity (S4 / integer) Either an EML otherEntity object or the index +#' of an otherEntity within a ListOfotherEntity. Integer input is recommended. +#' @param validate_eml (logical) Optional. Specify whether or not to validate the eml after +#' completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to +#' \code{FALSE} reduces execution time by ~ 50 percent. +#' +#' @author Dominic Mullen dmullen17@@gmail.com +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) +#' +#' # The following two calls are equivalent: +#' eml <- eml_otherEntity_to_dataTable(eml, eml@@dataset@@otherEntity[[1]]) +#' eml <- eml_otherEntity_to_dataTable(eml, 1) +#' +#' # Integer input is recommended: +#' eml <- eml_otherEntity_to_dataTable(eml, 1) +#' } +eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) { + ## Argument checks + stopifnot(methods::is(eml, "eml")) + stopifnot(any(is.integer(otherEntity), methods::is(otherEntity, "otherEntity"))) + stopifnot(is.logical(validate_eml)) + + ## Handle different inputs for 'otherEntity' + if (is.numeric(otherEntity)) { + index <- otherEntity + otherEntity <- eml@dataset@otherEntity[[index]] + } else { + index <- which_in_eml(eml@dataset@otherEntity, + "entityName", + otherEntity@entityName) + if (length(index) > 1) { + stop("Duplicate 'entityName' found in 'eml@dataset@otherEntity', please use a numeric index (1, 2, etc.) to specify which 'otherEntity' you would like to convert.") + } + } + + ## convert otherEntity to dataTable + dt <- utils::capture.output(otherEntity) %>% + stringr::str_trim() %>% + stringr::str_replace_all("otherEntity", "dataTable") %>% + paste(sep = "", collapse = "") %>% + EML::read_eml() + + ## Add dt to bottom of dt list + type <- "dataTable" + slot(eml@dataset, type) <- new(paste0("ListOf", type), c(slot(eml@dataset, type), + new(paste0("ListOf", type), list(dt)))) + + ## delete otherEntity from list + eml@dataset@otherEntity[[index]] <- NULL + + ## return eml + if (validate_eml == TRUE) { + eml_validate(eml) + } + return(eml) +} + + +#' Search through EMLs +#' +#' This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information. +#' +#' @import EML +#' @param eml_list (S4/List) an EML list object +#' @param element (character) element to evaluate +#' @param test (function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1). +#' +#' @keywords eml +#' +#' @author Mitchell Maier mitchell.maier@@gmail.com +#' +#' @examples +#' \dontrun{ +#' # Question: Which creators have a surName "Smith"? +#' n <- which_in_eml(eml@@dataset@@creator, "surName", "Smith") +#' # Answer: eml@@dataset@@creator[n] +#' +#' # Question: Which dataTables have an entityName that begins with "2016" +#' n <- which_in_eml(eml@@dataset@@dataTable, "entityName", function(x) {grepl("^2016", x)}) +#' # Answer: eml@@dataset@@dataTable[n] +#' +#' # Question: Which attributes in dataTable[[1]] have a numberType "natural"? +#' n <- which_in_eml(eml@@dataset@@dataTable[[1]]@@attributeList@@attribute, "numberType", "natural") +#' # Answer: eml@@dataset@@dataTable[[1]]@@attributeList@@attribute[n] +#' +#' #' # Question: Which dataTables have at least one attribute with a numberType "natural"? +#' n <- which_in_eml(eml@@dataset@@dataTable, "numberType", function(x) {"natural" %in% x}) +#' # Answer: eml@@dataset@@dataTable[n] +#' } +#' @export +#' +which_in_eml <- function(eml_list, element, test) { + + stopifnot(isS4(eml_list)) + stopifnot(methods::is(eml_list,"list")) + stopifnot(is.character(element)) + + if (is.character(test)) { + value = test + test = function(x) {x == value} + + } else { + stopifnot(is.function(test)) + } + + # Find location + location <- unlist(lapply(seq_along(eml_list), function(i) { + elements_test <- unlist(EML::eml_get(eml_list[[i]], element)) + + if (is.null(elements_test)) { + out <- NULL + + } else { + result <- test(elements_test) + + if (length(result) > 1) { + stop("Test must only return one value.") + + } else if (result == TRUE) { + out <- i + + } else { + out <- NULL + } + } + return(out) + })) + + return(location) +} diff --git a/man/create_dummy_enumeratedDomain_dataframe.Rd b/man/create_dummy_enumeratedDomain_dataframe.Rd index d65bc43..9cc8411 100644 --- a/man/create_dummy_enumeratedDomain_dataframe.Rd +++ b/man/create_dummy_enumeratedDomain_dataframe.Rd @@ -17,7 +17,7 @@ Create dummy enumeratedDomain data frame } \examples{ \dontrun{ -# Create dummy dataframe of 2 factors/enumerated domains +# Create dummy dataframe of 2 factors/enumerated domains attributes <- create_dummy_enumeratedDomain_dataframe(c("Factor1", "Factor2")) } } diff --git a/man/eml_otherEntity_to_dataTable.Rd b/man/eml_otherEntity_to_dataTable.Rd new file mode 100644 index 0000000..83a1164 --- /dev/null +++ b/man/eml_otherEntity_to_dataTable.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{eml_otherEntity_to_dataTable} +\alias{eml_otherEntity_to_dataTable} +\title{Convert otherEntities to dataTables} +\usage{ +eml_otherEntity_to_dataTable(eml, otherEntity, validate_eml = TRUE) +} +\arguments{ +\item{eml}{(S4) An EML S4 object} + +\item{otherEntity}{(S4 / integer) Either an EML otherEntity object or the index +of an otherEntity within a ListOfotherEntity. Integer input is recommended.} + +\item{validate_eml}{(logical) Optional. Specify whether or not to validate the eml after +completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to +\code{FALSE} reduces execution time by ~ 50 percent.} +} +\description{ +Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an +otherEntity objectas currently constructed - it does not add a physical or add attributes. +However, if these are already in their respective slots, they will be retained. +} +\examples{ +\dontrun{ +eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + +# The following two calls are equivalent: +eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]) +eml <- eml_otherEntity_to_dataTable(eml, 1) + +# Integer input is recommended: +eml <- eml_otherEntity_to_dataTable(eml, 1) +} +} +\author{ +Dominic Mullen dmullen17@gmail.com +} diff --git a/man/which_in_eml.Rd b/man/which_in_eml.Rd new file mode 100644 index 0000000..4e56266 --- /dev/null +++ b/man/which_in_eml.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml.R +\name{which_in_eml} +\alias{which_in_eml} +\title{Search through EMLs} +\usage{ +which_in_eml(eml_list, element, test) +} +\arguments{ +\item{eml_list}{(S4/List) an EML list object} + +\item{element}{(character) element to evaluate} + +\item{test}{(function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1).} +} +\description{ +This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information. +} +\examples{ +\dontrun{ +# Question: Which creators have a surName "Smith"? +n <- which_in_eml(eml@dataset@creator, "surName", "Smith") +# Answer: eml@dataset@creator[n] + +# Question: Which dataTables have an entityName that begins with "2016" +n <- which_in_eml(eml@dataset@dataTable, "entityName", function(x) {grepl("^2016", x)}) +# Answer: eml@dataset@dataTable[n] + +# Question: Which attributes in dataTable[[1]] have a numberType "natural"? +n <- which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "numberType", "natural") +# Answer: eml@dataset@dataTable[[1]]@attributeList@attribute[n] + +#' # Question: Which dataTables have at least one attribute with a numberType "natural"? +n <- which_in_eml(eml@dataset@dataTable, "numberType", function(x) {"natural" \%in\% x}) +# Answer: eml@dataset@dataTable[n] +} +} +\author{ +Mitchell Maier mitchell.maier@gmail.com +} +\keyword{eml} diff --git a/tests/testthat/test_eml.R b/tests/testthat/test_eml.R index 9967a54..06e39b8 100644 --- a/tests/testthat/test_eml.R +++ b/tests/testthat/test_eml.R @@ -146,5 +146,122 @@ test_that("a dataTable and otherEntity can be added from a pid", { unlink(data_path) }) +test_that("eml_otherEntity_to_dataTable fails gracefully", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + + # incorrect inputs + expect_error(eml_otherEntity_to_dataTable("dummy input")) + expect_error(eml_otherEntity_to_dataTable(eml, "1")) + + # subscripts out of bounds + expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[2]])) + expect_error(eml_otherEntity_to_dataTable(eml, 2)) + + # Duplicate entityNames found + eml@dataset@otherEntity[[2]] <- eml@dataset@otherEntity[[1]] + expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]])) + +}) + +test_that("eml_otherEntity_to_dataTable fails gracefully", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils")) + otherEntity <- eml@dataset@otherEntity[[1]] + eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]) + + # test that otherEntity was removed + expect_length(eml@dataset@otherEntity, 0) + + # test that dataTable was added + expect_equal(otherEntity@entityName, eml@dataset@dataTable[[1]]@entityName) + expect_equivalent(otherEntity@physical, eml@dataset@dataTable[[1]]@physical) +}) +test_that("which_in_eml Returns correct locations", { + if (!is_token_set(mn)) { + skip("No token set. Skipping test.") + } + + attributes <- + data.frame( + attributeName = c( + "length_1", + "time_2", + "length_3"), + attributeDefinition = c( + "def 1", + "def 2", + "def 3"), + formatString = c( + NA, + NA, + NA), + measurementScale = c( + "ratio", + "ratio", + "ratio"), + domain = c( + "numericDomain", + "numericDomain", + "numericDomain"), + definition = c( + NA, + NA, + NA), + unit = c( + "meter", + "second", + "meter"), + numberType = c( + "real", + "real", + "real"), + stringsAsFactors = FALSE + ) + + attributeList <- EML::set_attributes(attributes) + + dataTable_1 <- new("dataTable", + entityName = "2016_data.csv", + entityDescription = "2016 data", + attributeList = attributeList) + + dataTable_2 <- dataTable_1 + + dataTable_3 <- new("dataTable", + entityName = "2015_data.csv", + entityDescription = "2016 data", + attributeList = attributeList) + + creator_1 <- new("creator", + individualName = new("individualName", + surName = "LAST", + givenName = "FIRST")) + creator_2 <- new("creator", + individualName = new("individualName", + surName = "LAST", + givenName = "FIRST_2")) + creator_3 <- creator_2 + + title <- "Title" + + dataset <- new("dataset", + title = title, + creator = c(creator_1, creator_2, creator_3), + dataTable = c(dataTable_1, dataTable_2, dataTable_3)) + + eml <- new("eml", + dataset = dataset) + + expect_equal(c(2,3), which_in_eml(eml@dataset@creator, "givenName", "FIRST_2")) + expect_error(which_in_eml(eml@dataset@dataTable, "attributeName", "length_3")) + expect_equal(c(1,3), which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "attributeName", function(x) {grepl("^length", x)})) +})