Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ Description: A set of utilites for working with the Arctic Data Center
Depends:
R (>= 3.2.3)
Imports:
digest,
dataone,
datapack,
digest,
EML,
httr,
magrittr,
methods,
stringr,
stringi,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(eml_contact)
export(eml_creator)
export(eml_individual_name)
export(eml_metadata_provider)
export(eml_otherEntity_to_dataTable)
export(eml_party)
export(eml_personnel)
export(eml_project)
Expand Down Expand Up @@ -56,6 +57,7 @@ export(sysmeta_to_eml_physical)
export(update_object)
export(update_resource_map)
export(view_profile)
export(which_in_eml)
import(EML)
import(XML)
import(dataone)
Expand Down
143 changes: 143 additions & 0 deletions R/eml.R
Original file line number Diff line number Diff line change
Expand Up @@ -979,3 +979,146 @@ eml_add_entities <- function(doc,
doc
}


#' Convert otherEntities to dataTables
#'
#' Convert an EML 'otherEntity' object to a 'dataTable' object. This will convert an
#' otherEntity objectas currently constructed - it does not add a physical or add attributes.
#' However, if these are already in their respective slots, they will be retained.
#'
#' @param eml (S4) An EML S4 object
#' @param otherEntity (S4 / integer) Either an EML otherEntity object or the index
#' of an otherEntity within a ListOfotherEntity. Integer input is recommended.
#' @param validate_eml (logical) Optional. Specify whether or not to validate the eml after
#' completion. Defaults to \code{TRUE}. Recommended setting is \code{TRUE}. Setting this to
#' \code{FALSE} reduces execution time by ~ 50 percent.
#'
#' @author Dominic Mullen dmullen17@@gmail.com
#'
#' @export
#'
#' @examples
#' \dontrun{
#' eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils"))
#'
#' # The following two calls are equivalent:
#' eml <- eml_otherEntity_to_dataTable(eml, eml@@dataset@@otherEntity[[1]])
#' eml <- eml_otherEntity_to_dataTable(eml, 1)
#'
#' # Integer input is recommended:
#' eml <- eml_otherEntity_to_dataTable(eml, 1)
#' }
eml_otherEntity_to_dataTable <- function(eml, otherEntity, validate_eml = TRUE) {
## Argument checks
stopifnot(methods::is(eml, "eml"))
stopifnot(any(is.integer(otherEntity), methods::is(otherEntity, "otherEntity")))
stopifnot(is.logical(validate_eml))

## Handle different inputs for 'otherEntity'
if (is.numeric(otherEntity)) {
index <- otherEntity
otherEntity <- eml@dataset@otherEntity[[index]]
} else {
index <- which_in_eml(eml@dataset@otherEntity,
"entityName",
otherEntity@entityName)
if (length(index) > 1) {
stop("Duplicate 'entityName' found in 'eml@dataset@otherEntity', please use a numeric index (1, 2, etc.) to specify which 'otherEntity' you would like to convert.")
}
}

## convert otherEntity to dataTable
dt <- utils::capture.output(otherEntity) %>%
stringr::str_trim() %>%
stringr::str_replace_all("otherEntity", "dataTable") %>%
paste(sep = "", collapse = "") %>%
EML::read_eml()

## Add dt to bottom of dt list
type <- "dataTable"
slot(eml@dataset, type) <- new(paste0("ListOf", type), c(slot(eml@dataset, type),
new(paste0("ListOf", type), list(dt))))

## delete otherEntity from list
eml@dataset@otherEntity[[index]] <- NULL

## return eml
if (validate_eml == TRUE) {
eml_validate(eml)
}
return(eml)
}


#' Search through EMLs
#'
#' This function returns indices within an EML list that contain an instance where \code{test == TRUE}. See examples for more information.
#'
#' @import EML
#' @param eml_list (S4/List) an EML list object
#' @param element (character) element to evaluate
#' @param test (function/character) A function to evaluate (see examples). If test is a character, will evaluate if \code{element == test} (see example 1).
#'
#' @keywords eml
#'
#' @author Mitchell Maier mitchell.maier@@gmail.com
#'
#' @examples
#' \dontrun{
#' # Question: Which creators have a surName "Smith"?
#' n <- which_in_eml(eml@@dataset@@creator, "surName", "Smith")
#' # Answer: eml@@dataset@@creator[n]
#'
#' # Question: Which dataTables have an entityName that begins with "2016"
#' n <- which_in_eml(eml@@dataset@@dataTable, "entityName", function(x) {grepl("^2016", x)})
#' # Answer: eml@@dataset@@dataTable[n]
#'
#' # Question: Which attributes in dataTable[[1]] have a numberType "natural"?
#' n <- which_in_eml(eml@@dataset@@dataTable[[1]]@@attributeList@@attribute, "numberType", "natural")
#' # Answer: eml@@dataset@@dataTable[[1]]@@attributeList@@attribute[n]
#'
#' #' # Question: Which dataTables have at least one attribute with a numberType "natural"?
#' n <- which_in_eml(eml@@dataset@@dataTable, "numberType", function(x) {"natural" %in% x})
#' # Answer: eml@@dataset@@dataTable[n]
#' }
#' @export
#'
which_in_eml <- function(eml_list, element, test) {

stopifnot(isS4(eml_list))
stopifnot(methods::is(eml_list,"list"))
stopifnot(is.character(element))

if (is.character(test)) {
value = test
test = function(x) {x == value}

} else {
stopifnot(is.function(test))
}

# Find location
location <- unlist(lapply(seq_along(eml_list), function(i) {
elements_test <- unlist(EML::eml_get(eml_list[[i]], element))

if (is.null(elements_test)) {
out <- NULL

} else {
result <- test(elements_test)

if (length(result) > 1) {
stop("Test must only return one value.")

} else if (result == TRUE) {
out <- i

} else {
out <- NULL
}
}
return(out)
}))

return(location)
}
2 changes: 1 addition & 1 deletion man/create_dummy_enumeratedDomain_dataframe.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/eml_otherEntity_to_dataTable.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/which_in_eml.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

117 changes: 117 additions & 0 deletions tests/testthat/test_eml.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,122 @@ test_that("a dataTable and otherEntity can be added from a pid", {
unlink(data_path)
})

test_that("eml_otherEntity_to_dataTable fails gracefully", {
if (!is_token_set(mn)) {
skip("No token set. Skipping test.")
}

eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils"))

# incorrect inputs
expect_error(eml_otherEntity_to_dataTable("dummy input"))
expect_error(eml_otherEntity_to_dataTable(eml, "1"))

# subscripts out of bounds
expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[2]]))
expect_error(eml_otherEntity_to_dataTable(eml, 2))

# Duplicate entityNames found
eml@dataset@otherEntity[[2]] <- eml@dataset@otherEntity[[1]]
expect_error(eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]]))

})

test_that("eml_otherEntity_to_dataTable fails gracefully", {
if (!is_token_set(mn)) {
skip("No token set. Skipping test.")
}

eml <- read_eml(system.file("example-eml.xml", package = "arcticdatautils"))
otherEntity <- eml@dataset@otherEntity[[1]]

eml <- eml_otherEntity_to_dataTable(eml, eml@dataset@otherEntity[[1]])

# test that otherEntity was removed
expect_length(eml@dataset@otherEntity, 0)

# test that dataTable was added
expect_equal(otherEntity@entityName, eml@dataset@dataTable[[1]]@entityName)
expect_equivalent(otherEntity@physical, eml@dataset@dataTable[[1]]@physical)
})

test_that("which_in_eml Returns correct locations", {
if (!is_token_set(mn)) {
skip("No token set. Skipping test.")
}

attributes <-
data.frame(
attributeName = c(
"length_1",
"time_2",
"length_3"),
attributeDefinition = c(
"def 1",
"def 2",
"def 3"),
formatString = c(
NA,
NA,
NA),
measurementScale = c(
"ratio",
"ratio",
"ratio"),
domain = c(
"numericDomain",
"numericDomain",
"numericDomain"),
definition = c(
NA,
NA,
NA),
unit = c(
"meter",
"second",
"meter"),
numberType = c(
"real",
"real",
"real"),
stringsAsFactors = FALSE
)

attributeList <- EML::set_attributes(attributes)

dataTable_1 <- new("dataTable",
entityName = "2016_data.csv",
entityDescription = "2016 data",
attributeList = attributeList)

dataTable_2 <- dataTable_1

dataTable_3 <- new("dataTable",
entityName = "2015_data.csv",
entityDescription = "2016 data",
attributeList = attributeList)

creator_1 <- new("creator",
individualName = new("individualName",
surName = "LAST",
givenName = "FIRST"))
creator_2 <- new("creator",
individualName = new("individualName",
surName = "LAST",
givenName = "FIRST_2"))
creator_3 <- creator_2

title <- "Title"

dataset <- new("dataset",
title = title,
creator = c(creator_1, creator_2, creator_3),
dataTable = c(dataTable_1, dataTable_2, dataTable_3))

eml <- new("eml",
dataset = dataset)

expect_equal(c(2,3), which_in_eml(eml@dataset@creator, "givenName", "FIRST_2"))
expect_error(which_in_eml(eml@dataset@dataTable, "attributeName", "length_3"))
expect_equal(c(1,3), which_in_eml(eml@dataset@dataTable[[1]]@attributeList@attribute, "attributeName", function(x) {grepl("^length", x)}))
})