Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SpatialData.data
Title: SpatialData datasets
Depends: R (>= 4.4), SpatialData
Version: 0.99.2
Depends: R (>= 4.4)
Version: 0.99.3
Description: Makes 'scverse' data examples available through the NSF OSN,
and accessible from within R, using 'BiocFileCache'. Furthermore,
provides an interface to Python's 'spatialdata-io' for reading and
Expand All @@ -18,15 +18,16 @@ Authors@R: c(
Imports:
basilisk,
BiocFileCache,
reticulate
reticulate,
SpatialData
Suggests:
BiocStyle,
knitr,
magick,
paws,
Rgraphviz,
testthat,
DT
DT,
paws
Remotes:
HelenaLC/SpatialData
biocViews:
Expand Down
14 changes: 4 additions & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,11 @@ export(LungAdenocarcinomaMCMICRO)
export(MouseBrainMERFISH)
export(MouseIntestineVisHD)
export(MulticancerSteinbock)
export(available_10x_xen_zips)
export(availableOSN)
export(available_sdio)
export(available_spd_zarr_zips)
export(get_demo_SD)
export(merfish_demo_path)
export(path_to_10x_xen_demo)
export(spd_demo_cached_path)
export(spdzPath)
export(unzip_merfish_demo)
export(unzip_spd_demo)
export(loadFromOSN)
export(use_sdio)
import(BiocFileCache)
importFrom(basilisk,BasiliskEnvironment)
importClassesFrom(SpatialData,SpatialData)
importFrom(SpatialData,readSpatialData)
importFrom(utils,unzip)
23 changes: 23 additions & 0 deletions R/availableOSN.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' use 'paws::s3' to interrogate an NSF Open Storage Network
#' bucket for zipped zarr archives for various platforms
#' @examples
#' if (requireNamespace("paws")) {
#' availableOSN()
#' }
#' @export
availableOSN <- function() {
if (!requireNamespace("paws"))
stop("install 'paws' to use this function; without it",
" we can't check existence of data in OSN bucket")
# x = curl::curl("https://mghp.osn.xsede.org/bir190004-bucket01")
# y = xml2::read_xml(x)
# z = xml2::as_list(y)
message("checking Bioconductor OSN bucket...")
s3 <- paws::s3(
credentials=list(anonymous=TRUE),
endpoint="https://mghp.osn.xsede.org")
zz <- s3$list_objects("bir190004-bucket01")
allk <- lapply(zz$Contents, "[[", "Key")
basename(grep("BiocSpatialData\\/", allk, value=TRUE))
}

170 changes: 91 additions & 79 deletions R/get_demo_SD.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,66 +11,21 @@
#' The individual functions in this package give similarly detailed references.
"demo_spatialdata"

#' @title retrieve scverse-curated `SpatialData` .zarr archive
#' @aliases MouseIntestineVisHD
#'
#' @description
#' This function consolidates the retrieval and caching and transformation
#' of scverse-curated Zarr archives and 10x-curated Xenium archives.
#'
#' all logic for finding, caching, loading an OSN-based dataset, hidden
#' @importFrom SpatialData readSpatialData
#' @importClassesFrom SpatialData SpatialData
#' @param patt character(1) sufficient to identify an OSN resource
#' @param cache like `BiocFileCache`
#' @param target character(1), defaults to tempfile(); use a different
#' value if you wish to retain the unzipped .zarr store persistently.
#'
#' @details
#' \describe{
#' \item{
#' \code{MouseIntestineVisHD()}}{
#' Visium HD 3.0.0 (10x Genomics) dataset of mouse intestine; source:
#' \emph{https://www.10xgenomics.com/datasets/visium-hd-cytassist-gene-expression-libraries-of-mouse-intestine}}
#' \item{
#' \code{LungAdenocarcinomaMCMICRO()}}{
#' MCMICRO dataset of human small cell lung adenocarcinoma}
#' \item{
#' \code{MouseBrainMERFISH()}}{
#' MERFISH dataset of mouse brain tissue}
#' \item{
#' \code{MulticancerSteinbock()}}{
#' imaging mass cytometry dataset of four cancers; source:
#' \emph{https://www.nature.com/articles/s41596-023-00881-0}}
#' \item{
#' \code{ColorectalCarcinomaMIBITOF()}}{
#' MIBI-TOF dataset of colorectal carcinoma}
#' \item{
#' \code{JanesickBreastVisiumEnh()}}{
#' Visium (10x Genomics) dataset of breast cancer; source:
#' \emph{https://www.nature.com/articles/s41467-023-43458-x}}
#' \item{
#' \code{JanesickBreastXeniumRep1/2()}}{
#' two Xenium (10x Genomics) sections associated with
#' the above Visium section from Janesick \emph{et al.}}
#' \item{
#' \code{Breast2fov_10x()}}{
#' Xenium (10x Genomics) data on breast cancer, trimmed to 2 FOVs; source:
#' \emph{https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/resources/xenium-example-data}}
#' \item{
#' \code{Lung2fov_10x()}}{
#' Xenium (10x Genomics) data on lung cancer, trimmed to 2 FOVs; source:
#' \emph{https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/resources/xenium-example-data}}
#' \item{
#' \code{HumanLungMulti_10x()}}{
#' Xenium (10x Genomics) data on lung cancer;
#' source: \emph{https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard}}
#' }
#'
#' @examples
#' # the following are equivalent:
#' get_demo_SD("merfish")
#' MouseBrainMERFISH()
#'
#' @export
get_demo_SD <- function(patt,
#' @note This function checks for stale element in cache and uses bfcupdate to rectify
#' before retrieving from cache.
# @examples
# # the following are equivalent:
# get_demo_SD("merfish")
# MouseBrainMERFISH()
#
.get_demo_SD <- function(patt,
cache=BiocFileCache::BiocFileCache(),
target=tempfile()) {

Expand Down Expand Up @@ -136,82 +91,139 @@ get_demo_SD <- function(patt,
}
# so a single pattern has hit, and it is in cache
if (chkdf[ind,]$rname %in% xdzips) { # it is a Xenium 10x output resource
# check if update needed
stale = BiocFileCache::bfcneedsupdate(cache, chkdf[ind,]$rid)
if (stale) BiocFileCache::bfcupdate(cache, chkdf[ind,]$rid, fpath=chkdf[ind,]$fpath, rtype="web")
preloc <- chkdf[ind,]$rpath
td <- tempfile() # can't use target
dir.create(td)
unzip(preloc, exdir=td) # manufacturer output
use_sdio("xenium", srcdir=td, dest=target) # zarr in target
return(SpatialData::readSpatialData(target))
}
# check again, this one is not in xdzips
stale = BiocFileCache::bfcneedsupdate(cache, chkdf[ind,]$rid)
if (stale) BiocFileCache::bfcupdate(cache, chkdf[ind,]$rid, fpath=chkdf[ind,]$fpath, rtype="web")
loc <- chkdf[ind,]$rpath
td <- target
dir.create(td)
unzip(loc, exdir=td)
SpatialData::readSpatialData(dir(td, full.names=TRUE))
}

#' @rdname get_demo_SD
#' @title retrieve scverse-curated `SpatialData` .zarr archive
#' @aliases MouseIntestineVisHD
#'
#' @description
#' This function consolidates the retrieval and caching and transformation
#' of scverse-curated Zarr archives and 10x-curated Xenium archives.
#'
#' @param target character(1), defaults to tempfile(); use a different
#' value if you wish to retain the unzipped .zarr store persistently.
#'
#' @details
#' \describe{
#' \item{
#' \code{MouseIntestineVisHD()}}{
#' Visium HD 3.0.0 (10x Genomics) dataset of mouse intestine; source:
#' \emph{https://www.10xgenomics.com/datasets/visium-hd-cytassist-gene-expression-libraries-of-mouse-intestine}}
#' \item{
#' \code{LungAdenocarcinomaMCMICRO()}}{
#' MCMICRO dataset of human small cell lung adenocarcinoma}
#' \item{
#' \code{MouseBrainMERFISH()}}{
#' MERFISH dataset of mouse brain tissue}
#' \item{
#' \code{MulticancerSteinbock()}}{
#' imaging mass cytometry dataset of four cancers; source:
#' \emph{https://www.nature.com/articles/s41596-023-00881-0}}
#' \item{
#' \code{ColorectalCarcinomaMIBITOF()}}{
#' MIBI-TOF dataset of colorectal carcinoma}
#' \item{
#' \code{JanesickBreastVisiumEnh()}}{
#' Visium (10x Genomics) dataset of breast cancer; source:
#' \emph{https://www.nature.com/articles/s41467-023-43458-x}}
#' \item{
#' \code{JanesickBreastXeniumRep1/2()}}{
#' two Xenium (10x Genomics) sections associated with
#' the above Visium section from Janesick \emph{et al.}}
#' \item{
#' \code{Breast2fov_10x()}}{
#' Xenium (10x Genomics) data on breast cancer, trimmed to 2 FOVs; source:
#' \emph{https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/resources/xenium-example-data}}
#' \item{
#' \code{Lung2fov_10x()}}{
#' Xenium (10x Genomics) data on lung cancer, trimmed to 2 FOVs; source:
#' \emph{https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/resources/xenium-example-data}}
#' \item{
#' \code{HumanLungMulti_10x()}}{
#' Xenium (10x Genomics) data on lung cancer;
#' source: \emph{https://www.10xgenomics.com/datasets/preview-data-ffpe-human-lung-cancer-with-xenium-multimodal-cell-segmentation-1-standard}}
#' }

#' @rdname MouseIntestineVisHD
#' @export
MouseIntestineVisHD <- function(target=tempfile()) {
get_demo_SD("visium_hd_3.0.0", target=target)
.get_demo_SD("visium_hd_3.0.0", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
LungAdenocarcinomaMCMICRO <- function(target=tempfile()) {
get_demo_SD("mcmicro_io", target=target)
.get_demo_SD("mcmicro_io", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
MouseBrainMERFISH = function(target=tempfile()) {
get_demo_SD("merfish", target=target)
.get_demo_SD("merfish", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
MulticancerSteinbock <- function(target=tempfile()) {
get_demo_SD("steinbock_io", target=target)
.get_demo_SD("steinbock_io", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
ColorectalCarcinomaMIBITOF <- function(target=tempfile()) {
get_demo_SD("mibitof", target=target)
.get_demo_SD("mibitof", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
JanesickBreastVisiumEnh <- function(target=tempfile()) {
get_demo_SD("visium_associated_xenium_io", target=target)
.get_demo_SD("visium_associated_xenium_io", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
JanesickBreastXeniumRep1 <- function(target=tempfile()) {
get_demo_SD("xenium_rep1_io", target=target)
.get_demo_SD("xenium_rep1_io", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
JanesickBreastXeniumRep2 <- function(target=tempfile()) {
get_demo_SD("xenium_rep2_io", target=target)
.get_demo_SD("xenium_rep2_io", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
Breast2fov_10x <- function(target=tempfile()) {
get_demo_SD("human_Breast_2fov", target=target)
.get_demo_SD("human_Breast_2fov", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
Lung2fov_10x <- function(target=tempfile()) {
get_demo_SD("human_Lung_2fov", target=target)
.get_demo_SD("human_Lung_2fov", target=target)
}

#' @rdname get_demo_SD
#' @rdname MouseIntestineVisHD
#' @export
HumanLungMulti_10x <- function(target=tempfile()) {
get_demo_SD("HuLungXenmulti", target=target)
}
.get_demo_SD("HuLungXenmulti", target=target)
}
26 changes: 26 additions & 0 deletions R/loadFromOSN.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#' use a string to identify a resource in SpatialData format and
#' ingest via SpatialData::readSpatialData
#' @param stub character(1) a string that identifies a resource
#' @return an instance of SpatialData, or NULL if the stub does not
#' uniquely match (using grep()) the name of any resource
#' @examples
#' lu = loadFromOSN("Lung2")
#' lu
#' @export
loadFromOSN = function(stub) {
opts = ls("package:SpatialData.data")
hit = grep(stub, opts, value=TRUE)
if (!is.na(hit[1]) && length(hit)==1L) return(get(hit)())
else if (is.na(hit[1])) {
message("stub provided has no match in OSN resources")
message("returning NULL")
}
else {
message("stub does not uniquely match an OSN resource")
message("matched: ")
print(hit)
message("returning NULL")
}
NULL
}

Loading