From 92bb44c849d756b0e577fd09f19903a7dbbabfd5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 14:33:36 -0700 Subject: [PATCH 1/2] make tutorial downloads cleaner check for existence of each file individually rather than the download directory. Add new .downloadFiles() function to handle file download and checking to see if files downloaded properly. --- R/InputData.R | 113 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index e25e743e..a13860f1 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -11,57 +11,98 @@ getTutorialData <- function( tutorial = "hematopoiesis", threads = getArchRThreads() ){ - + #Validate .validInput(input = tutorial, name = "tutorial", valid = "character") .validInput(input = threads, name = "threads", valid = c("integer")) ######### - + #Make Sure URL doesnt timeout oldTimeout <- getOption('timeout') options(timeout=100000) - + if(tolower(tutorial) %in% c("heme","hematopoiesis")){ - if(!dir.exists("HemeFragments")){ - - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" - ) - - dir.create("HemeFragments", showWarnings = FALSE) - - downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ - download.file( - url = filesUrl[x], - destfile = file.path("HemeFragments", basename(filesUrl[x])) - ) - }, threads = min(threads, length(filesUrl))) - - #check for success of file download - if(!all(unlist(downloadFiles) == 0)) { - stop("Error! Some tutorial files did not download successfully. Please try again.") - } - } - pathFragments <- "HemeFragments" - - }else{ - + pathDownload <- "HemeFragments" + + filesUrl <- c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + ) + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + + }else if(tolower(tutorial) %in% c("multiome")){ + + filesUrl <- c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + ) + + pathDownload <- "Multiome" + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + fragFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(fragFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + fragFiles <- fragFiles[!grepl(".tbi", fragFiles)] + geneFiles <- list.files(pathDownload, pattern = "\\.h5$", full.names = TRUE) + names(geneFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + + inputFiles <- c(fragFiles, geneFiles) + + } else{ + stop("There is no tutorial data for : ", tutorial) - + } - + #Set back URL Options options(timeout=oldTimeout) - - #Return Fragment Files - inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) - names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) - inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + inputFiles + +} +#helper for file downloads +.downloadFiles <- function(filesUrl = NULL, pathDownload = NULL, threads = 1){ + if(is.null(filesUrl)) { + stop("No value supplied to filesUrl in .downloadFiles()!") + } + if(is.null(pathDownload)) { + stop("No value supplied to pathDownload in .downloadFiles()!") + } + message(paste0("Downloading files to ",pathDownload,"...")) + downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ + if(!file.exists(file.path(pathDownload, basename(filesUrl[x])))){ + message(paste0("Downloading file ", basename(filesUrl[x]),"...")) + download.file( + url = filesUrl[x], + destfile = file.path(pathDownload, basename(filesUrl[x])) + ) + } else { + message(paste0("File exists! Skipping file ", basename(filesUrl[x]),"...")) + } + }, threads = min(threads, length(filesUrl))) + + #check for success of file download + if(!all(unlist(downloadFiles) == 0)) { + stop("Some tutorial files did not download successfully. Please try again.") + } + + downloadFiles + } #' Get PBMC Small Test Fragments From cea4782584a0ea621b1d653782d988200a74ffae Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 14:44:22 -0700 Subject: [PATCH 2/2] fix typo --- R/InputData.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index a13860f1..ba492bdf 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -33,7 +33,7 @@ getTutorialData <- function( dir.create(pathDownload, showWarnings = FALSE) - downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) @@ -52,7 +52,7 @@ getTutorialData <- function( dir.create(pathDownload, showWarnings = FALSE) - downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) fragFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) names(fragFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$"))