From ecfe12a6b4df80beec1bf16be3b398433a7742bd Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 21 Feb 2021 21:13:28 -0800 Subject: [PATCH 001/184] Release 1.0.1 (#542) * bugfix remove sys.time for weird error in R 4.0 * bug fix new uwot save and loading features * deprecated * version number * handle error where spurious fragments seem to be 1 tile above max * update bugfixes * let quantCut in plotTrajectory be null * grepExclude in plotTrajectoryHeatmap * bugfix integrative analysis coaccessibility and peak2genelinks * change file locking location groupcoverages * throw error if no peakset for adding annotations * make it so predictionScore is not needed for peak2gene links * update addPeakSet * updated description * bugfix named list * update error handling plotEnrichHeatmap * update error messages tilematrix * handle error with bsgenomes * width is end - start + 1 * bugfix checkCairo in iterativeLSI * update archr verbosity and logging for markerfeatures * bugfix + documentation * update subsetting to metadata * handling case n = 1 for deviations matrix * bugfix ordering of scTrack * handle error where no barcodes passing on a small chromosome/scaffold * bugfix validBSgenome * fix missing paren * fix missing paren * add feature for supplying custom gene list for rna integration * add sanity check to partial matrix * update cellsInArrow in case user overrides sample in ArchRProj * Add null option for genesUse to validInput * bugfix NA in combined vars * add checks for genes symbol to be not a list Co-authored-by: Ryan Corces --- .DS_Store | Bin 14340 -> 14340 bytes DESCRIPTION | 6 +-- NAMESPACE | 2 + R/AllClasses.R | 4 +- R/AnnotationGenome.R | 9 +++- R/AnnotationPeaks.R | 27 ++++++++++-- R/ArchRBrowser.R | 6 +++ R/ArrowRead.R | 22 ++++++++- R/ArrowUtils.R | 12 ++++- R/CreateArrow.R | 76 +++++++++++++++++++++----------- R/Embedding.R | 22 ++++++++- R/FilterCells.R | 2 +- R/GgplotUtils.R | 2 + R/GlobalDefaults.R | 24 +++++++++- R/GroupCoverages.R | 14 +++--- R/GroupExport.R | 6 ++- R/IntegrativeAnalysis.R | 15 +++++-- R/IterativeLSI.R | 5 +-- R/LoggerUtils.R | 43 +++++++++++++++--- R/MarkerFeatures.R | 4 ++ R/MatrixDeviations.R | 6 +++ R/MatrixGeneScores.R | 8 ++++ R/ProjectMethods.R | 38 +++++++++++++++- R/RNAIntegration.R | 7 ++- R/ReproduciblePeakSet.R | 11 ++++- R/Trajectory.R | 17 ++++++- R/VisualizeData.R | 32 +++++++------- man/ArchRPalettes.Rd | 4 +- man/addArchRVerbose.Rd | 14 ++++++ man/addGeneIntegrationMatrix.Rd | 3 ++ man/addPeakAnnotations.Rd | 2 +- man/addPeakSet.Rd | 10 ++++- man/getArchRVerbose.Rd | 11 +++++ 33 files changed, 379 insertions(+), 85 deletions(-) create mode 100644 man/addArchRVerbose.Rd create mode 100644 man/getArchRVerbose.Rd diff --git a/.DS_Store b/.DS_Store index 81914b4f67252247a22d169e8ec55185b6100a30..2b21082f6917b33f76b7b6087a8e67597128a1da 100644 GIT binary patch delta 1940 zcmd5-TWl0n7(QQF%FHmdvxUy?4!iA^Ubk#%ciV0)MOY{T6_i^mC1CBgGsuK?Te{_1 zO3~Ip6cWwx22rCJjfxkNB1XmVK#V{LFRn3OAZko_gsPYrAM~HIKtvJ~-`Hd~`Tu`% zX8-SY#`?$lj~~o7&Pp9-V$fQVNDZ#+x005b)^c*E>3)4N@9^oM1ZJB&x zPC36`k!O9!F0sq(3cJQ8*stt201Bo;g%?Gbg%Xsa61Av93=Nov`>+Jd(1Mj{$9inQ zMwsYD5?e8VA&lV(?8gDTfLHMvj^bUshm$yi3%n;($j7Es@kOrHDq{@vrH?dq+%4&@ z@iH~vg>(G0zAxk*8mg_W4F@9)!{OjiBpQu{gAI+b;bB?wlt%7t>FnLLduY$_-oyNd za3PNHYeOs5kb(=l7ODmS7%;N<{P}V+_ZUaqBi~^%Xk~TPCHV;_z zYl>sa$_=$O<-9y0$)Ynmtg5YB?4+rsd~q8e>IiLT97k7mk}c&a|88tb-O11Y;N zsV%?JoZF-t?cII7y;gEC z6vt~aa5ISoSd=YhD_AERU_dyySuC)i2$5j(>!ur&LY{mibC7Dx%~4`h)NrXdHp zqy`^m!jBN5LJe`!z=K$Y)mTF+=tLK~g%VQOhV9saeWZdX@e~f?xiY*W#PGV1!v{Ey z(>RNBIFB^G!ev~+HC)HTO72gSyeUW&Ki~F|BK^!_9gp?F#Hn?*@WM8;iM3%(TD|zqZy0ILn{fnwP-~f zItaZ4;kQNjs1G}_i=4Cvdxeu63=iRXa@64x9KlO?18?Fjyp3ZxLDu>hpWsuR!sob% zukk%Daos2!AJg~CTnYMklQx!LA8QB)YwPNTogxvkRxDb>*Lmvs)nLe_FIl>bp9<~Z zN6QzCkdcmEJxREfk{^ba#t(5BJ4gtaynIkHdumieqMdnr2 zO2lEsam8AzC)2A_h^giBO;==^jY^Z`IN_EpGVS>~@ph*p?!w?Zz~hwIqj9!K8lT6jD+`ro*`1XqYmAg$kqL_1 zLXS_(DdzU<{4VwvCQ}r6C<^=tpaA6*2eT1I6YJPF8`tLRF_AQFC{sxA` B)42cu delta 1818 zcmdUvTTEMZ9LN9PQrJ0%1Duv#P7j41ZY`Gr?YQ3t1;b>xX$%u^7z4I*6)1BsHbLw* z*%p;V-Ctee4Wse42X*UoF>y0qAEp7PO>}C!B=Vrq#TeZl=G6Z=Z36h<+aBygbI$LV zbNcW1`}uz7cFyg5Z9Xk!gDlg4!(x;&voh%M5+!3WI5ZjBJRS^h4~K$uuS1Q-$eZLn zvPhQ5r{pv81^J%*M1Cf}kPGAzj7Y;ea4;bkd2pcw6{vy_el(#C0d$}f*WpI=Vk>UK zt+*Xw+<_5HVix`s zvTqUD28lJ79y4(^W6UjDjkHjmvsO3wy=t}JKkHRz{JxqxuUb=GN8eIwDQEJshwJGc zQxCPuswLCz%r7phs%hNN8PAeT=4^{7(qs9hQ7rMiK9L=Y(!YB+eCO`)#R9PZfpnnk7mw(+sCP-Jq#1TPzFhoWzr zWrudb&d|ki1KJq^!C)xJ6}CqR{neZsl|VR>OWdS^w3A-4os5(HWS%ULljJlxL(Vd9 zzac*`S1*#wDIo1Q}_@|_yp(h6~4wYF5nXW#1(oXyF2aL?oB97tqEzyDP49Ql&`NG`&_(vZPa zQ}itOP=|JOF|{_K2b);}`f)S1VGyBJnoVI3_Uc)f!$GFpLrgh}NAMUH@C=^Cb4LUqdWYQCA) z+PJ7#7FGz?CZ%P9utK;tsjL#@B;gw1+NiolkkvN0zwZs%v5rwr7tpeza`78)qkV; z&Wh$OEZYN&(?Q(A8hZz8>=BG2qEkAJ8J6!ujMDpXKj!f$j= 2) LinkingTo: Rcpp LazyData: TRUE -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 Encoding: UTF-8 Imports: Rcpp (>= 0.12.16), diff --git a/NAMESPACE b/NAMESPACE index 398bf483..ce5679a8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(addArchRDebugging) export(addArchRGenome) export(addArchRLogging) export(addArchRThreads) +export(addArchRVerbose) export(addBgdPeaks) export(addCellColData) export(addClusters) @@ -68,6 +69,7 @@ export(getArchRDebugging) export(getArchRGenome) export(getArchRLogging) export(getArchRThreads) +export(getArchRVerbose) export(getArrowFiles) export(getAvailableMatrices) export(getBgdPeaks) diff --git a/R/AllClasses.R b/R/AllClasses.R index 9ba68568..f87ff98c 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -130,7 +130,7 @@ ArchRProject <- function( message("Getting SampleNames...") sampleNames <- unlist(.safelapply(seq_along(ArrowFiles), function(x){ - message(x, " ", appendLF = FALSE) + if(getArchRVerbose()) message(x, " ", appendLF = FALSE) .sampleName(ArrowFiles[x]) }, threads = threads)) message("") @@ -159,7 +159,7 @@ ArchRProject <- function( #Cell Information message("Getting Cell Metadata...") metadataList <- .safelapply(seq_along(ArrowFiles), function(x){ - message(x, " ", appendLF = FALSE) + if(getArchRVerbose()) message(x, " ", appendLF = FALSE) .getMetadata(ArrowFiles[x]) }, threads = threads) message("") diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index d35fa9ba..3d75ac3a 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -41,7 +41,14 @@ createGenomeAnnotation <- function( ################## message("Getting blacklist..") - blacklist <- .getBlacklist(genome = bsg@provider_version) + + genomeName <- tryCatch({ + bsg@provider_version + }, error = function(e){ + strsplit(bsg@pkgname,"\\.")[[1]][4] + }) + + blacklist <- .getBlacklist(genome = genomeName) }else{ diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 60b30642..13df1f22 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -98,7 +98,7 @@ getMatches <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ #' binary value is stored indicating whether each region is observed within the peak region. #' #' @param ArchRProj An `ArchRProject` object. -#' @param regions A `list` of `GRanges` that are to be overlapped with the `peakSet` in the `ArchRProject`. +#' @param regions A named `list` of `GRanges` that are to be overlapped with the `peakSet` in the `ArchRProject`. #' @param name The name of `peakAnnotation` object to be stored as in `ArchRProject`. #' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be overwritten #' if it already exists in the given `ArchRProject`. @@ -136,6 +136,10 @@ addPeakAnnotations <- function( }else{ + if(is.null(names(regions))){ + names(regions) <- paste0("Region_", seq_along(regions)) + } + regionPositions <- lapply(seq_along(regions), function(x){ .logThis(regions[[x]], paste0("regions[[x]]-", x), logFile = logFile) @@ -184,6 +188,9 @@ addPeakAnnotations <- function( # Peak Overlap Matrix ############################################################# peakSet <- getPeakSet(ArchRProj) + if(is.null(peakSet)){ + .logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile) + } allPositions <- unlist(regionPositions) .logDiffTime("Creating Peak Overlap Matrix", t1 = tstart, verbose = TRUE, logFile = logFile) @@ -431,6 +438,9 @@ addMotifAnnotations <- function( ############################################################# .logDiffTime("Finding Motif Positions with motifmatchr!", t1 = tstart, verbose = TRUE, logFile = logFile) peakSet <- ArchRProj@peakSet + if(is.null(peakSet)){ + .logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile) + } motifPositions <- motifmatchr::matchMotifs( pwms = motifs, subject = peakSet, @@ -606,7 +616,11 @@ addArchRAnnotations <- function( } } - genome <- tolower(validBSgenome(getGenome(ArchRProj))@provider_version) + genome <- tolower(tryCatch({ + validBSgenome(getGenome(ArchRProj))$provider_version + }, error = function(e){ + strsplit(validBSgenome(getGenome(ArchRProj))@pkgname,"\\.")[[1]][4] + })) annoPath <- file.path(find.package("ArchR", NULL, quiet = TRUE), "data", "Annotations") dir.create(annoPath, showWarnings = FALSE) @@ -690,6 +704,9 @@ addArchRAnnotations <- function( # Peak Overlap Matrix ############################################################# peakSet <- getPeakSet(ArchRProj) + if(is.null(peakSet)){ + .logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile) + } chr <- paste0(unique(seqnames(peakSet))) .logMessage("Annotating Chromosomes", verbose = TRUE, logFile = logFile) @@ -1061,6 +1078,10 @@ plotEnrichHeatmap <- function( mat <- mat[keep, ,drop = FALSE] .logThis(mat, "mat-mlog10Padj-Filter", logFile = logFile) + if(nrow(mat)==0){ + stop("No enrichments found for your cutoff!") + } + passMat <- lapply(seq_len(nrow(mat)), function(x){ (mat[x, ] >= 0.9*max(mat[x, ])) * 1 }) %>% Reduce("rbind", .) %>% data.frame @@ -1070,7 +1091,7 @@ plotEnrichHeatmap <- function( mat[mat > pMax] <- pMax if(nrow(mat)==0){ - stop("No enrichments found!") + stop("No enrichments found for your cutoff!") } mat <- .rowScale(as.matrix(mat), min = 0) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 27a01fbd..c496cc9f 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1749,6 +1749,12 @@ plotBrowserTrack <- function( title <- paste0(as.character(seqnames(region)),":", start(region)-1, "-", end(region), " ", title) + #Re-Order + groupDF$group2 <- factor( + paste0(groupDF$group2), + levels = gtools::mixedsort(unique(paste0(groupDF$group2))) + ) + p <- ggplot(groupDF, aes(x=bp, y=y, width = tileSize, fill = group2, color = group2)) + geom_tile(size = scTileSize) + facet_grid(group2 ~ ., scales="free_y") + diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 0fa26ae1..20bbdb6c 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -783,6 +783,11 @@ getMatrixFromArrow <- function( matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .) mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .) + if(!all(cellNames %in% colnames(mat))){ + .logThis(sampledCellNames, "cellNames supplied", logFile = logFile) + .logThis(colnames(mat), "cellNames from matrix", logFile = logFile) + stop("Error not all cellNames found in partialMatrix") + } mat <- mat[,sampledCellNames, drop = FALSE] mat <- .checkSparseMatrix(mat, length(sampledCellNames)) @@ -793,6 +798,11 @@ getMatrixFromArrow <- function( }else{ mat <- Reduce("cbind", mat) + if(!all(cellNames %in% colnames(mat))){ + .logThis(cellNames, "cellNames supplied", logFile = logFile) + .logThis(colnames(mat), "cellNames from matrix", logFile = logFile) + stop("Error not all cellNames found in partialMatrix") + } mat <- mat[,cellNames, drop = FALSE] mat <- .checkSparseMatrix(mat, length(cellNames)) @@ -902,6 +912,16 @@ getMatrixFromArrow <- function( stop("Means Variances and Ns lengths not identical") } + #Check if samples have NAs due to N = 1 sample or some other weird thing. + #Set it to min non NA variance + dfVars <- lapply(seq_len(nrow(dfVars)), function(x){ + vx <- dfVars[x, ] + if(any(is.na(vx))){ + vx[is.na(vx)] <- min(vx[!is.na(vx)]) + } + vx + }) %>% Reduce("rbind", .) + combinedMeans <- rowSums(t(t(dfMeans) * ns)) / sum(ns) summedVars <- rowSums(t(t(dfVars) * (ns - 1)) + t(t(dfMeans^2) * ns)) combinedVars <- (summedVars - sum(ns)*combinedMeans^2)/(sum(ns)-1) @@ -925,8 +945,6 @@ getMatrixFromArrow <- function( length(.availableCells(ArrowFiles[y], useMatrix)) }) %>% unlist - - #Compute RowVars summaryDF <- .safelapply(seq_along(featureDF), function(x){ diff --git a/R/ArrowUtils.R b/R/ArrowUtils.R index 7fa584b5..f8e4af75 100644 --- a/R/ArrowUtils.R +++ b/R/ArrowUtils.R @@ -379,10 +379,20 @@ o <- h5createGroup(outArrow, groupName) mData <- ArrowInfo[[groupName]] + cellNames <- .h5read(inArrow, "Metadata/CellNames") + idx <- which(cellNames %in% stringr::str_split(cellsKeep, pattern="#", simplify=TRUE)[,2]) + if(length(idx)==0){ + stop("No cells matching in arrow file!") + } + for(i in seq_len(nrow(mData))){ h5name <- paste0(groupName, "/", mData$name[i]) - h5write(.h5read(inArrow, h5name), file = outArrow, name = h5name) + mDatai <- .h5read(inArrow, h5name) + if(length(mDatai)==length(cellNames)){ + mDatai <- mDatai[idx] + } + h5write(mDatai, file = outArrow, name = h5name) } #2. scATAC-Fragments diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 10db9ec3..d793bc69 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -473,7 +473,7 @@ createArrowFiles <- function( .logDiffTime("Continuing through after error ggplot for Fragment Size Distribution", t1 = tstart, logFile = logFile) #print(x) - message("\n") + if(getArchRVerbose()) message("\n") }) gc() @@ -537,7 +537,7 @@ createArrowFiles <- function( .logDiffTime("Continuing through after error ggplot for TSS by Frags", t1 = tstart, logFile = logFile) #message(x) - message("\n") + if(getArchRVerbose()) message("\n") }) @@ -1119,7 +1119,7 @@ createArrowFiles <- function( TRUE }, error = function(x){ tryCatch({ - message("Attempting to index ", file," as tabix..") + if(getArchRVerbose()) message("Attempting to index ", file," as tabix..") indexTabix(file, format = "bed") TRUE }, error = function(y){ @@ -1138,7 +1138,7 @@ createArrowFiles <- function( } }, error = function(x){ tryCatch({ - message("Attempting to index ", file," as bam...") + if(getArchRVerbose()) message("Attempting to index ", file," as bam...") indexBam(file) TRUE }, error = function(y){ @@ -1278,7 +1278,7 @@ createArrowFiles <- function( o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) - o <- h5write(obj = cbind(dt$V2,dt$V3-dt$V2), file = tmpFile, name = chrPos) + o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths) o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues) @@ -1314,7 +1314,7 @@ createArrowFiles <- function( o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) - o <- h5write(obj = cbind(dt$V2,dt$V3-dt$V2), file = tmpChrFile, name = chrPos) + o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpChrFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths) o <- h5write(obj = RG@values, file = tmpChrFile, name = chrRGValues) @@ -1650,7 +1650,7 @@ createArrowFiles <- function( o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) - o <- h5write(obj = cbind(dt$start,dt$end-dt$start), file = tmpFile, name = chrPos) + o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths) o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues) @@ -1686,7 +1686,7 @@ createArrowFiles <- function( o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) - o <- h5write(obj = cbind(dt$start,dt$end-dt$start), file = tmpChrFile, name = chrPos) + o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpChrFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths) o <- h5write(obj = RG@values, file = tmpChrFile, name = chrRGValues) @@ -1930,14 +1930,29 @@ createArrowFiles <- function( chrPos <- paste0("Fragments/",chr,"/Ranges") chrRGLengths <- paste0("Fragments/",chr,"/RGLengths") chrRGValues <- paste0("Fragments/",chr,"/RGValues") - o <- h5createGroup(outArrow, paste0("Fragments/",chr)) - o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, - size = max(nchar(mcols(fragments)$RG@values)) + 1)) - o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos) - o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths) - o <- h5write(obj = mcols(fragments)$RG@values, file = outArrow, name = chrRGValues) + + if(lengthRG == 0){ + + .logMessage(msg = paste0(prefix, " detected 0 Fragments in cells passing filtering threshold for ", chr), logFile = logFile) + + o <- h5createGroup(outArrow, paste0("Fragments/",chr)) + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, + size = 10)) + + }else{ + + o <- h5createGroup(outArrow, paste0("Fragments/",chr)) + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, + size = max(nchar(mcols(fragments)$RG@values)) + 1)) + + o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos) + o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths) + o <- h5write(obj = mcols(fragments)$RG@values, file = outArrow, name = chrRGValues) + } #Free Some Memory! rm(fragments) @@ -1995,15 +2010,26 @@ createArrowFiles <- function( chrRGLengths <- paste0(chr, "._.RGLengths") chrRGValues <- paste0(chr, "._.RGValues") - #HDF5 Write - o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, - size = max(nchar(mcols(fragments)$RG@values)) + 1)) - - o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos) - o <- h5write(obj = mcols(fragments)$RG@lengths, file = tmpChrFile, name = chrRGLengths) - o <- h5write(obj = mcols(fragments)$RG@values, file = tmpChrFile, name = chrRGValues) + if(lengthRG == 0){ + + #HDF5 Write + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, + size = 10)) + + }else{ + + #HDF5 Write + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, + size = max(nchar(mcols(fragments)$RG@values)) + 1)) + + o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos) + o <- h5write(obj = mcols(fragments)$RG@lengths, file = tmpChrFile, name = chrRGLengths) + o <- h5write(obj = mcols(fragments)$RG@values, file = tmpChrFile, name = chrRGValues) + } #Free Some Memory! rm(fragments) diff --git a/R/Embedding.R b/R/Embedding.R index ba59be35..e0ead19c 100644 --- a/R/Embedding.R +++ b/R/Embedding.R @@ -207,9 +207,18 @@ addUMAP <- function( } +#New Save UWOT +.saveUWOT <- function(model, file){ + tryCatch({ + uwot::save_uwot(model = model, file = file, verbose = TRUE) + }, error = function(e){ + .saveUWOT_Deprecated(model = model, file = file) #backwards to previous version + }) +} + #save_uwot does not work because tarring doesnt work for some reason on Stanford's compute server #Adapted from save_uwot -.saveUWOT <- function(model, file){ +.saveUWOT_Deprecated <- function(model, file){ file <- file.path(normalizePath(dirname(file)), basename(file)) wd <- getwd() mod_dir <- tempfile(pattern = "dir") @@ -243,8 +252,17 @@ addUMAP <- function( return(o) } +#New Save UWOT +.loadUWOT <- function(file){ + tryCatch({ + uwot::load_uwot(file = file, verbose = TRUE) + }, error = function(e){ + .loadUWOT_Deprecated(file = file, nDim = nDim) #backwards to previous version + }) +} + #Adapted from load_uwot -.loadUWOT <- function(file, nDim = NULL){ +.loadUWOT_Deprecated <- function(file, nDim = NULL){ model <- NULL tryCatch({ mod_dir <- tempfile(pattern = "dir") diff --git a/R/FilterCells.R b/R/FilterCells.R index 6ef51514..54a89ac1 100644 --- a/R/FilterCells.R +++ b/R/FilterCells.R @@ -99,7 +99,7 @@ filterDoublets <- function(ArchRProj = NULL, cutEnrich = 1, cutScore = -Inf, fil } ArchRProj <- addProjectSummary(ArchRProj = ArchRProj, name = "filterDoublets", - summary = c("Date" = Sys.time(), cutEnrich = cutEnrich, cutScore = cutScore, filterRatio = filterRatio)) + summary = c(cutEnrich = cutEnrich, cutScore = cutScore, filterRatio = filterRatio)) ArchRProj diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index c0339437..609caa08 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -952,8 +952,10 @@ theme_ArchR <- function( .checkCairo <- function(){ tryCatch({ + tmp <- dev.cur() Cairo::Cairo(type='raster') dev.off() + dev.set(tmp) TRUE }, error = function(e){ FALSE diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index 26854c57..aae3753b 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -8,7 +8,8 @@ ArchRDefaults <- list( ArchR.logging = TRUE, ArchR.genome = NA, ArchR.chrPrefix = TRUE, - ArchR.debugging = FALSE + ArchR.debugging = FALSE, + ArchR.verbose = TRUE ) .onAttach <- function(libname, pkgname){ @@ -27,9 +28,30 @@ ArchRDefaults <- list( if(!.checkCairo()){ packageStartupMessage("WARNING : Cairo check shows Cairo is not functional.\n ggplot2 rasterization will not be available without Cario.\n This may cause issues editing plots with many thousands of points from single cells.") } + if(.checkJupyter()){ + packageStartupMessage("Detected Jupyer Notebook session. Disabling Log Messages!\n\tIf this is undesired use `addArchRVerbose(TRUE)`") + addArchRVerbose(verbose = FALSE) + } invisible() } +#Check Jupyer Status +.checkJupyter <- function(){ + tryCatch({ + sysID <- Sys.getenv("JPY_PARENT_PID") + if(!is.character(sysID)){ + return(FALSE) + } + if(sysID == ""){ + FALSE + }else{ + TRUE + } + },error= function(e){ + FALSE + }) +} + #' Install extra packages used in ArchR that are not installed by default #' #' This function will install extra packages used in ArchR that are not installed by default. diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 549ec840..63859218 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -195,7 +195,11 @@ addGroupCoverages <- function( args$ArrowFiles <- getArrowFiles(ArchRProj) args$availableChr <- .availableSeqnames(getArrowFiles(ArchRProj)) args$chromLengths <- getChromLengths(ArchRProj) - args$cellsInArrow <- split(rownames(getCellColData(ArchRProj)), getCellColData(ArchRProj)$Sample) + #args$cellsInArrow <- split(rownames(getCellColData(ArchRProj)), getCellColData(ArchRProj)$Sample) + args$cellsInArrow <- cellsInArrow <- split( + rownames(getCellColData(ArchRProj)), + stringr::str_split(rownames(getCellColData(ArchRProj)), pattern="\\#", simplify=TRUE)[,1] + ) args$covDir <- file.path(getOutputDirectory(ArchRProj), "GroupCoverages", groupBy) args$parallelParam <- parallelParam args$threads <- threads @@ -218,9 +222,6 @@ addGroupCoverages <- function( nCells <- lapply(seq_along(batchOut),function(x) batchOut[[x]]$nCells) %>% unlist nFragments <- lapply(seq_along(batchOut),function(x) batchOut[[x]]$nFragments) %>% unlist - #Enable Hdf5 File Locking - h5enableFileLocking() - #Add To Project coverageMetadata <- DataFrame( Group = stringr::str_split(names(unlistGroups), pattern = "\\._.", simplify=TRUE)[,1], @@ -245,6 +246,9 @@ addGroupCoverages <- function( ArchRProj@projectMetadata$GroupCoverages[[groupBy]] <- SimpleList(Params = Params, coverageMetadata = coverageMetadata) + #Enable Hdf5 File Locking + h5enableFileLocking() + .logDiffTime(sprintf("Finished Creation of Coverage Files!"), tstart, addHeader = FALSE) .endLogging(logFile = logFile) @@ -671,7 +675,7 @@ addGroupCoverages <- function( .getCoverageMetadata <- function(ArchRProj = NULL, groupBy = NULL, useGroups = NULL, minCells = NULL){ coverageMetadata <- ArchRProj@projectMetadata$GroupCoverages[[groupBy]]$coverageMetadata if(is.null(coverageMetadata)){ - stop("No Coverage Metadata found for : ", groupBy) + stop("No Coverage Metadata found for : ", groupBy, ". Please run addGroupCoverages!") } if(!is.null(useGroups)){ if(sum(coverageMetadata[,1] %in% useGroups) == 0){ diff --git a/R/GroupExport.R b/R/GroupExport.R index b13b5a1e..87bf3719 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -222,7 +222,11 @@ getGroupBW <- function( o <- suppressWarnings(file.remove(list.files(bwDir2, full.names = TRUE))) - cellsInArrow <- split(rownames(getCellColData(ArchRProj)), getCellColData(ArchRProj)$Sample) + #cellsInArrow <- split(rownames(getCellColData(ArchRProj)), getCellColData(ArchRProj)$Sample) + cellsInArrow <- split( + rownames(getCellColData(ArchRProj)), + stringr::str_split(rownames(getCellColData(ArchRProj)), pattern="\\#", simplify=TRUE)[,1] + ) availableChr <- .availableSeqnames(head(getArrowFiles(ArchRProj))) chromLengths <- getChromLengths(ArchRProj) chromSizes <- getChromSizes(ArchRProj) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 909e0f30..543d1eff 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -821,7 +821,7 @@ addCoAccessibility <- function( o$idx2 <- NULL o <- o[!is.na(o$correlation),] - o$TStat <- (o$correlation / sqrt((max(1-o$correlation^2, 0.00000000000000001))/(length(knnObj)-2))) #T-statistic P-value + o$TStat <- (o$correlation / sqrt((pmax(1-o$correlation^2, 0.00000000000000001, na.rm = TRUE))/(length(knnObj)-2))) #T-statistic P-value o$Pval <- 2*pt(-abs(o$TStat), length(knnObj) - 2) o$FDR <- p.adjust(o$Pval, method = "fdr") o$VarQuantile1 <- .getQuantiles(o$Variability1) @@ -1028,9 +1028,16 @@ addPeak2GeneLinks <- function( tstart <- Sys.time() dfAll <- .safelapply(seq_along(ArrowFiles), function(x){ + cNx <- paste0(names(ArrowFiles)[x], "#", h5read(ArrowFiles[x], paste0(useMatrix, "/Info/CellNames"))) + pSx <- tryCatch({ + h5read(ArrowFiles[x], paste0(useMatrix, "/Info/predictionScore")) + }, error = function(e){ + if(getArchRVerbose()) message("No predictionScore found. Continuing without predictionScore!") + rep(9999999, length(cNx)) + }) DataFrame( - cellNames = paste0(names(ArrowFiles)[x], "#", h5read(ArrowFiles[x], paste0(useMatrix, "/Info/CellNames"))), - predictionScore = h5read(ArrowFiles[x], paste0(useMatrix, "/Info/predictionScore")) + cellNames = cNx, + predictionScore = pSx ) }, threads = threads) %>% Reduce("rbind", .) @@ -1173,7 +1180,7 @@ addPeak2GeneLinks <- function( o$Correlation <- rowCorCpp(as.integer(o$A), as.integer(o$B), assay(seATAC), assay(seRNA)) o$VarAssayA <- .getQuantiles(matrixStats::rowVars(assay(seATAC)))[o$A] o$VarAssayB <- .getQuantiles(matrixStats::rowVars(assay(seRNA)))[o$B] - o$TStat <- (o$Correlation / sqrt((max(1-o$Correlation^2, 0.00000000000000001))/(ncol(seATAC)-2))) #T-statistic P-value + o$TStat <- (o$Correlation / sqrt((pmax(1-o$Correlation^2, 0.00000000000000001, na.rm = TRUE))/(ncol(seATAC)-2))) #T-statistic P-value o$Pval <- 2*pt(-abs(o$TStat), ncol(seATAC) - 2) o$FDR <- p.adjust(o$Pval, method = "fdr") out <- o[, c("A", "B", "Correlation", "FDR", "VarAssayA", "VarAssayB")] diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 97f1dcac..57300734 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -803,9 +803,7 @@ addIterativeLSI <- function( UMAPParams$n_threads <- floor(threads / 2) uwotUmap <- do.call(uwot::umap, UMAPParams) - #Plot - pdf(file.path(outDir, paste0("Save-LSI-Iteration-",j,".pdf")), width = 6, height = 6) - + #Plot p1 <- ggPoint( uwotUmap[,1], uwotUmap[,2], @@ -831,6 +829,7 @@ addIterativeLSI <- function( theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) + pdf(file.path(outDir, paste0("Save-LSI-Iteration-",j,".pdf")), width = 6, height = 6) .fixPlotSize(p1, plotWidth = 6, plotHeight = 6) grid::grid.newpage() .fixPlotSize(p2, plotWidth = 6, plotHeight = 6) diff --git a/R/LoggerUtils.R b/R/LoggerUtils.R index f704cf94..bc498e08 100644 --- a/R/LoggerUtils.R +++ b/R/LoggerUtils.R @@ -56,6 +56,33 @@ getArchRDebugging <- function(){ ArchRDebugging } +#' Set ArchR Verbosity for Log Messaging +#' +#' This function will set ArchR logging verbosity. +#' +#' @param verbose A boolean describing whether to printMessages in addition to logging with ArchR. +#' @export +addArchRVerbose <- function(verbose = TRUE){ + .validInput(input = verbose, name = "verbose", valid = "boolean") + message("Setting addArchRVerbose = ", verbose) + options(ArchR.verbose = verbose) + return(invisible(0)) +} + +#' Set ArchR Verbosity for Log Messaging +#' +#' This function will get ArchR logging verbosity. +#' +#' @export +getArchRVerbose <- function(){ + ArchRVerbose <- options()[["ArchR.verbose"]] + if(!is.logical(ArchRVerbose)){ + options(ArchR.verbose = TRUE) + return(TRUE) + } + ArchRVerbose +} + #' Create a Log File for ArchR #' #' This function will create a log file for ArchR functions. If ArchRLogging is not TRUE @@ -113,9 +140,9 @@ createLogFile <- function( }else{ msg <- sprintf("%s : %s, %s %s %s", Sys.time(), main, dt, units, tail) } - message(msg) + if(getArchRVerbose()) message(msg) }, error = function(x){ - message("Time Error : ", x) + if(getArchRVerbose()) message("Time Error : ", x) }) } @@ -172,7 +199,7 @@ createLogFile <- function( } } - message("ArchR logging to : ", logFile, + if(getArchRVerbose()) message("ArchR logging to : ", logFile, "\nIf there is an issue, please report to github with logFile!") #Begin With @@ -220,8 +247,12 @@ createLogFile <- function( useLogs = getArchRLogging() ){ - msg <- utils::capture.output(message(...), type = "message") - msg <- paste0(msg, collapse = "\n") + if(getArchRVerbose()){ + msg <- utils::capture.output(message(...), type = "message") + msg <- paste0(msg, collapse = "\n") + }else{ + msg <- "SuppressedMessaged due to getArchRVerbose() is FALSE!" + } if(is.null(msg)){ stop("Message must be provided when logging!") @@ -556,7 +587,7 @@ createLogFile <- function( cat(paste0("Elapsed Time Minutes = ", mn), file = logFile, append = TRUE) cat(paste0("\nElapsed Time Hours = ", hr), file = logFile, append = TRUE) cat("\n\n-------\n\n\n\n", file = logFile, append = TRUE) - message("ArchR logging successful to : ", logFile) + if(getArchRVerbose()) message("ArchR logging successful to : ", logFile) }, error = function(x){ }) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 0cf5bf56..0ed03a5b 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -410,6 +410,10 @@ getMarkerFeatures <- function( } + .logThis(o, paste0(group, "_", seqnames[y], "_diffResult"), logFile = logFile) + + o + }) %>% Reduce("rbind", .) idxFilter <- rowSums(pairwiseDF[,c("mean1","mean2")]) != 0 diff --git a/R/MatrixDeviations.R b/R/MatrixDeviations.R index 6fd0d467..fd9de175 100644 --- a/R/MatrixDeviations.R +++ b/R/MatrixDeviations.R @@ -378,11 +378,17 @@ addDeviationsMatrix <- function( if("z" %in% tolower(out)){ z <- t(vapply(results, function(x) x[["z"]], rep(0, length(cn)))) + if(length(cn)==1){ + z <- matrix(z, ncol=length(cn)) + } }else{ z <- matrix(0, nrow = ncol(annotationsMatrix), ncol = length(cn)) } if("deviations" %in% tolower(out)){ dev <- t(vapply(results, function(x) x[["dev"]], rep(0, length(cn)))) + if(length(cn)==1){ + dev <- matrix(dev, ncol=length(cn)) + } }else{ dev <- matrix(0, nrow = ncol(annotationsMatrix), ncol = length(cn)) } diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index 1325d4ce..ef34822d 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -100,6 +100,10 @@ addGeneScoreMatrix <- function( stop("Error Input Arrow Files do not all exist!") } + if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){ + stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!") + } + .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGeneScoreMatrix Input-Parameters", logFile = logFile) @@ -191,6 +195,10 @@ addGeneScoreMatrix <- function( .validInput(input = force, name = "force", valid = c("boolean")) .validInput(input = tmpFile, name = "tmpFile", valid = c("character", "null")) + if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){ + stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!") + } + ArrowFile <- ArrowFiles[i] sampleName <- .sampleName(ArrowFile) diff --git a/R/ProjectMethods.R b/R/ProjectMethods.R index 5609cf0f..1cb18b7d 100644 --- a/R/ProjectMethods.R +++ b/R/ProjectMethods.R @@ -363,24 +363,56 @@ getPeakSet <- function(ArchRProj = NULL){ #' #' @param ArchRProj An `ArchRProject` object. #' @param peakSet A `GRanges` object containing the set of regions that define all peaks in the desired peak set. +#' @param genomeAnnotation The genomeAnnotation (see `createGenomeAnnotation()`) to be used for generating peak metadata such as nucleotide +#' information (GC content) or chromosome sizes. #' @param force If a `peakSet` object has already been added to the given `ArchRProject`, the value of `force` determines #' whether or not to overwrite this `peakSet`. #' @export -addPeakSet <- function(ArchRProj = NULL, peakSet = NULL, force = FALSE){ +addPeakSet <- function( + ArchRProj = NULL, + peakSet = NULL, + genomeAnnotation = getGenomeAnnotation(ArchRProj), + force = FALSE + ){ + .validInput(input = ArchRProj, name = "ArchRProj", valid = "ArchRProject") .validInput(input = peakSet, name = "peakSet", valid = c("GRanges")) .validInput(input = force, name = "force", valid = c("boolean")) + genomeAnnotation <- .validGenomeAnnotation(genomeAnnotation) + if(is.null(ArchRProj@peakSet) | force){ + #Index The Peak Set peakSet <- lapply(split(peakSet, seqnames(peakSet)), function(x){ mcols(x)$idx <- seq_along(x) x }) %>% Reduce("c", .) %>% sortSeqlevels %>% sort + + #Get NucleoTide Content + peakSet <- tryCatch({ + .requirePackage(genomeAnnotation$genome) + .requirePackage("Biostrings",source="bioc") + BSgenome <- eval(parse(text = genomeAnnotation$genome)) + BSgenome <- validBSgenome(BSgenome) + nucFreq <- BSgenome::alphabetFrequency(getSeq(BSgenome, peakSet)) + mcols(peakSet)$GC <- round(rowSums(nucFreq[,c("G","C")]) / rowSums(nucFreq),4) + mcols(peakSet)$N <- round(nucFreq[,c("N")] / rowSums(nucFreq),4) + peakSet + }, error = function(e){ + peakSet + }) + + #Add PeakSet ArchRProj@peakSet <- peakSet + }else{ + stop("Error peakSet exists! Set force=TRUE to override!") + } + return(ArchRProj) + } ########################################################################################## @@ -571,6 +603,10 @@ getGenes <- function(ArchRProj = NULL, symbols = NULL){ genes <- genes[which(tolower(genes$symbol) %in% tolower(symbols))] } + if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){ + stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!") + } + genes } diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R index 53a367f6..8e11b27e 100644 --- a/R/RNAIntegration.R +++ b/R/RNAIntegration.R @@ -43,6 +43,7 @@ #' @param reduction The Seurat reduction method to use for integrating modalities. See `Seurat::FindTransferAnchors()` for possible reduction methods. #' @param addToArrow A boolean value indicating whether to add the log2-normalized transcript counts from the integrated matched RNA to the Arrow files. #' @param scaleTo Each column in the integrated RNA matrix will be normalized to a column sum designated by `scaleTo` prior to adding to Arrow files. +#' @param genesUse If desired a character vector of gene names to use for integration instead of determined ones from Seurat::variableGenes. #' @param nameCell A column name to add to `cellColData` for the predicted scRNA-seq cell in the specified `ArchRProject`. This is useful for identifying which cell was closest to the scATAC-seq cell. #' @param nameGroup A column name to add to `cellColData` for the predicted scRNA-seq group in the specified `ArchRProject`. See `groupRNA` for more details. #' @param nameScore A column name to add to `cellColData` for the predicted scRNA-seq score in the specified `ArchRProject`. These scores represent @@ -77,6 +78,7 @@ addGeneIntegrationMatrix <- function( reduction = "cca", addToArrow = TRUE, scaleTo = 10000, + genesUse = NULL, nameCell = "predictedCell", nameGroup = "predictedGroup", nameScore = "predictedScore", @@ -110,6 +112,7 @@ addGeneIntegrationMatrix <- function( .validInput(input = reduction, name = "reduction", valid = c("character")) .validInput(input = addToArrow, name = "addToArrow", valid = c("boolean")) .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric")) + .validInput(input = genesUse, name = "genesUse", valid = c("character", "null")) .validInput(input = nameCell, name = "nameCell", valid = c("character")) .validInput(input = nameGroup, name = "nameGroup", valid = c("character")) .validInput(input = nameScore, name = "nameScore", valid = c("character")) @@ -385,7 +388,9 @@ addGeneIntegrationMatrix <- function( .logDiffTime(sprintf("%s Identifying Variable Genes", prefix), tstart, verbose = verbose, logFile = logFile) subRNA <- FindVariableFeatures(object = subRNA, nfeatures = nGenes, verbose = FALSE) subRNA <- ScaleData(object = subRNA, verbose = FALSE) - genesUse <- VariableFeatures(object = subRNA) + if(is.null(genesUse)){ + genesUse <- VariableFeatures(object = subRNA) + } ############################################################################################## #2. Get Gene Score Matrix and Create Seurat ATAC diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index fd6393df..8699e748 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -262,7 +262,7 @@ addReproduciblePeakSet <- function( }else if(tolower(peakMethod) == "tiles"){ - .logMessage("Calling Peaks with TileMatrix", logFile = logFile) + .logMessage("Calling Peaks with TileMatrix. We recommend using the Macs2 Version.\nThis method is still under development.", logFile = logFile) useMatrix <- "TileMatrix" @@ -325,11 +325,13 @@ addReproduciblePeakSet <- function( #Compute Row Sums Across All Samples .logDiffTime("Computing Total Accessibility Across All Features", tstart, addHeader = FALSE, verbose = verbose) totalAcc <- .getRowSums(ArrowFiles = ArrowFiles, useMatrix = useMatrix, seqnames = chrToRun) + .logThis(totalAcc, "PeakCallTiles-totalAcc", logFile=logFile) nTiles <- nrow(totalAcc) gc() #Pre-Filter 0s topFeatures <- totalAcc[which(totalAcc$rowSums != 0), ] + .logThis(topFeatures, "PeakCallTiles-topFeatures", logFile=logFile) #Group Matrix #Consider reading in group-wise if this is getting too large? @@ -344,9 +346,12 @@ addReproduciblePeakSet <- function( asSparse = TRUE, verbose = FALSE ) - + .logThis(groupMat, "PeakCallTiles-groupMat", logFile=logFile) + .logDiffTime(sprintf("Created Pseudo-Grouped Tile Matrix (%s GB)", round(object.size(groupMat) / 10^9, 3)), tstart, addHeader = FALSE, verbose = verbose) expectation <- Matrix::colSums(groupMat) / nTiles + .logMessage(paste0("colSums = ", Matrix::colSums(groupMat)), logFile = logFile) + .logMessage(paste0("nTiles = ", nTiles), logFile = logFile) .logMessage(paste0("Expectation = ", expectation), logFile = logFile) ##################################################### @@ -401,6 +406,8 @@ addReproduciblePeakSet <- function( }, threads = threads) %>% Reduce("rbind", .) + .logThis(groupPeaks, "PeakCallTiles-groupPeaks", logFile=logFile) + groupPeaks <- groupPeaks[order(groupPeaks$normmlogp, decreasing=TRUE), ] ##################################################### diff --git a/R/Trajectory.R b/R/Trajectory.R index c1931e30..ceee126d 100644 --- a/R/Trajectory.R +++ b/R/Trajectory.R @@ -72,7 +72,9 @@ addTrajectory <- function( } if(sum(unique(groupDF[,1]) %in% trajectory) < 3){ - .logStop("trajectory must span at least 3 groups in groupBy!", logFile = logFile) + if(!force){ + .logStop("trajectory must span at least 3 groups in groupBy!", logFile = logFile) + } } if(is.null(embedding)){ @@ -475,6 +477,13 @@ plotTrajectoryHeatmap <- function( } mat <- assay(seTrajectory) + + if(!is.null(grepExclude)){ + idxExclude <- grep(grepExclude, rownames(mat)) + if(length(idxExclude) > 0){ + mat <- mat[-grep(grepExclude, rownames(mat)), , drop = FALSE] + } + } #Rows with NA rSNA <- rowSums(is.na(mat)) @@ -676,7 +685,7 @@ plotTrajectory <- function( .validInput(input = pal, name = "pal", valid = c("character", "null")) .validInput(input = size, name = "size", valid = c("numeric")) .validInput(input = rastr, name = "rastr", valid = c("boolean")) - .validInput(input = quantCut, name = "quantCut", valid = c("numeric")) + .validInput(input = quantCut, name = "quantCut", valid = c("numeric", "null")) .validInput(input = quantHex, name = "quantHex", valid = c("numeric")) .validInput(input = discreteSet, name = "discreteSet", valid = c("character", "null")) .validInput(input = continuousSet, name = "continuousSet", valid = c("character", "null")) @@ -693,6 +702,10 @@ plotTrajectory <- function( .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "Input-Parameters", logFile=logFile) + if(is.null(quantCut)){ + quantCut <- c(0, 1) + } + #Make Sure ColorBy is valid! if(length(colorBy) > 1){ stop("colorBy must be of length 1!") diff --git a/R/VisualizeData.R b/R/VisualizeData.R index 6f2daa8d..d675abd4 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -95,12 +95,12 @@ plotPDF <- function( if(inherits(plotList[[i]], "patchwork")){ - message("Plotting Patchwork!") + if(getArchRVerbose()) message("Plotting Patchwork!") print(plotList[[i]]) }else{ - message("Plotting Ggplot!") + if(getArchRVerbose()) message("Plotting Ggplot!") if(!is.null(attr(plotList[[i]], "ratioYX"))){ .fixPlotSize(plotList[[i]], plotWidth = width, plotHeight = height, height = attr(plotList[[i]], "ratioYX"), newPage = FALSE) @@ -116,7 +116,7 @@ plotPDF <- function( }else if(inherits(plotList[[i]], "gtable")){ - message("Plotting Gtable!") + if(getArchRVerbose()) message("Plotting Gtable!") print(grid::grid.draw(plotList[[i]])) if(i != length(plotList)){ @@ -124,7 +124,7 @@ plotPDF <- function( } }else if(inherits(plotList[[i]], "HeatmapList") | inherits(plotList[[i]], "Heatmap") ){ - message("Plotting ComplexHeatmap!") + if(getArchRVerbose()) message("Plotting ComplexHeatmap!") padding <- 15 draw(plotList[[i]], @@ -135,7 +135,7 @@ plotPDF <- function( }else{ - message("Plotting Other") + if(getArchRVerbose()) message("Plotting Other") print(plotList[[i]]) @@ -147,7 +147,7 @@ plotPDF <- function( }, error = function(x){ - message(x) + if(getArchRVerbose()) message(x) }) @@ -320,7 +320,7 @@ plotEmbedding <- function( } if(!is.null(imputeWeights)){ - message("Imputing Matrix") + if(getArchRVerbose()) message("Imputing Matrix") colorMat <- matrix(colorParams$color, nrow=1) colnames(colorMat) <- rownames(df) colorMat <- imputeMatrix(mat = colorMat, imputeWeights = imputeWeights, logFile = logFile) @@ -367,7 +367,7 @@ plotEmbedding <- function( .logThis(colorMat, "colorMat-Before-Impute", logFile = logFile) if(!is.null(imputeWeights)){ - message("Imputing Matrix") + if(getArchRVerbose()) message("Imputing Matrix") colorMat <- imputeMatrix(mat = as.matrix(colorMat), imputeWeights = imputeWeights, logFile = logFile) if(!inherits(colorMat, "matrix")){ colorMat <- matrix(colorMat, ncol = nrow(df)) @@ -401,11 +401,11 @@ plotEmbedding <- function( } - message("Plotting Embedding") + if(getArchRVerbose()) message("Plotting Embedding") ggList <- lapply(seq_along(colorList), function(x){ - message(x, " ", appendLF = FALSE) + if(getArchRVerbose()) message(x, " ", appendLF = FALSE) plotParamsx <- .mergeParams(colorList[[x]], plotParams) @@ -483,7 +483,7 @@ plotEmbedding <- function( }) names(ggList) <- name - message("") + if(getArchRVerbose()) message("") if(length(ggList) == 1){ ggList <- ggList[[1]] @@ -663,7 +663,7 @@ plotGroups <- function( pl <- lapply(seq_along(colorList), function(x){ - message(paste0(x, " "), appendLF = FALSE) + if(getArchRVerbose()) message(paste0(x, " "), appendLF = FALSE) if(is.null(ylim)){ ylim <- range(colorList[[x]]$color,na.rm=TRUE) %>% extendrange(f = 0.05) @@ -692,7 +692,7 @@ plotGroups <- function( }) names(pl) <- name - message("") + if(getArchRVerbose()) message("") if(length(name)==1){ pl[[1]] @@ -764,7 +764,7 @@ plotGroups <- function( cellNamesList <- split(rownames(getCellColData(ArchRProj)), getCellColData(ArchRProj)$Sample) values <- .safelapply(seq_along(cellNamesList), function(x){ - message(x, " ", appendLF = FALSE) + if(getArchRVerbose()) message(x, " ", appendLF = FALSE) valuesx <- tryCatch({ o <- h5closeAll() ArrowFile <- getSampleColData(ArchRProj)[names(cellNamesList)[x],"ArrowFiles"] @@ -792,7 +792,7 @@ plotGroups <- function( valuesx }, threads = threads) %>% Reduce("cbind", .) values <- values[, ArchRProj$cellNames, drop = FALSE] - message("") + if(getArchRVerbose()) message("") gc() .logThis(values, "Feature-Matrix", logFile = logFile) @@ -804,7 +804,7 @@ plotGroups <- function( #Values Summary if(!is.null(log2Norm)){ if(log2Norm){ - message("Log2 Normalizing...") + if(getArchRVerbose()) message("Log2 Normalizing...") values <- log2(values + 1) } } diff --git a/man/ArchRPalettes.Rd b/man/ArchRPalettes.Rd index 61cc1e60..c74ad672 100644 --- a/man/ArchRPalettes.Rd +++ b/man/ArchRPalettes.Rd @@ -4,7 +4,9 @@ \name{ArchRPalettes} \alias{ArchRPalettes} \title{List of color palettes that can be used in plots} -\format{An object of class \code{list} of length 30.} +\format{ +An object of class \code{list} of length 30. +} \usage{ ArchRPalettes } diff --git a/man/addArchRVerbose.Rd b/man/addArchRVerbose.Rd new file mode 100644 index 00000000..dadc3508 --- /dev/null +++ b/man/addArchRVerbose.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LoggerUtils.R +\name{addArchRVerbose} +\alias{addArchRVerbose} +\title{Set ArchR Verbosity for Log Messaging} +\usage{ +addArchRVerbose(verbose = TRUE) +} +\arguments{ +\item{verbose}{A boolean describing whether to printMessages in addition to logging with ArchR.} +} +\description{ +This function will set ArchR logging verbosity. +} diff --git a/man/addGeneIntegrationMatrix.Rd b/man/addGeneIntegrationMatrix.Rd index 9b89ffdb..dba7957d 100644 --- a/man/addGeneIntegrationMatrix.Rd +++ b/man/addGeneIntegrationMatrix.Rd @@ -28,6 +28,7 @@ addGeneIntegrationMatrix( reduction = "cca", addToArrow = TRUE, scaleTo = 10000, + genesUse = NULL, nameCell = "predictedCell", nameGroup = "predictedGroup", nameScore = "predictedScore", @@ -98,6 +99,8 @@ correlation to sequencing depth that is greater than the \code{corCutOff}, it wi \item{scaleTo}{Each column in the integrated RNA matrix will be normalized to a column sum designated by \code{scaleTo} prior to adding to Arrow files.} +\item{genesUse}{If desired a character vector of gene names to use for integration instead of determined ones from Seurat::variableGenes.} + \item{nameCell}{A column name to add to \code{cellColData} for the predicted scRNA-seq cell in the specified \code{ArchRProject}. This is useful for identifying which cell was closest to the scATAC-seq cell.} \item{nameGroup}{A column name to add to \code{cellColData} for the predicted scRNA-seq group in the specified \code{ArchRProject}. See \code{groupRNA} for more details.} diff --git a/man/addPeakAnnotations.Rd b/man/addPeakAnnotations.Rd index d96db49d..72775a9f 100644 --- a/man/addPeakAnnotations.Rd +++ b/man/addPeakAnnotations.Rd @@ -15,7 +15,7 @@ addPeakAnnotations( \arguments{ \item{ArchRProj}{An \code{ArchRProject} object.} -\item{regions}{A \code{list} of \code{GRanges} that are to be overlapped with the \code{peakSet} in the \code{ArchRProject}.} +\item{regions}{A named \code{list} of \code{GRanges} that are to be overlapped with the \code{peakSet} in the \code{ArchRProject}.} \item{name}{The name of \code{peakAnnotation} object to be stored as in \code{ArchRProject}.} diff --git a/man/addPeakSet.Rd b/man/addPeakSet.Rd index 26b597d7..81dcd3e6 100644 --- a/man/addPeakSet.Rd +++ b/man/addPeakSet.Rd @@ -4,13 +4,21 @@ \alias{addPeakSet} \title{Add a peak set to an ArchRProject} \usage{ -addPeakSet(ArchRProj = NULL, peakSet = NULL, force = FALSE) +addPeakSet( + ArchRProj = NULL, + peakSet = NULL, + genomeAnnotation = getGenomeAnnotation(ArchRProj), + force = FALSE +) } \arguments{ \item{ArchRProj}{An \code{ArchRProject} object.} \item{peakSet}{A \code{GRanges} object containing the set of regions that define all peaks in the desired peak set.} +\item{genomeAnnotation}{The genomeAnnotation (see \code{createGenomeAnnotation()}) to be used for generating peak metadata such as nucleotide +information (GC content) or chromosome sizes.} + \item{force}{If a \code{peakSet} object has already been added to the given \code{ArchRProject}, the value of \code{force} determines whether or not to overwrite this \code{peakSet}.} } diff --git a/man/getArchRVerbose.Rd b/man/getArchRVerbose.Rd new file mode 100644 index 00000000..0ed46322 --- /dev/null +++ b/man/getArchRVerbose.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LoggerUtils.R +\name{getArchRVerbose} +\alias{getArchRVerbose} +\title{Set ArchR Verbosity for Log Messaging} +\usage{ +getArchRVerbose() +} +\description{ +This function will get ArchR logging verbosity. +} From cecd8ca89ba5004715ee656d7416cc2cdcc03da4 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 21 Feb 2021 21:22:22 -0800 Subject: [PATCH 002/184] bugfix useHdf5=FALSE addImputeWeights --- .DS_Store | Bin 14340 -> 14340 bytes R/Imputation.R | 6 ++---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.DS_Store b/.DS_Store index 2b21082f6917b33f76b7b6087a8e67597128a1da..6465b807d491c2119319c1fc58709491def719fd 100644 GIT binary patch delta 964 zcmaLVTSyd97zgn0zp0(^q@Fc(bxcRow9Il%i!P*=CU(yihAg!$6kbPL^U(J2thRu=R4nbIN$l;H+{jr;EgP2 z{vB`7r-#CB_ggqdB_t+|W>PZeDRR@orbUaF7}w=YNTlvPyEs;Q5r31|A)49<ERwx#8@<4IMaB( zTp)=-wqEswwWR@7-=b@(eIn<}#f+`9>h+zPxvF-Xl2Vzsgv^J9hs7FCF6X>utL|4t zDE^eYlFO@o99YH z(1|d*(2WTCu@C!k07r2K=Wreua0OR!4cGAyPcejNc!en5<0FRg!x*Sg+&|4hK`VWs z?x5Ef@De$W{as(N^(XyXKU0$4q{hfIvs?ue%BEJ$oIBr`SG=5==DFkYxMG|rE{Wvk z$l1zxrI?8!CRo%;d8snV(igB(rBs>1m~cd`dlhbli&3_+l~zyZibxcyNo1w7W^*nQ zY;h~CsWnfNY*)K)18F+K8fpBI>cA3q%j~&FluzYx6K?&N<)t_@DXxdpmkN z26LPkHr~42A{!?!uU%2c>W#hb@kYbUSw@w6f$`PjDr(W&V$mjjmnK2%w|kVL$^Me^ zibcVcLvm&_H_I`$dA-REx~fGx^)0#<7oE0!VskVU(Nfux)8WmP<(96nzO75|+$s44 zAw5_i7mCt#VUrr_)aqlZIjLG5xj>#I2pi3MFs|{pYF$%2MP|(jQ$|P-t;+Nnj5Tl7 zqpFk;m!?alWlUb%9tvyik}3+;sk!r*ysle|C-hiUs>>7?Rx-JzEf$Gz%?T%F@Upp$ zaW~tWG(Wh88bMfZYFMnQnkx7Lp;j^g$(Ig9)~2E_P?{+Sluxs%npRRP#i@r5 z({bvjA-X{~={`NC6uqSP^oc&x7y3#+U_%zh!43z!$VFZf3Z`K;=Aaw_)S>~au?CIU zh*pHr1{Dz`(1mVn$6oBm0UX339KmUv#W|eE6kj#Ecf7Eh|IXzepW`WMvNYDZXi1PUiA+0+e-TXn7s5yMl-_ZC zKS3DF(YZLbB2KIX6{zCGf~ZCfR&i!cXvSJ>=G;P@+*ZyliWqjFawmGQ8+*`~!FL!( yaSSJL5~pwh7jX%f(T^eAz%AUyFz(<1p5QrN;k7Z~nt%`2x^w!(YgvUqJih_r|Jz&u diff --git a/R/Imputation.R b/R/Imputation.R index 863e292a..de20a719 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -109,10 +109,9 @@ addImputeWeights <- function( }else{ weightFiles <- file.path(getOutputDirectory(ArchRProj), "ImputeWeights", paste0("Impute-Weights-Rep-", seq_len(nRep))) } + o <- suppressWarnings(file.remove(weightFiles)) } - o <- suppressWarnings(file.remove(weightFiles)) - weightList <- .safelapply(seq_len(nRep), function(y){ .logDiffTime(sprintf("Computing Partial Diffusion Matrix with Magic (%s of %s)", y, nRep), t1 = tstart, verbose = FALSE, logFile = logFile) @@ -124,9 +123,8 @@ addImputeWeights <- function( blocks <- list(rownames(matDR)) } - weightFile <- weightFiles[y] - if(useHdf5){ + weightFile <- weightFiles[y] o <- h5createFile(weightFile) } From 716abd879efb8b4948328b0f089f6d8bca6e1be0 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 21 Feb 2021 21:31:06 -0800 Subject: [PATCH 003/184] bugfix imputation --- R/Imputation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Imputation.R b/R/Imputation.R index de20a719..fad0239c 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -170,8 +170,8 @@ addImputeWeights <- function( for(i in seq_len(td)){ Wt <- Wt %*% W } - rownames(Wt) <- rownames(matDR)[ix] - colnames(Wt) <- rownames(matDR)[ix] + rownames(Wt) <- ix + colnames(Wt) <- ix rm(knnIdx) rm(knnDist) From 35a89aa9931f8acfdefa40ee160bf98a427448bd Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Mon, 22 Feb 2021 22:23:19 -0800 Subject: [PATCH 004/184] trim edge cases tssFeatures --- .DS_Store | Bin 14340 -> 14340 bytes R/CreateArrow.R | 3 +++ 2 files changed, 3 insertions(+) diff --git a/.DS_Store b/.DS_Store index 6465b807d491c2119319c1fc58709491def719fd..dfc4f20f6fd40dc1e8150396079621480fd782b3 100644 GIT binary patch delta 901 zcma*lT}TvB6bJD0-%4iK>pH8euH*QXskvIFsbP{@YnEwNreqo6t~0@+?y9Sn87U+f zeLyhuNkkS!QK6tIWz<6iCeceq6cHGGJSJ2R5>$6~_aS;}9_F5V&b^m2^SguXgYBa+ zR!r%0Q8tl<%bP2=R8=#5z!t67tXQd++MN37jA)xBR!K>7{VS| z+E$-x>{J64IZ<9F2#rSG*{Sij8aO6BLuPe;&mm2631T3VmCaaPi^r=`|GJQNV=>$2?q36w|WR839PNquyR&eJfB(*#Y@6M9aw^p-x- zSDL49^qm%9f*IkkAOd#8Asz}ck%!gDM;R(ngYDRXS~Q{wZZtzh8~o@-4-Q}e$8a1c za1v*58N;}WYq*2E)fmAj9%CBMFoRjV!h3wdJbvI;uul_$Bh`e)(171O0M)kod)nQt zJ~xr+N(f)sWa$5kZ?n9y82={979A6toR+E2$q9OsV~xOpDq-oA=OwH*EqVx z!pa1$D1`+M+TdhNBICM=e+ilYpJ;|&(g)7-6NE5M6UA9lI7tqQP|8W1C`Sdhag?2? zLp}C!7#F8$;V@qKa1g!d!(kl3P>AOg&f**{;36*JI&R=5ZrO1g XHQwQqJ`$CL&tp9?L*ke@c_H=>@|n&< delta 929 zcmaLVTSyd97zgn0zg{}iDLw0Lbu35oZkAhGWRds6QZvQKtSqKTUf%vBO+%pX*TC`uYMY5jkF)u;pv}td&|=T&zu!hNEor(lYZ3 zii%4r1JS}76C205Hk6jy-fow#SEMr*h!NIko+778VvKE5+kDQt zPSv&B~iW~)ohcME%PR?Yrj*pv0&qc`J6ZHadoJ|8~T-f370py z+Z;}}Pzxny-dnbu%bR18>=-5-Oem)V0=0b(?t>{LPPsg*p`OULOPU7@=)M&mR=&*?S2 zqZ#@@AL$c)rY`_yV-Dsb3N|D{Zb33~umnp{h*DIe4)xfG2DD%Y+Tg%0xX}e4y3qqa z25=O|a2#iF8CP))*D;J+xQ!7!#xqP}3a=2rG-mJ--?hO4#r{Jd6x8VU_IMobP6v_Y z?7z(w(>(WY^LR;m6BUGxPq3wC<}EH_3`nOBM~(8-ueM);sn3=G diff --git a/R/CreateArrow.R b/R/CreateArrow.R index d793bc69..35b9e4ea 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -854,6 +854,9 @@ createArrowFiles <- function( ) tssFlank$type <- "flank" tssFeatures <- c(tssWindow, tssFlank) + + #Trim In Case Extending beyond Chromosomes + tssFeatures <- GenomicRanges::trim(tssFeatures) #.logThis(tssFeatures, paste0(prefix, " tssFeatures"), logFile = logFile) #Counting From 3187af46f6586c496c4448c040f66fd189f0caa0 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Tue, 23 Feb 2021 22:37:41 -0800 Subject: [PATCH 005/184] bugfix handling low cells when subsetting keep 1 range regardless if its from subsetted cells because it will keep stability --- .DS_Store | Bin 14340 -> 14340 bytes R/ArrowUtils.R | 21 ++++++++++----------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.DS_Store b/.DS_Store index dfc4f20f6fd40dc1e8150396079621480fd782b3..509c71c009ad455434a55497701d856e1daa513a 100644 GIT binary patch delta 878 zcmZ{iOH5Ni6o%(7uX|;HX@M3l1&ZYrAW}h~fDbAL5hNM|0>XxRFXl?2NDC-YD^kP8 zMDhCAm|!#}%0f{PR$w&h!i7pCF(SIsg&Q^+5@UP}Z5J+N7Bgqg$;_O5-}rm|y^}_L z>ZG&RrFy(J+Y)qP;SrHhg18}?#z?h$YU`*rW3p?5db_4eDo%(snKQET3b&MRuU?E} zdP95yrP@SBo^M1H1Bs+$NowhLs>k|O?+IoVghd0>$I&#&BC0g3z{>ukKU$EC;U1&XH)NM;%gFhY-9tzkt%lF11Hyp3qaa%~Wb~bvP9llZ!C0?-b0Nbw668wkYz;{ ztRyt7Rk^{av8SV35gCX;Qb{(klPc0kI*FSMlk;SpOp`e>PhOI@&&(Nq&<* zAP`|d0uo_FIxN@-D{Lr71uC%{`_WW~X0)IcP8@-Xqv(PcLpX_3IE_(Uz!)w;!*$%o z9o)qPrZ9~e%;E)J;|&(@0n7M;6@0^Y?OI{FZI!nRa=_&o=ykf>PC|57{_oZtaGR~o zn#w|Qi{jtM29+inO_r>@O~v+YJ2gk9pjnoDSzz+{m{1gjNvtH`CX@L+WZbqHJyiwstG(Hc>&8xa{<7dwaUTc7#@ zSt4J_Ds+fK45uw|(z%>+DXLJzDc7MM4LHa#A3_`2q5Ok-0E0M=VVs2@=Q!|-9Jq$d zxQcPyz)jrZ;P2r+9+cuCCqIXIJi${u!*jgCJ1hr3hYq3dvNyDb7JQuY8%M>lGsdZC H%Ma6EEcL~Q delta 918 zcmaKqT}V@57{{OgOgpE&>Y2?sk4rZ+OHKU9EVFEDWtpaZEV2)x&2xf7b6aj&W@M0H zR9+qWBqEEVs7TO^GUy_LlIW&IAkjtiaamAZNKn1o3<;w5;ylm!Kj(eV^Sr-*U#KrM zWHIHA`9gjz94RlKfFWr{a>`6WoMmLG^4f-VjZI9?bGY?V^Af$wVK=8`*&K6=%c^SX z+|hK&l)=o*q9fDVyIoV%Kt$W4sU4!pa8&dJyzOcJnS;uSepE)awrG2Pvv@~09%rf{CjJYnsiDtQGlu9Pfb%(GVu zC{kGLO-SVxOy1)6`c%K9h(i3UMb%8++NE}cwO~MMoD!GTGP$)a*xt^24x6xm%du%> zr@T^-o(j8SKX3AeBP}5{Fx^(YAZ&|OT#BM9LcVhm#aF&_J_(78NMBxJ?VUrFw2GQ& zH+4`C4bWK{q!AjWd-Rx|Q zHewTYU^jedgMxO1(TOhX#}OREah$+OoWey6;xew_7H(^}gCRV^Q#`{sqIiY3_=GR` zj-QE+4M;?4o{Ccezjy-LpEn=>t0_PIr+ zm5Z0TbXP%(K52IK8VhZV!xp1C*|N2`&}Nei+y~qlIjqc9A#$WrQvB6c#i|9)PD)Nx zYgnzw@k%oi)jH-BI7BHWe$YxcV-m&OCjLz$K2H9h**pP#b&2#JDC6{!-f Date: Tue, 23 Feb 2021 22:41:26 -0800 Subject: [PATCH 006/184] update --- .DS_Store | Bin 14340 -> 14340 bytes DESCRIPTION | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.DS_Store b/.DS_Store index 509c71c009ad455434a55497701d856e1daa513a..6c7721e28182e25bebe48ac72f527b5f8b934978 100644 GIT binary patch delta 14 VcmZoEXeromRDjW9^DzNCbpS5}1(g5* delta 14 VcmZoEXeromRDjWZ^DzNCbpS5@1(W~) diff --git a/DESCRIPTION b/DESCRIPTION index 40311f0c..0f411743 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ArchR Type: Package -Date: 2020-11-23 +Date: 2021-02-23 Title: Analyzing single-cell regulatory chromatin in R. -Version: 1.0.1 +Version: 1.0.2 Authors@R: c( person("Jeffrey", "Granja", email = "jgranja.stanford@gmail.com", role = c("aut","cre")), person("Ryan", "Corces", role = "aut")) From a3c444db870aefa07b7f172e22d5610237b09bf6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 4 Mar 2021 07:28:02 -0800 Subject: [PATCH 007/184] Update issue templates Remove issue reports for feature requests and documentation. These have been moved to discussions --- .github/ISSUE_TEMPLATE/bug-report.md | 10 +++++-- .../ISSUE_TEMPLATE/documentation-request.md | 30 ------------------- .github/ISSUE_TEMPLATE/feature-request.md | 24 --------------- 3 files changed, 7 insertions(+), 57 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/documentation-request.md delete mode 100644 .github/ISSUE_TEMPLATE/feature-request.md diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index adc1b1c9..3dd48e19 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -7,7 +7,11 @@ assignees: '' --- -Before you submit this issue please update ArchR to the latest version and make sure that this issue has not already been fixed in the latest release. ArchR is in *beta* and we will fix problems as they arise. To update ArchR: +This is an issue template made by the developers of ArchR. You MUST follow these instructions. + +Questions related to how to use ArchR or requests for new features should be posted in the Discussions forum (https://github.com/GreenleafLab/ArchR/discussions). + +Before you submit this Bug Report please update ArchR to the latest stable version and make sure that this issue has not already been fixed in the latest release. ArchR is still in active development and we will fix problems as they arise. To update ArchR: devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) @@ -22,13 +26,13 @@ ArchR has a built-in logging functionality for all complex functions. You MUST a A clear and concise description of what the bug is. **To Reproduce** -To help us optimally address your issue, please try to reproduce this issue using the tutorial hematopoiesis dataset and provide us the command(s) to reproduce your bug. +To help us optimally address your issue, please try to reproduce this issue using the tutorial hematopoiesis dataset and provide us the command(s) to reproduce your bug. Our first question to you will be "can you reproduce this with the tutorial dataset" so please do this. **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** -If applicable, add screenshots to help explain your problem. +If applicable, add screenshots to help explain your problem. Do not screenshot code or text but embed this in markdown using triple-backticks. **Session Info** If you do not have a log file because the function that caused the error does not produce one, please paste the output of "sessionInfo()" here. diff --git a/.github/ISSUE_TEMPLATE/documentation-request.md b/.github/ISSUE_TEMPLATE/documentation-request.md deleted file mode 100644 index 31511d14..00000000 --- a/.github/ISSUE_TEMPLATE/documentation-request.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -name: Documentation Request -about: Help us improve ArchR's documentation -title: '' -labels: documentation -assignees: '' - ---- - -Before you submit this issue, go to the ArchR user manual (https://www.archrproject.com/bookdown/index.html) and use the search function (magnifying glass in the top navbar) to search the manual for the content you are looking for! - -PLEASE FILL OUT THE RELEVANT INFORMATION AND DELETE THE UNUSED PORTIONS OF THIS ISSUE TEMPLATE. - -### If this is an issue with an existing explanation: - -**Where is the problematic documentation?** -For example: The explanation of [...] in Section 4.3 of the user manual - -**Describe what is unclear or confusing** -A concise and clear explanation of what documentation could be improved - -### If this is an issue with documentation that is absent/missing: - -**Describe what material you feel should be explained** -A concise and clear explanation of what documentation could be improved - -**Where do you think this documentation would belong?** -For example: This would belong in Section 4.3 of the user manual. -OR -You should create a new section in the user manual in Chapter 4. diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md deleted file mode 100644 index c414496e..00000000 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -name: Feature Request -about: Suggest an idea to enhance ArchR -title: '' -labels: enhancement -assignees: '' - ---- - -Do not use this form to report a bug in ArchR! Instead, use the "Bug report" option. - -PLEASE FILL OUT THE RELEVANT INFORMATION AND DELETE THE UNUSED PORTIONS OF THIS ISSUE TEMPLATE. - -**Describe the problem that your feature request would address.** -A clear and concise description of what the problem is. Ex. A common analysis that is performed but not currently supported is [...] - -**Describe the solution you'd like** -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** -Add any other context or screenshots about the feature request here. From af56a4924314bbf43d2fb06dda35cda56d2b4eca Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 15:47:33 -0800 Subject: [PATCH 008/184] maxDist coAccessibilty is 1/2 the size it should be. --- .DS_Store | Bin 14340 -> 14340 bytes R/IntegrativeAnalysis.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 6c7721e28182e25bebe48ac72f527b5f8b934978..9dba1b5a5ff21095dee9588709f821d9a0e5aa02 100644 GIT binary patch delta 865 zcma))OH30{7=`ZvfjeOWQ=l;AHI^1j4WI%Mhytxec@;4N5rv?I8O;Rxpe;V|Q4J)< zD65GE(Va0q>cT|C_+A(YF~)_|g)Uqe6E_-7bYooTofZ?8-o+&Uf9}mW`OZuQrUG{t z1k`Ga!l5*=wXLG};d^^LArreMh8bNE~h zMlrHal@(uDJ*X-{&X6?8_4qtqC4NOTi3L=!(7y2@b!=P>AB$xPhRmX3kspvfVWrbA z>#txwI=u&+#M!pSDuIl)7FLM%#RQ_fd%LWt`bQGF3P;kAjcrh?4% zj(Uca(Zp&qUq^)wZqH#Q7*hSd#BvMYKn*D8`iA{pFAE-uTQ@mGaZtB*dqUx^fZ}7Z zmYBaDxS7S;t7}eK)i-EG(OhV^SrQzRhdv58kw5P zI!mbATudeUPsOc*uH7_d{3r33WSM+qw10yz#_7mrl+BEC1!@^#Cz`PxJJ5w)jBz)5 z8C5 zW0rSB%T@cTs+lorbsI&N8e_$pW{I^bjx5*a?M1sP=Hs}@9B*OF7<0ISLy{C0l5Dcv z)Yqn;=+gtIc?PBPW^RgOsdAdcpVC&<6VPgXs@|?^YS@ue0W?PVhP?AEN z#7#;`J!vOCGDt3w5i&uh$TWFP-jgrnD_JDVnAU%v0v_ks_Yd=on6b5k~!?+;eUJ-B&T*nQJU=+7;M?ii6+e19Y Z69IV&(|C>-c!?Rj!3QjiA4r^Z`~ll4!7=~< diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 543d1eff..a34ae4f0 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -760,7 +760,7 @@ addCoAccessibility <- function( #Create Ranges peakSummits <- resize(peakSet, 1, "center") - peakWindows <- resize(peakSummits, maxDist, "center") + peakWindows <- resize(peakSummits, 2*maxDist + 1, "center") #Create Pairwise Things to Test o <- DataFrame(findOverlaps(peakSummits, peakWindows, ignore.strand = TRUE)) From fe4810162e293d98101c277f4cb5ff434280829f Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 18:46:03 -0800 Subject: [PATCH 009/184] fix NA's in bam/tabix and max/minFragSize --- .DS_Store | Bin 14340 -> 14340 bytes R/CreateArrow.R | 47 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/.DS_Store b/.DS_Store index 9dba1b5a5ff21095dee9588709f821d9a0e5aa02..3d51ef3aac9b36926a8d98f5adec91e6f926f1c5 100644 GIT binary patch delta 878 zcmaKpOGs2v7{|}wGS2O!p7Aw$y*{SV85zxImRk83OU!5GMD{X9?+u1@W^_hDDQ2P2 z!)SA(q6n)+kwyijR8)%=rnCrZ2!dL)aTByDD%#kYJ5nvGvpAgp_vd`y$Nx9(ANSua zFi9*x%TMMCvsB)%aU^>K0^y=;Sk*DK`;6+h#-bA zj1Z3FBu-%hXK@)Uwz^oXUZU4rc zB1v!Kqij=B%{h6FqVmcuPQ5<2g9^AAN+!$JCmps>M!KA4S*s{Kk|7O8q|L4{4#8P6 zWPx3{F~W=dV`mrt24o%`{C}|hkb0J z5K*0m!akrwM3fzZ%)aXog+z47?NEmf5p*#M>JlC5ommfFo5L{A^PiXJncr`wgVVt~ z>n#FXEw#0`buf=E+ueF?MxDNtWwrNfBLRPpHmQmbW>d3r?M0=PHT8||Sej@_XBjM0 zzgj9q_h^dhk7)ZfH6&OPri5O<*Qc7VNH!^liVoU8F|3VGXpzIQbkUNQmoKqF#T!w( z0*Y};5${pKObYTPX~0O_A(h`1O5>B9Qi&jXX>XXX67Q6;3PB7Dy`!2xs>Ek)WYttu zguXHFusRlRZDMs)>=bq#P(xuY;Ey*qvj%EGnb1EH@cH=SVRLkoOOl3+(H?I&(j8R& ze60mO-&nYXueICAnia)ZiaN;hD6wmM+3wVb`#PA-U@D)q=rvJ6c`YWrG zJC|GATH9IYO6T%Q=O0oH(bkMLnOSR#O3JHhw>0a`h22!LW#{DPF= start(tileChromSizes[x]),] @@ -1622,13 +1636,18 @@ createArrowFiles <- function( .logThis(unique(dt$V4), name = paste0(prefix, " .bamToTmp Barcodes-Chunk-(",x," of ",length(tileChromSizes),")-", tileChromSizes[x]), logFile = logFile) } + #No NAs + dt <- dt[!is.na(dt$RG), , drop=FALSE] + dt <- dt[!is.na(dt$start), , drop=FALSE] + dt <- dt[!is.na(dt$end), , drop=FALSE] + #Care for Break Points - dt <- dt[dt$start >= start(tileChromSizes[x]),] - dt <- dt[dt$end - dt$start >= 10, ] #Minimum Fragment Size + dt <- dt[dt$start >= start(tileChromSizes[x]),, drop=FALSE] + dt <- dt[dt$end - dt$start >= 10, , drop=FALSE] #Minimum Fragment Size #Check for valid barcodes if(!is.null(validBC)){ - dt <- dt[dt$RG %in% validBC, ] + dt <- dt[dt$RG %in% validBC, , drop=FALSE] } if(all(!is.null(dt), nrow(dt) > 0)){ @@ -1790,8 +1809,10 @@ createArrowFiles <- function( outArrow = NULL, genome = NULL, chromSizes = NULL, - minFrags = 500, - maxFrags = 100000, + minFrags = 1000, + maxFrags = 100000, + minFragSize = 10, + maxFragSize = 2000, sampleName = NULL, verbose = TRUE, tstart = NULL, @@ -1922,6 +1943,12 @@ createArrowFiles <- function( #Order RG RLE based on bcPass fragments <- fragments[BiocGenerics::which(mcols(fragments)$RG %bcin% bcPass)] fragments <- fragments[order(S4Vectors::match(mcols(fragments)$RG, bcPass))] + + #Check if Fragments are greater than minFragSize and smaller than maxFragSize + fragments <- fragments[width(fragments) >= minFragSize] + fragments <- fragments[width(fragments) <= maxFragSize] + + #Length of BC lengthRG <- length(mcols(fragments)$RG@lengths) if(x == 1){ @@ -2002,6 +2029,12 @@ createArrowFiles <- function( #Order RG RLE based on bcPass fragments <- fragments[BiocGenerics::which(mcols(fragments)$RG %bcin% bcPass)] fragments <- fragments[order(S4Vectors::match(mcols(fragments)$RG, bcPass))] + + #Check if Fragments are greater than minFragSize and smaller than maxFragSize + fragments <- fragments[width(fragments) >= minFragSize] + fragments <- fragments[width(fragments) <= maxFragSize] + + #Length of BC lengthRG <- length(mcols(fragments)$RG@lengths) if(x == 1){ From 49f87b77731545ef7f3e8b196d5088f096e15ae9 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 18:47:03 -0800 Subject: [PATCH 010/184] Update createArrowFiles.Rd --- man/createArrowFiles.Rd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index 14648b84..e06f9ff5 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -14,6 +14,8 @@ createArrowFiles( minTSS = 4, minFrags = 1000, maxFrags = 1e+05, + minFragSize = 10, + maxFragSize = 2000, QCDir = "QualityControl", nucLength = 147, promoterRegion = c(2000, 100), @@ -69,6 +71,10 @@ Cells containing greater than or equal to \code{minFrags} total fragments wll be \item{maxFrags}{The maximum number of mapped ATAC-seq fragments required per cell to pass filtering for use in downstream analyses. Cells containing greater than or equal to \code{maxFrags} total fragments wll be retained.} +\item{minFragSize}{The minimum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} + +\item{maxFragSize}{The maximum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} + \item{QCDir}{The relative path to the output directory for QC-level information and plots for each sample/ArrowFile.} \item{nucLength}{The length in basepairs that wraps around a nucleosome. This number is used for identifying fragments as From c08c9131b2e818764a463e7f3a83a203480cc02c Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 7 Mar 2021 18:47:39 -0800 Subject: [PATCH 011/184] documentation --- R/CreateArrow.R | 4 ++-- man/createArrowFiles.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index e15beb59..f5c637bf 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -22,7 +22,7 @@ #' @param maxFrags The maximum number of mapped ATAC-seq fragments required per cell to pass filtering for use in downstream analyses. #' Cells containing greater than or equal to `maxFrags` total fragments wll be retained. #' @param minFragSize The minimum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize. -#' @param maxFragSize The maximum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize. +#' @param maxFragSize The maximum fragment size to be included into Arrow File. Fragments above than this number are discarded. Must be greater than maxFragSize. #' @param QCDir The relative path to the output directory for QC-level information and plots for each sample/ArrowFile. #' @param nucLength The length in basepairs that wraps around a nucleosome. This number is used for identifying fragments as #' sub-nucleosome-spanning, mono-nucleosome-spanning, or multi-nucleosome-spanning. @@ -1261,7 +1261,7 @@ createArrowFiles <- function( dt <- dt[!is.na(dt$RG), , drop=FALSE] dt <- dt[!is.na(dt$start), , drop=FALSE] dt <- dt[!is.na(dt$end), , drop=FALSE] - + #Care for Break Points dt <- dt[dt$V2 >= start(tileChromSizes[x]),] diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index e06f9ff5..4b31cfa2 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -73,7 +73,7 @@ Cells containing greater than or equal to \code{maxFrags} total fragments wll be \item{minFragSize}{The minimum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} -\item{maxFragSize}{The maximum fragment size to be included into Arrow File. Fragments lower than this number are discarded. Must be less than maxFragSize.} +\item{maxFragSize}{The maximum fragment size to be included into Arrow File. Fragments above than this number are discarded. Must be greater than maxFragSize.} \item{QCDir}{The relative path to the output directory for QC-level information and plots for each sample/ArrowFile.} From 968e4421ce7187a8ac7ea1cf6077412126876d5f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 8 Mar 2021 07:24:15 -0800 Subject: [PATCH 012/184] Update issue templates --- .github/ISSUE_TEMPLATE/bug-report.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 3dd48e19..d978275e 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -1,6 +1,7 @@ --- name: Bug Report -about: Create a bug report to help us improve ArchR +about: Create a bug report to help us improve ArchR. NOT to be used for questions + or feature requests! title: '' labels: bug assignees: '' From 9f008ad2da9fd0a273ac5927c41eb4b0db4ff32a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 10 Mar 2021 14:29:01 -0800 Subject: [PATCH 013/184] improve offsetPlus and offsetMinus description offsetPlus and offsetMinus only apply when input is a bam file. --- R/CreateArrow.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index f5c637bf..fae3f845 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -45,8 +45,10 @@ #' @param bamFlag A vector of bam flags to be used for reading in fragments from input bam files. Should be in the format of a #' `scanBamFlag` passed to `ScanBam` in Rsamtools. #' @param offsetPlus The numeric offset to apply to a "+" stranded Tn5 insertion to account for the precise Tn5 binding site. +#' This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. #' See Buenrostro et al. Nature Methods 2013. #' @param offsetMinus The numeric offset to apply to a "-" stranded Tn5 insertion to account for the precise Tn5 binding site. +#' This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. #' See Buenrostro et al. Nature Methods 2013. #' @param addTileMat A boolean value indicating whether to add a "Tile Matrix" to each ArrowFile. A Tile Matrix is a counts matrix that, #' instead of using peaks, uses a fixed-width sliding window of bins across the whole genome. This matrix can be used in many downstream ArchR operations. From 921d8dbfc986cf8838f7219af383758bc20d74a4 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 21:04:30 -0700 Subject: [PATCH 014/184] check for peakset matched in ArchRProj for addBgdPeaks --- .DS_Store | Bin 14340 -> 14340 bytes R/MatrixDeviations.R | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/.DS_Store b/.DS_Store index 3d51ef3aac9b36926a8d98f5adec91e6f926f1c5..22ad7da9874ee5b18c6c49e75e81f542cc5dd9d8 100644 GIT binary patch delta 955 zcmaKqTSyd97{|Z=nl_V19d*@p9CO9e(iL~j3ubv~%kDQ@$u5Mu%>+a1uDNQNl|4vB zB?LoPqO#~lMS*2>p&lYAiC)@>A|j&8V?y;1L3L(#A9|3^!}-4R|IRsl=luQy9RnRV zY9-@h=76=*S?REsx^_D3eJ)ou&t1-Pqdu$30c!S#dfYyL&_`ss=j-t6p)RNM4J>gJ z;u9tc;-o~DBrjOFXz>zeogqCKkQT?pnYB!A-56|d=RJo~;o!3Q8skBlt=LR>4^4uw#_XX{ zRW(&8XlQ#+<|t^G7Kv0qWXvwH_2*JK&7~#OMxE40gLI6B=q8QOZF)q{C`@nYBYmYY z`bOXBCoD)r5|WVu8?uoD1=CTE8K^)57GfDzU?rNd4sGyZBUH2_gl_a;D|TZa_TvB! z;xJBO2xoB)S8+|pb=<&1Ji$|pB8-=Khc6hz5B!eyX+bp7vRI4-{NYK|2OTjVoEhH- z%6!uZ|N6k1{yN$GrgUpYW|n>G^wRR`S&c^Hl;r}4oulNkJno4?BQGPpKe13@9J3hC z$(C)!g^D6sxC6{>tW0r<9HtZ(St=E*O5oI__-M6;)ruUfG$C58V+{hwCnZEKn&W0n zBGYH$_-LY!^aPA~CDMNojnWJH!14S7A%UYw=UDPNk}}kwo+EK%E}F2MLsh9S9pt0 U#-Hpwd>#&D?h%I*?LV{r0@{|^SO5S3 delta 989 zcmaLVZA?>F7zgm@Uj*;*D4wFwdZ|#tn_{Cao!}cNbR)buCbCTD%WS!KNwd;cT8MzP zB=|a(CCi<EyXxSX{cYZ3{j*vdV1EeCoA z28RwOj(lm{bQZhEhzQgfiuL;fp-6zpG9HMAg0Z;UeGitb1=%?Z6}E`;C2Dg^b8B0> zF|Ottf7x8dA*;_QDOqM?>|(CXQBq#D?v0K0zSL4-&0DsdbEDN>l^kI#ov^P^)m?po z;K9CNd_a^cN-9rSm+~Uj$;70xSM$g9?GY{bVNlo76}3n$Qbib|Dt z#e##n@GIuUYh0XneH099B9{4-+r!m&L;ips5}KDO>2vFAxcXkd-Wv->!lEsc+_agi zySgJiJ$g7Ev%)3Qb4?k4=Gl{T3)w@HqU@gQp+VDhO(~U=Ngu)jwy-R3rT z0;JGNa#JI<(te840FBZaI#0jQBu&v3x%P4C$dq~I-0RQ{r}dEv~|Vn*7=Ox%{77Z3vABvsj*d5(n_8^dv^pr zG$yjm>L$L0Nlmf9tZwBEiZl~BX7x?p#H^MPCTKp7q$x5V$28rdel1yR>m+&io!)08>-$ Date: Sun, 28 Mar 2021 21:47:38 -0700 Subject: [PATCH 015/184] add check for default assay in seurat object. --- .DS_Store | Bin 14340 -> 14340 bytes R/RNAIntegration.R | 6 ++++++ 2 files changed, 6 insertions(+) diff --git a/.DS_Store b/.DS_Store index 22ad7da9874ee5b18c6c49e75e81f542cc5dd9d8..e10ad6b0d5facb516d5c7c93bb0ef0c107544f6c 100644 GIT binary patch delta 739 zcmZvZTSyd90EYM9b=Py+i9MsG$6azltxYu7QpyrvCoVasWs(8OIOBx`*}3i3+IB4p6er?uQkwNyiyK?le5 zgleDgO;R0UDhi9RjU*!cPwA#_NsvScD3KEFD#mI!3__5;8=$s zHlPxl(Tok%{L+#T)0b;!;zrT;jVn~ha|0kZZ$5F4>oKx;z>c48O$u}5Ir zi?EF?f+)ri6Y!EaifJ6j37o_^oW})$?~=ebFYw*KP29m<+{1l5z+*hcGdveSUz(Rg a#d7Z-V7VtgFPtNQ#magSmCa6@K8HA0Y#29lCk}6=1LR!u%WcHZsQG*F>drZ~$>YAE%csXBANS~$qhc)q5Q`R-^6`TeP z{gB3jL{2N7YDxn`dQ@eGoKd{Bly`@NAuY^QnWPGn;@rA=%6mq&VMC8aS(~K_v9XEr z-oaQTqD2#i1Jy#dLUA8&CTy1UTR(3P8i~%h7X25ig^=Brvq@DomAHHZpC#*Y`BtW< z;eMa4-w^05#FaH14tP*90ASc^LN(25T1z)p0b4+98c z5Go=tFoIDWz%iV}DV)X`oWm7d#|_-XeLTQJOyW6a@d~ez!aIDzH+;tr{K}-TVJ=Na zW;Xl{hu`80Qdig*jfcXqkR;iz{l8RYi`DA9QeT)?tK7-5Qtn*3Oi`GHilnU7?pCNM zQ%)~cZsFO4BbqOxHA<~4Dq~scRjH%(M3lpFGHwHHl0|(iJL7JkJ|aqDx#^3V{gg7v zVe6LVd9>~GaGUErP661vuM#zAz(xVnkIiU7r@*-j0d%8ZAPowrLjq|OF&xAg4&w-p z;=~--vpA0nxQI)*j9a*kJGhH`c!Vc-ifPQ?8D8QI-s2;_OzGDjewO?J DuB@`r diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R index 8e11b27e..fee11668 100644 --- a/R/RNAIntegration.R +++ b/R/RNAIntegration.R @@ -203,6 +203,12 @@ addGeneIntegrationMatrix <- function( seuratRNA$Group <- paste0(seRNA@meta.data[,groupRNA]) rm(seRNA) } + + if("RNA" %in% names(seuratRNA@assays)){ + DefaultAssay(seuratRNA) <- "RNA" + }else{ + stop("'RNA' is not present in Seurat Object's Assays! Please make sure that this assay is present!") + } gc() if(!is.null(groupRNA)){ From 66d89887e297d9ce4bdf127816d4d3265a088bc7 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 21:57:18 -0700 Subject: [PATCH 016/184] update check in iterativeLSI to 50 cells to bug handle. Additionally print checks if the error actually occurs. --- .DS_Store | Bin 14340 -> 14340 bytes R/IterativeLSI.R | 8 +++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index e10ad6b0d5facb516d5c7c93bb0ef0c107544f6c..df3c3db35c59395b878b3cb1c962fe9933022de8 100644 GIT binary patch delta 792 zcmaKpOGs2v9LCRQ56^{@o*7@`@%k7aV~t{_spHtgN0@113nw+D7`xZFsWaw`R#J&! zviRb z=Uy5aoR`m7bGPBscwlK&;W{QfJ>E{ehiiKj8tyA8Wuj$J?++M$A8%L^-4#rb_vWf_y25juMrkn)2s#cWC)`Zy?yzr~Ce%RZD4G$f?paT{FL0ZNmx&p(K?!h>KK` zX3|CcWQ3d|<7A4=kXiDAydfXSC$c~m$xrf&{04ynE8>xWM5H4FnaDyR%Hc)@wqgf7 zXhAF5(20F8upiwBVhBfY6vuE9r*RfzFmVyra2+?&Fo8)-;Q^-c6tD0ab9j#r_>2X7 z!}l?fZa#}!6Rn10bC^w{Y$>;5YfL1ogv6wb9A`nXYh$%pmEA;TSSe~MOE<@zj^UJK zVY6qdDi1NC^P$p_t1=Fmn-VVba-|z3y-*)3RErdu4qp~7)e=@lxu)fsmZB21 zl4m^{(1@Lq=q|LQ1NuKy2XPRGFoNS4#R;fTCQTY&LM%m?|ha`H+?WKnvBIqS3s2-x1&g}HkQ}b}(f4+0h_woB?CNvYe z?=q)RPp7A=TbQL*ZF`kVb6Fa+NqdpSl$~!ctEjGPY;KEZh~`YeBG|O+6*RV2m6bqL zJ*X;S-kjv+`vX3|k~l(hXdWk)a03$~>iC2jJsi&z%^3xSR2Y(dQKdI1>$ha_A;%|_ z;zCLXb)_w=uvO+b!^uINyy)ZlBkHvAuuv(ucrn8FkE(%~YFgZfblQ}X^7ReHML|&3NBu?Iw zPvk54M!u6D3?3Ih!kKudcT*I0))TU;prn1Tm zbxRvliT+RVjJY;%wf+b6YqCl{GRVI{7}|7XF}QXHw+i(PtQ+mvif!n_4u*CYya}{E zj9?UF3~&%rn8r~Y!*QI(8JuN^&ojhx4DnT5!%f`A9o#kH9u~2L$9RHeJk>5#XFnLW H-Ln4y-ulH? diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 57300734..f4b98137 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -1096,7 +1096,7 @@ addIterativeLSI <- function( #.safeSaveRDS(mat, "temp.rds", compress = FALSE) matO <- mat[, idxOutlier, drop = FALSE] mat <- mat[, -idxOutlier, drop = FALSE] - mat2 <- mat[, head(seq_len(ncol(mat)), 10), drop = FALSE] # A 2nd Matrix to Check Projection is Working + mat2 <- mat[, head(seq_len(ncol(mat)), 50), drop = FALSE] # A 2nd Matrix to Check Projection is Working colSm <- colSm[-idxOutlier] filterOutliers <- 1 } @@ -1196,6 +1196,12 @@ addIterativeLSI <- function( cor(pCheck[,x], pCheck2[,x]) }) %>% unlist if(min(pCheck3) < 0.95){ + print("Check1 :") + print(head(pCheck)) + print("\nCheck2 :") + print(head(pCheck2)) + print("\nCheck3 :") + print(pCheck3) stop("Error with LSI-projection! Cor less than 0.95 of re-projection. Please report bug to github!") } #Project LSI Outliers From 1823c9925ceacc69ef0d809536c5c1ae7856ff33 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 22:02:43 -0700 Subject: [PATCH 017/184] Update IterativeLSI.R --- R/IterativeLSI.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index f4b98137..eebe6b51 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -1196,13 +1196,10 @@ addIterativeLSI <- function( cor(pCheck[,x], pCheck2[,x]) }) %>% unlist if(min(pCheck3) < 0.95){ - print("Check1 :") - print(head(pCheck)) - print("\nCheck2 :") - print(head(pCheck2)) - print("\nCheck3 :") - print(pCheck3) - stop("Error with LSI-projection! Cor less than 0.95 of re-projection. Please report bug to github!") + .logThis(pCheck, "pCheck", logFile=logFile) + .logThis(pCheck2, "pCheck2", logFile=logFile) + .logThis(pCheck3, "pCheck3", logFile=logFile) + warning("Warning with LSI-projection! Cor less than 0.95 of re-projection. Please report this to github with logFile!") } #Project LSI Outliers out$outliers <- colnames(matO) From 803fba670a689ed4d6c1afb9b7c281a587206421 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 28 Mar 2021 23:02:10 -0700 Subject: [PATCH 018/184] checking fragments for tile matrix creation --- R/MatrixTiles.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index fa22896f..0c6ecb1a 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -204,6 +204,29 @@ addTileMatrix <- function( .logThis(min(matchID), paste0("MinCell_TileMatrix_",z,"_",chr), logFile = logFile) .logThis(max(matchID), paste0("MaxCell_TileMatrix_",z,"_",chr), logFile = logFile) + #Check Fragments for validity in case + nf1 <- length(fragments) + + #Check 1 + fragmentsBad1 <- fragments[!(start(fragments) >= 1)] + fragments <- fragments[start(fragments) >= 1] + + #Check 2 + fragmentsBad2 <- fragments[!(end(fragments) < chromLengths[z])] + fragments <- fragments[end(fragments) < chromLengths[z]] + + #Check N + nf2 <- length(fragments) + if(nf2 < nf1) + warning("Skipping over fragments not within chromosome range on Chr:", chr) + .logThis(fragmentsBad1, "fragmentsBad1", logFile = logFile) + print("Bad1 (Start not greater than 0): ") + print(fragmentsBad1) + print("Bad2 (End greater than chromsome length): ") + .logThis(fragmentsBad2, "fragmentsBad2", logFile = logFile) + print(fragmentsBad2) + } + #Create Sparse Matrix mat <- Matrix::sparseMatrix( i = c(trunc(start(fragments) / tileSize), trunc(end(fragments) / tileSize)) + 1, From faf6debd16fe7a92b48a13425e8719caa38dbbcc Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 20:18:59 -0700 Subject: [PATCH 019/184] bugfix inputFiles improperly being called instead of ArrowFiles --- .DS_Store | Bin 14340 -> 14340 bytes R/MatrixGeneScores.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index df3c3db35c59395b878b3cb1c962fe9933022de8..ff9ed4861ffa9f704e0e0d4294960fedcc5e653e 100644 GIT binary patch delta 792 zcma*kO-PhM7zgm@?^o~ZWX^88uC5!dNhW3W?xwb>X}MOWlA=a-NZZv7UR-w9-d#b> z>>yDY9egVz73)%zeSs-ObqV^S@SxHmymY8TK^L*WOAWh1mx$&tFwe|9%>4cXp@GoD z5{~zJDL-w>?Co_pTm|{|EzTlmU%tK9<#g#Y+=5Yk#-8OKBO*|PU+Z#v{Xs8L+yigO zuWDgM`G~mqgv6v(lAO#~inZ!MRdr2mE9Vi-*WmMe!fKbV(i8S+L#E?$v)UF6G^#zm zp>(rFmM7L&Y}p$&Z7bMW=AKXG2IE>2WBNBko}Dktaq*1mS*Xb5M8EUNJKJ>!qJXAY=ui0mZJi0>=lmnXh0*Ha1^ak$u{^AKoB8x zqX#F^hyJMNATHn{hH-U0u3-dYxQSc1EoARv3e$LuCwPiyc!?Rz>2+yYSeOi^UC#au DE|g?L4NY8r{P*f4D6W4D)tXNuIprOlye;kHtcfF5a|MR@X3b z*VNjla*#rW8lnlBq$zqu@8}bKrUhE0pY)49}Es!KF$6-%mRV!+&CV0JO&b_r*$$0q(~{BVWfh79dChY Date: Sun, 4 Apr 2021 20:25:54 -0700 Subject: [PATCH 020/184] bugfix missing logFile in addIterativeLSI --- R/ArrowRead.R | 3 ++- R/IterativeLSI.R | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 20bbdb6c..35ac08f8 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -719,7 +719,8 @@ getMatrixFromArrow <- function( tmpPath = .tempfile(pattern = paste0("tmp-partial-mat")), useIndex = FALSE, tstart = NULL, - verbose = TRUE + verbose = TRUE, + logFile = NULL ){ ######################################### diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index eebe6b51..0fc60880 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -544,7 +544,8 @@ addIterativeLSI <- function( cellNames = cellNames, doSampleCells = FALSE, threads = threads, - verbose = FALSE + verbose = FALSE, + logFile = logFile ) #Compute LSI @@ -591,7 +592,8 @@ addIterativeLSI <- function( cellNames = sampledCellNames, doSampleCells = FALSE, threads = threads, - verbose = FALSE + verbose = FALSE, + logFile = logFile ) #Compute LSI @@ -628,7 +630,8 @@ addIterativeLSI <- function( tmpPath = tmpPath, useIndex = useIndex, threads = threads, - verbose = FALSE + verbose = FALSE, + logFile = logFile ) gc() From 46dfca98c688421f4cc37eef0f92ec3a32aca396 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 21:03:10 -0700 Subject: [PATCH 021/184] bugfix check cells in project for ColSums --- .DS_Store | Bin 14340 -> 14340 bytes R/MarkerFeatures.R | 5 +++++ 2 files changed, 5 insertions(+) diff --git a/.DS_Store b/.DS_Store index ff9ed4861ffa9f704e0e0d4294960fedcc5e653e..15f43769510d600e17ec3a9dafa1c541b5e1c4ff 100644 GIT binary patch delta 38 ucmZoEXeroWEilqYvW-B4j&yakp`nF|j)Ja{nNh8dLbZjFg^q%ufsuJ_EhmS#s-dlC eLT+VMbxm#EoXP&;@{Dsgvj{TtZ{}9`DGmTXO%?P2 diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 0ed03a5b..e812beb0 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -191,11 +191,16 @@ getMarkerFeatures <- function( ##################################################### # Pairwise Test Per Seqnames ##################################################### + #ColSums mColSums <- tryCatch({ suppressMessages(.getColSums(ArrowFiles, seqnames = featureDF$seqnames@values, useMatrix = useMatrix, threads = threads)) }, error = function(x){ rep(1, nCells(ArchRProj)) }) + + #Subset By Cells in ArchRProj + mColSums <- mColSums[ArchRProj$cellNames] + if(all(mColSums==1) & is.null(normBy)){ normBy <- "none" } From d9eda0a004eee65050771219c0e181ebf2f3318f Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 21:04:27 -0700 Subject: [PATCH 022/184] bugfix getColSums check cellNames --- R/IntegrativeAnalysis.R | 2 ++ R/IterativeLSI.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index a34ae4f0..dd2fe151 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -774,6 +774,8 @@ addCoAccessibility <- function( #Peak Matrix ColSums cS <- .getColSums(getArrowFiles(ArchRProj), chri, verbose = FALSE, useMatrix = "PeakMatrix") + cS <- cS[ArchRProj$cellNames] + gS <- unlist(lapply(seq_along(knnObj), function(x) sum(cS[knnObj[[x]]], na.rm=TRUE))) for(x in seq_along(chri)){ diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 0fc60880..8a341c5b 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -285,7 +285,7 @@ addIterativeLSI <- function( v }, error = function(e){ tryCatch({ - .getColSums(ArrowFiles = ArrowFiles, useMatrix = useMatrix, seqnames = chrToRun) + .getColSums(ArrowFiles = ArrowFiles, useMatrix = useMatrix, seqnames = chrToRun)[ArchRProj$cellNames] }, error = function(y){ stop("Could not determine depth from depthCol or colSums!") }) From 1a62519734323edb43fb36c49242afcef2aedb00 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 4 Apr 2021 21:06:55 -0700 Subject: [PATCH 023/184] bugfix --- R/MatrixTiles.R | 2 +- man/createArrowFiles.Rd | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 0c6ecb1a..451384eb 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -217,7 +217,7 @@ addTileMatrix <- function( #Check N nf2 <- length(fragments) - if(nf2 < nf1) + if(nf2 < nf1){ warning("Skipping over fragments not within chromosome range on Chr:", chr) .logThis(fragmentsBad1, "fragmentsBad1", logFile = logFile) print("Bad1 (Start not greater than 0): ") diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index 4b31cfa2..f1bd3018 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -106,9 +106,11 @@ gsubExpression would be ":.*". This would retrieve the string after the colon as \code{scanBamFlag} passed to \code{ScanBam} in Rsamtools.} \item{offsetPlus}{The numeric offset to apply to a "+" stranded Tn5 insertion to account for the precise Tn5 binding site. +This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. See Buenrostro et al. Nature Methods 2013.} \item{offsetMinus}{The numeric offset to apply to a "-" stranded Tn5 insertion to account for the precise Tn5 binding site. +This parameter only applies to bam file input and it is assumed that fragment files have already been offset which is the standard from 10x output. See Buenrostro et al. Nature Methods 2013.} \item{addTileMat}{A boolean value indicating whether to add a "Tile Matrix" to each ArrowFile. A Tile Matrix is a counts matrix that, From 73287e62b2340ec9e3c9ed62c76c16820e417826 Mon Sep 17 00:00:00 2001 From: jgranja24 Date: Sun, 11 Apr 2021 21:29:26 -0700 Subject: [PATCH 024/184] bug fix sampledCellNames arrowRead --- .DS_Store | Bin 14340 -> 14340 bytes R/ArrowRead.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 15f43769510d600e17ec3a9dafa1c541b5e1c4ff..c201598da24f47b1678239b13fdff6ae6e5eb315 100644 GIT binary patch delta 16 XcmZoEXerpRUx3-d)M)b|0Vj0;IJpJ* delta 16 XcmZoEXerpRUx3-n(qQu;0Vj0;IJpJ* diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 35ac08f8..eaed91ef 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -784,7 +784,7 @@ getMatrixFromArrow <- function( matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .) mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .) - if(!all(cellNames %in% colnames(mat))){ + if(!all(sampledCellNames %in% colnames(mat))){ .logThis(sampledCellNames, "cellNames supplied", logFile = logFile) .logThis(colnames(mat), "cellNames from matrix", logFile = logFile) stop("Error not all cellNames found in partialMatrix") From 0291b6fee341a8c910d016a09be432a022fa01d8 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 21 Apr 2021 20:26:24 -0700 Subject: [PATCH 025/184] fix fast H5Fopen to be read only --- R/ArrowRead.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index eaed91ef..9b8f9bdf 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -1021,7 +1021,7 @@ getMatrixFromArrow <- function( ){ if(tolower(method) == "fast" & is.null(index) & is.null(start) & is.null(block) & is.null(count)){ - fid <- H5Fopen(file) + fid <- H5Fopen(file, "H5F_ACC_RDONLY")) dapl <- H5Pcreate("H5P_DATASET_ACCESS") did <- .Call("_H5Dopen", fid@ID, name, dapl@ID, PACKAGE='rhdf5') res <- .Call("_H5Dread", did, NULL, NULL, NULL, TRUE, 0L, FALSE, fid@native, PACKAGE='rhdf5') From 400ecf9357cbc16a80079dab789af3e3a994dd16 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Apr 2021 07:24:59 -0700 Subject: [PATCH 026/184] remove extra paren --- R/ArrowRead.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 9b8f9bdf..13111d05 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -1021,7 +1021,7 @@ getMatrixFromArrow <- function( ){ if(tolower(method) == "fast" & is.null(index) & is.null(start) & is.null(block) & is.null(count)){ - fid <- H5Fopen(file, "H5F_ACC_RDONLY")) + fid <- H5Fopen(file, "H5F_ACC_RDONLY") dapl <- H5Pcreate("H5P_DATASET_ACCESS") did <- .Call("_H5Dopen", fid@ID, name, dapl@ID, PACKAGE='rhdf5') res <- .Call("_H5Dread", did, NULL, NULL, NULL, TRUE, 0L, FALSE, fid@native, PACKAGE='rhdf5') From 7ff7e4d2306d06304d0a1a73bd6bb2a718a3a724 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Apr 2021 20:20:17 -0700 Subject: [PATCH 027/184] Add requirement for genomeSize argument when using non-standard genomes. --- R/ReproduciblePeakSet.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 8699e748..6c50c0cf 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -20,7 +20,7 @@ #' This is important to allow for exclusion of pseudo-bulk replicates derived from very low cell numbers. #' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from peak calling. #' @param pathToMacs2 The full path to the MACS2 executable. -#' @param genomeSize The genome size to be used for MACS2 peak calling (see MACS2 documentation). +#' @param genomeSize The genome size to be used for MACS2 peak calling (see MACS2 documentation). This is required if genome is not hg19, hg38, mm9, or mm10. #' @param shift The number of basepairs to shift each Tn5 insertion. When combined with `extsize` this allows you to create proper fragments, #' centered at the Tn5 insertion site, for use with MACS2 (see MACS2 documentation). #' @param extsize The number of basepairs to extend the MACS2 fragment after `shift` has been applied. When combined with `extsize` this @@ -166,6 +166,8 @@ addReproduciblePeakSet <- function( genomeSize <- 2.7e9 }else if(grepl("mm9|mm10", getGenome(ArchRProj), ignore.case = TRUE)){ genomeSize <- 1.87e9 + }else { + stop("Non-standard genome detected. Argument genomeSize is required!") } } From bac49aef6ac74c1d1d049d4c12dc6ea1bf8cd25d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Apr 2021 07:32:17 -0700 Subject: [PATCH 028/184] add default nDim for .loadUWOT Fixes backwards compatibility problem with .loadUWOT --- R/Embedding.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Embedding.R b/R/Embedding.R index e0ead19c..b4e9747a 100644 --- a/R/Embedding.R +++ b/R/Embedding.R @@ -253,7 +253,7 @@ addUMAP <- function( } #New Save UWOT -.loadUWOT <- function(file){ +.loadUWOT <- function(file, nDim = NULL){ tryCatch({ uwot::load_uwot(file = file, verbose = TRUE) }, error = function(e){ From 05d66f20abe6865a330c1409374d4ea33fdecdf5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 30 Apr 2021 07:23:52 -0700 Subject: [PATCH 029/184] Add check for existence of matrix passed to useMatrix --- R/RNAIntegration.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R index fee11668..d1137e79 100644 --- a/R/RNAIntegration.R +++ b/R/RNAIntegration.R @@ -143,6 +143,11 @@ addGeneIntegrationMatrix <- function( ######################################################################################### .logDiffTime("Checking ATAC Input", tstart, verbose = verbose, logFile = logFile) + if (useMatrix %ni% getAvailableMatrices(ArchRProj)) { + .logMessage(paste0("Matrix ", useMatrix, " does not exist in the provided ArchRProject. See available matrix names from getAvailableMatrices()!"), logFile = logFile) + stop("Matrix name provided to useMatrix does not exist in ArchRProject!") + } + if(!is.null(groupATAC)){ dfATAC <- getCellColData(ArchRProj = ArchRProj, select = groupATAC, drop = FALSE) } From eec8474ddf924149e38a9469784942b5f456cb1f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 30 Apr 2021 14:11:04 -0700 Subject: [PATCH 030/184] update normMethod param documentation --- R/GroupExport.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/GroupExport.R b/R/GroupExport.R index 87bf3719..9102c6f3 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -140,7 +140,8 @@ getGroupSE <- function( #' user-supplied `cellColData` metadata columns (for example, "Clusters"). Cells with the same value annotated in this metadata #' column will be grouped together and the average signal will be plotted. #' @param normMethod The name of the column in `cellColData` by which normalization should be performed. The recommended and default value -#' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. +#' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. Accepted values are +#' "None", "ReadsInTSS", "nCells", "ReadsInPromoter", or "nFrags". #' @param tileSize The numeric width of the tile/bin in basepairs for plotting ATAC-seq signal tracks. All insertions in a single bin will be summed. #' @param maxCells Maximum number of cells used for each bigwig. #' @param ceiling Maximum contribution of accessibility per cell in each tile. From 980198829169c326f57afbed8a1c738c4f260837 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 10 May 2021 08:02:39 -0700 Subject: [PATCH 031/184] Check for PeakMatrix in addBgdPeaks --- R/MatrixDeviations.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/MatrixDeviations.R b/R/MatrixDeviations.R index 50721fc5..904c049f 100644 --- a/R/MatrixDeviations.R +++ b/R/MatrixDeviations.R @@ -626,6 +626,11 @@ addBgdPeaks <- function( .validInput(input = outFile, name = "outFile", valid = c("character")) .validInput(input = force, name = "force", valid = c("boolean")) + if ("PeakMatrix" %ni% getAvailableMatrices(ArchRProj)) { + .logMessage(paste0("PeakMatrix does not exist in the provided ArchRProject. Add a peak matrix using addPeakMatrix(). See available matrix names from getAvailableMatrices()!"), logFile = logFile) + stop("PeakMatrix does not exist in the provided ArchRProject. Add a peak matrix using addPeakMatrix(). See available matrix names from getAvailableMatrices()!") + } + if(!is.null(metadata(getPeakSet(ArchRProj))$bgdPeaks) & !force){ if(file.exists(metadata(getPeakSet(ArchRProj))$bgdPeaks)){ From 28a17f7ee90e406fc061f1c1f7e0611a55ba8a3c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 28 May 2021 20:41:31 -0700 Subject: [PATCH 032/184] Update issue templates --- ...s---feature-requests--use-discussions--.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md diff --git a/.github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md b/.github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md new file mode 100644 index 00000000..e89438b4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md @@ -0,0 +1,42 @@ +--- +name: Bug Report - NO QUESTIONS / FEATURE REQUESTS (Use Discussions!) +about: Create a bug report to help us improve ArchR. NOT to be used for questions + or feature requests! +title: '' +labels: bug +assignees: '' + +--- + +This is an issue template made by the developers of ArchR. You MUST follow these instructions. + +Questions related to how to use ArchR or requests for new features should be posted in the Discussions forum (https://github.com/GreenleafLab/ArchR/discussions). + +Before you submit this Bug Report please update ArchR to the latest stable version and make sure that this issue has not already been fixed in the latest release. ArchR is still in active development and we will fix problems as they arise. To update ArchR: + +devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) + +If your issue persists, then please submit this bug report. + +PLEASE FILL OUT THE RELEVANT INFORMATION AND DELETE THE UNUSED PORTIONS OF THIS ISSUE TEMPLATE. + +**Attach your log file** +ArchR has a built-in logging functionality for all complex functions. You MUST attach your log file (indicated in the console output) to this issue. Just drag and drop it here. + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +To help us optimally address your issue, please try to reproduce this issue using the tutorial hematopoiesis dataset and provide us the command(s) to reproduce your bug. Our first question to you will be "can you reproduce this with the tutorial dataset" so please do this. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. Do not screenshot code or text but embed this in markdown using triple-backticks. + +**Session Info** +If you do not have a log file because the function that caused the error does not produce one, please paste the output of "sessionInfo()" here. + +**Additional context** +Add any other context about the problem here. From e1482547873b6739e2c7c194062c5ea8b5924901 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 28 May 2021 20:42:45 -0700 Subject: [PATCH 033/184] Update issue templates --- .github/ISSUE_TEMPLATE/bug-report.md | 42 ---------------------------- 1 file changed, 42 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/bug-report.md diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md deleted file mode 100644 index d978275e..00000000 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -name: Bug Report -about: Create a bug report to help us improve ArchR. NOT to be used for questions - or feature requests! -title: '' -labels: bug -assignees: '' - ---- - -This is an issue template made by the developers of ArchR. You MUST follow these instructions. - -Questions related to how to use ArchR or requests for new features should be posted in the Discussions forum (https://github.com/GreenleafLab/ArchR/discussions). - -Before you submit this Bug Report please update ArchR to the latest stable version and make sure that this issue has not already been fixed in the latest release. ArchR is still in active development and we will fix problems as they arise. To update ArchR: - -devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) - -If your issue persists, then please submit this bug report. - -PLEASE FILL OUT THE RELEVANT INFORMATION AND DELETE THE UNUSED PORTIONS OF THIS ISSUE TEMPLATE. - -**Attach your log file** -ArchR has a built-in logging functionality for all complex functions. You MUST attach your log file (indicated in the console output) to this issue. Just drag and drop it here. - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -To help us optimally address your issue, please try to reproduce this issue using the tutorial hematopoiesis dataset and provide us the command(s) to reproduce your bug. Our first question to you will be "can you reproduce this with the tutorial dataset" so please do this. - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. Do not screenshot code or text but embed this in markdown using triple-backticks. - -**Session Info** -If you do not have a log file because the function that caused the error does not produce one, please paste the output of "sessionInfo()" here. - -**Additional context** -Add any other context about the problem here. From d4dd9aef9483febcf6758b311aff58a36f3bf541 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 2 Jun 2021 20:50:58 -0700 Subject: [PATCH 034/184] add force parameter where missing --- R/BulkProjection.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/BulkProjection.R b/R/BulkProjection.R index b02d027e..04b2f1d4 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -9,6 +9,7 @@ #' @param n An integer specifying the number of subsampled "pseudo single cells" per bulk sample. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param threads The number of threads used for parallel execution +#' @param force A boolean value indicating whether to force the projection of bulk ATAC data even if fewer than 25% of the features are present in the bulk ATAC data set. #' @param logFile The path to a file to be used for logging ArchR output. #' @export #' @@ -20,6 +21,7 @@ projectBulkATAC <- function( n = 250, verbose = TRUE, threads = getArchRThreads(), + force = FALSE, logFile = createLogFile("projectBulkATAC") ){ @@ -30,8 +32,9 @@ projectBulkATAC <- function( .validInput(input = n, name = "n", valid = c("integer")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = force, name = "force", valid = c("boolean")) .validInput(input = logFile, name = "logFile", valid = c("character")) - + tstart <- Sys.time() .startLogging(logFile = logFile) From 0d77da238cbcf6f4fa0c8daac4cec7df34c63a44 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 2 Jun 2021 21:19:00 -0700 Subject: [PATCH 035/184] create error when genes lacks symbol column --- R/MatrixGeneScores.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index b7f5548a..afbd14f3 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -103,6 +103,9 @@ addGeneScoreMatrix <- function( if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){ stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!") } + if(!any(colnames(mcols(genes)) == "symbol")) { + stop("No symbol column in genes! A column named symbol is exected in the GRanges object passed to the genes parameter!") + } .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGeneScoreMatrix Input-Parameters", logFile = logFile) From b0a48c86c77f9af66ac7254d782dde917e817284 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 1 Jul 2021 13:10:25 -0700 Subject: [PATCH 036/184] bugfix for tabix dt column names --- R/CreateArrow.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index fae3f845..55e35670 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1260,9 +1260,9 @@ createArrowFiles <- function( } #No NAs - dt <- dt[!is.na(dt$RG), , drop=FALSE] - dt <- dt[!is.na(dt$start), , drop=FALSE] - dt <- dt[!is.na(dt$end), , drop=FALSE] + dt <- dt[!is.na(dt$V2), , drop=FALSE] + dt <- dt[!is.na(dt$V3), , drop=FALSE] + dt <- dt[!is.na(dt$V4), , drop=FALSE] #Care for Break Points dt <- dt[dt$V2 >= start(tileChromSizes[x]),] From f98354ce07f72c86213982e5fbcd9a5bd70d743b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 3 Jul 2021 05:45:52 -0700 Subject: [PATCH 037/184] Fix mColSums Suggested by @andyyhchen --- R/MarkerFeatures.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index e812beb0..b439771d 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -193,14 +193,12 @@ getMarkerFeatures <- function( ##################################################### #ColSums mColSums <- tryCatch({ - suppressMessages(.getColSums(ArrowFiles, seqnames = featureDF$seqnames@values, useMatrix = useMatrix, threads = threads)) + suppressMessages(tmpColSum <- .getColSums(ArrowFiles, seqnames = featureDF$seqnames@values, useMatrix = useMatrix, threads = threads)) + tmpColSum[ArchRProj$cellNames] }, error = function(x){ rep(1, nCells(ArchRProj)) }) - #Subset By Cells in ArchRProj - mColSums <- mColSums[ArchRProj$cellNames] - if(all(mColSums==1) & is.null(normBy)){ normBy <- "none" } From 2aabfc349fcd403388cc26a5f3fd0821b33a9764 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 6 Jul 2021 11:16:34 -0700 Subject: [PATCH 038/184] add useMatrix to param documentation --- R/IntegrativeAnalysis.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index dd2fe151..4e85635d 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -948,6 +948,7 @@ getCoAccessibility <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param reducedDims The name of the `reducedDims` object (i.e. "IterativeLSI") to retrieve from the designated `ArchRProject`. +#' @param useMatrix The name of the matrix containing gene expression information to be used for determining peak-to-gene links. See `getAvailableMatrices(ArchRProj)` #' @param dimsToUse A vector containing the dimensions from the `reducedDims` object to use in clustering. #' @param scaleDims A boolean value that indicates whether to z-score the reduced dimensions for each cell. This is useful for minimizing #' the contribution of strong biases (dominating early PCs) and lowly abundant populations. However, this may lead to stronger sample-specific @@ -996,6 +997,7 @@ addPeak2GeneLinks <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = reducedDims, name = "reducedDims", valid = c("character")) + .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) .validInput(input = dimsToUse, name = "dimsToUse", valid = c("numeric", "null")) .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null")) .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric", "null")) From a861399f489fa2a870e42cdf759166d44543caaf Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 16 Jul 2021 13:53:33 -0700 Subject: [PATCH 039/184] fix ylim quantile calculation --- R/ArchRBrowser.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index c496cc9f..b472cb17 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -995,7 +995,7 @@ plotBrowserTrack <- function( # Plot Track ###################################################### if(!is.null(ylim)){ - ylim <- quantile(df$y, ylim) + ylim <- c(0,quantile(df$y, ylim)) df$y[df$y < ylim[1]] <- ylim[1] df$y[df$y > ylim[2]] <- ylim[2] }else{ From 8caa716086832c2560b867b6e4a0d1bab48716e1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 16 Jul 2021 14:02:51 -0700 Subject: [PATCH 040/184] fix ylim quantile again --- R/ArchRBrowser.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index b472cb17..20abfb19 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -995,7 +995,7 @@ plotBrowserTrack <- function( # Plot Track ###################################################### if(!is.null(ylim)){ - ylim <- c(0,quantile(df$y, ylim)) + ylim <- c(0,quantile(df$y, probs=c(ylim))) df$y[df$y < ylim[1]] <- ylim[1] df$y[df$y > ylim[2]] <- ylim[2] }else{ From d596b235d2a64393ed7d90f23ae00842a21e1cf5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 21 Jul 2021 08:09:59 -0700 Subject: [PATCH 041/184] Fix CreateGenomeAnnotation This function had a very odd logic where an if statement asked if genome param was NULL even though the .validInput statements prevented that from being the case. I think I fixed the logic which will enable more flexible custom genomes --- R/AnnotationGenome.R | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index 3d75ac3a..dc504544 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -24,23 +24,30 @@ createGenomeAnnotation <- function( .validInput(input = filter, name = "filter", valid = c("boolean")) .validInput(input = filterChr, name = "filterChr", valid = c("character", "null")) - if(is.null(genome) | is.null(blacklist) | is.null(chromSizes)){ - - ################## - message("Getting genome..") - bsg <- validBSgenome(genome) - genome <- bsg@pkgname - - ################## - message("Getting chromSizes..") + ################## + message("Getting genome..") + #validBSgenome works on both character and BSgenome inputs, which are the only allowable inputs to the param + bsg <- validBSgenome(genome) + genome <- bsg@pkgname + + if(is.null(chromSizes)) { + message("Attempting to infer chromSizes..") chromSizes <- GRanges(names(seqlengths(bsg)), IRanges(1, seqlengths(bsg))) if(filter){ - chromSizes <- filterChrGR(chromSizes, remove = filterChr) + if(is.null(filterChr)) { + stop("Cannot have filterChr = NULL when filter = TRUE!") + } + chromSizes <- filterChrGR(chromSizes, remove = filterChr) } seqlengths(chromSizes) <- end(chromSizes) + } else { + message("Using provided chromSizes..") + chromSizes <- .validGRanges(chromSizes) + } + if(is.null(blacklist)){ ################## - message("Getting blacklist..") + message("Attempting to infer blacklist..") genomeName <- tryCatch({ bsg@provider_version @@ -50,15 +57,9 @@ createGenomeAnnotation <- function( blacklist <- .getBlacklist(genome = genomeName) - }else{ - - bsg <- validBSgenome(genome) - genome <- bsg@pkgname - - chromSizes <- .validGRanges(chromSizes) - + } else { + message("Using provided blacklist...") blacklist <- .validGRanges(blacklist) - } SimpleList(genome = genome, chromSizes = chromSizes, blacklist = blacklist) From ef6fd035fe75329aca19d14f3c656503ff3a834e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 21 Jul 2021 09:01:51 -0700 Subject: [PATCH 042/184] annotate that this is pearson correlation --- src/Correlation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Correlation.cpp b/src/Correlation.cpp index 419da16d..e86ea2c4 100644 --- a/src/Correlation.cpp +++ b/src/Correlation.cpp @@ -3,7 +3,7 @@ using namespace Rcpp; using namespace std; -// Adapted from https://github.com/AEBilgrau/correlateR/blob/master/src/auxiliary_functions.cpp +// Pearson Correlation, Adapted from https://github.com/AEBilgrau/correlateR/blob/master/src/auxiliary_functions.cpp // [[Rcpp::export]] Rcpp::NumericVector rowCorCpp(IntegerVector idxX, IntegerVector idxY, Rcpp::NumericMatrix X, Rcpp::NumericMatrix Y) { From 699a886f748066d89fea73db6fd2acd119237f31 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 22 Jul 2021 11:51:33 -0700 Subject: [PATCH 043/184] add the ability to designate sample labels independently addGroupCoverages automatically assumes that sample labels come from cellColData$Sample which assumes that each arrow file represents an individual sample. This is not the case for applications where different samples are mixed together into the same GEM reaction as is the case for multiplexing based on sample genotype or lipid barcode. --- R/GroupCoverages.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 63859218..ca1f7d02 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -6,6 +6,10 @@ #' @param ArchRProj An `ArchRProject` object. #' @param groupBy The name of the column in `cellColData` to use for grouping multiple cells together prior to generation of the insertion coverage file. #' @param useLabels A boolean value indicating whether to use sample labels to create sample-aware subgroupings during as pseudo-bulk replicate generation. +#' @param sampleLabels The name of a column in `cellColData` to use to identify samples. In most cases, this parameter should be left as `NULL` and you +#' should only use this parameter if you do not want to use the default sample labels stored in `cellColData$Sample`. However, if your individual Arrow +#' files do not map to individual samples, then you should set this parameter to accurately identify your samples. This is the case in (for example) +#' multiplexing applications where cells from different biological samples are mixed into the same reaction and demultiplexed based on a lipid barcode or genotype. #' @param minCells The minimum number of cells required in a given cell group to permit insertion coverage file generation. #' @param maxCells The maximum number of cells to use during insertion coverage file generation. #' @param maxFragments The maximum number of fragments per cell group to use in insertion coverage file generation. This prevents the generation @@ -28,6 +32,7 @@ addGroupCoverages <- function( ArchRProj = NULL, groupBy = "Clusters", useLabels = TRUE, + sampleLabels = NULL, minCells = 40, maxCells = 500, maxFragments = 25*10^6, @@ -46,6 +51,7 @@ addGroupCoverages <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = groupBy, name = "groupBy", valid = c("character")) .validInput(input = useLabels, name = "useLabels", valid = c("boolean")) + .validInput(input = sampleLabels, name = "sampleLabels", valid = c("character","null")) .validInput(input = minCells, name = "minCells", valid = c("integer")) .validInput(input = maxCells, name = "maxCells", valid = c("integer")) .validInput(input = maxFragments, name = "maxFragments", valid = c("integer")) @@ -64,6 +70,12 @@ addGroupCoverages <- function( stop("minReplicates must be at least 2!") } + if(!is.null(sampleLabels)){ + if(sampleLabels %ni% colnames(ArchRProj@cellColData)) { + stop("sampleLabels is not a column in cellColData!") + } + } + tstart <- Sys.time() .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGroupCoverages Input-Parameters", logFile = logFile) @@ -118,8 +130,11 @@ addGroupCoverages <- function( # outListx <- SimpleList(LowCellGroup = cellNamesx) or NULL #} if(useLabels){ - sampleLabelsx <- paste0(subColDat$Sample) - }else{ + if(is.null(sampleLabels)) { + sampleLabels <- "Sample" + } + sampleLabelsx <- paste0(subColDat[,sampleLabels]) + } else { sampleLabelsx <- NULL } outListx <- .identifyGroupsForPseudoBulk( From eb8d77a3c45a0350b8bfebd09d07100c730dd40d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 23 Jul 2021 08:42:58 -0700 Subject: [PATCH 044/184] delete requirePackage(genome) When using a custom BSgenome object, no package is installed associated with that object. This causes requirePackage(genome) to fail. This statement is unnecessary and is superseded by validBSgenome. --- R/Footprinting.R | 1 - R/GroupCoverages.R | 1 - 2 files changed, 2 deletions(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index baf77703..7c346c0f 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -68,7 +68,6 @@ getFootprints <- function( } genome <- getGenome(ArchRProj) - .requirePackage(genome) .requirePackage("Biostrings", source = "bioc") BSgenome <- eval(parse(text = genome)) BSgenome <- validBSgenome(BSgenome) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 63859218..45683429 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -585,7 +585,6 @@ addGroupCoverages <- function( .logThis(append(args, mget(names(formals()),sys.frame(sys.nframe()))), "kmerBias-Parameters", logFile = logFile) - .requirePackage(genome) .requirePackage("Biostrings", source = "bioc") BSgenome <- eval(parse(text = genome)) BSgenome <- validBSgenome(BSgenome) From 52e7011e26703aad4d9f62d12a7a79492980c09f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 23 Jul 2021 08:49:11 -0700 Subject: [PATCH 045/184] Update issue templates --- ...s---feature-requests--use-discussions--.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug-error-report---no-usage-questions---feature-requests--use-discussions--.md diff --git a/.github/ISSUE_TEMPLATE/bug-error-report---no-usage-questions---feature-requests--use-discussions--.md b/.github/ISSUE_TEMPLATE/bug-error-report---no-usage-questions---feature-requests--use-discussions--.md new file mode 100644 index 00000000..dfa6fd79 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-error-report---no-usage-questions---feature-requests--use-discussions--.md @@ -0,0 +1,42 @@ +--- +name: Bug/Error Report - NO USAGE QUESTIONS / FEATURE REQUESTS (Use Discussions!) +about: Create a bug/error report to help us improve ArchR. NOT to be used for usage + questions or feature requests! +title: '' +labels: bug +assignees: '' + +--- + +This is an issue template made by the developers of ArchR. You MUST follow these instructions. + +Questions related to how to use ArchR or requests for new features should be posted in the Discussions forum (https://github.com/GreenleafLab/ArchR/discussions). + +Before you submit this Bug Report please update ArchR to the latest stable version and make sure that this issue has not already been fixed in the latest release. ArchR is still in active development and we will fix problems as they arise. To update ArchR: + +devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) + +If your issue persists, then please submit this bug report. + +PLEASE FILL OUT THE RELEVANT INFORMATION AND DELETE THE UNUSED PORTIONS OF THIS ISSUE TEMPLATE. + +**Attach your log file** +ArchR has a built-in logging functionality for all complex functions. You MUST attach your log file (indicated in the console output) to this issue. Just drag and drop it here. + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +To help us optimally address your issue, please try to reproduce this issue using the tutorial hematopoiesis dataset and provide us the command(s) to reproduce your bug. Our first question to you will be "can you reproduce this with the tutorial dataset" so please do this. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. Do not screenshot code or text but embed this in markdown using triple-backticks. + +**Session Info** +If you do not have a log file because the function that caused the error does not produce one, please paste the output of "sessionInfo()" here. + +**Additional context** +Add any other context about the problem here. From 2be1294eb1fbff364fb538d71f4f545ee4384d09 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 23 Jul 2021 08:49:35 -0700 Subject: [PATCH 046/184] Update issue templates --- ...s---feature-requests--use-discussions--.md | 42 ------------------- 1 file changed, 42 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md diff --git a/.github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md b/.github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md deleted file mode 100644 index e89438b4..00000000 --- a/.github/ISSUE_TEMPLATE/bug-report---no-questions---feature-requests--use-discussions--.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -name: Bug Report - NO QUESTIONS / FEATURE REQUESTS (Use Discussions!) -about: Create a bug report to help us improve ArchR. NOT to be used for questions - or feature requests! -title: '' -labels: bug -assignees: '' - ---- - -This is an issue template made by the developers of ArchR. You MUST follow these instructions. - -Questions related to how to use ArchR or requests for new features should be posted in the Discussions forum (https://github.com/GreenleafLab/ArchR/discussions). - -Before you submit this Bug Report please update ArchR to the latest stable version and make sure that this issue has not already been fixed in the latest release. ArchR is still in active development and we will fix problems as they arise. To update ArchR: - -devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) - -If your issue persists, then please submit this bug report. - -PLEASE FILL OUT THE RELEVANT INFORMATION AND DELETE THE UNUSED PORTIONS OF THIS ISSUE TEMPLATE. - -**Attach your log file** -ArchR has a built-in logging functionality for all complex functions. You MUST attach your log file (indicated in the console output) to this issue. Just drag and drop it here. - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -To help us optimally address your issue, please try to reproduce this issue using the tutorial hematopoiesis dataset and provide us the command(s) to reproduce your bug. Our first question to you will be "can you reproduce this with the tutorial dataset" so please do this. - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. Do not screenshot code or text but embed this in markdown using triple-backticks. - -**Session Info** -If you do not have a log file because the function that caused the error does not produce one, please paste the output of "sessionInfo()" here. - -**Additional context** -Add any other context about the problem here. From 54efef17f488fabbe81c5df98b019e90e12f1a8c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Jul 2021 07:33:52 -0700 Subject: [PATCH 047/184] Remove unneeded if and update param definitions Removed if statement that prevented `filter = TRUE` in combination with `filterChr = NULL` since this was not necessary. --- R/AnnotationGenome.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index dc504544..876828d7 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -6,9 +6,10 @@ #' @param chromSizes A `GRanges` object containing chromosome start and end coordinates. #' @param blacklist A `GRanges` object containing regions that should be excluded from analyses due to unwanted biases. #' @param filter A boolean value indicating whether non-standard chromosome scaffolds should be excluded. -#' These "non-standard" chromosomes are defined by `filterChrGR()`. +#' These "non-standard" chromosomes are defined by `filterChrGR()` and by manual annotation using the `filterChr` parameter. #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. -#' If no manual removal is desired, `filterChr` should be set to `NULL`. +#' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, +#' non-standard chromosomes will still be removed as defined in `filterChrGR()`. #' @export createGenomeAnnotation <- function( genome = NULL, @@ -34,9 +35,6 @@ createGenomeAnnotation <- function( message("Attempting to infer chromSizes..") chromSizes <- GRanges(names(seqlengths(bsg)), IRanges(1, seqlengths(bsg))) if(filter){ - if(is.null(filterChr)) { - stop("Cannot have filterChr = NULL when filter = TRUE!") - } chromSizes <- filterChrGR(chromSizes, remove = filterChr) } seqlengths(chromSizes) <- end(chromSizes) From 779a2d5b29a003e0dff975a1954d165361d4a8aa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 28 Jul 2021 07:48:47 -0700 Subject: [PATCH 048/184] Default sampleLabels to Sample Made default for sampleLabels to Sample and removed checks for if sampleLabels is NULL --- R/GroupCoverages.R | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index ca1f7d02..b2769a86 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -32,7 +32,7 @@ addGroupCoverages <- function( ArchRProj = NULL, groupBy = "Clusters", useLabels = TRUE, - sampleLabels = NULL, + sampleLabels = "Sample", minCells = 40, maxCells = 500, maxFragments = 25*10^6, @@ -51,7 +51,7 @@ addGroupCoverages <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = groupBy, name = "groupBy", valid = c("character")) .validInput(input = useLabels, name = "useLabels", valid = c("boolean")) - .validInput(input = sampleLabels, name = "sampleLabels", valid = c("character","null")) + .validInput(input = sampleLabels, name = "sampleLabels", valid = c("character")) .validInput(input = minCells, name = "minCells", valid = c("integer")) .validInput(input = maxCells, name = "maxCells", valid = c("integer")) .validInput(input = maxFragments, name = "maxFragments", valid = c("integer")) @@ -70,10 +70,8 @@ addGroupCoverages <- function( stop("minReplicates must be at least 2!") } - if(!is.null(sampleLabels)){ - if(sampleLabels %ni% colnames(ArchRProj@cellColData)) { - stop("sampleLabels is not a column in cellColData!") - } + if(sampleLabels %ni% colnames(ArchRProj@cellColData)) { + stop("sampleLabels is not a column in cellColData!") } tstart <- Sys.time() @@ -130,9 +128,6 @@ addGroupCoverages <- function( # outListx <- SimpleList(LowCellGroup = cellNamesx) or NULL #} if(useLabels){ - if(is.null(sampleLabels)) { - sampleLabels <- "Sample" - } sampleLabelsx <- paste0(subColDat[,sampleLabels]) } else { sampleLabelsx <- NULL From 79f567e0b038cb49d01626fd504ed41e3c85ce46 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:05:16 -0700 Subject: [PATCH 049/184] update function description for getMatrixFromProject in response to https://github.com/GreenleafLab/ArchR/discussions/943 --- R/ArrowRead.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 13111d05..55d12c41 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -239,7 +239,12 @@ getFragmentsFromArrow <- function( #' Get a data matrix stored in an ArchRProject #' -#' This function gets a given data matrix from an `ArchRProject`. +#' This function gets a given data matrix from an `ArchRProject` and returns it as a `SummarizedExperiment`. +#' This function will return the matrix you ask it for, without altering that matrix unless you tell it to. +#' For example, if you added your `PeakMatrix` using `addPeakMatrix()` with `binarize = TRUE`, then +#' `getMatrixFromProject()` will return a binarized `PeakMatrix`. Alternatively, you could set `binarize = TRUE` +#' in the parameters passed to `getMatrixFromProject()` and the `PeakMatrix` will be binarized as you pull +#' it out. No other normalization is applied to the matrix by this function. #' #' @param ArchRProj An `ArchRProject` object to get data matrix from. #' @param useMatrix The name of the data matrix to retrieve from the given ArrowFile. Options include "TileMatrix", "GeneScoreMatrix", etc. From ff34d6180aa70dd3dbed9af476c462a030596f99 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:12:32 -0700 Subject: [PATCH 050/184] remove .requirePackage(genome) Prevents use of custom (non-installed) BSgenome objects and this check is superseded by `validBSgenome()` --- R/ProjectMethods.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ProjectMethods.R b/R/ProjectMethods.R index 1cb18b7d..0ac8258c 100644 --- a/R/ProjectMethods.R +++ b/R/ProjectMethods.R @@ -390,7 +390,6 @@ addPeakSet <- function( #Get NucleoTide Content peakSet <- tryCatch({ - .requirePackage(genomeAnnotation$genome) .requirePackage("Biostrings",source="bioc") BSgenome <- eval(parse(text = genomeAnnotation$genome)) BSgenome <- validBSgenome(BSgenome) From ccac7bc4bf0de87c7ba51de9970a95a581b45f90 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:14:05 -0700 Subject: [PATCH 051/184] remove requirePackage(genome) This prevents the use of custom (non-installed) BSgenome objects and this check is superseded by `validBSgenome` --- R/ReproduciblePeakSet.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 6c50c0cf..1b153091 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -213,7 +213,6 @@ addReproduciblePeakSet <- function( ##################################################### # BSgenome for Add Nucleotide Frequencies! ##################################################### - .requirePackage(genomeAnnotation$genome) .requirePackage("Biostrings",source="bioc") BSgenome <- eval(parse(text = genomeAnnotation$genome)) BSgenome <- validBSgenome(BSgenome) From 73a286707415c01f07394861ab606cfd187a2568 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 08:39:22 -0700 Subject: [PATCH 052/184] explain geneAnnotations in plots --- R/ArchRBrowser.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 20abfb19..3b139ea1 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -5,7 +5,8 @@ #' Launch ArchR Genome Browser #' #' This function will open an interactive shiny session in style of a browser track. It allows for normalization of the signal which -#' enables direct comparison across samples. +#' enables direct comparison across samples. Note that the genes displayed in this browser are derived from your `geneAnnotation` +#' (i.e. the `BSgenome` object you used) so they may not match other online genome browsers that use different gene annotations. #' #' @param ArchRProj An `ArchRProject` object. #' @param features A `GRanges` object containing the "features" to be plotted via the "featureTrack". This should be thought of as a @@ -628,7 +629,8 @@ ArchRBrowserTrack <- function(...){ #' Plot an ArchR Region Track #' #' This function will plot the coverage at an input region in the style of a browser track. It allows for normalization of the signal -#' which enables direct comparison across samples. +#' which enables direct comparison across samples. Note that the genes displayed in these plots are derived from your `geneAnnotation` +#' (i.e. the `BSgenome` object you used) so they may not match other online genome browsers that use different gene annotations. #' #' @param ArchRProj An `ArchRProject` object. #' @param region A `GRanges` region that indicates the region to be plotted. If more than one region exists in the `GRanges` object, From 17d613db4b847e978264d5a7dd275c081e36541b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 29 Jul 2021 17:40:22 -0700 Subject: [PATCH 053/184] fix clusterCols being set to NULL by .binarySort() addressing https://github.com/GreenleafLab/ArchR/issues/948 --- R/MarkerFeatures.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index b439771d..7816e1bb 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -965,7 +965,9 @@ plotMarkerHeatmap <- function( mat <- bS[[1]][,colnames(mat),drop=FALSE] } clusterRows <- FALSE - clusterCols <- bS[[2]] + if (clusterCols) { + clusterCols <- bS[[2]] + } }else{ clusterRows <- TRUE clusterCols <- TRUE From 8adea54390f1d6ce2e92370f9a09b8d8409683b3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 2 Aug 2021 09:39:35 -0700 Subject: [PATCH 054/184] delete requirePackage(genome) --- R/AnnotationPeaks.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 13df1f22..40627965 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -429,7 +429,6 @@ addMotifAnnotations <- function( # Get BSgenome Information! ############################################################# genome <- ArchRProj@genomeAnnotation$genome - .requirePackage(genome) BSgenome <- eval(parse(text = genome)) BSgenome <- validBSgenome(BSgenome) From 818b615896ee661130892de05767d230c782a2fc Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 19 Aug 2021 07:59:24 -0700 Subject: [PATCH 055/184] Set addRowVarsLog2 to true when adding TileMatrix addIterativeLSI depends on the `rowMeansLog2` and `rowVarsLog2` HDF5 groups when `firstSelection = "var"` when using a TileMatrix. Brought up in https://github.com/GreenleafLab/ArchR/issues/958 --- R/MatrixTiles.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 451384eb..402bfc1e 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -267,7 +267,8 @@ addTileMatrix <- function( Group = paste0("TileMatrix/", chr), binarize = binarize, addColSums = TRUE, - addRowSums = TRUE + addRowSums = TRUE, + addRowVarsLog2 = TRUE ) gc() From cd8baa379a169642cd8ba9f71272aea65f8d130e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 24 Aug 2021 11:44:54 -0700 Subject: [PATCH 056/184] Enable null ArchRProj in PlotFootprints --- R/Footprinting.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index baf77703..764a1e17 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -374,7 +374,7 @@ plotFootprints <- function( .validInput(input = smoothWindow, name = "smoothWindow", valid = c("integer", "null")) .validInput(input = baseSize, name = "baseSize", valid = c("numeric")) .validInput(input = plot, name = "plot", valid = c("boolean")) - .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) + .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj", "null")) .validInput(input = plotName, name = "plotName", valid = c("character")) .validInput(input = height, name = "height", valid = c("numeric")) .validInput(input = width, name = "width", valid = c("numeric")) From 96a71cc17bceb99eb5802ad33bc6fda74b8d1229 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 Aug 2021 13:53:38 -0700 Subject: [PATCH 057/184] improve messaging for tabix indexing failure in response to #1000 --- R/CreateArrow.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 55e35670..5a478abd 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1137,6 +1137,7 @@ createArrowFiles <- function( indexTabix(file, format = "bed") TRUE }, error = function(y){ + message("Tabix indexing failed for ", file,". Note that ArchR requires bgzipped fragment files which is different from gzip. See samtools bgzip!") FALSE }) }) @@ -1152,7 +1153,8 @@ createArrowFiles <- function( } }, error = function(x){ tryCatch({ - if(getArchRVerbose()) message("Attempting to index ", file," as bam...") + if(getArchRVerbose()) + ("Attempting to index ", file," as bam...") indexBam(file) TRUE }, error = function(y){ From 38614209d4f9c9cbf0cc4e6b2d24a2ccbb51e512 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 Aug 2021 14:01:51 -0700 Subject: [PATCH 058/184] fix inadvertent typo and add bam index fail message --- R/CreateArrow.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 5a478abd..300cb7c8 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1153,11 +1153,11 @@ createArrowFiles <- function( } }, error = function(x){ tryCatch({ - if(getArchRVerbose()) - ("Attempting to index ", file," as bam...") + if(getArchRVerbose()) message("Attempting to index ", file," as bam...") indexBam(file) TRUE }, error = function(y){ + message("Indexing of BAM file failed for ",file,".") FALSE }) }) From ee6bc0b89aeb9a5afdd971f2d8ede3c3827dd564 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 26 Aug 2021 08:37:23 -0700 Subject: [PATCH 059/184] add rastr option to plotMarkers --- R/MarkerFeatures.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 7816e1bb..09662341 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -1225,7 +1225,8 @@ plotMarkers <- function( name = NULL, cutOff = "FDR <= 0.01 & abs(Log2FC) >= 0.5", plotAs = "Volcano", - scaleTo = 10^4 + scaleTo = 10^4, + rastr = TRUE ){ .validInput(input = seMarker, name = "seMarker", valid = c("SummarizedExperiment")) @@ -1233,6 +1234,7 @@ plotMarkers <- function( .validInput(input = cutOff, name = "cutOff", valid = c("character")) .validInput(input = plotAs, name = "plotAs", valid = c("character")) .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric")) + .validInput(input = rastr, name = "rastr", valid = c("boolean")) #Evaluate AssayNames assayNames <- names(SummarizedExperiment::assays(seMarker)) @@ -1287,7 +1289,7 @@ plotMarkers <- function( ylim = c(-qLFC, qLFC), size = 1, extend = 0, - rastr = TRUE, + rastr = rastr, labelMeans = FALSE, labelAsFactors = FALSE, pal = pal, @@ -1304,7 +1306,7 @@ plotMarkers <- function( xlim = c(-qLFC, qLFC), extend = 0, size = 1, - rastr = TRUE, + rastr = rastr, labelMeans = FALSE, labelAsFactors = FALSE, pal = pal, @@ -1321,7 +1323,7 @@ plotMarkers <- function( xlim = c(-qDiff, qDiff), extend = 0, size = 1, - rastr = TRUE, + rastr = rastr, labelMeans = FALSE, labelAsFactors = FALSE, pal = pal, From 2770d1b9844451cc65a403999cd242eb54d46f20 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 26 Aug 2021 08:40:00 -0700 Subject: [PATCH 060/184] add param definition for rastr --- R/MarkerFeatures.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 09662341..092d3b1b 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -1219,6 +1219,8 @@ markerPlot <- function(...){ #' @param cutOff A valid-syntax logical statement that defines which marker features from `seMarker` will be plotted. #' `cutoff` can contain any of the `assayNames` from `seMarker`. #' @param plotAs A string indicating whether to plot a volcano plot ("Volcano") or an MA plot ("MA"). +#' @param rastr A boolean value that indicates whether the plot should be rasterized using `ggrastr`. This does not rasterize +#' lines and labels, just the internal portions of the plot. #' @export plotMarkers <- function( seMarker = NULL, From 18bcead479721978f1d12c21931d83fe294b56d7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 26 Aug 2021 10:49:13 -0700 Subject: [PATCH 061/184] update getMatches() description detail that the peak order is the same as from getPeakSet() --- R/AnnotationPeaks.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 13df1f22..4a6c473c 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -59,7 +59,8 @@ getPositions <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ #' Get peak annotation matches from an ArchRProject #' -#' This function gets peak annotation matches from a given ArchRProject. +#' This function gets peak annotation matches from a given ArchRProject. The peaks in the returned object are in the +#' same order as the peaks returned by `getPeakSet()`. #' #' @param ArchRProj An `ArchRProject` object. #' @param name The name of the `peakAnnotation` object (i.e. Motifs) to retrieve from the designated `ArchRProject`. From 9366af8b6022f4d7f7d6ff3f65b16725c32fdde4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 27 Aug 2021 15:27:44 -0700 Subject: [PATCH 062/184] add documentation and comments for addModuleScore --- R/ModuleScore.R | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 99af34de..60b6fc07 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -1,12 +1,20 @@ #' Add Module Scores to an ArchRProject #' -#' This function computes imputations weights that describe each cell as a linear combination of many cells based on a MAGIC diffusion matrix. -#' -#' RRR +#' This function calculates a module score from a set of features across all cells. This allows for +#' grouping of multiple features together into a single quantitative measurement. Currently, this +#' function only works for modules derived from the `GeneScoreMatrix`. Each module is added as a +#' new column in `cellColData` #' #' @param ArchRProj An `ArchRProject` object. -#' @param seed A number to be used as the seed for random number generation. It is recommended to keep track of the seed used so that you can -#' reproduce results downstream. +#' @param useMatrix The name of the matrix to be used for calculation of the module score. See `getAvailableMatrices()` to view available options. +#' @param name The name to be given to the designated module. If `features` is a list, this name will be prepended to the feature set names given in the list as shown below. +#' @param features A list of feature names to be grouped into modules. For example, `list(BScore = c("MS4A1", "CD79A", "CD74"), TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1"))`. +#' Each named element in this list will be stored as a separate module. The examples given in these parameters would yield two modules called `Module.Bscore` and `Module.Tscore`. +#' If the elements of this list are not named, they will be numbered in order, i.e. `Module1`, `Module2`. +#' @param nBin The number of bins to use to divide all features for identification of signal-matched features for background calculation +#' @param nBgd The number of background features to use for signal normalization. +#' @param seed A number to be used as the seed for random number generation required when sampling cells for the background set. It is recommended +#' to keep track of the seed used so that you can reproduce results downstream. #' @param threads The number of threads to be used for parallel computing. #' @param logFile The path to a file to be used for logging ArchR output. #' @export @@ -22,6 +30,10 @@ addModuleScore <- function( logFile = createLogFile("addModuleScore") ){ + if(useMatrix %ni% getAvailableMatrices(ArchRProj)){ + stop("useMatrix not in available matrices! See getAvailableMatrices!") + } + if(!is.null(seed)) set.seed(seed) #Get Feature DF @@ -29,10 +41,6 @@ addModuleScore <- function( rownames(featureDF) <- paste0(featureDF$seqnames, ":", featureDF$idx) featureDF$Match <- seq_len(nrow(featureDF)) - if(useMatrix %ni% getAvailableMatrices(ArchRProj)){ - stop("useMatrix not in available matrices! See getAvailableMatrices!") - } - matrixClass <- h5read(getArrowFiles(ArchRProj)[1], paste0(useMatrix, "/Info/Class")) if(matrixClass == "Sparse.Assays.Matrix"){ @@ -42,6 +50,7 @@ addModuleScore <- function( } } + #Figure out the index numbers of the selected features within the given matrix if(grepl(":",unlist(features)[1])){ sname <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,1] @@ -76,24 +85,28 @@ addModuleScore <- function( featuresUse <- featureDF[idx,] featuresUse$Module <- Rle(stack(features)[,2]) - #Get Averages + #Get average values for all features and then order the features based on their average values + #so that the features can be binned into nBins rS <- ArchR:::.getRowSums(ArrowFiles = getArrowFiles(ArchRProj), useMatrix = useMatrix) rS <- rS[order(rS[,3]), ] rS$Bins <- Rle(ggplot2::cut_number(x = rS[,3] + rnorm(length(rS[,3]))/1e30, n = nBin, labels = FALSE, right = FALSE)) rS$Match <- match(paste0(rS$seqnames, ":", rS$idx), rownames(featureDF)) + #check that the number of selected background features isnt bigger than the size of each bin if(nBgd > min(rS$Bins@lengths)){ stop("nBgd must be lower than ", min(rS$Bins@lengths), "!") } + #Match the indicies across the different vectors idxMatch <- match(paste0(featuresUse$seqnames, ":", featuresUse$idx), paste0(rS$seqnames, ":", rS$idx)) featuresUse$Bins <- as.vector(rS$Bins[idxMatch]) - #MakeLists - featureList <- split(featuresUse$Match, featuresUse$Module) - moduleList <- split(featuresUse$Bins, featuresUse$Module) - binList <- split(rS$Match, rS$Bins) + #Make lists + featureList <- split(featuresUse$Match, featuresUse$Module) #feature indicies per module + moduleList <- split(featuresUse$Bins, featuresUse$Module) #bins for each feature per module + binList <- split(rS$Match, rS$Bins) #list of all indicies for each bin + #calculate the module score by normalizing to a background set of features dfM <- lapply(seq_along(featureList), function(x){ message("Computing Module ",x, " of ", length(featureList)) binx <- binList[moduleList[[x]]] @@ -111,6 +124,7 @@ addModuleScore <- function( Matrix::colMeans(m[seq_along(idxFgd), ]) - Matrix::colMeans(m[-seq_along(idxFgd), ]) }) %>% Reduce("cbind", .) + #add the module scores as new columns in cellColData for(x in seq_len(ncol(dfM))){ ArchRProj <- addCellColData(ArchRProj, data = dfM[,x], name=names(featureList)[x], cells=rownames(dfM), force = TRUE) } From 8fa2c857fe745b08f0a461f8ec624e2c0815981b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 27 Aug 2021 16:28:46 -0700 Subject: [PATCH 063/184] add validInput checks to addModuleScore --- R/ModuleScore.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 60b6fc07..06daa745 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -30,6 +30,16 @@ addModuleScore <- function( logFile = createLogFile("addModuleScore") ){ + .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) + .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) + .validInput(input = name, name = "name", valid = c("character")) + .validInput(input = features, name = "features", valid = c("character")) + .validInput(input = nBin, name = "nBin", valid = c("integer")) + .validInput(input = nBgd, name = "nBgd", valid = c("integer")) + .validInput(input = seed, name = "seed", valid = c("integer","null")) + .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = logFile, name = "logFile", valid = c("character", "null")) + if(useMatrix %ni% getAvailableMatrices(ArchRProj)){ stop("useMatrix not in available matrices! See getAvailableMatrices!") } From 96aa3a8c39119c8ad521f9b2002009e329d74a3e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 10 Sep 2021 09:57:13 -0700 Subject: [PATCH 064/184] Fix coverage files for chromosomes with no insertions https://github.com/GreenleafLab/ArchR/issues/1025 throw a warning when a seqname is found with no insertions but do not write an empty entry to the BED file --- R/GroupCoverages.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index b2769a86..0c2f59d1 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -762,7 +762,11 @@ addGroupCoverages <- function( if(x == 1) .logThis(iS, "InsertionSites", logFile = logFile) iS <- data.table(seqnames = allChr[x], start = iS - 1L, end = iS) if(x == 1) .logThis(iS, "InsertionSites-DT", logFile = logFile) - data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) + if(!any(is.na(iS$start))) { + data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) + } else { + .logMessage(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,"."), logFile = logFile) + } }, error = function(e){ errorList <- list( x = x, From e39e2784a7adc23f8476cd01c74fc4f5d5991f9b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 13 Sep 2021 07:53:41 -0700 Subject: [PATCH 065/184] add RNGkind("L'Ecuyer-CMRG") to fix mclapply seed https://github.com/GreenleafLab/ArchR/issues/756 mclapply will generate new random seeds unless you set RNGkind("L'Ecuyer-CMRG") outside. To handle this, I added this call to `addArchRThreads()` when `threads > 1` --- R/GlobalDefaults.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index aae3753b..6fc570ab 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -284,6 +284,9 @@ addArchRThreads <- function(threads = floor(parallel::detectCores()/ 2), force = message("Input threads is equal to or greater than ncores minus 1 (",parallel::detectCores()-1,")\nSetting cores to ncores minus 2. Set force = TRUE to set above this number!") threads <- parallel::detectCores()-2 } + if(threads > 1){ + RNGkind("L'Ecuyer-CMRG") + } } message("Setting default number of Parallel threads to ", threads, ".") From 87b2dfb8d4d478bec551e137cbe503c83dd6a4a6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 17 Sep 2021 08:16:09 -0700 Subject: [PATCH 066/184] fix errorList biasMat type mentioned in https://github.com/GreenleafLab/ArchR/issues/1034 --- R/Footprinting.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index 764a1e17..c9da4bea 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -501,7 +501,7 @@ plotFootprints <- function( biasMat <- t(t(biasMat) / colMeans(biasMat[idx, ,drop=FALSE])) errorList$footMatNorm <- footMat - errorList$biasMatNorm <- footMat + errorList$biasMatNorm <- biasMat #Norm Foot By Bias if(tolower(normMethod) == "none"){ From 4d005fef65923b487ffe6f6151c367f0a4f61408 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 23 Sep 2021 11:40:13 -0700 Subject: [PATCH 067/184] add warning message to console in addition to log file --- R/GroupCoverages.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 0c2f59d1..d2dea9ea 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -765,6 +765,7 @@ addGroupCoverages <- function( if(!any(is.na(iS$start))) { data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) } else { + message(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,".") .logMessage(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,"."), logFile = logFile) } }, error = function(e){ From 5d666ffb7b546387a182472977ae3b75d4a84236 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 23 Sep 2021 11:40:58 -0700 Subject: [PATCH 068/184] fix typo --- R/GroupCoverages.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index d2dea9ea..4e4145a3 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -765,7 +765,7 @@ addGroupCoverages <- function( if(!any(is.na(iS$start))) { data.table::fwrite(iS, out, sep = "\t", col.names = FALSE, append = TRUE) } else { - message(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,".") + message(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,".")) .logMessage(paste0("Warning - No insertions found on seqnames ", allChr[x], " for coverageFile ", coverageFile,"."), logFile = logFile) } }, error = function(e){ From f6ac862f62d5fa5dea0932b000ecaeed34420e3a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 24 Sep 2021 07:40:50 -0700 Subject: [PATCH 069/184] fix RNGkind if statement --- R/GlobalDefaults.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index 6fc570ab..14bfd94f 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -284,9 +284,9 @@ addArchRThreads <- function(threads = floor(parallel::detectCores()/ 2), force = message("Input threads is equal to or greater than ncores minus 1 (",parallel::detectCores()-1,")\nSetting cores to ncores minus 2. Set force = TRUE to set above this number!") threads <- parallel::detectCores()-2 } - if(threads > 1){ - RNGkind("L'Ecuyer-CMRG") - } + } + if(threads > 1){ + RNGkind("L'Ecuyer-CMRG") } message("Setting default number of Parallel threads to ", threads, ".") From 6765ad962d4d8dcb292a326071c9b5c30c25918e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 11 Oct 2021 12:40:21 -0700 Subject: [PATCH 070/184] Create config.yml --- .github/ISSUE_TEMPLATE/config.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/ISSUE_TEMPLATE/config.yml diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..3ba13e0c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false From c61f782e0feb12433e365c46440b5f87dffffac6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 29 Oct 2021 08:02:12 -0700 Subject: [PATCH 071/184] fix grey background in gene tracks suggested in https://github.com/GreenleafLab/ArchR/issues/1020#issuecomment-954490261 --- R/ArchRBrowser.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 3b139ea1..41be35fc 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1352,17 +1352,17 @@ plotBrowserTrack <- function( #Add Labels if There are Genes with this orientation! if(length(which(genesO$strand!="-")) > 0){ p <- p + ggrepel::geom_label_repel(data=genesO[which(genesO$strand!="-"),], - aes(x = start, y = cluster, label = symbol, color = strand, fill = NA), + aes(x = start, y = cluster, label = symbol, color = strand), segment.color = "grey", nudge_x = -0.01*(end(region) - start(region)), nudge_y = -0.25, - size = labelSize, direction = "x") + size = labelSize, direction = "x", inherit.aes=FALSE) } #Add Labels if There are Genes with this orientation! if(length(which(genesO$strand=="-")) > 0){ p <- p + ggrepel::geom_label_repel(data=genesO[which(genesO$strand=="-"),], - aes(x = end, y = cluster, label = symbol, color = strand, fill = NA), + aes(x = end, y = cluster, label = symbol, color = strand), segment.color = "grey", nudge_x = +0.01*(end(region) - start(region)), nudge_y = 0.25, - size = labelSize, direction = "x") + size = labelSize, direction = "x", inherit.aes=FALSE) } p <- p + theme(legend.justification = c(0, 1), From b2f3d95171ea1d708ae1e39a980669091d02689f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 29 Oct 2021 08:34:20 -0700 Subject: [PATCH 072/184] Fixing typo in blacklisted tile removal suggested in https://github.com/GreenleafLab/ArchR/issues/1110#issuecomment-953000886 --- R/MatrixGeneScores.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index afbd14f3..4957141a 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -396,7 +396,7 @@ addGeneScoreMatrix <- function( if(!is.null(blacklist)){ if(length(blacklist) > 0){ blacklistz <- blacklist[[chrz]] - if(is.null(blacklistz) | length(blacklistz) > 0){ + if(!is.null(blacklistz) | length(blacklistz) > 0){ tilesBlacklist <- 1 * (!overlapsAny(uniqueTiles, ranges(blacklistz))) if(sum(tilesBlacklist == 0) > 0){ x <- x * tilesBlacklist[subjectHits(tmp)] #Multiply Such That All Blacklisted Tiles weight is now 0! From 75544172bc1962fd0050bb190450ee826ce1aba0 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 1 Nov 2021 21:24:43 -0700 Subject: [PATCH 073/184] fix logic of if statement after partial matrix sampling in response to https://github.com/GreenleafLab/ArchR/issues/1130 --- R/ArrowRead.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 20bbdb6c..d1124772 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -783,10 +783,10 @@ getMatrixFromArrow <- function( matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .) mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .) - if(!all(cellNames %in% colnames(mat))){ + if(!all(colnames(mat) %in% cellNames)){ .logThis(sampledCellNames, "cellNames supplied", logFile = logFile) .logThis(colnames(mat), "cellNames from matrix", logFile = logFile) - stop("Error not all cellNames found in partialMatrix") + stop("Error - some colnames in partialMatrix do not match cellNames") } mat <- mat[,sampledCellNames, drop = FALSE] mat <- .checkSparseMatrix(mat, length(sampledCellNames)) From 422f0ffcfebfd061e38665efddaa4844d1fa0a82 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 1 Nov 2021 21:35:28 -0700 Subject: [PATCH 074/184] revert changes mistakenly made to master --- R/ArrowRead.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index d1124772..20bbdb6c 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -783,10 +783,10 @@ getMatrixFromArrow <- function( matFiles <- lapply(mat, function(x) x[[2]]) %>% Reduce("c", .) mat <- lapply(mat, function(x) x[[1]]) %>% Reduce("cbind", .) - if(!all(colnames(mat) %in% cellNames)){ + if(!all(cellNames %in% colnames(mat))){ .logThis(sampledCellNames, "cellNames supplied", logFile = logFile) .logThis(colnames(mat), "cellNames from matrix", logFile = logFile) - stop("Error - some colnames in partialMatrix do not match cellNames") + stop("Error not all cellNames found in partialMatrix") } mat <- mat[,sampledCellNames, drop = FALSE] mat <- .checkSparseMatrix(mat, length(sampledCellNames)) From b38b3bda99eff78c4ea56d22a2b4eac422ad4c03 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 3 Nov 2021 21:36:48 -0700 Subject: [PATCH 075/184] Fix distTSS bug raised in https://github.com/GreenleafLab/ArchR/issues/1122 --- R/ReproduciblePeakSet.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 6c50c0cf..21fe8c11 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -625,9 +625,9 @@ addReproduciblePeakSet <- function( distTSS <- distanceToNearest(peakSummits, resize(geneAnnotation$TSS, 1, "start"), ignore.strand = TRUE) mcols(peaks)$distToTSS <- mcols(distTSS)$distance if("symbol" %in% colnames(mcols(geneAnnotation$TSS))){ - mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$symbol[subjectHits(distPeaks)] + mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$symbol[subjectHits(distTSS)] }else if("tx_name" %in% colnames(mcols(geneAnnotation$TSS))){ - mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$tx_name[subjectHits(distPeaks)] + mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$tx_name[subjectHits(distTSS)] } #Get NucleoTide Content From 351875aa3a7a634bac4b8ba08ac7099b99928e01 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 4 Nov 2021 11:57:49 -0700 Subject: [PATCH 076/184] fix typo in validInput for features make it a list instead of character --- R/ModuleScore.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 06daa745..87920983 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -33,7 +33,7 @@ addModuleScore <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) .validInput(input = name, name = "name", valid = c("character")) - .validInput(input = features, name = "features", valid = c("character")) + .validInput(input = features, name = "features", valid = c("list")) .validInput(input = nBin, name = "nBin", valid = c("integer")) .validInput(input = nBgd, name = "nBgd", valid = c("integer")) .validInput(input = seed, name = "seed", valid = c("integer","null")) From d32148104a5ffe82bcd40b30c7493618df8ea74b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 10 Nov 2021 12:06:44 -0800 Subject: [PATCH 077/184] update error handling for binarized matrix with top addressing https://github.com/GreenleafLab/ArchR/issues/958#issuecomment-954830868 --- R/IterativeLSI.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 8a341c5b..d708df14 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -211,7 +211,10 @@ addIterativeLSI <- function( if(tolower(firstSelection) == "top"){ if(!binarize){ - stop("Please binarize data if using top selection for first iteration! Set binarize = TRUE!") + matClass <- h5read(ArrowFiles[1], paste0(useMatrix,"/Info/Class")) + if(matClass != "Sparse.Binary.Matrix"){ + stop("Input matrix is not binarized and binarize != TRUE. Please use binarized data if using top selection for first iteration! Set binarize = TRUE!") + } } #Compute Row Sums Across All Samples From 3927b7061e4f892a6082a17c43840804e92da356 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 11 Nov 2021 10:22:35 -0800 Subject: [PATCH 078/184] fix check for feature not in matrix in response to https://github.com/GreenleafLab/ArchR/issues/1142#issuecomment-964771521 --- R/ModuleScore.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 87920983..aecd7c44 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -78,7 +78,7 @@ addModuleScore <- function( idx <- lapply(seq_along(unlist(features)), function(x){ ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1] - if(length(ix)==0){ + if(is.na(ix)){ .logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", unlist(features)[x]), logFile = logFile) } ix From 4800d9ff7ecd6bc65364027d8cebfefe6510777d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 11 Nov 2021 10:42:40 -0800 Subject: [PATCH 079/184] improve error message --- R/ModuleScore.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index aecd7c44..2e7d4fd9 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -69,7 +69,7 @@ addModuleScore <- function( idx <- lapply(seq_along(name), function(x){ ix <- intersect(which(tolower(name[x]) == tolower(featureDF$name)), BiocGenerics::which(tolower(sname[x]) == tolower(featureDF$seqnames))) if(length(ix)==0){ - .logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", name[x]), logFile = logFile) + .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", name[x]), logFile = logFile) } ix }) %>% unlist @@ -79,7 +79,7 @@ addModuleScore <- function( idx <- lapply(seq_along(unlist(features)), function(x){ ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1] if(is.na(ix)){ - .logStop(sprintf("FeatureName (%s) does not exist! See getFeatures", unlist(features)[x]), logFile = logFile) + .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", unlist(features)[x]), logFile = logFile) } ix }) %>% unlist From 9997f87c0b449224738e9f3245507424ce4f1887 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 15 Nov 2021 09:35:50 -0800 Subject: [PATCH 080/184] make acceptable fragments less or equal to chr ends in response to https://github.com/GreenleafLab/ArchR/issues/1145#issuecomment-968334964 --- R/MatrixTiles.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 402bfc1e..4b420c50 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -5,7 +5,6 @@ #' Add TileMatrix to ArrowFiles or an ArchRProject #' #' This function, for each sample, will independently compute counts for each tile -#' per cell in the ArrowFile #' #' @param input An `ArchRProject` object or character vector of ArrowFiles. #' @param chromSizes A named numeric vector containing the chromsome names and lengths. The default behavior is to retrieve @@ -212,8 +211,8 @@ addTileMatrix <- function( fragments <- fragments[start(fragments) >= 1] #Check 2 - fragmentsBad2 <- fragments[!(end(fragments) < chromLengths[z])] - fragments <- fragments[end(fragments) < chromLengths[z]] + fragmentsBad2 <- fragments[!(end(fragments) <= chromLengths[z])] + fragments <- fragments[end(fragments) <= chromLengths[z]] #Check N nf2 <- length(fragments) From 0630ae09571734338520a16f801cc8a71a06d2f1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 16 Nov 2021 15:14:56 -0800 Subject: [PATCH 081/184] add require(parallel) In Bioconductor >3.1, at some point the BiocGenerics package stopped depending on the parallel package. Since parallel no longer gets loaded at runtime with ArchR, this require statement ensures that it has been loaded. --- R/HiddenUtils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 4ef6980e..19eccbc3 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -393,7 +393,7 @@ } if(threads > 1){ - + require(parallel) o <- mclapply(..., mc.cores = threads, mc.preschedule = preschedule) errorMsg <- list() From 560764a94c1cb117845cefd72f0089097a3e04b2 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 17 Nov 2021 08:16:31 -0800 Subject: [PATCH 082/184] log ranges1/2 before removal in response to https://github.com/GreenleafLab/ArchR/issues/1169#issue-1056312775 --- R/IntegrativeAnalysis.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 4e85635d..f662630b 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -475,7 +475,6 @@ correlateTrajectories <- function( #mcols(ranges1) <- featureDF1 names(ranges1) <- rownames(featureDF1) rowRanges(seTrajectory1) <- ranges1 - rm(ranges1) if("strand" %in% colnames(featureDF2)){ ranges2 <- GRanges( @@ -492,11 +491,12 @@ correlateTrajectories <- function( #mcols(ranges2) <- featureDF2 names(ranges2) <- rownames(featureDF2) rowRanges(seTrajectory2) <- ranges2 - rm(ranges2) .logThis(ranges1, "ranges1", logFile = logFile) .logThis(ranges2, "ranges2", logFile = logFile) - + rm(ranges1) + rm(ranges2) + #Find Associations to test isStranded1 <- any(as.integer(strand(seTrajectory1)) == 2) isStranded2 <- any(as.integer(strand(seTrajectory2)) == 2) From 84c8ff41940d8ce1ab2efa8237f3376a257a331d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 18 Nov 2021 14:39:50 -0800 Subject: [PATCH 083/184] address boundary case where chr length is a multiple of num tiles in response to https://github.com/GreenleafLab/ArchR/issues/1163 --- R/GroupExport.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/GroupExport.R b/R/GroupExport.R index 9102c6f3..fdb15a07 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -328,7 +328,10 @@ getGroupBW <- function( }else{ #N Tiles - nTiles <- trunc(chromLengths[availableChr[k]] / tileSize) + 1 + nTiles <- chromLengths[availableChr[k]] / tileSize + if (nTiles%%1 != 0) { + nTiles <- trunc(nTiles) + 1 + } #Create Sparse Matrix matchID <- S4Vectors::match(mcols(fragik)$RG, cellGroupi) From 2291f019b8ed3753fbf3d9d4d808a61163536340 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 18 Nov 2021 15:24:30 -0800 Subject: [PATCH 084/184] add check for success for file download --- R/InputData.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/InputData.R b/R/InputData.R index 178572e8..1e12c11e 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -36,6 +36,10 @@ getTutorialData <- function( ) }, threads = min(threads, length(filesUrl))) + #check for success of file download + if(!all(unlist(downloadFiles) == 0)) { + stop("Error! Some tutorial files did not download successfully. Please try again.") + } } pathFragments <- "HemeFragments" From 41e621b8b6960300e0ef93478baf1313edc071f9 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 19 Nov 2021 09:06:34 -0800 Subject: [PATCH 085/184] update loading of parallel to use native ArchR .requirePackage --- R/HiddenUtils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 19eccbc3..57fdad35 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -393,7 +393,7 @@ } if(threads > 1){ - require(parallel) + .requirePackage("parallel", source = "cran") o <- mclapply(..., mc.cores = threads, mc.preschedule = preschedule) errorMsg <- list() From 5e489a3b765b38d14a000ed36fa74119f43b8102 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 19 Nov 2021 09:24:15 -0800 Subject: [PATCH 086/184] Sort SE containing rowRanges prior to return In response to https://github.com/GreenleafLab/ArchR/issues/1148 --- R/ArrowRead.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 55d12c41..0d9c786f 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -357,6 +357,7 @@ getMatrixFromProject <- function( .logDiffTime("Constructing SummarizedExperiment", t1 = tstart, verbose = verbose, logFile = logFile) if(!is.null(rR1)){ se <- SummarizedExperiment(assays = asy, colData = cD, rowRanges = rR1) + se <- sort(se) }else{ se <- SummarizedExperiment(assays = asy, colData = cD, rowData = rD1) } From c4296eea67d3a6dc771a4d9e33256c3443a38c2f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 19 Nov 2021 10:29:30 -0800 Subject: [PATCH 087/184] define prefix in response to https://github.com/GreenleafLab/ArchR/issues/586 --- R/MatrixFeatures.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/MatrixFeatures.R b/R/MatrixFeatures.R index a13730ae..15e50137 100644 --- a/R/MatrixFeatures.R +++ b/R/MatrixFeatures.R @@ -265,6 +265,8 @@ addPeakMatrix <- function( for(z in seq_along(uniqueChr)){ + prefix <- sprintf("Chr %s (%s of %s)!", uniqueChr[z], z, length(uniqueChr)) + o <- tryCatch({ o <- h5closeAll() From 48dccf361bab6d9191a4d9b61e1317acfa71167c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 22 Nov 2021 08:42:54 -0800 Subject: [PATCH 088/184] fix typo in check for isDeviations Supposed to be fixed in https://github.com/GreenleafLab/ArchR/issues/78 but "dev" was supposed to be "deviations" raised again in https://github.com/GreenleafLab/ArchR/discussions/1177 --- R/MarkerFeatures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 7816e1bb..56c95e8a 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -129,7 +129,7 @@ getMarkerFeatures <- function( .logThis(range(as.vector(table(paste0(featureDF$seqnames)))), "FeaturesPerSeqnames", logFile = logFile) isDeviations <- FALSE - if(all(unique(paste0(featureDF$seqnames)) %in% c("z", "dev"))){ + if(all(unique(paste0(featureDF$seqnames)) %in% c("z", "deviations"))){ isDeviations <- TRUE } From 939dd9ec39caf349e59cd7a17151795cfa5578f3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 1 Dec 2021 16:20:08 -0800 Subject: [PATCH 089/184] Create label-actions.yml --- .github/label-actions.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/label-actions.yml diff --git a/.github/label-actions.yml b/.github/label-actions.yml new file mode 100644 index 00000000..543023f6 --- /dev/null +++ b/.github/label-actions.yml @@ -0,0 +1,20 @@ +# Configuration for Label Actions - https://github.com/dessant/label-actions + +# When the `bug` label is added to issues +bug: + issues: + # Post a comment + comment: > + Thanks for using ArchR! Please make sure that your post belongs in the Issues section! This is where bugs and error reports belong.
+ Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.

+ __Before we help you, you must respond to the following questions__ unless your original post already contained this information:
+ __1.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example.
+ __2.__ If you've encountered an error, have you already searched the full manual and previous Issues to make sure that this hasn't already been solved?
+ __3.__ Did you post your log file? If not, add it now.
+ +# When the `new_discussion` label is added to discussions +#new_discussion: +# discussions: +# # Post a comment +# comment: > +# Thanks for using ArchR! This is a discussion post From 442a6a2960966b572ee2f4ad93fc68b98032500b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 1 Dec 2021 16:21:59 -0800 Subject: [PATCH 090/184] Delete label-actions.yml --- .github/label-actions.yml | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 .github/label-actions.yml diff --git a/.github/label-actions.yml b/.github/label-actions.yml deleted file mode 100644 index 543023f6..00000000 --- a/.github/label-actions.yml +++ /dev/null @@ -1,20 +0,0 @@ -# Configuration for Label Actions - https://github.com/dessant/label-actions - -# When the `bug` label is added to issues -bug: - issues: - # Post a comment - comment: > - Thanks for using ArchR! Please make sure that your post belongs in the Issues section! This is where bugs and error reports belong.
- Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.

- __Before we help you, you must respond to the following questions__ unless your original post already contained this information:
- __1.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example.
- __2.__ If you've encountered an error, have you already searched the full manual and previous Issues to make sure that this hasn't already been solved?
- __3.__ Did you post your log file? If not, add it now.
- -# When the `new_discussion` label is added to discussions -#new_discussion: -# discussions: -# # Post a comment -# comment: > -# Thanks for using ArchR! This is a discussion post From 99a094381434d2b3d72f45d8309363cb7eb5f395 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 2 Dec 2021 10:56:12 -0800 Subject: [PATCH 091/184] Delete auto-comment.yml --- .github/auto-comment.yml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .github/auto-comment.yml diff --git a/.github/auto-comment.yml b/.github/auto-comment.yml deleted file mode 100644 index 53fd071d..00000000 --- a/.github/auto-comment.yml +++ /dev/null @@ -1,6 +0,0 @@ -# Comment to a new issue. -issueOpened: > - Thank your for raising a issue. We will try and get back to you as soon as possible. - - Please make sure you have given us as much context as possible. - From 5a09694112345f5b4e65126f90e40316fb63ffcb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 2 Dec 2021 11:33:56 -0800 Subject: [PATCH 092/184] Create auto-comment.yml --- .github/workflows/auto-comment.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/auto-comment.yml diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml new file mode 100644 index 00000000..1853bb0d --- /dev/null +++ b/.github/workflows/auto-comment.yml @@ -0,0 +1,24 @@ +name: Auto Comment +on: [issues] +jobs: + run: + runs-on: ubuntu-latest + steps: + - uses: wow-actions/auto-comment@v1 + with: + GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + issuesOpened: | + Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+ __Before we help you, you must respond to the following questions__ unless your original post already contained this information: + __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? + __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. + __3.__ Did you post your log file? If not, add it now. + +# issuesOpened: | +# Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
+# In the meantime, it is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment. Search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for.
+# If you are able to solve your issue, please post the solution and close this issue post.
+# Otherwise __if you would like my help when I return, you must respond to the following questions__ unless your original post already contained this information: +# __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? +# __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. +# __3.__ Did you post your log file? If not, add it now. From 63e7883586a013b9a9c264ddb19dea18d9287087 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 2 Dec 2021 13:52:25 -0800 Subject: [PATCH 093/184] revert change to ylim Previous commit https://github.com/GreenleafLab/ArchR/commit/8caa716086832c2560b867b6e4a0d1bab48716e1 was not correct and broke the ArchRBrowser as detailed in https://github.com/GreenleafLab/ArchR/issues/1206 This commit reverts that change and also updates the param definition for `ylim` to make it clear how this argument should be used. --- R/ArchRBrowser.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 41be35fc..41be2a83 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -665,7 +665,7 @@ ArchRBrowserTrack <- function(...){ #' @param normMethod The name of the column in `cellColData` by which normalization should be performed. The recommended and default value #' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. #' @param threads The number of threads to use for parallel execution. -#' @param ylim The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. If not provided, the y-axis limit will be c(0, 0.999). +#' @param ylim The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. This should be expressed as `c(lower limit, upper limit)` such as `c(0,0.99)`. If not provided, the y-axis limit will be c(0, 0.999). #' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override coloring for groups. #' @param baseSize The numeric font size to be used in the plot. This applies to all plot labels. #' @param scTileSize The width of the tiles in scTracks. Larger numbers may make cells overlap more. Default is 0.5 for about 100 cells. @@ -997,7 +997,7 @@ plotBrowserTrack <- function( # Plot Track ###################################################### if(!is.null(ylim)){ - ylim <- c(0,quantile(df$y, probs=c(ylim))) + ylim <- quantile(df$y, ylim) df$y[df$y < ylim[1]] <- ylim[1] df$y[df$y > ylim[2]] <- ylim[2] }else{ From 92ab814f86be0cea75c661f9827a9549c2cf47f5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 4 Dec 2021 10:21:51 -0800 Subject: [PATCH 094/184] Update auto-comment.yml --- .github/workflows/auto-comment.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml index 1853bb0d..4337e0ac 100644 --- a/.github/workflows/auto-comment.yml +++ b/.github/workflows/auto-comment.yml @@ -7,18 +7,18 @@ jobs: - uses: wow-actions/auto-comment@v1 with: GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} - issuesOpened: | - Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
- __Before we help you, you must respond to the following questions__ unless your original post already contained this information: - __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? - __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. - __3.__ Did you post your log file? If not, add it now. - # issuesOpened: | -# Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
-# In the meantime, it is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment. Search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for.
-# If you are able to solve your issue, please post the solution and close this issue post.
-# Otherwise __if you would like my help when I return, you must respond to the following questions__ unless your original post already contained this information: +# Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+# __Before we help you, you must respond to the following questions__ unless your original post already contained this information: # __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? # __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. # __3.__ Did you post your log file? If not, add it now. + + issuesOpened: | + Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
+ In the meantime, it is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment. Search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for.
+ If you are able to solve your issue, please post the solution and close this issue post.
+ Otherwise __if you would like my help when I return, you must respond to the following questions__ unless your original post already contained this information: + __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? + __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. + __3.__ Did you post your log file? If not, add it now. From 2ec4b9a7eb218c2d89f760e0112b1a559d8fada5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 28 Jan 2022 11:28:00 -0800 Subject: [PATCH 095/184] Update auto-comment.yml --- .github/workflows/auto-comment.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml index 4337e0ac..1853bb0d 100644 --- a/.github/workflows/auto-comment.yml +++ b/.github/workflows/auto-comment.yml @@ -7,18 +7,18 @@ jobs: - uses: wow-actions/auto-comment@v1 with: GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} -# issuesOpened: | -# Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
-# __Before we help you, you must respond to the following questions__ unless your original post already contained this information: -# __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? -# __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. -# __3.__ Did you post your log file? If not, add it now. - issuesOpened: | - Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
- In the meantime, it is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment. Search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for.
- If you are able to solve your issue, please post the solution and close this issue post.
- Otherwise __if you would like my help when I return, you must respond to the following questions__ unless your original post already contained this information: + Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+ __Before we help you, you must respond to the following questions__ unless your original post already contained this information: __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. __3.__ Did you post your log file? If not, add it now. + +# issuesOpened: | +# Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
+# In the meantime, it is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment. Search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for.
+# If you are able to solve your issue, please post the solution and close this issue post.
+# Otherwise __if you would like my help when I return, you must respond to the following questions__ unless your original post already contained this information: +# __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? +# __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. +# __3.__ Did you post your log file? If not, add it now. From fdfce2d27949be94a4fb23c9fc2982532a63fe94 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 09:56:29 -0800 Subject: [PATCH 096/184] update no cell overlap error message --- R/MatrixGeneExpression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index 1b432e45..fcf7348f 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -65,7 +65,7 @@ addGeneExpressionMatrix <- function( .logMessage("Overlap w/ scATAC = ", round(overlap,3), logFile = logFile, verbose = TRUE) if(overlap == 0){ - stop("No overlap found with scATAC!") + stop("No overlapping cell names found between ArrowFiles and seRNA object!") } splitCells <- split(cellsInArrows, stringr::str_split(cellsInArrows, pattern = "#", simplify=TRUE)[,1]) From 5350569e904ccc2a48e4ddc6c13acbf220f3ad23 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 09:57:01 -0800 Subject: [PATCH 097/184] update overlap error message --- R/MatrixGeneExpression.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index fcf7348f..7d91a480 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -65,7 +65,7 @@ addGeneExpressionMatrix <- function( .logMessage("Overlap w/ scATAC = ", round(overlap,3), logFile = logFile, verbose = TRUE) if(overlap == 0){ - stop("No overlapping cell names found between ArrowFiles and seRNA object!") + stop("No overlapping cell names found between ArrowFiles and seRNA object! Cell names in ArrowFiles must match colnames in seRNA!") } splitCells <- split(cellsInArrows, stringr::str_split(cellsInArrows, pattern = "#", simplify=TRUE)[,1]) From e5752763cec99c686144d1f07853004e686c5906 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 10:07:21 -0800 Subject: [PATCH 098/184] fix processing multiple RNA inputs In response to https://github.com/GreenleafLab/ArchR/issues/507 --- R/MultiModal.R | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index ac20df74..b645d046 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -17,21 +17,43 @@ import10xFeatureMatrix <- function( featureType = "Gene Expression" ){ - if(!all(file.exists(input))){ + .validInput(input = input, name = "input", valid = c("character")) + .validInput(input = names, name = "names", valid = c("character")) + .validInput(input = featureType, name = "featureType", valid = c("character")) + + if (!all(file.exists(input))) { stop("Not all input file paths exist!") } - - featureMats <- lapply(seq_along(input), function(y){ + featureMats <- lapply(seq_along(input), function(y) { message("Importing Feature Matrix ", y, " of ", length(input)) - .importFM(featureMatrix = input[y], featureType = featureType, name = names[y]) + ArchR:::.importFM(featureMatrix = input[y], featureType = featureType, + name = names[y]) }) - featureMats <- tryCatch({ - Reduce("cbind", featureMats) - }, error = function(e){ - message("Error in combining individual feature matrices! Returning as a list of individual feature matrices!") - featureMats - }) + #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects + if(length(featureMats) > 1) { + featureMats <- tryCatch({ + rse_final <- featureMats[[1]] + #for each element of the list, test to make sure each SE attribute is identical + for(i in 2:length(featureMats)){ + + if(!all.equal(rownames(featureMats[[1]]),rownames(featureMats[[i]]))) { + stop("Error - rownames (genes) of individual RNA objects are not equivalent.") + } + if(!all.equal(rowData(featureMats[[1]]),rowData(featureMats[[i]]))) { + stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") + } + if(!all.equal(names(assays(featureMats[[1]])),names(assays(featureMats[[i]])))) { + stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") + } + rse_final <- cbind(rse_final,featureMats[[i]]) + } + rse_final + }, error = function(e) { + message("Error in combining individual feature matrices! Returning as a list of individual feature matrices!") + featureMats + }) + } featureMats From 705e9f5b59b70cbe406d8f708d533813d1e8a4cb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Feb 2022 10:13:47 -0800 Subject: [PATCH 099/184] remove unnecessary ArchR::: reference --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index b645d046..db257fc9 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -26,7 +26,7 @@ import10xFeatureMatrix <- function( } featureMats <- lapply(seq_along(input), function(y) { message("Importing Feature Matrix ", y, " of ", length(input)) - ArchR:::.importFM(featureMatrix = input[y], featureType = featureType, + .importFM(featureMatrix = input[y], featureType = featureType, name = names[y]) }) From c95add0b8ffbc10efb3d5e072c00c6dfde7747c7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 09:16:12 -0800 Subject: [PATCH 100/184] remove try catch and improve error handling --- R/MultiModal.R | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index db257fc9..b77ae8ff 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -31,32 +31,26 @@ import10xFeatureMatrix <- function( }) #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if(length(featureMats) > 1) { - featureMats <- tryCatch({ - rse_final <- featureMats[[1]] - #for each element of the list, test to make sure each SE attribute is identical - for(i in 2:length(featureMats)){ - - if(!all.equal(rownames(featureMats[[1]]),rownames(featureMats[[i]]))) { - stop("Error - rownames (genes) of individual RNA objects are not equivalent.") - } - if(!all.equal(rowData(featureMats[[1]]),rowData(featureMats[[i]]))) { - stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") - } - if(!all.equal(names(assays(featureMats[[1]])),names(assays(featureMats[[i]])))) { - stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") - } - rse_final <- cbind(rse_final,featureMats[[i]]) + if (length(featureMats) > 1) { + rse_final <- featureMats[[1]] + for (i in 2:length(featureMats)) { + if (!all.equal(rownames(featureMats[[1]]), rownames(featureMats[[i]]))) { + stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - rse_final - }, error = function(e) { - message("Error in combining individual feature matrices! Returning as a list of individual feature matrices!") - featureMats - }) + if (!all.equal(rowData(featureMats[[1]]), rowData(featureMats[[i]]))) { + stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") + } + if (!all.equal(names(assays(featureMats[[1]])), + names(assays(featureMats[[i]])))) { + stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") + } + rse_final <- cbind(rse_final, featureMats[[i]]) + } + return(rse_final) + } else { + return(featureMats) } - featureMats - } .importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ From 972aa83d7b4214fa50a0bccc1074ba9324d2ac4f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 11:15:12 -0800 Subject: [PATCH 101/184] Update MultiModal.R --- R/MultiModal.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index b77ae8ff..34473126 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -34,23 +34,22 @@ import10xFeatureMatrix <- function( if (length(featureMats) > 1) { rse_final <- featureMats[[1]] for (i in 2:length(featureMats)) { - if (!all.equal(rownames(featureMats[[1]]), rownames(featureMats[[i]]))) { + if (!all.equal(rownames(rse_final), rownames(featureMats[[i]]))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - if (!all.equal(rowData(featureMats[[1]]), rowData(featureMats[[i]]))) { + if (!all.equal(rowData(rse_final), rowData(featureMats[[i]]))) { stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") } - if (!all.equal(names(assays(featureMats[[1]])), - names(assays(featureMats[[i]])))) { + if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } rse_final <- cbind(rse_final, featureMats[[i]]) } return(rse_final) - } else { + } + else { return(featureMats) } - } .importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ From 99248d6df8a6ca11904680f49ffbc0d669550a37 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 11:32:02 -0800 Subject: [PATCH 102/184] change from cbind to combineCols if desired later, combineCols would allow combination of RSEs with different rows but I think its better to enforce equality before merge. --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 34473126..b2634c97 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -43,7 +43,7 @@ import10xFeatureMatrix <- function( if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } - rse_final <- cbind(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::combineCols(rse_final, featureMats[[i]]) } return(rse_final) } From fa9c1b7bfb010c762c02546be7de1d92853463fc Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 18 Feb 2022 11:35:57 -0800 Subject: [PATCH 103/184] revert back to cbind but force SummarizedExperiment --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index b2634c97..5994f2d6 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -43,7 +43,7 @@ import10xFeatureMatrix <- function( if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } - rse_final <- SummarizedExperiment::combineCols(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } return(rse_final) } From 09864ada3c37724894ea077018465bf4bd41d006 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 21 Feb 2022 20:46:40 -0800 Subject: [PATCH 104/184] improve stop message for chr not in AnnoFile --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 4a6c473c..2a9f4999 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -787,7 +787,7 @@ addArchRAnnotations <- function( } if(chr %ni% .availableSeqnames(AnnoFile, Group)){ - stop("Error Chromosome not in AnnoFile!") + stop(paste("Error! Chromosome ",chr," not in AnnoFile!")) } o <- h5closeAll() From 4a56079c57f9f672290832457a0e190df0b783fb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 8 Mar 2022 12:33:22 -0800 Subject: [PATCH 105/184] fix multi-sample handling and mismatch reporting in response to https://github.com/GreenleafLab/ArchR/issues/507 --- R/MultiModal.R | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 5994f2d6..665d9fc4 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -31,19 +31,40 @@ import10xFeatureMatrix <- function( }) #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if (length(featureMats) > 1) { + if (length(featureMats) > 1) { rse_final <- featureMats[[1]] for (i in 2:length(featureMats)) { - if (!all.equal(rownames(rse_final), rownames(featureMats[[i]]))) { + print(paste0("featureMats[[",i,"]]")) + if (!identical(rownames(rse_final), rownames(featureMats[[i]]))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - if (!all.equal(rowData(rse_final), rowData(featureMats[[i]]))) { - stop("Error - rowData (gene metadata) of individual RNA objects are not equivalent.") + if (!identical(colnames(rowData(rse_final)), colnames(rowData(featureMats[[i]])))) { + stop("Error - rowData (gene metadata) of individual RNA objects have different columns. This is highly unusual and merging has been aborted.") } - if (!all.equal(names(assays(rse_final)), names(assays(featureMats[[i]])))) { + if (!identical(names(assays(rse_final)), names(assays(featureMats[[i]])))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } - rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) + + #check each column in rowData to check for mismatches that should be thrown as warnings + #occasionally, it seems like 10x is annotating different ensembl IDs to the same gene which seems like a bad way to go + #this is a bit heavy-handed but it seems like the safest thing to do is report any mismatch rather than merge blindly + mismatchWarning <- TRUE + for (x in 1:ncol(rowData(rse_final))) { + if (!identical(rowData(rse_final)[,x], rowData(featureMats[[i]])[,x])) { + if(mismatchWarning) { + message(sprintf("Warning! Some values within column \"%s\" the rowData of your objects do not precisely match!", colnames(rowData(rse_final))[x])) + message("This is often caused by slight variations in Ensembl IDs used by cellranger. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") + mismatchWarning <- FALSE + } + + mismatch <- which(rowData(rse_final)[,x] != rowData(featureMats[[i]])[,x]) + for (y in 1:length(mismatch)) { + message(sprintf("Mismatch in column \"%s\" row %s for %s: %s does not exactly match %s!", colnames(rowData(rse_final))[x], mismatch[y], names[i], rowData(rse_final)[mismatch[y],x], rowData(featureMats[[i]])[mismatch[y],x])) + } + } + } + + rse_final <- cbind(rse_final, featureMats[[i]]) } return(rse_final) } From 8ddd94315b3ba5bc2b7590d825f438ce1e79ed9a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 8 Mar 2022 21:34:03 -0800 Subject: [PATCH 106/184] fix cbind to explicit reference SummarizedExperiment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit avoid error - unable to find an inherited method for function ‘bindCOLS’ for signature ‘"RangedSummarizedExperiment"’ --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 665d9fc4..4d26371a 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -64,7 +64,7 @@ import10xFeatureMatrix <- function( } } - rse_final <- cbind(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } return(rse_final) } From 8f05eef02685c5dd37e3ecadab725daadb71ea1c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 08:40:48 -0800 Subject: [PATCH 107/184] Fix handling of multiple input samples see https://github.com/GreenleafLab/ArchR/issues/507 --- R/MultiModal.R | 69 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 4d26371a..24b25f4d 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -8,17 +8,27 @@ #' #' @param input A character of paths to 10x feature hdf5 file(s). These will traditionally have a suffix similar to "filtered_feature_bc_matrix.h5". #' @param names A character of sample names associated with each input file. +#' @param strictMatch Only relevant when multiple input files are used. A boolean that indictes whether rows (genes) that do not match perfectly in the matrices +#' should be removed (`strictMatch = TRUE`) or coerced (`strictMatch = FALSE`). CellRanger seems to occassionally use different ensembl ids for the same gene across +#' different samples. If you are comfortable tolerating such mismatches, you can coerce all matrices to fit together, in which case the gene metadata present in +#' the first listed sample will be applied to all matrices for that particular gene entry. Regardless of what value is used for `strictMatch`, this function +#' cannot tolerate mismatched gene names, only mismatched metadata for the same gene. +#' @param verbose Only relevant when multiple input files are used. A boolean that indicates whether messaging about mismatches should be verbose (`TRUE`) or minimal (`FALSE`) #' @param featureType The name of the feature to extract from the 10x feature file. #' See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices for more information. #' @export import10xFeatureMatrix <- function( input = NULL, - names = NULL, + names = NULL, + strictMatch = TRUE, + verbose = TRUE, featureType = "Gene Expression" - ){ - +){ + .validInput(input = input, name = "input", valid = c("character")) .validInput(input = names, name = "names", valid = c("character")) + .validInput(input = strictMatch, name = "strictMatch", valid = c("boolean")) + .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = featureType, name = "featureType", valid = c("character")) if (!all(file.exists(input))) { @@ -29,12 +39,26 @@ import10xFeatureMatrix <- function( .importFM(featureMatrix = input[y], featureType = featureType, name = names[y]) }) - + + message("Re-ordering RNA matricies alphabetically for consistency.") + for(j in 1:length(featureMats)) { + featureMats[[j]] <- featureMats[[j]][order(rownames(featureMats[[j]])),] + } + #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if (length(featureMats) > 1) { + if (length(featureMats) > 1) { + message("Merging individual RNA objects...") + #make the first matrix the base matrix and merge all others into it rse_final <- featureMats[[1]] + + rowsToRemove <- c() #rows that have previously been removed from rse_final + + #for each additional feature matrix (starting with the second), look for mismatches with rse_final and merge accordingly for (i in 2:length(featureMats)) { - print(paste0("featureMats[[",i,"]]")) + mismatchWarning <- TRUE #a boolean to prevent output of the warning message many times and only output it once + + message(sprintf("\nMerging %s", names[i])) + if (!identical(rownames(rse_final), rownames(featureMats[[i]]))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } @@ -48,24 +72,46 @@ import10xFeatureMatrix <- function( #check each column in rowData to check for mismatches that should be thrown as warnings #occasionally, it seems like 10x is annotating different ensembl IDs to the same gene which seems like a bad way to go #this is a bit heavy-handed but it seems like the safest thing to do is report any mismatch rather than merge blindly - mismatchWarning <- TRUE + for (x in 1:ncol(rowData(rse_final))) { if (!identical(rowData(rse_final)[,x], rowData(featureMats[[i]])[,x])) { if(mismatchWarning) { - message(sprintf("Warning! Some values within column \"%s\" the rowData of your objects do not precisely match!", colnames(rowData(rse_final))[x])) - message("This is often caused by slight variations in Ensembl IDs used by cellranger. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") + message(sprintf("Warning! Some values within column \"%s\" of the rowData (gene metadata) of your objects do not precisely match!", colnames(rowData(rse_final))[x])) + message("This is often caused by slight variations in Ensembl IDs and gene locations used by cellranger across different samples. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") mismatchWarning <- FALSE } + #detect all of the mismatches betwenn rse_final and the current featureMat mismatch <- which(rowData(rse_final)[,x] != rowData(featureMats[[i]])[,x]) + #for each detected mismatch, handle the mismatch according to the value of strictMatch for (y in 1:length(mismatch)) { - message(sprintf("Mismatch in column \"%s\" row %s for %s: %s does not exactly match %s!", colnames(rowData(rse_final))[x], mismatch[y], names[i], rowData(rse_final)[mismatch[y],x], rowData(featureMats[[i]])[mismatch[y],x])) + if (verbose) { + message(sprintf("Mismatch in column \"%s\" row %s for %s: %s does not exactly match %s!", colnames(rowData(rse_final))[x], mismatch[y], names[i], rowData(rse_final)[mismatch[y],x], rowData(featureMats[[i]])[mismatch[y],x])) + } + if (strictMatch) { + if (verbose) { + message("strictMatch = TRUE so the corresponding gene entry with mismatching information will be removed.") + } + rowsToRemove <- unique(c(rowsToRemove, mismatch[y])) + #temporarily force the data to match so that merging can occur easily. Mismatched rows will be removed later + rowData(featureMats[[i]])[mismatch[y],] <- rowData(rse_final)[mismatch[y],] + rowRanges(featureMats[[i]])[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] + } else { + if (verbose) { + message("strictMatch = FALSE so mismatching information will be coerced to match the first sample provided.") + } + rowData(featureMats[[i]])[mismatch[y],] <- rowData(rse_final)[mismatch[y],] + rowRanges(featureMats[[i]])[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] + } } } } - + rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } + if (strictMatch) { + rse_final <- rse_final[-rowsToRemove,] + } return(rse_final) } else { @@ -73,6 +119,7 @@ import10xFeatureMatrix <- function( } } + .importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ o <- h5closeAll() From f31660dfb4b76c70792c9074213f2add0a77e3c3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 08:59:13 -0800 Subject: [PATCH 108/184] explicitly call GenomicRanges::resize To avoid conflicst with the `webshot` package proposed in https://github.com/GreenleafLab/ArchR/issues/1324 --- R/ReproduciblePeakSet.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 21fe8c11..75eaf1d1 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -598,7 +598,7 @@ addReproduciblePeakSet <- function( #Validate peaks <- .validGRanges(peaks) - peakSummits <- resize(peaks,1,"center") + peakSummits <- GenomicRanges::resize(peaks,1,"center") geneAnnotation$genes <- .validGRanges(geneAnnotation$genes) geneAnnotation$exons <- .validGRanges(geneAnnotation$exons) geneAnnotation$TSS <- .validGRanges(geneAnnotation$TSS) @@ -606,11 +606,11 @@ addReproduciblePeakSet <- function( #First Lets Get Distance to Nearest Gene Start .logMessage("Annotating Peaks : Nearest Gene", logFile = logFile) - distPeaks <- distanceToNearest(peakSummits, resize(geneAnnotation$genes, 1, "start"), ignore.strand = TRUE) + distPeaks <- distanceToNearest(peakSummits, GenomicRanges::resize(geneAnnotation$genes, 1, "start"), ignore.strand = TRUE) mcols(peaks)$distToGeneStart <- mcols(distPeaks)$distance mcols(peaks)$nearestGene <- mcols(geneAnnotation$genes)$symbol[subjectHits(distPeaks)] .logMessage("Annotating Peaks : Gene", logFile = logFile) - promoters <- extendGR(resize(geneAnnotation$genes, 1, "start"), upstream = promoterRegion[1], downstream = promoterRegion[2]) + promoters <- extendGR(GenomicRanges::resize(geneAnnotation$genes, 1, "start"), upstream = promoterRegion[1], downstream = promoterRegion[2]) op <- overlapsAny(peakSummits, promoters, ignore.strand = TRUE) og <- overlapsAny(peakSummits, geneAnnotation$genes, ignore.strand = TRUE) oe <- overlapsAny(peakSummits, geneAnnotation$exons, ignore.strand = TRUE) @@ -622,7 +622,7 @@ addReproduciblePeakSet <- function( #First Lets Get Distance to Nearest TSS's .logMessage("Annotating Peaks : TSS", logFile = logFile) - distTSS <- distanceToNearest(peakSummits, resize(geneAnnotation$TSS, 1, "start"), ignore.strand = TRUE) + distTSS <- distanceToNearest(peakSummits, GenomicRanges::resize(geneAnnotation$TSS, 1, "start"), ignore.strand = TRUE) mcols(peaks)$distToTSS <- mcols(distTSS)$distance if("symbol" %in% colnames(mcols(geneAnnotation$TSS))){ mcols(peaks)$nearestTSS <- mcols(geneAnnotation$TSS)$symbol[subjectHits(distTSS)] @@ -663,7 +663,7 @@ addReproduciblePeakSet <- function( summits <- Reduce("c", as(summits, "GRangesList")) .logMessage(paste0(prefix, " Extending Summits"), logFile = logFile) - extendedSummits <- resize(summits, extendSummits * 2 + 1, "center") + extendedSummits <- GenomicRanges::resize(summits, extendSummits * 2 + 1, "center") extendedSummits <- lapply(split(extendedSummits, extendedSummits$GroupReplicate), function(x){ nonES <- nonOverlappingGR(x, by = "score", decreasing = TRUE) nonES$replicateScoreQuantile <- round(.getQuantiles(nonES$score),3) From b441c88a2e0528d38f40555240d0c177339d0a8c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:01:10 -0800 Subject: [PATCH 109/184] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/MatrixGeneScores.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index 4957141a..1d1d10b3 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -221,9 +221,9 @@ addGeneScoreMatrix <- function( if(useTSS){ .logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = TRUE")) distMethod <- "GenePromoter" - geneRegions$geneStart <- start(resize(geneRegions, 1, "start")) - geneRegions$geneEnd <- start(resize(geneRegions, 1, "end")) - geneRegions <- resize(geneRegions, 1, "start") + geneRegions$geneStart <- start(GenomicRanges::resize(geneRegions, 1, "start")) + geneRegions$geneEnd <- start(GenomicRanges::resize(geneRegions, 1, "end")) + geneRegions <- GenomicRanges::resize(geneRegions, 1, "start") if(extendTSS){ geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream) } @@ -231,8 +231,8 @@ addGeneScoreMatrix <- function( }else{ .logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = FALSE")) distMethod <- "GeneBody" - geneRegions$geneStart <- start(resize(geneRegions, 1, "start")) - geneRegions$geneEnd <- start(resize(geneRegions, 1, "end")) + geneRegions$geneStart <- start(GenomicRanges::resize(geneRegions, 1, "start")) + geneRegions$geneEnd <- start(GenomicRanges::resize(geneRegions, 1, "end")) geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream) m <- 1 / width(geneRegions) geneRegions$geneWeight <- 1 + m * (geneScaleFactor - 1) / (max(m) - min(m)) @@ -317,8 +317,8 @@ addGeneScoreMatrix <- function( #Time to Overlap Gene Windows if(useGeneBoundaries){ - geneStartz <- start(resize(geneRegionz, 1, "start")) - geneEndz <- start(resize(geneRegionz, 1, "end")) + geneStartz <- start(GenomicRanges::resize(geneRegionz, 1, "start")) + geneEndz <- start(GenomicRanges::resize(geneRegionz, 1, "end")) pminGene <- pmin(geneStartz, geneEndz) pmaxGene <- pmax(geneStartz, geneEndz) @@ -380,7 +380,7 @@ addGeneScoreMatrix <- function( #Determine Sign for Distance relative to strand (Directionality determined based on dist from gene start) isMinus <- BiocGenerics::which(strand(geneRegionz) == "-") - signDist <- sign(start(uniqueTiles)[subjectHits(tmp)] - start(resize(geneRegionz,1,"start"))[queryHits(tmp)]) + signDist <- sign(start(uniqueTiles)[subjectHits(tmp)] - start(GenomicRanges::resize(geneRegionz,1,"start"))[queryHits(tmp)]) signDist[isMinus] <- signDist[isMinus] * -1 #Correct the orientation for the distance! From 394f7ccd917a8fbc0757679961bcbf1d50857cdd Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:02:05 -0800 Subject: [PATCH 110/184] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/QualityControl.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/QualityControl.R b/R/QualityControl.R index 20c83f30..0c76ee36 100644 --- a/R/QualityControl.R +++ b/R/QualityControl.R @@ -48,7 +48,7 @@ plotTSSEnrichment <- function( chr <- paste0(seqnames(chromSizes)) chr <- gtools::mixedsort(intersect(chr, paste0(seqnames(TSS)))) TSS <- sort(sortSeqlevels(TSS)) - splitTSS <- split(resize(TSS,1,"start"), seqnames(TSS))[chr] + splitTSS <- split(GenomicRanges::resize(TSS,1,"start"), seqnames(TSS))[chr] window <- 2 * flank + 1 groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = FALSE) uniqGroups <- gtools::mixedsort(unique(groups[,1])) From 4803ee9f6e70bea93b70998c863a1deaade9e713 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:03:10 -0800 Subject: [PATCH 111/184] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/Footprinting.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index c9da4bea..e02afd22 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -221,7 +221,7 @@ getFootprints <- function( footprintDF <- lapply(seq_along(featureList), function(x){ outx <- tryCatch({ - featurex <- split(resize(featureList[[x]],1,"center"), seqnames(featureList[[x]])) + featurex <- split(GenomicRanges::resize(featureList[[x]],1,"center"), seqnames(featureList[[x]])) intSeq <- intersect(names(featurex), names(cov)) if(length(intSeq)==0){ .logMessage(paste0("No intersecting chromsomes for feature ", names(featureList)[x], "!")) @@ -292,7 +292,7 @@ getFootprints <- function( kmerList <- .safelapply(seq_along(featureList), function(i){ .logDiffTime(sprintf("Computing Kmer Tables for %s of %s features", i, length(featureList)), tstart, verbose=verbose, logFile = logFile) - bsv <- BSgenomeViews(genome , resize(featureList[[i]], window + k, "center")) + bsv <- BSgenomeViews(genome, GenomicRanges::resize(featureList[[i]], window + k, "center")) bsv <- bsv[width(bsv) == window + k] #none that are trimmed! #BSgenome is already stranded #kmerPositionFrequencyCpp is Rcpp export for getting kmer position frequencies from strings From f208a345d2e07606f0b89acb44dd5c5d86a07a78 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:04:03 -0800 Subject: [PATCH 112/184] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/AnnotationGenome.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index 876828d7..9df316fc 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -8,7 +8,6 @@ #' @param filter A boolean value indicating whether non-standard chromosome scaffolds should be excluded. #' These "non-standard" chromosomes are defined by `filterChrGR()` and by manual annotation using the `filterChr` parameter. #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. -#' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, #' non-standard chromosomes will still be removed as defined in `filterChrGR()`. #' @export createGenomeAnnotation <- function( @@ -171,7 +170,7 @@ createGeneAnnotation <- function( ########################### message("Getting TSS..") - TSS <- unique(resize(GenomicFeatures::transcripts(TxDb), width = 1, fix = "start")) + TSS <- unique(GenomicRanges::resize(GenomicFeatures::transcripts(TxDb), width = 1, fix = "start")) if(!is.null(inGenes)){ genes <- .validGRanges(inGenes) From c57440498fdb1a042702eb3fac21bc6d58af37e2 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:04:51 -0800 Subject: [PATCH 113/184] fix deleted line --- R/AnnotationGenome.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index 9df316fc..e76026de 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -8,6 +8,7 @@ #' @param filter A boolean value indicating whether non-standard chromosome scaffolds should be excluded. #' These "non-standard" chromosomes are defined by `filterChrGR()` and by manual annotation using the `filterChr` parameter. #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. +#' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, #' non-standard chromosomes will still be removed as defined in `filterChrGR()`. #' @export createGenomeAnnotation <- function( From 6dd1e30df8bbfbc3d51b789fa6d4b1eed5c582fe Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:06:12 -0800 Subject: [PATCH 114/184] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/CreateArrow.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 300cb7c8..0fa4335a 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -580,7 +580,7 @@ createArrowFiles <- function( featureList <- list() featureList$Promoter <- extendGR( - gr = resize(geneAnnotation$genes, 1, "start"), + gr = GenomicRanges::resize(geneAnnotation$genes, 1, "start"), upstream = promoterRegion[1], downstream = promoterRegion[2] ) @@ -852,10 +852,10 @@ createArrowFiles <- function( } #Create Window and Flank - TSS <- resize(TSS, 1, fix = "start") + TSS <- GenomicRanges::resize(TSS, 1, fix = "start") strand(TSS) <- "*" TSS <- unique(TSS) - tssWindow <- resize(TSS, window, "center") + tssWindow <- GenomicRanges::resize(TSS, window, "center") tssWindow$type <- "window" tssFlank <- c( #Positive Flank From 0023165538d737735112f6ae4a942bde5bb30483 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:07:18 -0800 Subject: [PATCH 115/184] strictly call GenomicRanges::resize() https://github.com/GreenleafLab/ArchR/issues/1324 --- R/ArchRBrowser.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 41be2a83..c19333a6 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -292,7 +292,7 @@ ArchRBrowser <- function( region <- region[which(tolower(mcols(region)$symbol) %in% tolower(input$name))] region <- region[order(match(tolower(mcols(region)$symbol), tolower(input$name)))] - region1 <- resize(region, 1, "start") + region1 <- GenomicRanges::resize(region, 1, "start") strand(region1) <- "*" #Extend Region @@ -752,7 +752,7 @@ plotBrowserTrack <- function( region <- region[which(tolower(mcols(region)$symbol) %in% tolower(geneSymbol))] region <- region[order(match(tolower(mcols(region)$symbol), tolower(geneSymbol)))] print(region) - region <- resize(region, 1, "start") + region <- GenomicRanges::resize(region, 1, "start") strand(region) <- "*" region <- extendGR(region, upstream = upstream, downstream = downstream) } From 7c6690b6064bad593a25a02465fa82630ad908d5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 9 Mar 2022 09:14:13 -0800 Subject: [PATCH 116/184] remove NULL as option for scaleDims https://github.com/GreenleafLab/ArchR/issues/1314 --- R/IterativeLSI.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index d708df14..f8b75a15 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -24,8 +24,7 @@ #' Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logidf". #' @param scaleDims A boolean that indicates whether to z-score the reduced dimensions for each cell. This is useful forminimizing the contribution #' of strong biases (dominating early PCs) and lowly abundant populations. However, this may lead to stronger sample-specific biases since -#' it is over-weighting latent PCs. If set to `NULL` this will scale the dimensions based on the value of `scaleDims` when the `reducedDims` were -#' originally created during dimensionality reduction. This idea was introduced by Timothy Stuart. +#' it is over-weighting latent PCs. #' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to #' sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis. #' @param binarize A boolean value indicating whether the matrix should be binarized before running LSI. This is often desired when working with insertion counts. @@ -117,7 +116,7 @@ addIterativeLSI <- function( .validInput(input = varFeatures, name = "varFeatures", valid = c("integer")) .validInput(input = dimsToUse, name = "dimsToUse", valid = c("integer")) .validInput(input = LSIMethod, name = "LSIMethod", valid = c("integer", "character")) - .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null")) + .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean")) .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric")) .validInput(input = binarize, name = "binarize", valid = c("boolean")) .validInput(input = outlierQuantiles, name = "outlierQuantiles", valid = c("numeric", "null")) From 6bde881b2127e8ce81632ed8b451314c569c47cd Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 12 Mar 2022 09:39:08 -0800 Subject: [PATCH 117/184] fix footprinting crashes bc of ggrepel addressing https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 --- R/Footprinting.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index e02afd22..9d62449f 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -580,8 +580,9 @@ plotFootprints <- function( xlim = c(min(plotFootDF$x),max(plotFootDF$x)) ) + theme_ArchR(baseSize = baseSize) + ggtitle(name) + guides(fill = FALSE) + - guides(color = FALSE) + ylab(paste0(title,"Normalized Insertions")) + - ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) + guides(color = FALSE) + ylab(paste0(title,"Normalized Insertions")) + #removed ggrepel due to incompatibility with coord_cartesian - see https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 + #ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) ggBias <- ggplot(plotBiasDF, aes(x = x, y = mean, color = group)) + geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, linetype = NA, fill = group), alpha = 0.4) + From d28fa1d61fd3a3444a01721785b6f32bce8cbf48 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 12 Mar 2022 10:13:00 -0800 Subject: [PATCH 118/184] set drop=FALSE in matrix operations addressing https://github.com/GreenleafLab/ArchR/issues/1325 set drop=FALSE to prevent conversion to a vector this would happen if a chr had only one feature on it --- R/ArrowRead.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 0d9c786f..4129242c 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -922,7 +922,7 @@ getMatrixFromArrow <- function( #Check if samples have NAs due to N = 1 sample or some other weird thing. #Set it to min non NA variance dfVars <- lapply(seq_len(nrow(dfVars)), function(x){ - vx <- dfVars[x, ] + vx <- dfVars[x, , drop = FALSE] if(any(is.na(vx))){ vx[is.na(vx)] <- min(vx[!is.na(vx)]) } From f7a0f74cd7f9dc98a38f81eca6b25233faea522b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sun, 13 Mar 2022 10:34:20 -0700 Subject: [PATCH 119/184] fix deprecated ggplot calls and ggridges xlim addressing https://github.com/GreenleafLab/ArchR/issues/1249 --- R/GgplotUtils.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index 609caa08..90fd6447 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -699,8 +699,8 @@ ggGroup <- function( df$x <- factor(df$x, groupOrder) p <- ggplot(df, aes(x = x, y = y, color = x)) + - scale_color_manual(values = pal, guide = FALSE) + - scale_fill_manual(values = pal, guide = FALSE) + + scale_color_manual(values = pal, guide = "none") + + scale_fill_manual(values = pal, guide = "none") + ggtitle(title) if(tolower(plotAs) == "ridges" | tolower(plotAs) == "ggridges"){ @@ -728,7 +728,9 @@ ggGroup <- function( val <- 1/length(unique(x)) p <- p + geom_density_ridges(data = df, aes(x = y, y = x, color = x, fill = x), scale = ridgeScale, - alpha = alpha, color = "black") + scale_y_discrete(expand = expand_scale(mult = c(0.01, val))) + alpha = alpha, color = "black") + scale_y_discrete(expand = expansion(mult = c(0.01, val))) + xmax <- layer_scales(p)$x$range$range[2] + p <- p + xlim(0, xmax) } }else{ type <- "violin" From 8f2ec3a0df1954dd4334ef6cf5caedeb794441f5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sun, 13 Mar 2022 11:05:28 -0700 Subject: [PATCH 120/184] revert xlim change I decided against implementing an xlim cutoff because there are situations where users may want negative values shown and it doesnt seem worth the gain to add an extra argument to handle this edge case. https://github.com/GreenleafLab/ArchR/issues/1249 --- R/GgplotUtils.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index 90fd6447..4d4c31ea 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -729,8 +729,6 @@ ggGroup <- function( p <- p + geom_density_ridges(data = df, aes(x = y, y = x, color = x, fill = x), scale = ridgeScale, alpha = alpha, color = "black") + scale_y_discrete(expand = expansion(mult = c(0.01, val))) - xmax <- layer_scales(p)$x$range$range[2] - p <- p + xlim(0, xmax) } }else{ type <- "violin" From 6a005c47ddc4682cd4916cc98fb0de2c021953c6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 14 Mar 2022 21:37:34 -0700 Subject: [PATCH 121/184] bugfixes problems when only one sample is provided, return value was a list also when rowsToRemove was NULL, this caused errors --- R/MultiModal.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 24b25f4d..2ff1e963 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -110,12 +110,14 @@ import10xFeatureMatrix <- function( rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) } if (strictMatch) { - rse_final <- rse_final[-rowsToRemove,] + if(length(rowsToRemove) > 0) { + rse_final <- rse_final[-rowsToRemove,] + } } return(rse_final) } else { - return(featureMats) + return(featureMats[[1]]) } } From 849a59c2e728304c2fef15a057515aa93e36a964 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 15 Mar 2022 21:07:09 -0700 Subject: [PATCH 122/184] change how featureMats are sorted trying to fix https://github.com/GreenleafLab/ArchR/issues/507#issuecomment-1068558649 --- R/MultiModal.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index 2ff1e963..ed444ebb 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -40,9 +40,9 @@ import10xFeatureMatrix <- function( name = names[y]) }) - message("Re-ordering RNA matricies alphabetically for consistency.") + message("Re-ordering RNA matricies for consistency.") for(j in 1:length(featureMats)) { - featureMats[[j]] <- featureMats[[j]][order(rownames(featureMats[[j]])),] + featureMats[[j]] <- sort(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) } #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects From 42c8be39d003545bccab9450f53da6815809f7e6 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 15 Mar 2022 21:32:30 -0700 Subject: [PATCH 123/184] specify sort function https://github.com/GreenleafLab/ArchR/issues/507#issuecomment-1068719393 --- R/MultiModal.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MultiModal.R b/R/MultiModal.R index ed444ebb..6733d78c 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -42,7 +42,7 @@ import10xFeatureMatrix <- function( message("Re-ordering RNA matricies for consistency.") for(j in 1:length(featureMats)) { - featureMats[[j]] <- sort(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) + featureMats[[j]] <- sort.GenomicRanges(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) } #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects From 0aa874a05b2969c4796d30591f98c0e09937aa8a Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 17 Mar 2022 09:39:31 -0700 Subject: [PATCH 124/184] update comments --- R/ArchRBrowser.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index c19333a6..98a1e251 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -808,7 +808,7 @@ plotBrowserTrack <- function( } ########################################################## - # Bulk Tracks + # Single-cell Tracks ########################################################## if("sctrack" %in% tolower(plotSummary)){ .logDiffTime(sprintf("Adding SC Tracks (%s of %s)",x,length(region)), t1=tstart, verbose=verbose, logFile=logFile) @@ -850,7 +850,7 @@ plotBrowserTrack <- function( } ########################################################## - # Feature Tracks + # Loop Tracks ########################################################## if("looptrack" %in% tolower(plotSummary)){ if(!is.null(loops)){ From 420a3c2573fc07ad27ac4cf3fa911ceccc0176fb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 22 Mar 2022 13:11:08 -0700 Subject: [PATCH 125/184] update function param docs --- R/ArrowRead.R | 3 ++- R/Harmony.R | 2 ++ R/IntegrativeAnalysis.R | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 4129242c..8fbb09aa 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -250,7 +250,8 @@ getFragmentsFromArrow <- function( #' @param useMatrix The name of the data matrix to retrieve from the given ArrowFile. Options include "TileMatrix", "GeneScoreMatrix", etc. #' @param useSeqnames A character vector of chromosome names to be used to subset the data matrix being obtained. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. -#' @param binarize A boolean value indicating whether the matrix should be binarized before return. This is often desired when working with insertion counts. +#' @param binarize A boolean value indicating whether the matrix should be binarized before return. +#' This is often desired when working with insertion counts. Note that if the matrix has already been binarized previously, this should be set to `TRUE`. #' @param logFile The path to a file to be used for logging ArchR output. #' @export getMatrixFromProject <- function( diff --git a/R/Harmony.R b/R/Harmony.R index 5a2f33ac..ca9fbabe 100644 --- a/R/Harmony.R +++ b/R/Harmony.R @@ -13,6 +13,8 @@ #' to sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis. #' @param name The name to store harmony output as a `reducedDims` in the `ArchRProject` object. #' @param groupBy The name of the column in `cellColData` to use for grouping cells together for vars in harmony batch correction. +#' The value of `groupBy` is passed to the `vars_use` parameter in `harmony::HarmonyMatrix()`. When run through ArchR, this parameter +#' defines which variables to correct for during batch correction. See `harmony::HarmonyMatrix()` for more information. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param force A boolean value that indicates whether or not to overwrite data in a given column when the value passed to `name` already #' exists as a column name in `cellColData`. diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index f662630b..8695c83d 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -846,7 +846,8 @@ addCoAccessibility <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param corCutOff A numeric describing the minimum numeric peak-to-peak correlation to return. -#' @param resolution A numeric describing the bp resolution to return loops as. This helps with overplotting of correlated regions. +#' @param resolution A numeric describing the bp resolution to use when returning loops. This helps with overplotting of correlated regions. +#' This only takes affect if `returnLoops = TRUE`. #' @param returnLoops A boolean indicating to return the co-accessibility signal as a `GRanges` "loops" object designed for use with #' the `ArchRBrowser()` or as an `ArchRBrowserTrack()`. #' @export From 293d20fcd199eb3964d8e6606fd70923622bfdde Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Sun, 27 Mar 2022 21:16:59 -0700 Subject: [PATCH 126/184] Bug Fixes 20220327 Bug Fixes - Timeout for downloading tutorial/any url data in inputData - Fixed bug for GroupCoverages path not being updated after saveArchRProject --- .DS_Store | Bin 14340 -> 16388 bytes R/AllClasses.R | 54 +++++++++++++++---------------------------------- R/InputData.R | 25 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/.DS_Store b/.DS_Store index c201598da24f47b1678239b13fdff6ae6e5eb315..acd08dd80702b22ae611211269c826d0cd190fe5 100644 GIT binary patch delta 1000 zcmb7DO=}ZT6g_Wdl1!KZC$**)8W^$0h3SyADaM718nX!0R@%DKO`4R5rk!Fxpj4?~ zcP>;OF5I{&xU_`#3kvE#@F%zs*DhS?nE}U{annAy^X?nYx%Zy?-aXtrd@d#dh+8|o z8qlC?(t_QwfGW+kfs59g$!nR*{A&zXlz!xzMqcp5Wm>9W77bOlzv;r1kBe9D<*|eo z=5QN#xO-SfuRZX)s{?b5YlzLTskrR347*926JC_b} zm`58;W>~;7T1l?nB-e{&Ffo9SKFwpUUG!7JB!NC~Au8en1z6?eZjQ@vhs8`HD$=Gd&t6;8A(26`0J z#a1$!)@QUxxGa^w9E2^0#p>8%Jmlqgv`wCTJW8@dff1XL95!JG+eA*0>$55ASi=VY zc-W&qNbv);aefBP`M)hnLPObcXdtM=z?~E>cqI;KkHLUtkO4=F8M!8RNGUQiG6YTb zm+u$0Fg4OqFsRj0sJ1Y&&`~fnFfyO~QQdTNtjs~C&1Z}jGfn0ZP~Y4w(7?X2;U^;_ z*XAnl>W?_S^OcP6dC0QYM8gK(?S7euOEPT&A cncv0}>;)!>-z+Al*i7AgP*t4~%}^gk06_aZKL7v# diff --git a/R/AllClasses.R b/R/AllClasses.R index f87ff98c..be9c05e9 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -569,51 +569,29 @@ saveArchRProject <- function( newProj@imputeWeights <- SimpleList() } - #Copy Other Folders 2 layers nested + #Copy Recursively message("Copying Other Files...") for(i in seq_along(oldFiles)){ - - fin <- file.path(outDirOld, oldFiles[i]) - fout <- file.path(outputDirectory, oldFiles[i]) - message(sprintf("Copying Other Files (%s of %s): %s", i, length(oldFiles), basename(fin))) - - if(dir.exists(fin)){ - - dir.create(file.path(outputDirectory, basename(fin)), showWarnings=FALSE) - fin2 <- list.files(fin, full.names = TRUE) - - for(j in seq_along(fin2)){ - - if(dir.exists(fin2[j])){ - - dir.create(file.path(outputDirectory, basename(fin), basename(fin2)[j]), showWarnings=FALSE) - fin3 <- list.files(fin2[j], full.names = TRUE) - - for(k in seq_along(fin3)){ - - cf <- file.copy(fin3[k], file.path(fout, basename(fin3[k])), overwrite = overwrite) - - } - - }else{ - - cf <- file.copy(fin2[j], file.path(fout, basename(fin2[j])), overwrite = overwrite) - - } - - } - - }else{ - - cf <- file.copy(fin, fout, overwrite = overwrite) - - } - + message(sprintf("Copying Other Files (%s of %s): %s", i, length(oldFiles), oldFiles[i])) + oldPath <- file.path(outDirOld, oldFiles[i]) + file.copy(oldPath, outputDirectory, recursive=TRUE, overwrite=overwrite) } + #Set New Info newProj@sampleColData <- newProj@sampleColData[names(ArrowFilesNew), , drop = FALSE] newProj@sampleColData$ArrowFiles <- ArrowFilesNew[rownames(newProj@sampleColData)] + #Check for Group Coverages Copied + groupC <- length(newProj@projectMetadata$GroupCoverages) + if(length(groupC) > 0){ + for(z in seq_len(groupC)){ + zdata <- newProj@projectMetadata$GroupCoverages[[z]]$coverageMetadata + zfiles <- gsub(outDirOld, outputDirectory, zdata$File) + newProj@projectMetadata$GroupCoverages[[z]]$coverageMetadata$File <- zfiles + stopifnot(all(file.exists(zfiles))) + } + } + } message("Saving ArchRProject...") diff --git a/R/InputData.R b/R/InputData.R index 1e12c11e..e25e743e 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -17,6 +17,10 @@ getTutorialData <- function( .validInput(input = threads, name = "threads", valid = c("integer")) ######### + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) + if(tolower(tutorial) %in% c("heme","hematopoiesis")){ if(!dir.exists("HemeFragments")){ @@ -49,6 +53,10 @@ getTutorialData <- function( } + #Set back URL Options + options(timeout=oldTimeout) + + #Return Fragment Files inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] @@ -62,14 +70,24 @@ getTutorialData <- function( #' #' @export getTestFragments <- function(x){ + + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) + if(!file.exists("PBMCSmall.tsv.gz")){ download.file( url = "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.tsv.gz", destfile = "PBMCSmall.tsv.gz" ) } + #Set back URL Options + options(timeout=oldTimeout) + + #Add Genome Return Name Vector addArchRGenome("hg19test") c("PBMC" = "PBMCSmall.tsv.gz") + } #' Get PBMC Small Test Project @@ -78,6 +96,10 @@ getTestFragments <- function(x){ #' #' @export getTestProject <- function(){ + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) + #Download if(!dir.exists("PBMCSmall")){ download.file( url = "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.zip", @@ -86,6 +108,9 @@ getTestProject <- function(){ unzip("PBMCSmall.zip", exdir = getwd()) file.remove("PBMCSmall.zip") } + #Set back URL Options + options(timeout=oldTimeout) + #Load addArchRGenome("hg19test") loadArchRProject("PBMCSmall") } From 506161d36d1457d4b3aba20918c0e4f622ae937e Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Tue, 29 Mar 2022 22:54:58 -0700 Subject: [PATCH 127/184] Bugfix https://github.com/GreenleafLab/ArchR/issues/252#issuecomment-796829243 Issue was due to C++ overwriting ranges after each iteration when using 1 core vs being copied for each core and not being a noticeable bug. This fix should be more stable for other functions using this as well. --- .DS_Store | Bin 16388 -> 16388 bytes DESCRIPTION | 2 +- R/QualityControl.R | 62 ++++++++++++++++++++++------------ man/ArchRBrowser.Rd | 3 +- man/addGroupCoverages.Rd | 6 ++++ man/addHarmony.Rd | 4 ++- man/addIterativeLSI.Rd | 3 +- man/addModuleScore.Rd | 24 +++++++++---- man/addPeak2GeneLinks.Rd | 2 ++ man/addReproduciblePeakSet.Rd | 2 +- man/addTileMatrix.Rd | 1 - man/createGenomeAnnotation.Rd | 5 +-- man/getCoAccessibility.Rd | 3 +- man/getGroupBW.Rd | 3 +- man/getMatches.Rd | 3 +- man/getMatrixFromProject.Rd | 10 ++++-- man/import10xFeatureMatrix.Rd | 10 ++++++ man/plotBrowserTrack.Rd | 5 +-- man/projectBulkATAC.Rd | 3 ++ src/Footprinting_utils.cpp | 3 ++ src/RcppExports.cpp | 5 +++ 21 files changed, 116 insertions(+), 43 deletions(-) diff --git a/.DS_Store b/.DS_Store index acd08dd80702b22ae611211269c826d0cd190fe5..6010e1e42cb6a5dfbfe254daf8f62afd56f32fb7 100644 GIT binary patch delta 2591 zcmeH{Yitx%6vyv>wkNRyN~X6+it6U(Uv}F3l#d`k@ zJWAP;G#FGAGf@OZjEY3m22%;*6F*=CkwmldRPYfKFp3frKz!br-K8Y>MWWx>4|DH5 zXL9$>`Q6_c8pO~b_U)uNqHIkW%rW)%SCkgzrFo0L)pd0BbP&Q_=;-Zo`}{>kr@-l=qGQJJf~c38 zTf_y*wuAxWaIC>%cVv$%E}vX8eb(&Y*tmF_kSHlDGjcNM9$wG_$wo9ta!IG(32VSbmhL+;X2 zQmHtBrwu}DpX~Lydpx0Fr8tRCK)%q{+0)%Ed;Gpoc#5RVbr|kb4_0fHq=oAL)VN%- ziycD^&X_4li+uMxy>ioPkL;zNh2sq@-cIASOrz81pYC$|d%WRfla{1zPoLZG^s^&0 zvtgBw=aqUXFR+^olY``aa)f+JPLNaNG&w`QB|nf$8LP8P@NJlR6 zFb)MM!DLinDylIZv(bdPn1?nv(E%4^bfXWeu@3zh!WKM=t#};I;bn~A4eZBzID(IH z44>eHj0?Dk@9_&4!;Rs1PT&$bBWGfG*^fhLB(32wJa85^8O7fl#=rER@Y`n06iZb6 z&eO4B;7Nv5^%hK+RDGlTAz%?kH}h@rx!1`{R1o*{xZL-;Wmn4sZjY-L~N%^G5B~ zR z8hGZ+O$J5cO+0_AKapg!n!ME?NA<=v+DG3&;9%eWfjLQOPqhaQljF>#^W-AAOs3UP=Bg&7uDVPiI_49ZZ0Sz(8ov4qvVQ?tkezvj^zY-BZmnALm>p2RLZj~DPF zUc)f<;dLBhHXUU?eU5WDkFW6!e#DiDxQc6>$VprhXW+8A9Ilv~#7*Psxp||YwAtd& zI`ZUNl%pv#M$X#*O2jVZY_U0NRzqW1q`E6khk9m`A=#Lw7P+FT_P>=VXfj(;tTw$} zX-_mMU*^RHv_OVFQ*Ep?I+VX#pQAQa8WT)0TTFTRR%_TR^=18dYl*GmT%Q- zUTL|B`pE**ibiQ0>8I*z1bSUyUa1?^O=BBHyb9Bey#?=q()7S7@)c{6m&u#TF*hGEJsVCX)xfSP8Et(*^_>SXCvpMQ+1WnoK+K685lGxfidp zR(Svi@fP03J2;HvIEgPpoD|s2_)b}4`|!@y{?}K#JT=<+zjIQ4OSRu2bX(0wIcv}P E8wIm`GXMYp delta 1669 zcmd^;?N3`(9LLYM6u1wRU1n%|OQ8=0s?e1_QrH*+!NFW7W@6?B&b73fx>6X$8K+p( z#)!@=+fk!t#HYj;)~AR=7q`fkIG0Rk98;H#z2L=6HVr0{X(Spw_xAc`e}ReT#Xb3* z-@W(zexLI>xk=qN5>OeM&il)lZm)= zgyYW}%2!FXHd!7}GF@W{_Kb6!y6n1H7Wy;EeF-V5z35&e2>rX0>9~|(e^x&A`33pz zR5X@INpZgLS#wyBx9v@gWs)OlX|uMtt}P_U_Y92;4=2*&nF_eI^^Q(i-l=RIiDt(8 zMiXgP)@ioCQnhT8duY1WNDtiP$i7MPWR@Hwr^s0{PrfHVkPBpyTqVDdKgbICll+Ae ztb!Fau)&U75JEG;=)i60K`%C8E4E=fc3=Q;JctyAk;cQw;t4#7r?4L{;$^&oSMe4; z!pAs*S$vK;oWL1;h3{}thf8!v$R=JUg#1f>uEC+-*duf+?CDYS1N3BL+mNNs;^G)t zNvHH63k?=G&-h72YG`Q^d>kVs6&JEWp_ykaC4C{=Dy-ueGO47HZ5KND3f&YhkwRQ& z>E<~J)M#9jJwd)E=NN;_|Dw!GgdM`p(q%STh z<#<694VKMI&ICB<;zW0M_ofxu(iEw##=2x;A#s+uo8B;P4sw;Eqjuj zVL~jBAITC~CfCVtDh?hZ%3y#ICdjbB1+R*uoe2@a9V&+H48bmjAjS~vK?dV^2#@1w zwf$#7aTu@Rb-aOhkjHy?AIEVDU*a^*VIB*(f@@f2bqt28ah+DVmS%YSx2y32z2ukm ztc`lL4MX5iu_w^7{#Kf9?4!#8Pqs?7m>o{5RpM0Dm9XAr6(y(_QVAPfEURPJz${y; zM6cDaDWrtfkQmk!QnPEtHccTlyFu*K6jHO@;znLtVFum6Z7%3AEHYjz+=YJJtBMiB5W6U8jNnE!p2L3X4arTj_!LL+8IIu#oW*?MWE4Hr5z%IC!UEk> X{mp;*0Dqn-ZTVI2VRHP>L0tU@v~rHX diff --git a/DESCRIPTION b/DESCRIPTION index 0f411743..5659170d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Roxygen: list(markdown = TRUE) License: GPL (>= 2) LinkingTo: Rcpp LazyData: TRUE -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Encoding: UTF-8 Imports: Rcpp (>= 0.12.16), diff --git a/R/QualityControl.R b/R/QualityControl.R index 0c76ee36..39672ab5 100644 --- a/R/QualityControl.R +++ b/R/QualityControl.R @@ -47,8 +47,10 @@ plotTSSEnrichment <- function( chr <- paste0(seqnames(chromSizes)) chr <- gtools::mixedsort(intersect(chr, paste0(seqnames(TSS)))) + .logThis(chr, paste0("chr"), logFile = logFile) TSS <- sort(sortSeqlevels(TSS)) splitTSS <- split(GenomicRanges::resize(TSS,1,"start"), seqnames(TSS))[chr] + .logThis(splitTSS, paste0("splitTSS"), logFile = logFile) window <- 2 * flank + 1 groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = FALSE) uniqGroups <- gtools::mixedsort(unique(groups[,1])) @@ -57,50 +59,68 @@ plotTSSEnrichment <- function( h5disableFileLocking() } - dfTSS <- .safelapply(seq_along(uniqGroups), function(x){ + dfTSS <- .safelapply(seq_along(uniqGroups), function(z){ - .logDiffTime(paste0(uniqGroups[x], " Computing TSS (",x," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) + .logDiffTime(paste0(uniqGroups[z], " Computing TSS (",z," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) - cellx <- rownames(groups)[which(paste0(groups[,1]) == uniqGroups[x])] + cellx <- rownames(groups)[which(paste0(groups[,1]) == uniqGroups[z])] - for(i in seq_along(chr)){ + for(k in seq_along(chr)){ - TSSi <- splitTSS[[chr[i]]] + #TSS for Chr + TSSi <- splitTSS[[chr[k]]] - covi <- unlist(suppressMessages(getFragmentsFromProject( + #Set TSS To be a dummy chr1 + TSSi <- GRanges(seqnames=rep("chr1",length(TSSi)), ranges = ranges(TSSi), strand = strand(TSSi)) + .logThis(TSSi, paste0(uniqGroups[z], " : TSSi : ", chr[k]), logFile = logFile) + + #Extract Fragments + covi <- suppressMessages(getFragmentsFromProject( ArchRProj = ArchRProj, - subsetBy = chromSizes[paste0(seqnames(chromSizes)) %in% chr[i]], + subsetBy = chromSizes[paste0(seqnames(chromSizes)) %in% chr[k]], cellNames = cellx, logFile = logFile - )), use.names=FALSE) %>% - sort %>% - {coverage(IRanges(c(start(.), end(.)), width = 1))} - - .logThis(covi, paste0(uniqGroups[x], " : Cov : ", chr[i]), logFile = logFile) - - if(i == 1){ - sumTSS <- rleSumsStranded(list(chr1=covi), list(chr1=TSSi), window, as.integer) + ) %>% unlist(use.names = FALSE)) + .logThis(covi, paste0(uniqGroups[z], " : Fragments : ", chr[k]), logFile = logFile) + + #Get Insertions + covi <- sort(c(start(covi), end(covi))) + .logThis(covi, paste0(uniqGroups[z], " : Insertions : ", chr[k]), logFile = logFile) + + #IRanges + covi <- IRanges(start = covi, width = 1) + .logThis(covi, paste0(uniqGroups[z], " : Insertions2 : ", chr[k]), logFile = logFile) + + #Coverage + covi <- IRanges::coverage(covi) + .logThis(covi, paste0(uniqGroups[z], " : Cov : ", chr[k]), logFile = logFile) + + #Compute Sum + sumTSSi <- rleSumsStranded(list(chr1=covi), list(chr1=TSSi), window, as.integer) + .logThis(sumTSSi, paste0(uniqGroups[z], " : SumTSS 1 : ", chr[k]), logFile = logFile) + + if(k == 1){ + sumTSS <- sumTSSi }else{ - sumTSS <- sumTSS + rleSumsStranded(list(chr1=covi), list(chr1=TSSi), window, as.integer) + sumTSS <- sumTSS + sumTSSi } - - .logThis(sumTSS, paste0(uniqGroups[x], " : SumTSS : ", chr[i]), logFile = logFile) + .logThis(sumTSS, paste0(uniqGroups[z], " : SumTSS : ", chr[k]), logFile = logFile) } normBy <- mean(sumTSS[c(1:norm,(flank*2-norm+1):(flank*2+1))]) df <- DataFrame( - group = uniqGroups[x], + group = uniqGroups[z], x = seq_along(sumTSS) - flank - 1, value = sumTSS, normValue = sumTSS / normBy, smoothValue = .centerRollMean(sumTSS/normBy, 11) ) - .logThis(df, paste0(uniqGroups[x], " : TSSDf"), logFile = logFile) + .logThis(df, paste0(uniqGroups[z], " : TSSDf"), logFile = logFile) - .logDiffTime(paste0(uniqGroups[x], " Finished Computing TSS (",x," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) + .logDiffTime(paste0(uniqGroups[z], " Finished Computing TSS (",z," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) df diff --git a/man/ArchRBrowser.Rd b/man/ArchRBrowser.Rd index 62f8fb45..bbe9fa5e 100644 --- a/man/ArchRBrowser.Rd +++ b/man/ArchRBrowser.Rd @@ -55,5 +55,6 @@ To install try devtools::install_github("rstudio/shinythemes").} } \description{ This function will open an interactive shiny session in style of a browser track. It allows for normalization of the signal which -enables direct comparison across samples. +enables direct comparison across samples. Note that the genes displayed in this browser are derived from your \code{geneAnnotation} +(i.e. the \code{BSgenome} object you used) so they may not match other online genome browsers that use different gene annotations. } diff --git a/man/addGroupCoverages.Rd b/man/addGroupCoverages.Rd index be1d3792..3622b119 100644 --- a/man/addGroupCoverages.Rd +++ b/man/addGroupCoverages.Rd @@ -8,6 +8,7 @@ addGroupCoverages( ArchRProj = NULL, groupBy = "Clusters", useLabels = TRUE, + sampleLabels = "Sample", minCells = 40, maxCells = 500, maxFragments = 25 * 10^6, @@ -30,6 +31,11 @@ addGroupCoverages( \item{useLabels}{A boolean value indicating whether to use sample labels to create sample-aware subgroupings during as pseudo-bulk replicate generation.} +\item{sampleLabels}{The name of a column in \code{cellColData} to use to identify samples. In most cases, this parameter should be left as \code{NULL} and you +should only use this parameter if you do not want to use the default sample labels stored in \code{cellColData$Sample}. However, if your individual Arrow +files do not map to individual samples, then you should set this parameter to accurately identify your samples. This is the case in (for example) +multiplexing applications where cells from different biological samples are mixed into the same reaction and demultiplexed based on a lipid barcode or genotype.} + \item{minCells}{The minimum number of cells required in a given cell group to permit insertion coverage file generation.} \item{maxCells}{The maximum number of cells to use during insertion coverage file generation.} diff --git a/man/addHarmony.Rd b/man/addHarmony.Rd index 4f82147e..260514d4 100644 --- a/man/addHarmony.Rd +++ b/man/addHarmony.Rd @@ -34,7 +34,9 @@ to sequencing depth that is greater than the \code{corCutOff}, it will be exclud \item{name}{The name to store harmony output as a \code{reducedDims} in the \code{ArchRProject} object.} -\item{groupBy}{The name of the column in \code{cellColData} to use for grouping cells together for vars in harmony batch correction.} +\item{groupBy}{The name of the column in \code{cellColData} to use for grouping cells together for vars in harmony batch correction. +The value of \code{groupBy} is passed to the \code{vars_use} parameter in \code{harmony::HarmonyMatrix()}. When run through ArchR, this parameter +defines which variables to correct for during batch correction. See \code{harmony::HarmonyMatrix()} for more information.} \item{verbose}{A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output.} diff --git a/man/addIterativeLSI.Rd b/man/addIterativeLSI.Rd index 40afe0b2..fedd8496 100644 --- a/man/addIterativeLSI.Rd +++ b/man/addIterativeLSI.Rd @@ -70,8 +70,7 @@ Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logid \item{scaleDims}{A boolean that indicates whether to z-score the reduced dimensions for each cell. This is useful forminimizing the contribution of strong biases (dominating early PCs) and lowly abundant populations. However, this may lead to stronger sample-specific biases since -it is over-weighting latent PCs. If set to \code{NULL} this will scale the dimensions based on the value of \code{scaleDims} when the \code{reducedDims} were -originally created during dimensionality reduction. This idea was introduced by Timothy Stuart.} +it is over-weighting latent PCs.} \item{corCutOff}{A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to sequencing depth that is greater than the \code{corCutOff}, it will be excluded from analysis.} diff --git a/man/addModuleScore.Rd b/man/addModuleScore.Rd index d1a2964e..2a27fe91 100644 --- a/man/addModuleScore.Rd +++ b/man/addModuleScore.Rd @@ -19,16 +19,28 @@ addModuleScore( \arguments{ \item{ArchRProj}{An \code{ArchRProject} object.} -\item{seed}{A number to be used as the seed for random number generation. It is recommended to keep track of the seed used so that you can -reproduce results downstream.} +\item{useMatrix}{The name of the matrix to be used for calculation of the module score. See \code{getAvailableMatrices()} to view available options.} + +\item{name}{The name to be given to the designated module. If \code{features} is a list, this name will be prepended to the feature set names given in the list as shown below.} + +\item{features}{A list of feature names to be grouped into modules. For example, \code{list(BScore = c("MS4A1", "CD79A", "CD74"), TScore = c("CD3D", "CD8A", "GZMB", "CCR7", "LEF1"))}. +Each named element in this list will be stored as a separate module. The examples given in these parameters would yield two modules called \code{Module.Bscore} and \code{Module.Tscore}. +If the elements of this list are not named, they will be numbered in order, i.e. \code{Module1}, \code{Module2}.} + +\item{nBin}{The number of bins to use to divide all features for identification of signal-matched features for background calculation} + +\item{nBgd}{The number of background features to use for signal normalization.} + +\item{seed}{A number to be used as the seed for random number generation required when sampling cells for the background set. It is recommended +to keep track of the seed used so that you can reproduce results downstream.} \item{threads}{The number of threads to be used for parallel computing.} \item{logFile}{The path to a file to be used for logging ArchR output.} } \description{ -This function computes imputations weights that describe each cell as a linear combination of many cells based on a MAGIC diffusion matrix. -} -\details{ -RRR +This function calculates a module score from a set of features across all cells. This allows for +grouping of multiple features together into a single quantitative measurement. Currently, this +function only works for modules derived from the \code{GeneScoreMatrix}. Each module is added as a +new column in \code{cellColData} } diff --git a/man/addPeak2GeneLinks.Rd b/man/addPeak2GeneLinks.Rd index 11edf142..891f9ff8 100644 --- a/man/addPeak2GeneLinks.Rd +++ b/man/addPeak2GeneLinks.Rd @@ -31,6 +31,8 @@ addPeak2GeneLinks( \item{reducedDims}{The name of the \code{reducedDims} object (i.e. "IterativeLSI") to retrieve from the designated \code{ArchRProject}.} +\item{useMatrix}{The name of the matrix containing gene expression information to be used for determining peak-to-gene links. See \code{getAvailableMatrices(ArchRProj)}} + \item{dimsToUse}{A vector containing the dimensions from the \code{reducedDims} object to use in clustering.} \item{scaleDims}{A boolean value that indicates whether to z-score the reduced dimensions for each cell. This is useful for minimizing diff --git a/man/addReproduciblePeakSet.Rd b/man/addReproduciblePeakSet.Rd index 29bea491..7bd22e82 100644 --- a/man/addReproduciblePeakSet.Rd +++ b/man/addReproduciblePeakSet.Rd @@ -56,7 +56,7 @@ This is important to allow for exclusion of pseudo-bulk replicates derived from \item{pathToMacs2}{The full path to the MACS2 executable.} -\item{genomeSize}{The genome size to be used for MACS2 peak calling (see MACS2 documentation).} +\item{genomeSize}{The genome size to be used for MACS2 peak calling (see MACS2 documentation). This is required if genome is not hg19, hg38, mm9, or mm10.} \item{shift}{The number of basepairs to shift each Tn5 insertion. When combined with \code{extsize} this allows you to create proper fragments, centered at the Tn5 insertion site, for use with MACS2 (see MACS2 documentation).} diff --git a/man/addTileMatrix.Rd b/man/addTileMatrix.Rd index 9c8849ea..b1e71406 100644 --- a/man/addTileMatrix.Rd +++ b/man/addTileMatrix.Rd @@ -42,5 +42,4 @@ is to retrieve this from the \code{ArchRProject} using \code{getBlacklist()}.} } \description{ This function, for each sample, will independently compute counts for each tile -per cell in the ArrowFile } diff --git a/man/createGenomeAnnotation.Rd b/man/createGenomeAnnotation.Rd index f90fe5a9..e52160c0 100644 --- a/man/createGenomeAnnotation.Rd +++ b/man/createGenomeAnnotation.Rd @@ -20,10 +20,11 @@ createGenomeAnnotation( \item{blacklist}{A \code{GRanges} object containing regions that should be excluded from analyses due to unwanted biases.} \item{filter}{A boolean value indicating whether non-standard chromosome scaffolds should be excluded. -These "non-standard" chromosomes are defined by \code{filterChrGR()}.} +These "non-standard" chromosomes are defined by \code{filterChrGR()} and by manual annotation using the \code{filterChr} parameter.} \item{filterChr}{A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. -If no manual removal is desired, \code{filterChr} should be set to \code{NULL}.} +If no manual removal is desired, \code{filterChr} should be set to \code{NULL}. If \code{filter} is set to \code{TRUE} but \code{filterChr} is set to \code{NULL}, +non-standard chromosomes will still be removed as defined in \code{filterChrGR()}.} } \description{ This function will create a genome annotation object that can be used for creating ArrowFiles or an ArchRProject, etc. diff --git a/man/getCoAccessibility.Rd b/man/getCoAccessibility.Rd index ce8b1c10..b97163e2 100644 --- a/man/getCoAccessibility.Rd +++ b/man/getCoAccessibility.Rd @@ -16,7 +16,8 @@ getCoAccessibility( \item{corCutOff}{A numeric describing the minimum numeric peak-to-peak correlation to return.} -\item{resolution}{A numeric describing the bp resolution to return loops as. This helps with overplotting of correlated regions.} +\item{resolution}{A numeric describing the bp resolution to use when returning loops. This helps with overplotting of correlated regions. +This only takes affect if \code{returnLoops = TRUE}.} \item{returnLoops}{A boolean indicating to return the co-accessibility signal as a \code{GRanges} "loops" object designed for use with the \code{ArchRBrowser()} or as an \code{ArchRBrowserTrack()}.} diff --git a/man/getGroupBW.Rd b/man/getGroupBW.Rd index 10e80ef0..f90166fb 100644 --- a/man/getGroupBW.Rd +++ b/man/getGroupBW.Rd @@ -24,7 +24,8 @@ user-supplied \code{cellColData} metadata columns (for example, "Clusters"). Cel column will be grouped together and the average signal will be plotted.} \item{normMethod}{The name of the column in \code{cellColData} by which normalization should be performed. The recommended and default value -is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality.} +is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. Accepted values are +"None", "ReadsInTSS", "nCells", "ReadsInPromoter", or "nFrags".} \item{tileSize}{The numeric width of the tile/bin in basepairs for plotting ATAC-seq signal tracks. All insertions in a single bin will be summed.} diff --git a/man/getMatches.Rd b/man/getMatches.Rd index 3c6f7211..36e369ab 100644 --- a/man/getMatches.Rd +++ b/man/getMatches.Rd @@ -14,5 +14,6 @@ getMatches(ArchRProj = NULL, name = NULL, annoName = NULL) \item{annoName}{The name of a specific annotation to subset within the \code{peakAnnotation}.} } \description{ -This function gets peak annotation matches from a given ArchRProject. +This function gets peak annotation matches from a given ArchRProject. The peaks in the returned object are in the +same order as the peaks returned by \code{getPeakSet()}. } diff --git a/man/getMatrixFromProject.Rd b/man/getMatrixFromProject.Rd index 96b01a92..2e96d395 100644 --- a/man/getMatrixFromProject.Rd +++ b/man/getMatrixFromProject.Rd @@ -23,10 +23,16 @@ getMatrixFromProject( \item{verbose}{A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output.} -\item{binarize}{A boolean value indicating whether the matrix should be binarized before return. This is often desired when working with insertion counts.} +\item{binarize}{A boolean value indicating whether the matrix should be binarized before return. +This is often desired when working with insertion counts. Note that if the matrix has already been binarized previously, this should be set to \code{TRUE}.} \item{logFile}{The path to a file to be used for logging ArchR output.} } \description{ -This function gets a given data matrix from an \code{ArchRProject}. +This function gets a given data matrix from an \code{ArchRProject} and returns it as a \code{SummarizedExperiment}. +This function will return the matrix you ask it for, without altering that matrix unless you tell it to. +For example, if you added your \code{PeakMatrix} using \code{addPeakMatrix()} with \code{binarize = TRUE}, then +\code{getMatrixFromProject()} will return a binarized \code{PeakMatrix}. Alternatively, you could set \code{binarize = TRUE} +in the parameters passed to \code{getMatrixFromProject()} and the \code{PeakMatrix} will be binarized as you pull +it out. No other normalization is applied to the matrix by this function. } diff --git a/man/import10xFeatureMatrix.Rd b/man/import10xFeatureMatrix.Rd index 79f000fc..61261bd5 100644 --- a/man/import10xFeatureMatrix.Rd +++ b/man/import10xFeatureMatrix.Rd @@ -7,6 +7,8 @@ import10xFeatureMatrix( input = NULL, names = NULL, + strictMatch = TRUE, + verbose = TRUE, featureType = "Gene Expression" ) } @@ -15,6 +17,14 @@ import10xFeatureMatrix( \item{names}{A character of sample names associated with each input file.} +\item{strictMatch}{Only relevant when multiple input files are used. A boolean that indictes whether rows (genes) that do not match perfectly in the matrices +should be removed (\code{strictMatch = TRUE}) or coerced (\code{strictMatch = FALSE}). CellRanger seems to occassionally use different ensembl ids for the same gene across +different samples. If you are comfortable tolerating such mismatches, you can coerce all matrices to fit together, in which case the gene metadata present in +the first listed sample will be applied to all matrices for that particular gene entry. Regardless of what value is used for \code{strictMatch}, this function +cannot tolerate mismatched gene names, only mismatched metadata for the same gene.} + +\item{verbose}{Only relevant when multiple input files are used. A boolean that indicates whether messaging about mismatches should be verbose (\code{TRUE}) or minimal (\code{FALSE})} + \item{featureType}{The name of the feature to extract from the 10x feature file. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices for more information.} } diff --git a/man/plotBrowserTrack.Rd b/man/plotBrowserTrack.Rd index f3374f1b..ada70e90 100644 --- a/man/plotBrowserTrack.Rd +++ b/man/plotBrowserTrack.Rd @@ -87,7 +87,7 @@ is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth \item{threads}{The number of threads to use for parallel execution.} -\item{ylim}{The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. If not provided, the y-axis limit will be c(0, 0.999).} +\item{ylim}{The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. This should be expressed as \verb{c(lower limit, upper limit)} such as \code{c(0,0.99)}. If not provided, the y-axis limit will be c(0, 0.999).} \item{pal}{A custom palette (see \code{paletteDiscrete} or \code{ArchRPalettes}) used to override coloring for groups.} @@ -113,5 +113,6 @@ is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth } \description{ This function will plot the coverage at an input region in the style of a browser track. It allows for normalization of the signal -which enables direct comparison across samples. +which enables direct comparison across samples. Note that the genes displayed in these plots are derived from your \code{geneAnnotation} +(i.e. the \code{BSgenome} object you used) so they may not match other online genome browsers that use different gene annotations. } diff --git a/man/projectBulkATAC.Rd b/man/projectBulkATAC.Rd index 62c15c6f..4f5e9b77 100644 --- a/man/projectBulkATAC.Rd +++ b/man/projectBulkATAC.Rd @@ -12,6 +12,7 @@ projectBulkATAC( n = 250, verbose = TRUE, threads = getArchRThreads(), + force = FALSE, logFile = createLogFile("projectBulkATAC") ) } @@ -30,6 +31,8 @@ projectBulkATAC( \item{threads}{The number of threads used for parallel execution} +\item{force}{A boolean value indicating whether to force the projection of bulk ATAC data even if fewer than 25\% of the features are present in the bulk ATAC data set.} + \item{logFile}{The path to a file to be used for logging ArchR output.} } \description{ diff --git a/src/Footprinting_utils.cpp b/src/Footprinting_utils.cpp index 8698e16f..566fefb6 100644 --- a/src/Footprinting_utils.cpp +++ b/src/Footprinting_utils.cpp @@ -104,6 +104,9 @@ IntegerVector rleSumsStranded(List rleList, List grList, int width, Function as_ IntegerVector strand, debug, start; IntegerVector out = IntegerVector(width); + // Clone grList + grList = Rcpp::clone(grList); + int n = grList.size(); int shift = floor(width/2); diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index adae0404..308aeb3c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -5,6 +5,11 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // rowCorCpp Rcpp::NumericVector rowCorCpp(IntegerVector idxX, IntegerVector idxY, Rcpp::NumericMatrix X, Rcpp::NumericMatrix Y); RcppExport SEXP _ArchR_rowCorCpp(SEXP idxXSEXP, SEXP idxYSEXP, SEXP XSEXP, SEXP YSEXP) { From 9275d4edcdde223d2c59671ff4cdafaab1439aaf Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 31 Mar 2022 05:57:08 -0700 Subject: [PATCH 128/184] normBy doesnt actually take a value Responding to https://github.com/GreenleafLab/ArchR/issues/1363 The logic of the if else statements in `.MarkersSC` was constructed in such a way that it would never apply normalization based on a column in cellColData --- R/MarkerFeatures.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 56c95e8a..e2321213 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -219,7 +219,12 @@ getMarkerFeatures <- function( }else{ if(tolower(normBy) == "none"){ normFactors <- NULL + }else if(normBy %in% colnames(ArchRProj@cellColData)) { + normFactors <- getCellColData(ArchRProj, normBy, drop=FALSE) + normFactors[,1] <- median(normFactors[,1]) / normFactors[,1] }else{ + .logMessage("Warning! Parameter 'normBy' was set to ", normBy," but no matching column was found in cellColData.\n", + "Continuing with normalization based on column sums of matrix!", verbose = verbose, logFile = logFile) normFactors <- scaleTo / mColSums normFactors <- DataFrame(normFactors) } From 231f5eacd5fb5a25d7b20077fd28b9600b460257 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 1 Apr 2022 08:15:59 -0700 Subject: [PATCH 129/184] change name param to annoName in response to https://github.com/GreenleafLab/ArchR/issues/1367 this allows passage of a 'name' param through to TFBSTools::getMatrixSet() via the `...` passthrough argument --- R/AnnotationPeaks.R | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 2a9f4999..c3c417ce 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -242,7 +242,7 @@ addPeakAnnotations <- function( #' @param motifSet The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" #' which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the #' corresponding motif sets from the `chromVAR` package. -#' @param name The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` +#' @param annoName The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` #' @param species The name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR @@ -252,7 +252,7 @@ addPeakAnnotations <- function( #' (see `MOODS` for more details on this determination). #' @param width The width in basepairs to consider for motif matches. See the `motimatchr` package for more information. #' @param version An integer specifying version 1 or version 2 of chromVARmotifs see github for more info GreenleafLab/chromVARmotifs. -#' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be overwritten if +#' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `annoName` to be overwritten if #' it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a PWM object. @@ -260,7 +260,7 @@ addPeakAnnotations <- function( addMotifAnnotations <- function( ArchRProj = NULL, motifSet = "cisbp", - name = "Motif", + annoName = "Motif", species = NULL, collection = "CORE", motifPWMs = NULL, @@ -274,7 +274,7 @@ addMotifAnnotations <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = motifSet, name = "motifSet", valid = c("character", "null")) - .validInput(input = name, name = "name", valid = c("character")) + .validInput(input = annoName, name = "annoName", valid = c("character")) .validInput(input = species, name = "species", valid = c("character", "null")) .validInput(input = collection, name = "collection", valid = c("character", "null")) .validInput(input = cutOff, name = "cutOff", valid = c("numeric")) @@ -299,7 +299,7 @@ addMotifAnnotations <- function( .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addMotifAnnotations Input-Parameters", logFile = logFile) - if(name %in% names(ArchRProj@peakAnnotation)){ + if(annoName %in% names(ArchRProj@peakAnnotation)){ if(force){ message("peakAnnotation name already exists! Overriding.") }else{ @@ -476,16 +476,16 @@ addMotifAnnotations <- function( ) dir.create(file.path(getOutputDirectory(ArchRProj), "Annotations"), showWarnings=FALSE) - savePositions <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Positions-In-Peaks.rds")) - saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Matches-In-Peaks.rds")) + savePositions <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(annoName,"-Positions-In-Peaks.rds")) + saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(annoName,"-Matches-In-Peaks.rds")) - ArchRProj@peakAnnotation[[name]]$Name <- name - ArchRProj@peakAnnotation[[name]]$motifs <- motifs - ArchRProj@peakAnnotation[[name]]$motifSummary <- motifSummary - ArchRProj@peakAnnotation[[name]]$Positions <- savePositions - ArchRProj@peakAnnotation[[name]]$Matches <- saveMatches + ArchRProj@peakAnnotation[[annoName]]$Name <- annoName + ArchRProj@peakAnnotation[[annoName]]$motifs <- motifs + ArchRProj@peakAnnotation[[annoName]]$motifSummary <- motifSummary + ArchRProj@peakAnnotation[[annoName]]$Positions <- savePositions + ArchRProj@peakAnnotation[[annoName]]$Matches <- saveMatches - .safeSaveRDS(out, file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-In-Peaks-Summary.rds")), compress = FALSE) + .safeSaveRDS(out, file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(annoName,"-In-Peaks-Summary.rds")), compress = FALSE) .safeSaveRDS(out$motifPositions, savePositions, compress = FALSE) .safeSaveRDS(out$motifMatches, saveMatches, compress = FALSE) From 6e7abec25de9217a8830ac1d6a853702c061bf8a Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Sun, 3 Apr 2022 20:39:32 -0700 Subject: [PATCH 130/184] Bugfix and improvements Bugfix in markerTesting for NA and 0. New features for loading. Bugfix for uwot and peak annotations where motifs/regions dont overlap any peaks --- .DS_Store | Bin 16388 -> 16388 bytes DESCRIPTION | 22 ++-- NAMESPACE | 3 +- R/AllClasses.R | 1 + R/AnnotationPeaks.R | 83 ++++++++++++- R/ColorPalettes.R | 1 + R/Embedding.R | 177 +++++++++++++++------------- R/GlobalDefaults.R | 49 +++++++- R/MarkerFeatures.R | 11 +- man/dot-DollarNames.ArchRProject.Rd | 2 +- 10 files changed, 250 insertions(+), 99 deletions(-) diff --git a/.DS_Store b/.DS_Store index 6010e1e42cb6a5dfbfe254daf8f62afd56f32fb7..ab55183e87dd9182d504050b04a6853e65b51f63 100644 GIT binary patch delta 1638 zcmeH{OH5Ni6o$_~JiL`L+%}@6y`|-$mQa(H(v|>55)$QQVn_i&H*F}Pp%k@11Rs>x zXiQ8DTwp0iV@%Ze1fvUlFLc2IB`iRrY{bXPL>IUKFKriU)VPoxvpF;8%q-6LolGQ* zNEkQH=;cqQNSe)7U@_&}T}LgZ!#11CWidGmopL18VFA?yLS1FvK)aVH!RXx|@P|6A z))}yb#H8dkf@*CFU#F|BtF7k^^03+^KbH#SIkjERGNdui@gU@n_7 zK9DigOYxOmYL4yPK~I}cC4vMp&}J&6QgTx>wNoFRrV$#Yak@_vG);3fPoHUl7C`_Q zQm`Hyun}5hA`3a#gnSgE1lv%78q}i!jo6DO_|b|0+R%j~I2J@N!Z?L~$QZ;B&f*d- z;|i`~40muB_wWppc!8IAjdz&G2Ykd=EMkcf(=a`gn1NZDjTNynR?aG7EilBJYK+Ht zU{&%%{qEbd#wAV0;O{+17^|^S5R<@ z$y54toi@)RNg@-+1R+}GHp#A1G>Hk(s>3325sI|sm%NCVs1#vhV(hq`%i#j;pH%!+ z#3aqoTY67lXaUKJ0ue@-VTBW=*nyp>L=|?SE{=a6d}u~XR6&O#;1IgegX4(c1WvXp z8U~^ghB1P3IFAdsh-p#%x n@8}k@6*wAEow_>8|7(<88A-kWrgz2cYF72G&`%}L8gst`V25On delta 1585 zcmeIyOGuPa6bJD0|3>Tkyrtjhn9e)XYJ5=YI6YJ}9eb!l4;wSBET@^p)S%{w*@M(@ zllFp3CPfGtMNwjuMUO=-gfwsw6Fr2?phZxNHfAoWab{Lgt)!KA^XDAS=HB1owzs0a z75!(l7~*HOt=yorrGd&k-X^QVQ|BO3q&Vt5Zf|3H`dbK!u<$u^MJa-@`I_==727MT z__#8bE6h%wo67j{q!^7P^+m?%5*DW{&sv>lU2og$i;|VmF)GIRn90QV>5X8#LvOY* zyVrf#CD({jU?Me*G5daZtyA_&!A^Px)0BH^94?RSl*GW0Wd+kzG`SkQ?z&pJG}O$_ zVO%ntQ>|%hrseU>gf=5QWy2mT^gW!G)N=#hQ{a< zeWeMSr0+BZ1?C|VQIMgA20G}G0yDB;K>- zOA`IhEQk97mo+vRSU=$CAT>Wj-sAZl13bhdJjNhK@QU|l iCWs$JU$m@@n%&kJ-PV>x*{~A7OO#TJ7&u7N~ diff --git a/DESCRIPTION b/DESCRIPTION index 5659170d..6953679c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ArchR Type: Package -Date: 2021-02-23 +Date: 2022-04-03 Title: Analyzing single-cell regulatory chromatin in R. Version: 1.0.2 Authors@R: c( @@ -14,6 +14,14 @@ LazyData: TRUE RoxygenNote: 7.1.2 Encoding: UTF-8 Imports: + ggplot2, + SummarizedExperiment, + data.table, + Matrix, + rhdf5, + magrittr, + S4Vectors (>= 0.9.25), + BiocGenerics, Rcpp (>= 0.12.16), matrixStats, plyr, @@ -29,17 +37,9 @@ Imports: grid, gridExtra, Biostrings, - ComplexHeatmap -Depends: - ggplot2, - SummarizedExperiment, - data.table, - Matrix, - rhdf5, - magrittr, - S4Vectors (>= 0.9.25), - BiocGenerics, + ComplexHeatmap, GenomicRanges +Depends: Collate: 'AllClasses.R' 'AnnotationGenome.R' diff --git a/NAMESPACE b/NAMESPACE index ce5679a8..7fb50b4b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,10 +3,10 @@ S3method("$",ArchRProject) S3method("$<-",ArchRProject) S3method("[",ArchRProject) -S3method(.DollarNames,ArchRProject) export("%bcin%") export("%bcni%") export("%ni%") +export(.DollarNames.ArchRProject) export(ArchRBrowser) export(ArchRBrowserTrack) export(ArchRPalettes) @@ -158,5 +158,6 @@ export(subsetCells) export(theme_ArchR) export(trajectoryHeatmap) export(validBSgenome) +import(GenomicRanges) importFrom(Rcpp,sourceCpp) useDynLib(ArchR) diff --git a/R/AllClasses.R b/R/AllClasses.R index be9c05e9..f7966028 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -1,5 +1,6 @@ #' @useDynLib ArchR #' @importFrom Rcpp sourceCpp +#' @import GenomicRanges NULL setClassUnion("characterOrNull", c("character", "NULL")) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 2a9f4999..79f57e74 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -141,6 +141,10 @@ addPeakAnnotations <- function( names(regions) <- paste0("Region_", seq_along(regions)) } + if(any(duplicated(names(regions)))){ + stop("Found duplicated region names! Please make unique!") + } + regionPositions <- lapply(seq_along(regions), function(x){ .logThis(regions[[x]], paste0("regions[[x]]-", x), logFile = logFile) @@ -192,11 +196,14 @@ addPeakAnnotations <- function( if(is.null(peakSet)){ .logStop("peakSet is NULL. You need a peakset to run addMotifAnnotations! See addReproduciblePeakSet!", logFile = logFile) } - allPositions <- unlist(regionPositions) + allPositions <- unlist(regionPositions, use.names=TRUE) .logDiffTime("Creating Peak Overlap Matrix", t1 = tstart, verbose = TRUE, logFile = logFile) overlapRegions <- findOverlaps(peakSet, allPositions, ignore.strand=TRUE) + if(length(overlapRegions) == 0){ + stop("No Overlaps Found between regions and peak Matrix!") + } .logThis(overlapRegions, "overlapRegions", logFile = logFile) regionMat <- Matrix::sparseMatrix( @@ -211,6 +218,31 @@ addPeakAnnotations <- function( regionMat <- SummarizedExperiment::SummarizedExperiment(assays=SimpleList(matches = regionMat), rowRanges = peakSet) .logThis(regionMat, "regionSE", logFile = logFile) + ############################################################# + # Filter Regions With No Matches + ############################################################# + + #Number of Overlaps + nO <- Matrix::colSums(assay(regionMat)) + rF <- names(which(nO == 0)) + + if(all(nO == 0)){ + stop("No Overlaps Found! Please check your peakSet and genome!") + } + + if(length(rF) > 0){ + .logDiffTime(paste0("Filtering Region Annotations with 0 overlaps :\n\n ", paste(rF, collapse=", "), "\n\n"), t1 = tstart, verbose = TRUE, logFile = logFile) + #Filter + regionPositions <- regionPositions[!(names(regionPositions) %in% rF)] + regionMat <- regionMat[,names(regionPositions),drop=FALSE] + }else{ + .logDiffTime(paste0("All Regions Overlap at least 1 peak!"), t1 = tstart, verbose = TRUE, logFile = logFile) + } + + ############################################################# + # Summarize and Save + ############################################################# + dir.create(file.path(getOutputDirectory(ArchRProj), "Annotations"), showWarnings=FALSE) savePositions <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Positions-In-Peaks.rds")) saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Matches-In-Peaks.rds")) @@ -451,6 +483,28 @@ addMotifAnnotations <- function( w = width ) + ############################################################# + # Filter Motifs With No Matches + ############################################################# + + #Number of Overlaps + nO <- lapply(motifPositions, length) %>% unlist + mF <- names(which(nO == 0)) + + if(all(nO == 0)){ + stop("No Overlaps Found! Please check your peakSet and genome!") + } + + if(length(mF) > 0){ + .logDiffTime(paste0("Filtering Motif Annotations with 0 overlaps :\n\n ", paste(mF, collapse=", "), "\n\n"), t1 = tstart, verbose = TRUE, logFile = logFile) + #Filter + motifPositions <- motifPositions[nO > 0] + motifSummary <- motifSummary[names(motifPositions),,drop=FALSE] + motifs <- motifs[names(motifPositions)] + }else{ + .logDiffTime(paste0("All Motifs Overlap at least 1 peak!"), t1 = tstart, verbose = TRUE, logFile = logFile) + } + ############################################################# # Motif Overlap Matrix ############################################################# @@ -665,12 +719,15 @@ addArchRAnnotations <- function( #Download if(!file.exists(file.path(annoPath, basename(url)))){ + oldTimeout <- getOption('timeout') + options(timeout=10000) message("Annotation ", basename(url)," does not exist! Downloading..") download.file( url = url, destfile = file.path(annoPath, basename(url)), quiet = FALSE ) + options(timeout=oldTimeout) } AnnoFile <- file.path(annoPath, basename(url)) @@ -747,6 +804,30 @@ addArchRAnnotations <- function( ) .logThis(regionMat, "regionSE", logFile=logFile) + ############################################################# + # Filter Regions With No Matches + ############################################################# + + #Number of Overlaps + nO <- Matrix::colSums(assay(regionMat)) + rF <- names(which(nO == 0)) + + if(all(nO == 0)){ + stop("No Overlaps Found! Please check your peakSet and genome!") + } + + if(length(rF) > 0){ + .logDiffTime(paste0("Filtering Region Annotations with 0 overlaps :\n\n ", paste(rF, collapse=", "), "\n\n"), t1 = tstart, verbose = TRUE, logFile = logFile) + #Filter + regionMat <- regionMat[,nO > 0,drop=FALSE] + }else{ + .logDiffTime(paste0("All Regions Overlap at least 1 peak!"), t1 = tstart, verbose = TRUE, logFile = logFile) + } + + ############################################################# + # Save + ############################################################# + dir.create(file.path(getOutputDirectory(ArchRProj), "Annotations"), showWarnings=FALSE) saveMatches <- file.path(getOutputDirectory(ArchRProj), "Annotations", paste0(name,"-Matches-In-Peaks.rds")) diff --git a/R/ColorPalettes.R b/R/ColorPalettes.R index d64b7b1c..81694ee3 100644 --- a/R/ColorPalettes.R +++ b/R/ColorPalettes.R @@ -112,6 +112,7 @@ paletteDiscrete <- function( .validInput(input = values, name = "values", valid = c("character", "factor")) .validInput(input = reverse, name = "reverse", valid = c("boolean")) + values <- unique(values) values <- gtools::mixedsort(values) n <- length(unique(values)) pal <- ArchRPalettes[[set]] diff --git a/R/Embedding.R b/R/Embedding.R index b4e9747a..c71026f4 100644 --- a/R/Embedding.R +++ b/R/Embedding.R @@ -209,6 +209,48 @@ addUMAP <- function( #New Save UWOT .saveUWOT <- function(model, file){ + + #save_uwot does not work because tarring doesnt work for some reason on Stanford's compute server + #Adapted from save_uwot + #this function is evaluated because it doesnt work on newer versions of uwot + #this is kept for legacy R versions + strUWOT <- " + .saveUWOT_Deprecated <- function(model, file){ + file <- file.path(normalizePath(dirname(file)), basename(file)) + wd <- getwd() + mod_dir <- tempfile(pattern = 'dir') + dir.create(mod_dir) + uwot_dir <- file.path(mod_dir, 'uwot') + dir.create(uwot_dir) + model_tmpfname <- file.path(uwot_dir, 'model') + .safeSaveRDS(model, file = model_tmpfname) + metrics <- names(model$metric) + n_metrics <- length(metrics) + for (i in seq_len(n_metrics)) { + nn_tmpfname <- file.path(uwot_dir, paste0('nn', i)) + if (n_metrics == 1) { + model$nn_index$save(nn_tmpfname) + model$nn_index$unload() + model$nn_index$load(nn_tmpfname) + } + else { + model$nn_index[[i]]$save(nn_tmpfname) + model$nn_index[[i]]$unload() + model$nn_index[[i]]$load(nn_tmpfname) + } + } + setwd(mod_dir) + system2('tar', '-cvf uwot.tar uwot', stdout = NULL, stderr = NULL) + o <- .fileRename('uwot.tar', file) + setwd(wd) + if (file.exists(mod_dir)) { + unlink(mod_dir, recursive = TRUE) + } + return(o) + } + " + eval(parse(text=strUWOT)) + tryCatch({ uwot::save_uwot(model = model, file = file, verbose = TRUE) }, error = function(e){ @@ -216,44 +258,61 @@ addUMAP <- function( }) } -#save_uwot does not work because tarring doesnt work for some reason on Stanford's compute server -#Adapted from save_uwot -.saveUWOT_Deprecated <- function(model, file){ - file <- file.path(normalizePath(dirname(file)), basename(file)) - wd <- getwd() - mod_dir <- tempfile(pattern = "dir") - dir.create(mod_dir) - uwot_dir <- file.path(mod_dir, "uwot") - dir.create(uwot_dir) - model_tmpfname <- file.path(uwot_dir, "model") - .safeSaveRDS(model, file = model_tmpfname) - metrics <- names(model$metric) - n_metrics <- length(metrics) - for (i in seq_len(n_metrics)) { - nn_tmpfname <- file.path(uwot_dir, paste0("nn", i)) - if (n_metrics == 1) { - model$nn_index$save(nn_tmpfname) - model$nn_index$unload() - model$nn_index$load(nn_tmpfname) - } - else { - model$nn_index[[i]]$save(nn_tmpfname) - model$nn_index[[i]]$unload() - model$nn_index[[i]]$load(nn_tmpfname) - } - } - setwd(mod_dir) - system2("tar", "-cvf uwot.tar uwot", stdout = NULL, stderr = NULL) - o <- .fileRename("uwot.tar", file) - setwd(wd) - if (file.exists(mod_dir)) { - unlink(mod_dir, recursive = TRUE) - } - return(o) -} - #New Save UWOT .loadUWOT <- function(file, nDim = NULL){ + + #load_uwot does not work because tarring doesnt work for some reason on Stanford's compute server + #Adapted from load_uwot + #this function is evaluated because it doesnt work on newer versions of uwot + #this is kept for legacy R versions + strUWOT <- " + .loadUWOT_Deprecated <- function(file, nDim = NULL){ + model <- NULL + tryCatch({ + mod_dir <- tempfile(pattern = 'dir') + dir.create(mod_dir) + utils::untar(file, exdir = mod_dir) + model_fname <- file.path(mod_dir, 'uwot/model') + if (!file.exists(model_fname)) { + stop('Cant find model in ', file) + } + model <- readRDS(file = model_fname) + metrics <- names(model$metric) + n_metrics <- length(metrics) + for (i in seq_len(n_metrics)){ + nn_fname <- file.path(mod_dir, paste0('uwot/nn', i)) + if (!file.exists(nn_fname)) { + stop('Cant find nearest neighbor index ', nn_fname, ' in ', file) + } + metric <- metrics[[i]] + if(length(model$metric[[i]]) == 0){ + if(!is.null(nDim)){ + nDim2 <- nDim + }else{ + nDim2 <- length(model$metric[[i]]) + } + } + if(!is.null(nDim)){ + nDim2 <- nDim + } + ann <- uwot:::create_ann(metric, ndim = nDim2) + ann$load(nn_fname) + if (n_metrics == 1) { + model$nn_index <- ann + }else{ + model$nn_index[[i]] <- ann + } + } + }, finally = { + if (file.exists(mod_dir)) { + unlink(mod_dir, recursive = TRUE) + } + }) + model + } + " + eval(parse(text=strUWOT)) + tryCatch({ uwot::load_uwot(file = file, verbose = TRUE) }, error = function(e){ @@ -261,52 +320,6 @@ addUMAP <- function( }) } -#Adapted from load_uwot -.loadUWOT_Deprecated <- function(file, nDim = NULL){ - model <- NULL - tryCatch({ - mod_dir <- tempfile(pattern = "dir") - dir.create(mod_dir) - utils::untar(file, exdir = mod_dir) - model_fname <- file.path(mod_dir, "uwot/model") - if (!file.exists(model_fname)) { - stop("Can't find model in ", file) - } - model <- readRDS(file = model_fname) - metrics <- names(model$metric) - n_metrics <- length(metrics) - for (i in seq_len(n_metrics)){ - nn_fname <- file.path(mod_dir, paste0("uwot/nn", i)) - if (!file.exists(nn_fname)) { - stop("Can't find nearest neighbor index ", nn_fname, " in ", file) - } - metric <- metrics[[i]] - if(length(model$metric[[i]]) == 0){ - if(!is.null(nDim)){ - nDim2 <- nDim - }else{ - nDim2 <- length(model$metric[[i]]) - } - } - if(!is.null(nDim)){ - nDim2 <- nDim - } - ann <- uwot:::create_ann(metric, ndim = nDim2) - ann$load(nn_fname) - if (n_metrics == 1) { - model$nn_index <- ann - }else{ - model$nn_index[[i]] <- ann - } - } - }, finally = { - if (file.exists(mod_dir)) { - unlink(mod_dir, recursive = TRUE) - } - }) - model -} - #' Add a TSNE embedding of a reduced dimensions object to an ArchRProject #' #' This function will compute a TSNE embedding and add it to an ArchRProject. diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index 14bfd94f..aab0b8a5 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -12,27 +12,72 @@ ArchRDefaults <- list( ArchR.verbose = TRUE ) +ArchRDependency <- c( + "grid", + "gridExtra", + "gtools", + "gtable", + "ggplot2", + "magrittr", + "plyr", + "stringr", + "data.table", + "matrixStats", + "S4Vectors", + "GenomicRanges", + "BiocGenerics", + "Matrix", + "Rcpp", + "SummarizedExperiment", + "rhdf5" +) + .onAttach <- function(libname, pkgname){ - if(!interactive()) return() - v <- packageVersion("ArchR") + + #Logo .ArchRLogo() + + #Package Startup + v <- packageVersion("ArchR") packageStartupMessage("ArchR : Version ", v, "\nFor more information see our website : www.ArchRProject.com\nIf you encounter a bug please report : https://github.com/GreenleafLab/ArchR/issues") + + #Load Packages + packageStartupMessage("Loading Required Packages...") + pkgs <- ArchRDependency + for(i in seq_along(pkgs)){ + packageStartupMessage("\tLoading Package : ", pkgs[i], " v", packageVersion(pkgs[i])) + tryCatch({ + suppressPackageStartupMessages(require(pkgs[i], character.only=TRUE)) + }, error = function(e){ + packageStartupMessage("\tFailed To Load Package : ", pkgs[i], " v", packageVersion(pkgs[i])) + }) + } + + if(!interactive()) return() + + #Set Default Options op <- options() toset <- !(names(ArchRDefaults) %in% names(op)) + if (any(toset)) options(ArchRDefaults[toset]) + if(!.isWholenumber(options()[["ArchR.threads"]])){ addArchRThreads() }else if(options()[["ArchR.threads"]] == 1){ addArchRThreads() } + if(!.checkCairo()){ packageStartupMessage("WARNING : Cairo check shows Cairo is not functional.\n ggplot2 rasterization will not be available without Cario.\n This may cause issues editing plots with many thousands of points from single cells.") } + if(.checkJupyter()){ packageStartupMessage("Detected Jupyer Notebook session. Disabling Log Messages!\n\tIf this is undesired use `addArchRVerbose(TRUE)`") addArchRVerbose(verbose = FALSE) } + invisible() + } #Check Jupyer Status diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 56c95e8a..a03eecea 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -419,7 +419,16 @@ getMarkerFeatures <- function( }) %>% Reduce("rbind", .) - idxFilter <- rowSums(pairwiseDF[,c("mean1","mean2")]) != 0 + #Check for Mean being 0 for both Mean1 and Mean2 + idxFilter1 <- rowSums(pairwiseDF[,c("mean1","mean2")]) != 0 + + #Check For NA in Either Mean1 Mean2 + idxFilter2 <- rowSums(is.na(pairwiseDF[,c("mean1","mean2")])) == 0 + + #Combo Check + idxFilter <- idxFilter1 & idxFilter2 + + #FDR pairwiseDF$fdr <- NA pairwiseDF$fdr[idxFilter] <- p.adjust(pairwiseDF$pval[idxFilter], method = "fdr") pairwiseDF <- pairwiseDF[rownames(featureDF), , drop = FALSE] diff --git a/man/dot-DollarNames.ArchRProject.Rd b/man/dot-DollarNames.ArchRProject.Rd index 2a965405..d0905606 100644 --- a/man/dot-DollarNames.ArchRProject.Rd +++ b/man/dot-DollarNames.ArchRProject.Rd @@ -4,7 +4,7 @@ \alias{.DollarNames.ArchRProject} \title{Accessing cellColData directly from dollar.sign accessor} \usage{ -\method{.DollarNames}{ArchRProject}(x, pattern = "") +.DollarNames.ArchRProject(x, pattern = "") } \description{ This function will allow direct access to cellColData with a \code{$} accessor. From 9d2ebbab2944f8722b4b9bd490ca3a87ea2e5dcf Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Sun, 3 Apr 2022 21:59:03 -0700 Subject: [PATCH 131/184] update readme --- .DS_Store | Bin 16388 -> 16388 bytes README.md | 10 +++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.DS_Store b/.DS_Store index ab55183e87dd9182d504050b04a6853e65b51f63..9be749afeb44d77fc6aec1464d916eb8ab59cc9b 100644 GIT binary patch delta 23 ecmZo^U~Fk%+#oE&VPa&eqhMrayjfc2r7{3cX$EQl delta 23 ecmZo^U~Fk%+#oE&VQ6TgqhMrgzFAu4r7{3cE(T@* diff --git a/README.md b/README.md index 01464e8f..0fee95a7 100755 --- a/README.md +++ b/README.md @@ -34,22 +34,22 @@ ArchR is a full-featured R package for processing and analyzing single-cell ATAC For a full walk through of installation and frequently related issues please visit www.ArchRProject.com. **First, install devtools (for installing GitHub packages) if it isn't already installed:** -```{r} +``` r if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") ``` **Then, install BiocManager (for installing bioconductor packages) if it isn't already installed:** -```{r} +``` r if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") ``` **Then, install ArchR:** -```{r} +``` r devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) ``` **Lastly, install all of the ArchR dependencies that aren't installed by default:** -```{r} +``` r library(ArchR) ArchR::installExtraPackages() ``` @@ -58,7 +58,7 @@ If any of these steps fails, you should identify the offending package and troub # Issues using ArchR? ArchR is currently in __beta__. We expect there to be bumps in the road. If you think you have found a bug, please first install the latest version of ArchR via -```{r} +``` r devtools::install_github("GreenleafLab/ArchR", ref="master", repos = BiocManager::repositories()) ``` If this does not fix your problem, please [report an issue on Github](https://github.com/GreenleafLab/ArchR/issues) with the __Bug Report__ form. From 041882c7d123ab6288aa37fa33fa6add54ca833e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 4 Apr 2022 08:37:47 -0700 Subject: [PATCH 132/184] ask users to not paste screenshots --- .github/workflows/auto-comment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml index 1853bb0d..3ae4ec02 100644 --- a/.github/workflows/auto-comment.yml +++ b/.github/workflows/auto-comment.yml @@ -13,6 +13,7 @@ jobs: __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. __3.__ Did you post your log file? If not, add it now. + __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. # issuesOpened: | # Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
From 3075f1f034d8c5a9d7dcdfd6b6990814733fbf87 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 6 Apr 2022 08:26:14 -0700 Subject: [PATCH 133/184] fix impute weights completion message --- R/Imputation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Imputation.R b/R/Imputation.R index fad0239c..bda3e0da 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -198,7 +198,7 @@ addImputeWeights <- function( }, threads = threads) %>% SimpleList names(weightList) <- paste0("w",seq_along(weightList)) - .logDiffTime(sprintf("Completed Getting Magic Weights!", round(object.size(weightList) / 10^9, 3)), + .logDiffTime(sprintf("Completed Getting Magic Weights! Object size - %s.", round(object.size(weightList) / 10^9, 3)), t1 = tstart, verbose = FALSE, logFile = logFile) ArchRProj@imputeWeights <- SimpleList( From 3af7c3075a7e95ec88a290b9fecc1061b7bfe885 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 13 Apr 2022 06:04:39 -0700 Subject: [PATCH 134/184] update warning message and function params --- R/BulkProjection.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/BulkProjection.R b/R/BulkProjection.R index 04b2f1d4..8cef79f1 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -3,9 +3,9 @@ #' This function will Project Bulk ATAC-seq data into single cell subspace. #' #' @param ArchRProj An `ArchRProject` object containing the dimensionality reduction matrix passed by `reducedDims`. -#' @param seATAC Bulk ATAC Summarized Experiment. -#' @param reducedDims A string specifying the reducedDims. -#' @param embedding A string specifying embedding. +#' @param seATAC A `SummarizedExperiment` object containing bulk ATAC-seq data. +#' @param reducedDims A string specifying the name of the `reducedDims` object to be used. +#' @param embedding A string specifying the name of the `embedding` object to be used. #' @param n An integer specifying the number of subsampled "pseudo single cells" per bulk sample. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param threads The number of threads used for parallel execution @@ -153,7 +153,7 @@ projectBulkATAC <- function( } if(embedding$params$nc != ncol(simRD)){ - .logMessage("Error incosistency found with matching LSI dimensions to those used in addEmbedding", + .logMessage("Warning! Inconsistency found with matching LSI dimensions to those used in addUMAP or addTSNE", "\nReturning with simulated reduced dimension coordinates...", verbose = TRUE, logFile = logFile) out <- SimpleList( simulatedReducedDims = simRD From a8c48d82dd2904e7bea363c8a9ef80e05464a74d Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 13 Apr 2022 08:17:32 -0700 Subject: [PATCH 135/184] revert warning to error --- R/BulkProjection.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/BulkProjection.R b/R/BulkProjection.R index 8cef79f1..882dd9a4 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -153,7 +153,7 @@ projectBulkATAC <- function( } if(embedding$params$nc != ncol(simRD)){ - .logMessage("Warning! Inconsistency found with matching LSI dimensions to those used in addUMAP or addTSNE", + .logMessage("Error! Inconsistency found with matching LSI dimensions to those used in addUMAP or addTSNE", "\nReturning with simulated reduced dimension coordinates...", verbose = TRUE, logFile = logFile) out <- SimpleList( simulatedReducedDims = simRD From cf8d41deb134e725fccbf8c71d4ee64771a4535f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 13 Apr 2022 08:27:46 -0700 Subject: [PATCH 136/184] fix excludeChr param definition --- R/IterativeLSI.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index f8b75a15..31ba7001 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -46,9 +46,9 @@ #' @param totalFeatures The number of features to consider for use in LSI after ranking the features by the total number of insertions. #' These features are the only ones used throught the variance identification and LSI. These are an equivalent when using a `TileMatrix` to a defined peakSet. #' @param filterQuantile A number [0,1] that indicates the quantile above which features should be removed based on insertion counts prior -#' @param excludeChr A string of chromosomes to exclude for iterativeLSI procedure. #' to the first iteration of the iterative LSI paradigm. For example, if `filterQuantile = 0.99`, any features above the 99th percentile in #' insertion counts will be ignored for the first LSI iteration. +#' @param excludeChr A string of chromosomes to exclude for iterativeLSI procedure. #' @param saveIterations A boolean value indicating whether the results of each LSI iterations should be saved as compressed `.rds` files in #' the designated `outDir`. #' @param UMAPParams The list of parameters to pass to the UMAP function if "UMAP" if `saveIterations=TRUE`. See the function `uwot::umap()`. From db42a20ee68fbbabf1e05742f7f162fdb1a4db54 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:18:24 -0700 Subject: [PATCH 137/184] add native support for Vierstra motifs create a motifSet option for "vierstra" and create collection options for "individual" and "archetype" corresponding to the motifs produced by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). The new rds files linked on amazon have fixed the issue of ":" being a part of the motif name and now use "|" for separation of name info (https://github.com/GreenleafLab/ArchR/issues/675) --- R/AnnotationPeaks.R | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index c98f5b6e..f4628695 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -272,13 +272,16 @@ addPeakAnnotations <- function( #' #' @param ArchRProj An `ArchRProject` object. #' @param motifSet The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" -#' which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the -#' corresponding motif sets from the `chromVAR` package. +#' which gives the 2016, 2018 or 2020 version of JASPAR motifs, (ii) one of "cisbp", "encode", or "homer" which gives the +#' corresponding motif sets from the `chromVAR` package, or (iii) "vierstra" which gives the clustered archetype motifs +#' created by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). #' @param annoName The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` #' @param species The name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR -#' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. +#' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is +#' "vierstra", then this must either be "individual" (for individual motif models), or "archetype" (for clustered models). +#' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences #' (see `MOODS` for more details on this determination). @@ -442,6 +445,26 @@ addMotifAnnotations <- function( motifs <- obj$motifs motifSummary <- obj$motifSummary + }else if(tolower(motifSet)=="vierstra"){ + if(tolower(collection)=="individual"){ + fileName <- "Vierstra_Individual_Motifs.rds" + download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds", + destfile = fileName) + motifs <- readRDS(fileName) + file.remove(fileName) + } else if(tolower(collection == "archetype")){ + fileName <- "Vierstra_Archetype_Motifs.rds" + download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds", + destfile = fileName) + motifs <- readRDS(fileName) + file.remove(fileName) + } else { + stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet + ". Accepted values are 'individual' and 'archetype'")) + } + obj <- NULL + motifSummary <- NULL + }else if(tolower(motifSet)=="custom"){ obj <- NULL From b1fb5f0dbae7d07554854e7347ee70c11b52ed35 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:20:31 -0700 Subject: [PATCH 138/184] typo --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index f4628695..23bb14f9 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -459,7 +459,7 @@ addMotifAnnotations <- function( motifs <- readRDS(fileName) file.remove(fileName) } else { - stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet + stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) } obj <- NULL From d7a3105d910f00c11a8c90841d5a59b2436897a9 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 15:55:29 -0700 Subject: [PATCH 139/184] unify file download workflow make file download workflow match that of annotations like lola etc --- R/AnnotationPeaks.R | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 23bb14f9..af705ae5 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -447,21 +447,29 @@ addMotifAnnotations <- function( }else if(tolower(motifSet)=="vierstra"){ if(tolower(collection)=="individual"){ - fileName <- "Vierstra_Individual_Motifs.rds" - download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds", - destfile = fileName) - motifs <- readRDS(fileName) - file.remove(fileName) + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" } else if(tolower(collection == "archetype")){ - fileName <- "Vierstra_Archetype_Motifs.rds" - download.file(url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds", - destfile = fileName) - motifs <- readRDS(fileName) - file.remove(fileName) + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds" } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) } + + annoPath <- file.path(find.package("ArchR", NULL, quiet = TRUE), "data", "Annotations") + dir.create(annoPath, showWarnings = FALSE) + + #Download + if(!file.exists(file.path(annoPath, basename(url)))){ + message("Motif file ", basename(url)," does not exist! Downloading..") + download.file( + url = url, + destfile = file.path(annoPath, basename(url)), + quiet = FALSE + ) + } + motifFile <- file.path(annoPath, basename(url)) + + motifs <- readRDS(motifFile) obj <- NULL motifSummary <- NULL From 3f263a9e32662c6db9bb25eb7335bf7d231a8caa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 15 Apr 2022 17:02:19 -0700 Subject: [PATCH 140/184] update collection param --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index af705ae5..ec8c06fe 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -280,7 +280,7 @@ addPeakAnnotations <- function( #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR #' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is -#' "vierstra", then this must either be "individual" (for individual motif models), or "archetype" (for clustered models). +#' "vierstra", then this must either be "archetype" (for the v2 clustered models) or "individual" (for the original v1 individual motif models). #' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences From 6feac133d5a5bc8a14e589bfc999329bb39e8f6b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Sat, 16 Apr 2022 05:58:16 -0700 Subject: [PATCH 141/184] update param definition for pal To make it more clear how to change the color of highlighted cells addressing https://github.com/GreenleafLab/ArchR/issues/1240 --- R/VisualizeData.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index d675abd4..d433c8e7 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -174,6 +174,10 @@ plotPDF <- function( #' @param imputeWeights The weights to be used for imputing numerical values for each cell as a linear combination of other cells values. #' See `addImputationWeights()` and `getImutationWeights()` for more information. #' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override discreteSet/continuousSet for coloring vector. +#' If you are using `pal` in conjuction with `highlightCells`, your palette must be a named vector with two entries, one named for the value +#' of the cells in the `name` column of `cellColData` and the other named "Non.Highlighted". For example, `pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")` +#' would be used to change the color of cells with the value "Mono" in the `cellColData` column indicated by `name`. Because of this, +#' the cells indicated by `highlightCells` must also match this value in the `name` column. #' @param size A number indicating the size of the points to plot if `plotAs` is set to "points". #' @param sampleCells A numeric describing number of cells to use for plot. If using impute weights, this will occur after imputation. #' @param highlightCells A character vector of cellNames describing which cells to hightlight if using `plotAs = "points"` (default if discrete). From c61dfca4d346515ce6a40656f8fa1b01cbd73a52 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 18 Apr 2022 05:47:15 -0700 Subject: [PATCH 142/184] require hexbin to be installed If a user doesnt have `hexbin` installed, they will get a cryptic error. Since ggplot2 only "suggests" `hexbin`, its possible to try to run this function without it installed properly. Related to https://github.com/GreenleafLab/ArchR/issues/1387 https://github.com/GreenleafLab/ArchR/issues/1292 --- R/GgplotUtils.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index 4d4c31ea..ec808b4b 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -555,6 +555,9 @@ ggHex <- function( .validInput(input = hexCut, name = "quantCut", valid = c("numeric", "null")) .validInput(input = addPoints, name = "addPoints", valid = c("boolean")) + #require hexbin to be installed. otherwise, this section wont work properly + .requirePackage(x = "hexbin", source = "CRAN") + df <- data.frame(x = x, y = y) include <- which(is.finite(x) & is.finite(y)) From 0dada5f424bc5945584e4424300875bc55f43a86 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 22 Apr 2022 10:20:12 -0700 Subject: [PATCH 143/184] update vierstra archetype motifs to v2.1 --- R/AnnotationPeaks.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index ec8c06fe..e2882b38 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -280,7 +280,7 @@ addPeakAnnotations <- function( #' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR #' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is -#' "vierstra", then this must either be "archetype" (for the v2 clustered models) or "individual" (for the original v1 individual motif models). +#' "vierstra", then this must either be "archetype" (for the v2.1 clustered models) or "individual" (for the original v1 individual motif models). #' NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra. #' @param motifPWMs A custom set of motif PWMs as a PWMList for adding motif annotations. #' @param cutOff The p-value cutoff to be used for motif search. The p-value is determined vs a background set of sequences @@ -449,7 +449,7 @@ addMotifAnnotations <- function( if(tolower(collection)=="individual"){ url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" } else if(tolower(collection == "archetype")){ - url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs.rds" + url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs_v2.1.rds" } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) From b40e6b39e04cd3391642631631823f7f02f09764 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 22 Apr 2022 11:09:52 -0700 Subject: [PATCH 144/184] add strictMatch to handle mismatch in cells If a GeneExpressionMatrix is added to the project but not all cells in the project have gene expression information, this causes problems with downstream functions that require info from all cells such as addIterativeLSI. This patch provides a warning to users when this is the case. Currently strictMatch defaults to FALSE but could consider changing that to TRUE or handling this downstream for ex in addIterativeLSI. --- R/MatrixGeneExpression.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index 7d91a480..5a1b02c8 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -17,6 +17,9 @@ #' @param verbose A boolean describing whether to print to console messages of progress. #' @param threads The number of threads to be used for parallel computing. #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. +#' @param strictMatch A boolean value indicating whether every cell in `input` must be represented in `seRNA`. If set to `FALSE`, +#' this and this `GeneExpressionMatrix` is used for certain downstream analyses such as `addIterativeLSI()`, then errors may occur +#' because not all cells will have relevant information. #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exist in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. #' @export @@ -29,10 +32,24 @@ addGeneExpressionMatrix <- function( verbose = TRUE, threads = getArchRThreads(), parallelParam = NULL, + strictMatch = FALSE, force = TRUE, logFile = createLogFile("addGeneExpressionMatrix") ){ + .validInput(input = input, name = "input", valid = c("ArchRProj", "character")) + .validInput(input = seRNA, name = "seRNA", valid = c("SummarizedExperiment")) + .validInput(input = chromSizes, name = "chromSizes", valid = c("granges")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) + .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric")) + .validInput(input = verbose, name = "verbose", valid = c("boolean")) + .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = parallelParam, name = "parallelParam", valid = c("parallelparam", "null")) + .validInput(input = strictMatch, name = "strictMatch", valid = c("boolean")) + .validInput(input = force, name = "force", valid = c("boolean")) + .validInput(input = logFile, name = "logFile", valid = c("character")) + + if(inherits(input, "ArchRProject")){ ArrowFiles <- getArrowFiles(input) allCells <- rownames(getCellColData(input)) @@ -61,11 +78,18 @@ addGeneExpressionMatrix <- function( if(!is.null(allCells)){ cellsInArrows <- allCells } + overlap <- sum(cellsInArrows %in% colnames(seRNA)) / length(cellsInArrows) .logMessage("Overlap w/ scATAC = ", round(overlap,3), logFile = logFile, verbose = TRUE) if(overlap == 0){ stop("No overlapping cell names found between ArrowFiles and seRNA object! Cell names in ArrowFiles must match colnames in seRNA!") + } else if(overlap != 1) { + if(strictMatch){ + stop("Error! 'strictMatch = TRUE' and not all cells in input are represented in the provided gene expression seRNA. To proceed, please subset your ArchRProject using the subsetArchRProject() function to contain only cells present in seRNA or set 'strictMatch = FALSE'.") + } else { + .logMessage("Warning! Not all cells in input exist in seRNA! This may cause downstream issues with functions that require information from all cells. For example, addIterativeLSI() will not work on this GeneExpressionMatrix!", logFile = logFile, verbose = TRUE) + } } splitCells <- split(cellsInArrows, stringr::str_split(cellsInArrows, pattern = "#", simplify=TRUE)[,1]) From ee0c8ec29a8b8108f48cda8b9e62e0c6cfc1a41b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 26 Apr 2022 13:59:21 -0700 Subject: [PATCH 145/184] update clusterParams description --- R/IterativeLSI.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 31ba7001..156bbe80 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -11,9 +11,10 @@ #' "TileMatrix" or "PeakMatrix". #' @param name The name to use for storage of the IterativeLSI dimensionality reduction in the `ArchRProject` as a `reducedDims` object. #' @param iterations The number of LSI iterations to perform. -#' @param clusterParams A list of Additional parameters to be passed to `addClusters()` for clustering within each iteration. +#' @param clusterParams A list of additional parameters to be passed to `addClusters()` for clustering within each iteration. #' These params can be constant across each iteration, or specified for each iteration individually. Thus each param must be of -#' length == 1 or the total number of `iterations` - 1. PLEASE NOTE - We have updated these params to `resolution=2` and `maxClusters=6`! To use previous settings use `resolution=0.2` and `maxClusters=NULL`. +#' length == 1 or the total number of `iterations` - 1. If you want to use `scran` for clustering, you would pass this as `method="scran"`. +#` PLEASE NOTE - We have updated these params to `resolution=2` and `maxClusters=6`! To use previous settings use `resolution=0.2` and `maxClusters=NULL`. #' @param firstSelection First iteration selection method for features to use for LSI. Either "Top" for the top accessible/average or "Var" for the top variable features. #' "Top" should be used for all scATAC-seq data (binary) while "Var" should be used for all scRNA/other-seq data types (non-binary). #' @param depthCol A column in the `ArchRProject` that represents the coverage (scATAC = unique fragments, scRNA = unique molecular identifiers) per cell. From f162072dfb38e62d71431506db124a7a0e09b6aa Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 26 Apr 2022 15:04:45 -0700 Subject: [PATCH 146/184] fix module scores when only one set of features is supplied addressing https://github.com/GreenleafLab/ArchR/issues/308#issuecomment-813905861 --- R/ModuleScore.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 2e7d4fd9..725276ab 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -132,8 +132,14 @@ addModuleScore <- function( doSampleCells = FALSE ) Matrix::colMeans(m[seq_along(idxFgd), ]) - Matrix::colMeans(m[-seq_along(idxFgd), ]) - }) %>% Reduce("cbind", .) - + }) + + if (length(features) > 1) { + dfM <- Reduce("cbind", dfM) + } else { + dfM <- as.data.frame(dfM[[1]], row.names = names(dfM), drop = FALSE) + } + #add the module scores as new columns in cellColData for(x in seq_len(ncol(dfM))){ ArchRProj <- addCellColData(ArchRProj, data = dfM[,x], name=names(featureList)[x], cells=rownames(dfM), force = TRUE) From 9b3e72e1fa82b9f6c116023e3ac09165d44b04a4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 29 Apr 2022 15:25:42 -0700 Subject: [PATCH 147/184] update pal param def --- R/VisualizeData.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index d433c8e7..e9e327ba 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -173,11 +173,13 @@ plotPDF <- function( #' @param log2Norm A boolean value indicating whether a log2 transformation should be performed on the values (if continuous) in plotting. #' @param imputeWeights The weights to be used for imputing numerical values for each cell as a linear combination of other cells values. #' See `addImputationWeights()` and `getImutationWeights()` for more information. -#' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override discreteSet/continuousSet for coloring vector. -#' If you are using `pal` in conjuction with `highlightCells`, your palette must be a named vector with two entries, one named for the value -#' of the cells in the `name` column of `cellColData` and the other named "Non.Highlighted". For example, `pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")` -#' would be used to change the color of cells with the value "Mono" in the `cellColData` column indicated by `name`. Because of this, -#' the cells indicated by `highlightCells` must also match this value in the `name` column. +#' @param pal A custom palette used to override discreteSet/continuousSet for coloring cells. Typically created using `paletteDiscrete()` or `paletteContinuous()`. +#' To make a custom palette, you must construct this following strict specifications. If the coloring is for discrete data (i.e. "Clusters"), +#' then this palette must be a named vector of colors where each color is named for the corresponding group (e.g. `"C1" = "#F97070"`). If the coloring +#' for continuous data, then it just needs to be a vector of colors. If you are using `pal` in conjuction with `highlightCells`, your palette +#' must be a named vector with two entries, one named for the value of the cells in the `name` column of `cellColData` and the other named +#' "Non.Highlighted". For example, `pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")` would be used to change the color of cells with the value +#' "Mono" in the `cellColData` column indicated by `name`. Because of this, the cells indicated by `highlightCells` must also match this value in the `name` column. #' @param size A number indicating the size of the points to plot if `plotAs` is set to "points". #' @param sampleCells A numeric describing number of cells to use for plot. If using impute weights, this will occur after imputation. #' @param highlightCells A character vector of cellNames describing which cells to hightlight if using `plotAs = "points"` (default if discrete). From ba21b05969c7d34125dcc56cdbabf0d64007e019 Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Tue, 3 May 2022 22:27:31 -0700 Subject: [PATCH 148/184] bugfix import data.table causing issues --- .DS_Store | Bin 16388 -> 16388 bytes NAMESPACE | 3 ++- R/AllClasses.R | 3 ++- man/addGeneExpressionMatrix.Rd | 5 +++++ man/addIterativeLSI.Rd | 10 +++++----- man/addMotifAnnotations.Rd | 15 +++++++++------ man/plotEmbedding.Rd | 8 +++++++- man/plotMarkers.Rd | 6 +++++- man/projectBulkATAC.Rd | 6 +++--- 9 files changed, 38 insertions(+), 18 deletions(-) diff --git a/.DS_Store b/.DS_Store index 9be749afeb44d77fc6aec1464d916eb8ab59cc9b..a885b2f8922c50a3e6fb25a2386aaf50830e8950 100644 GIT binary patch delta 1426 zcmeH_-%C?r7{{OQ>&$c7F^{IUxlLy)Xf{x{rHE3?*)Jy)ZlM&qTp3BzmD8-&*a)vQ z>*7Ga2nwtyjD9JmMN|+GR&=gfe`{NyhhhhaC#5D*iC*~vs`UqtIDP(rk0_}? z^6Ry#x_V)k6kX+%p6Xrd`j)+cHh0VMW)XvCLnk5*k1^K3&{AnKwC# z5(SP7WTh&qp*_@2A?l;^G(bZ%Ob_S9?vX}yb%WD+3mLhW z6=hS3GJdIANY!uIx~*EMQ4r?+g^-_ImL{fWEYHl!USV8aP-ri$*eKnyJ&vd?dV%A| zHQF4VP$rb~VvrYA@#lKM!He^}NaH$fj;7M7*NdF^lH)kjzfmzp?iLm0*gM)44j@EmVsA=7vt$0`k2wfHh98PtmlmRPXT++^>6rZ4qd^B1ZA*+7OE Jb!j@!{2RfHHBA5j delta 1365 zcmeH_TS${(7{{Og>!I(t^&I-!dg2-M0DAe@6GePcmL=2`$r>)MsVYl zO8TUW7FL<92A$bvIb_fsFq!OjgU;r#NzqcP0n``>^}GCmpr0s9>)#dV3H2F`Q(#%y zIk}5CesP|#MA__XZV_6gL782uv3R9fnJy;Rgb=3mD|1$)wx*pOy`8)W;z&b!a*&gn zsEdMhgig{BjZmEK(-=+CTbiNIG)MCw01SC3KoN?eKq*#WCCXt&4Qf#b4;s;eRXGx&gy_=0(S zXT;=8#nep0jLgKUnTxrZCv^ft`lz<_Ob_PW-M+ayxTh%?4EQ^eyYzH*eQa9$O}>2K z@8HyKkGnyz{svB)|6=JfQ6|qXC@d=0m9MI-a@5sJ!-EJ5ydeaPvAe3gg=NOoW})R#4xVmI&R_? zMi9pXJj5eB#uJQT5-(GEe?i`u$p=e*mq!DVqQQ diff --git a/NAMESPACE b/NAMESPACE index 7fb50b4b..5fec629b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,7 @@ export(subsetCells) export(theme_ArchR) export(trajectoryHeatmap) export(validBSgenome) -import(GenomicRanges) +import(data.table) +importFrom(GenomicRanges,GRanges) importFrom(Rcpp,sourceCpp) useDynLib(ArchR) diff --git a/R/AllClasses.R b/R/AllClasses.R index f7966028..314c4182 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -1,6 +1,7 @@ #' @useDynLib ArchR #' @importFrom Rcpp sourceCpp -#' @import GenomicRanges +#' @importFrom GenomicRanges GRanges +#' @import data.table NULL setClassUnion("characterOrNull", c("character", "NULL")) diff --git a/man/addGeneExpressionMatrix.Rd b/man/addGeneExpressionMatrix.Rd index e728ac04..06365823 100644 --- a/man/addGeneExpressionMatrix.Rd +++ b/man/addGeneExpressionMatrix.Rd @@ -13,6 +13,7 @@ addGeneExpressionMatrix( verbose = TRUE, threads = getArchRThreads(), parallelParam = NULL, + strictMatch = FALSE, force = TRUE, logFile = createLogFile("addGeneExpressionMatrix") ) @@ -36,6 +37,10 @@ for Seurat Objects (see \code{Seurat::as.SingleCellExperiment}). The provided va \item{parallelParam}{A list of parameters to be passed for biocparallel/batchtools parallel computing.} +\item{strictMatch}{A boolean value indicating whether every cell in \code{input} must be represented in \code{seRNA}. If set to \code{FALSE}, +this and this \code{GeneExpressionMatrix} is used for certain downstream analyses such as \code{addIterativeLSI()}, then errors may occur +because not all cells will have relevant information.} + \item{force}{A boolean value indicating whether to force the matrix indicated by \code{matrixName} to be overwritten if it already exist in the given \code{input}.} \item{logFile}{The path to a file to be used for logging ArchR output.} diff --git a/man/addIterativeLSI.Rd b/man/addIterativeLSI.Rd index fedd8496..ac601e66 100644 --- a/man/addIterativeLSI.Rd +++ b/man/addIterativeLSI.Rd @@ -51,9 +51,9 @@ addIterativeLSI( \item{iterations}{The number of LSI iterations to perform.} -\item{clusterParams}{A list of Additional parameters to be passed to \code{addClusters()} for clustering within each iteration. +\item{clusterParams}{A list of additional parameters to be passed to \code{addClusters()} for clustering within each iteration. These params can be constant across each iteration, or specified for each iteration individually. Thus each param must be of -length == 1 or the total number of \code{iterations} - 1. PLEASE NOTE - We have updated these params to \code{resolution=2} and \code{maxClusters=6}! To use previous settings use \code{resolution=0.2} and \code{maxClusters=NULL}.} +length == 1 or the total number of \code{iterations} - 1. If you want to use \code{scran} for clustering, you would pass this as \code{method="scran"}.} \item{firstSelection}{First iteration selection method for features to use for LSI. Either "Top" for the top accessible/average or "Var" for the top variable features. "Top" should be used for all scATAC-seq data (binary) while "Var" should be used for all scRNA/other-seq data types (non-binary).} @@ -102,12 +102,12 @@ variance calculation and TF-IDF normalization.} \item{totalFeatures}{The number of features to consider for use in LSI after ranking the features by the total number of insertions. These features are the only ones used throught the variance identification and LSI. These are an equivalent when using a \code{TileMatrix} to a defined peakSet.} -\item{filterQuantile}{A number \link{0,1} that indicates the quantile above which features should be removed based on insertion counts prior} - -\item{excludeChr}{A string of chromosomes to exclude for iterativeLSI procedure. +\item{filterQuantile}{A number \link{0,1} that indicates the quantile above which features should be removed based on insertion counts prior to the first iteration of the iterative LSI paradigm. For example, if \code{filterQuantile = 0.99}, any features above the 99th percentile in insertion counts will be ignored for the first LSI iteration.} +\item{excludeChr}{A string of chromosomes to exclude for iterativeLSI procedure.} + \item{saveIterations}{A boolean value indicating whether the results of each LSI iterations should be saved as compressed \code{.rds} files in the designated \code{outDir}.} diff --git a/man/addMotifAnnotations.Rd b/man/addMotifAnnotations.Rd index e78aa964..296da8cc 100644 --- a/man/addMotifAnnotations.Rd +++ b/man/addMotifAnnotations.Rd @@ -7,7 +7,7 @@ addMotifAnnotations( ArchRProj = NULL, motifSet = "cisbp", - name = "Motif", + annoName = "Motif", species = NULL, collection = "CORE", motifPWMs = NULL, @@ -23,16 +23,19 @@ addMotifAnnotations( \item{ArchRProj}{An \code{ArchRProject} object.} \item{motifSet}{The motif set to be used for annotation. Options include: (i) "JASPAR2016", "JASPAR2018", "JASPAR2020" -which gives the 2016, 2018 or 2020 version of JASPAR motifs or (ii) one of "cisbp", "encode", or "homer" which gives the -corresponding motif sets from the \code{chromVAR} package.} +which gives the 2016, 2018 or 2020 version of JASPAR motifs, (ii) one of "cisbp", "encode", or "homer" which gives the +corresponding motif sets from the \code{chromVAR} package, or (iii) "vierstra" which gives the clustered archetype motifs +created by Jeff Vierstra (https://github.com/jvierstra/motif-clustering).} -\item{name}{The name of the \code{peakAnnotation} object to be stored in the provided \code{ArchRProject}} +\item{annoName}{The name of the \code{peakAnnotation} object to be stored in the provided \code{ArchRProject}} \item{species}{The name of the species relevant to the supplied \code{ArchRProject}. This is used for identifying which motif to be used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from \code{getGenome()}.} \item{collection}{If one of the JASPAR motif sets is used via \code{motifSet}, this parameter allows you to indicate the JASPAR -collection to be used. See \code{getMatrixSet()} from \code{TFBSTools} for all options to supply for collection.} +collection to be used. See \code{getMatrixSet()} from \code{TFBSTools} for all options to supply for collection. If \code{motifSet} is +"vierstra", then this must either be "archetype" (for the v2.1 clustered models) or "individual" (for the original v1 individual motif models). +NOTE: vierstra archetype motifs are currently in beta and have not been finalized by Jeff Vierstra.} \item{motifPWMs}{A custom set of motif PWMs as a PWMList for adding motif annotations.} @@ -43,7 +46,7 @@ collection to be used. See \code{getMatrixSet()} from \code{TFBSTools} for all o \item{version}{An integer specifying version 1 or version 2 of chromVARmotifs see github for more info GreenleafLab/chromVARmotifs.} -\item{force}{A boolean value indicating whether to force the \code{peakAnnotation} object indicated by \code{name} to be overwritten if +\item{force}{A boolean value indicating whether to force the \code{peakAnnotation} object indicated by \code{annoName} to be overwritten if it already exists in the given \code{ArchRProject}.} \item{logFile}{The path to a file to be used for logging ArchR output.} diff --git a/man/plotEmbedding.Rd b/man/plotEmbedding.Rd index 147d5cd5..4ed4836a 100644 --- a/man/plotEmbedding.Rd +++ b/man/plotEmbedding.Rd @@ -46,7 +46,13 @@ is "GeneScoreMatrix" then \code{name} refers to a gene name which can be listed \item{imputeWeights}{The weights to be used for imputing numerical values for each cell as a linear combination of other cells values. See \code{addImputationWeights()} and \code{getImutationWeights()} for more information.} -\item{pal}{A custom palette (see \code{paletteDiscrete} or \code{ArchRPalettes}) used to override discreteSet/continuousSet for coloring vector.} +\item{pal}{A custom palette used to override discreteSet/continuousSet for coloring cells. Typically created using \code{paletteDiscrete()} or \code{paletteContinuous()}. +To make a custom palette, you must construct this following strict specifications. If the coloring is for discrete data (i.e. "Clusters"), +then this palette must be a named vector of colors where each color is named for the corresponding group (e.g. \code{"C1" = "#F97070"}). If the coloring +for continuous data, then it just needs to be a vector of colors. If you are using \code{pal} in conjuction with \code{highlightCells}, your palette +must be a named vector with two entries, one named for the value of the cells in the \code{name} column of \code{cellColData} and the other named +"Non.Highlighted". For example, \code{pal=c("Mono" = "green", "Non.Highlighted" = "lightgrey")} would be used to change the color of cells with the value +"Mono" in the \code{cellColData} column indicated by \code{name}. Because of this, the cells indicated by \code{highlightCells} must also match this value in the \code{name} column.} \item{size}{A number indicating the size of the points to plot if \code{plotAs} is set to "points".} diff --git a/man/plotMarkers.Rd b/man/plotMarkers.Rd index f0baad6b..6b1a0078 100644 --- a/man/plotMarkers.Rd +++ b/man/plotMarkers.Rd @@ -9,7 +9,8 @@ plotMarkers( name = NULL, cutOff = "FDR <= 0.01 & abs(Log2FC) >= 0.5", plotAs = "Volcano", - scaleTo = 10^4 + scaleTo = 10^4, + rastr = TRUE ) } \arguments{ @@ -22,6 +23,9 @@ To see available options try \code{colnames(seMarker)}.} \code{cutoff} can contain any of the \code{assayNames} from \code{seMarker}.} \item{plotAs}{A string indicating whether to plot a volcano plot ("Volcano") or an MA plot ("MA").} + +\item{rastr}{A boolean value that indicates whether the plot should be rasterized using \code{ggrastr}. This does not rasterize +lines and labels, just the internal portions of the plot.} } \description{ This function will plot one group/column of a differential markers as an MA or Volcano plot. diff --git a/man/projectBulkATAC.Rd b/man/projectBulkATAC.Rd index 4f5e9b77..31859ab2 100644 --- a/man/projectBulkATAC.Rd +++ b/man/projectBulkATAC.Rd @@ -19,11 +19,11 @@ projectBulkATAC( \arguments{ \item{ArchRProj}{An \code{ArchRProject} object containing the dimensionality reduction matrix passed by \code{reducedDims}.} -\item{seATAC}{Bulk ATAC Summarized Experiment.} +\item{seATAC}{A \code{SummarizedExperiment} object containing bulk ATAC-seq data.} -\item{reducedDims}{A string specifying the reducedDims.} +\item{reducedDims}{A string specifying the name of the \code{reducedDims} object to be used.} -\item{embedding}{A string specifying embedding.} +\item{embedding}{A string specifying the name of the \code{embedding} object to be used.} \item{n}{An integer specifying the number of subsampled "pseudo single cells" per bulk sample.} From 7ead5412657ca1d21b5f874e2908c8d920e977cb Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 4 May 2022 14:28:26 -0700 Subject: [PATCH 149/184] update ... additional params def --- R/AnnotationPeaks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index e2882b38..b969532d 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -290,7 +290,7 @@ addPeakAnnotations <- function( #' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `annoName` to be overwritten if #' it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. -#' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a PWM object. +#' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a JASPAR PWM object. #' @export addMotifAnnotations <- function( ArchRProj = NULL, From b9ee2663d9ba7d58c6737a7a8bf2b3614bf26866 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 4 May 2022 20:33:35 -0700 Subject: [PATCH 150/184] typo in geneTiles --- R/IntegrativeAnalysis.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 8695c83d..94116f75 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -1327,7 +1327,7 @@ getPeak2GeneLinks <- function( geneTiles <- floor(start(geneStarts) / resolution) * resolution + floor(resolution / 2) }else{ summitTiles <- start(peakSummits) - geneTiles <- start(geneTiles) + geneTiles <- start(geneStarts) } loops <- .constructGR( From f2d5d0583e00167878e8d15ae02a016b423b9f3b Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 6 May 2022 11:49:21 -0700 Subject: [PATCH 151/184] Add message to point to Vierstra website https://github.com/GreenleafLab/ArchR/discussions/1364#discussioncomment-2700224 --- R/AnnotationPeaks.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index b969532d..2e13ff91 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -448,8 +448,10 @@ addMotifAnnotations <- function( }else if(tolower(motifSet)=="vierstra"){ if(tolower(collection)=="individual"){ url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Individual_Motifs.rds" + message("Using Vierstra v1.0 motifs. See https://www.vierstra.org/resources/motif_clustering for more details.") } else if(tolower(collection == "archetype")){ url = "https://jeffgranja.s3.amazonaws.com/ArchR/Annotations/Vierstra_Archetype_Motifs_v2.1.rds" + message("Using Vierstra v2.1beta motifs. See https://resources.altius.org/~jvierstra/projects/motif-clustering-v2.1beta/ for more details.") } else { stop(paste0("Error! collection ", collection, " not recognized for motifSet ",motifSet, ". Accepted values are 'individual' and 'archetype'")) From ee5f2176fbed5685e92a29f0bbbf8911fea434d7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 17 May 2022 20:51:02 -0700 Subject: [PATCH 152/184] remove strictMatch from batchlapply https://github.com/GreenleafLab/ArchR/issues/1427 --- R/MatrixGeneExpression.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index 5a1b02c8..c2cb7c9d 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -147,6 +147,7 @@ addGeneExpressionMatrix <- function( #Remove Input from args args$input <- NULL args$chromSizes <- NULL + args$strictMatch <- NULL #Run With Parallel or lapply outList <- .batchlapply(args) From e008328cb634a266b30b25841b3d36d987263af1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:22:40 -0700 Subject: [PATCH 153/184] add marker subsetting to plotMarkerHeatMap --- R/MarkerFeatures.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index c0e655f6..e077744b 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -823,6 +823,8 @@ markerHeatmap <- function(...){ #' @param pal A custom continuous palette from `ArchRPalettes` (see `paletteContinuous()`) used to override the default continuous palette for the heatmap. #' @param binaryClusterRows A boolean value that indicates whether a binary sorting algorithm should be used for fast clustering of heatmap rows. #' @param clusterCols A boolean value that indicates whether the columns of the marker heatmap should be clustered. +#' @param subsetMarkers A vector of rownames from seMarker to use for subsetting of seMarker to only plot specific features on the heatmap. +#' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. #' @param nLabel An integer value that indicates whether the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. #' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is. @@ -847,6 +849,7 @@ plotMarkerHeatmap <- function( pal = NULL, binaryClusterRows = TRUE, clusterCols = TRUE, + subsetMarkers = NULL, labelMarkers = NULL, nLabel = 15, nPrint = 15, @@ -919,6 +922,11 @@ plotMarkerHeatmap <- function( }else{ idx <- which(rowSums(passMat, na.rm = TRUE) > 0 & matrixStats::rowVars(mat) != 0 & !is.na(matrixStats::rowVars(mat))) } + + if(!is.null(subsetMarkers)) { + idx <- subsetMarkers + } + mat <- mat[idx,,drop=FALSE] passMat <- passMat[idx,,drop=FALSE] From eaa15993b4fdae69cacd9fd03d74c5aa8c230cc8 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:34:13 -0700 Subject: [PATCH 154/184] remove printing of marker genes when subsetMarkers is used --- R/MarkerFeatures.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index e077744b..5cecbb36 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -959,15 +959,19 @@ plotMarkerHeatmap <- function( } spmat <- passMat / rowSums(passMat) - if(metadata(seMarker)$Params$useMatrix == "GeneScoreMatrix"){ - message("Printing Top Marker Genes:") - for(x in seq_len(ncol(spmat))){ - genes <- head(order(spmat[,x], decreasing = TRUE), nPrint) - message(colnames(spmat)[x], ":") - message("\t", paste(as.vector(rownames(mat)[genes]), collapse = ", ")) + #only print out identified marker genes if subsetMarkers is NULL + if(is.null(subsetMarkers)) { + if(metadata(seMarker)$Params$useMatrix == "GeneScoreMatrix"){ + message("Printing Top Marker Genes:") + for(x in seq_len(ncol(spmat))){ + genes <- head(order(spmat[,x], decreasing = TRUE), nPrint) + message(colnames(spmat)[x], ":") + message("\t", paste(as.vector(rownames(mat)[genes]), collapse = ", ")) + } } } + if(is.null(labelMarkers)){ labelMarkers <- lapply(seq_len(ncol(spmat)), function(x){ as.vector(rownames(mat)[head(order(spmat[,x], decreasing = TRUE), nLabel)]) From d74445f35cbce0e1bb4eb150111e507110b98951 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:43:38 -0700 Subject: [PATCH 155/184] update param def for subsetMarkers --- R/MarkerFeatures.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 5cecbb36..94adfac6 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -824,7 +824,8 @@ markerHeatmap <- function(...){ #' @param binaryClusterRows A boolean value that indicates whether a binary sorting algorithm should be used for fast clustering of heatmap rows. #' @param clusterCols A boolean value that indicates whether the columns of the marker heatmap should be clustered. #' @param subsetMarkers A vector of rownames from seMarker to use for subsetting of seMarker to only plot specific features on the heatmap. -#' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. +#' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. If this parameter is used for +#' subsetting, then the values provided to `cutOff` are effectively ignored. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. #' @param nLabel An integer value that indicates whether the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. #' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is. From 03a8d6b8b9cbb9ced2b13d5491287f9f29a43ed1 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:44:26 -0700 Subject: [PATCH 156/184] add validInput for subsetMarkers --- R/MarkerFeatures.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 94adfac6..7d3a2657 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -872,6 +872,7 @@ plotMarkerHeatmap <- function( .validInput(input = pal, name = "pal", valid = c("character", "null")) .validInput(input = binaryClusterRows, name = "binaryClusterRows", valid = c("boolean")) .validInput(input = clusterCols, name = "clusterCols", valid = c("boolean")) + .validInput(input = subsetMarkers, name = "subsetMarkers", valid = c("integer", "null")) .validInput(input = labelMarkers, name = "labelMarkers", valid = c("character", "null")) .validInput(input = nLabel, name = "nLabel", valid = c("integer", "null")) .validInput(input = nPrint, name = "nPrint", valid = c("integer", "null")) From b3dd16436fcde4aeb3a5b3e20343e248ec34679f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:48:57 -0700 Subject: [PATCH 157/184] catch problematic inputs to subsetMarkers --- R/MarkerFeatures.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 7d3a2657..2e59051c 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -926,7 +926,12 @@ plotMarkerHeatmap <- function( } if(!is.null(subsetMarkers)) { - idx <- subsetMarkers + if(length(which(subsetMarkers %ni% 1:nrow(mat)))){ + idx <- subsetMarkers + } else { + stop("Rownames / indices provided to the subsetMarker parameter are outside of the boundaries of seMarker.") + } + } mat <- mat[idx,,drop=FALSE] From 80d57bb2e9309263beead385aa5c967aea783f39 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:50:03 -0700 Subject: [PATCH 158/184] fix if statement typo --- R/MarkerFeatures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 2e59051c..fe914ec5 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -926,7 +926,7 @@ plotMarkerHeatmap <- function( } if(!is.null(subsetMarkers)) { - if(length(which(subsetMarkers %ni% 1:nrow(mat)))){ + if(length(which(subsetMarkers %ni% 1:nrow(mat))) > 0){ idx <- subsetMarkers } else { stop("Rownames / indices provided to the subsetMarker parameter are outside of the boundaries of seMarker.") From 673943f1cae2e32060939f2ca773386f726bc98f Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 25 May 2022 09:50:33 -0700 Subject: [PATCH 159/184] fix if statement typo --- R/MarkerFeatures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index fe914ec5..69879538 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -926,7 +926,7 @@ plotMarkerHeatmap <- function( } if(!is.null(subsetMarkers)) { - if(length(which(subsetMarkers %ni% 1:nrow(mat))) > 0){ + if(length(which(subsetMarkers %ni% 1:nrow(mat))) == 0){ idx <- subsetMarkers } else { stop("Rownames / indices provided to the subsetMarker parameter are outside of the boundaries of seMarker.") From 17dbf94f247d8b084493a49fac97664ccd3832b4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 27 May 2022 08:02:49 -0700 Subject: [PATCH 160/184] improve error message when no cells found passing filter https://github.com/GreenleafLab/ArchR/issues/1435#issuecomment-1139663014 --- R/CreateArrow.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 0fa4335a..ac0be626 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -1884,7 +1884,7 @@ createArrowFiles <- function( bcPass <- BStringSet(dt$values.V1[dt$V1 >= minFrags & dt$V1 <= maxFrags]) if(length(bcPass) < 3){ - .logStop(sprintf("Detected 2 or less cells (%s barcodes have greater than 50 fragments) in file!\n Check inputs such as 'minFrags' or 'maxFrags' to keep cells! Exiting!", sum(dt$V1 > 50)), logFile = logFile) + .logStop(sprintf("Detected 2 or less cells (%s barcodes have greater than 50 fragments) in file!\n Check inputs such as 'minFrags' or 'maxFrags' to keep cells!\n Also check that you are using the correct reference genome.\n Exiting!", sum(dt$V1 > 50)), logFile = logFile) } .logThis(data.frame(bc = as.character(bcPass)), name = paste0(prefix, " BarcodesMinMaxFrags"), logFile = logFile) From 978cd3c8b82b332dcc31fbf9976ce8625f2b9c55 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 08:41:46 -0700 Subject: [PATCH 161/184] check to make sure tmpdir exists --- R/HiddenUtils.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 57fdad35..9502b4ff 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -326,6 +326,10 @@ .tempfile <- function(pattern = "tmp", tmpdir = "tmp", fileext = "", addDOC = TRUE){ dir.create(tmpdir, showWarnings = FALSE) + + if(!dir.exists(tmpdir)){ + stop(paste0("Unable to create temporary directory ", tmpdir,". Check file permissions!")) + } if(addDOC){ doc <- paste0("-Date-", Sys.Date(), "_Time-", gsub(":","-", stringr::str_split(Sys.time(), pattern=" ",simplify=TRUE)[1,2])) From 6a23fa18c55e9677531095ae939cf21b6ab60cd4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 12:16:33 -0700 Subject: [PATCH 162/184] check for file named "tmp" in .tempFile directory creation https://github.com/GreenleafLab/ArchR/issues/1447#issuecomment-1146215298 --- R/HiddenUtils.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 9502b4ff..44f84f53 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -324,6 +324,10 @@ } .tempfile <- function(pattern = "tmp", tmpdir = "tmp", fileext = "", addDOC = TRUE){ + + if(file.exists(tmpdir)){ + stop(paste0("Attempted to create temporary directory ", tmpdir," but a file already exists with this name. Please remove this file and try again!")) + } dir.create(tmpdir, showWarnings = FALSE) From 7358dd109053049ae19ef8c64eb129e544805579 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 16:16:33 -0700 Subject: [PATCH 163/184] patch error with file.exists https://github.com/GreenleafLab/ArchR/issues/1447#issuecomment-1146441748 --- R/HiddenUtils.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 44f84f53..1e7844dc 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -325,8 +325,11 @@ .tempfile <- function(pattern = "tmp", tmpdir = "tmp", fileext = "", addDOC = TRUE){ - if(file.exists(tmpdir)){ - stop(paste0("Attempted to create temporary directory ", tmpdir," but a file already exists with this name. Please remove this file and try again!")) + #if the directory doesnt already exist and file.exists evaluates to true, then a file exists with that name + if(!dir.exists(tmpdir)){ + if(file.exists(tmpdir)){ + stop(paste0("Attempted to create temporary directory ", tmpdir," but a file already exists with this name. Please remove this file and try again!")) + } } dir.create(tmpdir, showWarnings = FALSE) From e2d911bbe3df32afd8160194150b60d5514bc559 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 3 Jun 2022 16:47:09 -0700 Subject: [PATCH 164/184] update strictMatch warning message and typo https://github.com/GreenleafLab/ArchR/discussions/1450#discussion-4119022 --- R/MatrixGeneExpression.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index c2cb7c9d..b54f409f 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -18,7 +18,7 @@ #' @param threads The number of threads to be used for parallel computing. #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. #' @param strictMatch A boolean value indicating whether every cell in `input` must be represented in `seRNA`. If set to `FALSE`, -#' this and this `GeneExpressionMatrix` is used for certain downstream analyses such as `addIterativeLSI()`, then errors may occur +#' and this `GeneExpressionMatrix` is used for certain downstream analyses such as `addIterativeLSI()`, then errors may occur #' because not all cells will have relevant information. #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exist in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. @@ -88,7 +88,7 @@ addGeneExpressionMatrix <- function( if(strictMatch){ stop("Error! 'strictMatch = TRUE' and not all cells in input are represented in the provided gene expression seRNA. To proceed, please subset your ArchRProject using the subsetArchRProject() function to contain only cells present in seRNA or set 'strictMatch = FALSE'.") } else { - .logMessage("Warning! Not all cells in input exist in seRNA! This may cause downstream issues with functions that require information from all cells. For example, addIterativeLSI() will not work on this GeneExpressionMatrix!", logFile = logFile, verbose = TRUE) + .logMessage("Warning! Not all cells in input exist in seRNA! This may cause downstream issues with functions that require information from all cells. For example, addIterativeLSI() will not work on this GeneExpressionMatrix! To remove these mis-matched cells, subset your ArchRProject using the subsetArchRProject() function to contain only cells present in seRNA and set 'strictMatch = TRUE'", logFile = logFile, verbose = TRUE) } } From cadcd31b6c228537d43701c63bd3ce9e7e1b2795 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Mon, 6 Jun 2022 11:07:20 -0700 Subject: [PATCH 165/184] Properly catch NULL value for quantCut mentioned in https://github.com/GreenleafLab/ArchR/issues/1452#issuecomment-1147705139 --- R/VisualizeData.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/VisualizeData.R b/R/VisualizeData.R index e9e327ba..ae9eff3e 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -421,8 +421,10 @@ plotEmbedding <- function( if(!plotParamsx$discrete){ - plotParamsx$color <- .quantileCut(plotParamsx$color, min(quantCut), max(quantCut)) - + if(!is.null(quantCut)){ + plotParamsx$color <- .quantileCut(plotParamsx$color, min(quantCut), max(quantCut)) + } + plotParamsx$pal <- paletteContinuous(set = plotParamsx$continuousSet) if(!is.null(pal)){ From 92bb44c849d756b0e577fd09f19903a7dbbabfd5 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 14:33:36 -0700 Subject: [PATCH 166/184] make tutorial downloads cleaner check for existence of each file individually rather than the download directory. Add new .downloadFiles() function to handle file download and checking to see if files downloaded properly. --- R/InputData.R | 113 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index e25e743e..a13860f1 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -11,57 +11,98 @@ getTutorialData <- function( tutorial = "hematopoiesis", threads = getArchRThreads() ){ - + #Validate .validInput(input = tutorial, name = "tutorial", valid = "character") .validInput(input = threads, name = "threads", valid = c("integer")) ######### - + #Make Sure URL doesnt timeout oldTimeout <- getOption('timeout') options(timeout=100000) - + if(tolower(tutorial) %in% c("heme","hematopoiesis")){ - if(!dir.exists("HemeFragments")){ - - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" - ) - - dir.create("HemeFragments", showWarnings = FALSE) - - downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ - download.file( - url = filesUrl[x], - destfile = file.path("HemeFragments", basename(filesUrl[x])) - ) - }, threads = min(threads, length(filesUrl))) - - #check for success of file download - if(!all(unlist(downloadFiles) == 0)) { - stop("Error! Some tutorial files did not download successfully. Please try again.") - } - } - pathFragments <- "HemeFragments" - - }else{ - + pathDownload <- "HemeFragments" + + filesUrl <- c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + ) + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + + }else if(tolower(tutorial) %in% c("multiome")){ + + filesUrl <- c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + ) + + pathDownload <- "Multiome" + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + fragFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(fragFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + fragFiles <- fragFiles[!grepl(".tbi", fragFiles)] + geneFiles <- list.files(pathDownload, pattern = "\\.h5$", full.names = TRUE) + names(geneFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + + inputFiles <- c(fragFiles, geneFiles) + + } else{ + stop("There is no tutorial data for : ", tutorial) - + } - + #Set back URL Options options(timeout=oldTimeout) - - #Return Fragment Files - inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) - names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) - inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + inputFiles + +} +#helper for file downloads +.downloadFiles <- function(filesUrl = NULL, pathDownload = NULL, threads = 1){ + if(is.null(filesUrl)) { + stop("No value supplied to filesUrl in .downloadFiles()!") + } + if(is.null(pathDownload)) { + stop("No value supplied to pathDownload in .downloadFiles()!") + } + message(paste0("Downloading files to ",pathDownload,"...")) + downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ + if(!file.exists(file.path(pathDownload, basename(filesUrl[x])))){ + message(paste0("Downloading file ", basename(filesUrl[x]),"...")) + download.file( + url = filesUrl[x], + destfile = file.path(pathDownload, basename(filesUrl[x])) + ) + } else { + message(paste0("File exists! Skipping file ", basename(filesUrl[x]),"...")) + } + }, threads = min(threads, length(filesUrl))) + + #check for success of file download + if(!all(unlist(downloadFiles) == 0)) { + stop("Some tutorial files did not download successfully. Please try again.") + } + + downloadFiles + } #' Get PBMC Small Test Fragments From cea4782584a0ea621b1d653782d988200a74ffae Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 14:44:22 -0700 Subject: [PATCH 167/184] fix typo --- R/InputData.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index a13860f1..ba492bdf 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -33,7 +33,7 @@ getTutorialData <- function( dir.create(pathDownload, showWarnings = FALSE) - downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) @@ -52,7 +52,7 @@ getTutorialData <- function( dir.create(pathDownload, showWarnings = FALSE) - downloadFiles <- downloadFiles2(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) fragFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) names(fragFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) From 21099d6ed966f6088b7b49ebbb9cca07486e8875 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 7 Jun 2022 20:58:48 -0700 Subject: [PATCH 168/184] fix error message typo --- R/IterativeLSI.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index 156bbe80..c32f7795 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -268,7 +268,7 @@ addIterativeLSI <- function( .logDiffTime("Computing Variable Features", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) nFeature <- varFeatures[1] if(nFeature > 0.5 * nrow(totalAcc)){ - stop("nFeature for variable selection must be at leat 1/2 the total features!") + stop("nFeature for variable selection must be less than 1/2 the total features!") } topIdx <- head(order(totalAcc$combinedVars, decreasing=TRUE), nFeature) topFeatures <- totalAcc[sort(topIdx),] From e395adfb4916f42f73ee646954d1e0f0fa7bdc76 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 8 Jun 2022 16:02:43 -0700 Subject: [PATCH 169/184] bugfix - `error` does not exist the variable `error` doesnt exist / hasnt been declared anywhere. pretty sure this should be `throwError = FALSE` --- R/ReproduciblePeakSet.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index 3bb1819b..a5e47a53 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -835,7 +835,7 @@ findMacs2 <- function(){ if(search2[1] != "ERROR"){ path2Install <- gsub("Location: ","",search2[grep("Location", search2, ignore.case=TRUE)]) path2Bin <- gsub("lib/python/site-packages", "bin/macs2",path2Install) - if(.suppressAll(.checkPath(path2Bin, throwError = error))){ + if(.suppressAll(.checkPath(path2Bin, throwError = FALSE))){ message("Found with pip!") return(path2Bin) } @@ -848,7 +848,7 @@ findMacs2 <- function(){ if(search3[1] != "ERROR"){ path2Install <- gsub("Location: ","",search3[grep("Location", search3, ignore.case=TRUE)]) path2Bin <- gsub("lib/python/site-packages", "bin/macs2",path2Install) - if(.suppressAll(.checkPath(path2Bin, throwError = error))){ + if(.suppressAll(.checkPath(path2Bin, throwError = FALSE))){ message("Found with pip3!") return(path2Bin) } From 792487b7a1119054b4d4aacbffb2f5c893b50017 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 11:50:14 -0700 Subject: [PATCH 170/184] update documentation on "features" --- R/ArchRBrowser.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 98a1e251..5b378c23 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -646,8 +646,10 @@ ArchRBrowserTrack <- function(...){ #' Blue-colored genes are on the minus strand and red-colored genes are on the plus strand), and "loopTrack" (links between a peak and a gene). #' @param sizes A numeric vector containing up to 3 values that indicate the sizes of the individual components passed to `plotSummary`. #' The order must be the same as `plotSummary`. -#' @param features A `GRanges` object containing the "features" to be plotted via the "featureTrack". This should be thought of as a -#' bed track. i.e. the set of peaks obtained using `getPeakSet(ArchRProj))`. +#' @param features A `GRanges` (for a single feature track) or `GRangesList` (for multiple feature tracks) object containing the "features" to +#' be plotted via the "featureTrack". This should be thought of as a bed track. i.e. the set of peaks obtained using `getPeakSet(ArchRProj))`. +#' If you provide a `GRangesList`, then each element of that object must be named and this name will be used on the plot. +#' For example - `GRangesList("peaks" = peak_gr, "other" = other_gr)`. #' @param loops A `GRanges` object containing the "loops" to be plotted via the "loopTrack". #' This `GRanges` object start represents the center position of one loop anchor and the end represents the center position of another loop anchor. #' A "loopTrack" draws an arc between two genomic regions that show some type of interaction. This type of track can be used From 9769de9d11d3e8dc84a170c4ab4c036996a52ce3 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 12:50:16 -0700 Subject: [PATCH 171/184] catch when GRangesList has no names if the GRangesList given to features in plotBrowserTrack does not have names, then the call to data.frame errors out. This fix ensures that namex isnt null or blank. --- R/ArchRBrowser.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 5b378c23..5c049e55 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1431,6 +1431,10 @@ plotBrowserTrack <- function( featureO <- lapply(seq_along(featureList), function(x){ featurex <- featureList[[x]] namex <- names(featureList)[x] + if(is.null(namex) || namex == "") { + message("Warning! Object ",x," in your GRangesList (features) is not named. Generic numbering will be used.") + namex <- as.character(x) + } mcols(featurex) <- NULL sub <- subsetByOverlaps(featurex, region, ignore.strand = TRUE) if(length(sub) > 0){ From d214b0b1c23af8192d1818b6878da1b28ba346cf Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 13:28:29 -0700 Subject: [PATCH 172/184] update featureList with generic name --- R/ArchRBrowser.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 5c049e55..37b7947d 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1434,6 +1434,7 @@ plotBrowserTrack <- function( if(is.null(namex) || namex == "") { message("Warning! Object ",x," in your GRangesList (features) is not named. Generic numbering will be used.") namex <- as.character(x) + names(featureList)[x] <- as.character(x) } mcols(featurex) <- NULL sub <- subsetByOverlaps(featurex, region, ignore.strand = TRUE) From 851427b051937cb7ff8596d971696f23a6809a5e Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 14:15:20 -0700 Subject: [PATCH 173/184] fix featureList naming --- R/ArchRBrowser.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 37b7947d..1e76ddc7 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1426,6 +1426,15 @@ plotBrowserTrack <- function( featureList <- features hideY <- FALSE } + + #make sure all elements in featureList have a name for plot display + for(i in seq_along(featureList)){ + if(is.null(names(featureList)[i]) || is.na(names(featureList)[i]) || nchar(names(featureList)[i]) == 0) { + message("Warning! Object ",i," in your GRangesList (features) is not named. Generic numbering will be used.") + names(featureList)[i] <- as.character(i) + } + } + featureList <- featureList[rev(seq_along(featureList))] featureO <- lapply(seq_along(featureList), function(x){ @@ -1434,7 +1443,6 @@ plotBrowserTrack <- function( if(is.null(namex) || namex == "") { message("Warning! Object ",x," in your GRangesList (features) is not named. Generic numbering will be used.") namex <- as.character(x) - names(featureList)[x] <- as.character(x) } mcols(featurex) <- NULL sub <- subsetByOverlaps(featurex, region, ignore.strand = TRUE) From 8345fc6a01995ded5debe09c9b6aef00a8db0345 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 14:29:00 -0700 Subject: [PATCH 174/184] fix guides FALSE warning from ggplot --- R/ArchRBrowser.R | 10 +++++----- R/DoubletsScores.R | 6 +++--- R/Footprinting.R | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 1e76ddc7..41df3315 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -1044,7 +1044,7 @@ plotBrowserTrack <- function( margin = margin(0,0.35,0,0.35, "cm")), strip.text.y = element_text(angle = 0), strip.background = element_rect(color="black")) + - guides(fill = FALSE, colour = FALSE) + ggtitle(title) + guides(fill = "none", colour = "none") + ggtitle(title) p @@ -1346,7 +1346,7 @@ plotBrowserTrack <- function( theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.ticks.x=element_blank()) + theme(axis.title.y=element_blank(), axis.text.y=element_blank(),axis.ticks.y=element_blank()) + theme(legend.text = element_text(size = baseSize), strip.text.y = element_text(size = facetbaseSize, angle = 0)) + - guides(fill = guide_legend(override.aes = list(colour = NA, shape = "c", size=3)), color = FALSE) + + guides(fill = guide_legend(override.aes = list(colour = NA, shape = "c", size=3)), color = "none") + theme(legend.position="bottom") + theme(legend.title=element_text(size=5), legend.text=element_text(size=7), legend.key.size = unit(0.75,"line"), legend.background = element_rect(color =NA), strip.background = element_blank()) @@ -1475,7 +1475,7 @@ plotBrowserTrack <- function( scale_color_manual(values = pal) + theme(legend.text = element_text(size = baseSize)) + theme_ArchR(baseSize = baseSize, baseLineSize = borderWidth, baseRectSize = borderWidth) + - guides(color = FALSE, fill = FALSE) + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank()) + guides(color = "none", fill = "none") + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank()) }else{ @@ -1795,7 +1795,7 @@ plotBrowserTrack <- function( margin = margin(0,0.35,0,0.35, "cm")), strip.text.y = element_text(angle = 0), strip.background = element_rect(color="black")) + - guides(fill = FALSE, colour = FALSE) + ggtitle(title) + guides(fill = "none", colour = "none") + ggtitle(title) p @@ -1884,7 +1884,7 @@ plotBrowserTrack <- function( pal = pal ) + facet_wrap(x~., ncol=1,scales="free_y",strip.position="right") + - guides(fill = FALSE, colour = FALSE) + + guides(fill = "none", colour = "none") + theme_ArchR(baseSize = baseSize, baseRectSize = borderWidth, baseLineSize = tickWidth, diff --git a/R/DoubletsScores.R b/R/DoubletsScores.R index c5338195..9275bd5f 100644 --- a/R/DoubletsScores.R +++ b/R/DoubletsScores.R @@ -378,7 +378,7 @@ addDoubletScores <- function( scale_colour_gradientn(colors = pal) + xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + labs(color = "Simulated Doublet Density") + - guides(fill = FALSE) + theme_ArchR(baseSize = 10) + + guides(fill = "none") + theme_ArchR(baseSize = 10) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) + coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + @@ -395,7 +395,7 @@ addDoubletScores <- function( # geom_point(data = dfDoub, aes(x=x,y=y,colour=color), size = 0.5) + # scale_colour_gradientn(colors = pal) + # xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + - # guides(fill = FALSE) + theme_ArchR(baseSize = 10) + + # guides(fill = "none") + theme_ArchR(baseSize = 10) + # labs(color = "Simulated Doublet Density") + # theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), # axis.text.y = element_blank(), axis.ticks.y = element_blank()) + @@ -413,7 +413,7 @@ addDoubletScores <- function( # scale_colour_gradientn(colors = pal) + # xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + # labs(color = "Simulated Doublet Density") + - # guides(fill = FALSE) + theme_ArchR(baseSize = 10) + + # guides(fill = "none") + theme_ArchR(baseSize = 10) + # theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), # axis.text.y = element_blank(), axis.ticks.y = element_blank()) + # coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + diff --git a/R/Footprinting.R b/R/Footprinting.R index 5fd214da..9447cdd3 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -578,8 +578,8 @@ plotFootprints <- function( ylim = c(quantile(plotFootDF$mean, 0.0001), 1.15*quantile(smoothFoot, 0.999)), xlim = c(min(plotFootDF$x),max(plotFootDF$x)) ) + theme_ArchR(baseSize = baseSize) + ggtitle(name) + - guides(fill = FALSE) + - guides(color = FALSE) + ylab(paste0(title,"Normalized Insertions")) + guides(fill = "none") + + guides(color = "none") + ylab(paste0(title,"Normalized Insertions")) #removed ggrepel due to incompatibility with coord_cartesian - see https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 #ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) From 05a83440f3080ed1e39750b619b01e475b5e94a7 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Thu, 9 Jun 2022 20:39:24 -0700 Subject: [PATCH 175/184] update param for ArchRProj in plotFootprints --- R/Footprinting.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/Footprinting.R b/R/Footprinting.R index 9447cdd3..566c41bf 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -335,7 +335,8 @@ getFootprints <- function( #' @param smoothWindow The size in basepairs of the sliding window to be used for smoothing of the footprint signal. #' @param baseSize A numeric specifying the baseSize of font in the plots. #' @param plot A boolean value indicating whether or not the footprints should be plotted (`TRUE`) or returned as grob objects (`FALSE`). -#' @param ArchRProj An `ArchRProject` object to be used for plotting directory in `getOutputDirectory`. +#' @param ArchRProj An `ArchRProject` object to be used for plotting directory in `getOutputDirectory`. If no `ArchRProj` is supplied, +#' then plots will be stored in a directory called "Plots" in the current working directory. #' @param plotName A string indicating the name/prefix of the file to be used for output plots. #' @param height The height in inches to be used for the output PDF file. #' @param width The width in inches to be used for the output PDF file. From 8877bd12226b30ab71559a8299dbf0e18683e685 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 14 Jun 2022 05:55:02 -0700 Subject: [PATCH 176/184] remove NULL as option for nPrint and nLabel --- R/MarkerFeatures.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 69879538..5c346019 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -874,8 +874,8 @@ plotMarkerHeatmap <- function( .validInput(input = clusterCols, name = "clusterCols", valid = c("boolean")) .validInput(input = subsetMarkers, name = "subsetMarkers", valid = c("integer", "null")) .validInput(input = labelMarkers, name = "labelMarkers", valid = c("character", "null")) - .validInput(input = nLabel, name = "nLabel", valid = c("integer", "null")) - .validInput(input = nPrint, name = "nPrint", valid = c("integer", "null")) + .validInput(input = nLabel, name = "nLabel", valid = c("integer")) + .validInput(input = nPrint, name = "nPrint", valid = c("integer")) .validInput(input = labelRows, name = "labelRows", valid = c("boolean")) .validInput(input = returnMatrix, name = "returnMatrix", valid = c("boolean")) .validInput(input = transpose, name = "transpose", valid = c("boolean")) From 44cb0950261c10292e512f96fa62efa2da16d88c Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 14 Jun 2022 05:56:33 -0700 Subject: [PATCH 177/184] update param def for plotMarkerHeatmap --- R/MarkerFeatures.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index 5c346019..ed1f96a6 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -827,8 +827,8 @@ markerHeatmap <- function(...){ #' Note that these rownames are expected to be integers that come from `rownames(rowData(seMarker))`. If this parameter is used for #' subsetting, then the values provided to `cutOff` are effectively ignored. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. -#' @param nLabel An integer value that indicates whether the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. -#' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is. +#' @param nLabel An integer value that indicates how many of the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. +#' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top `n` genes for each group based on how uniquely up-regulated the gene is. #' @param labelRows A boolean value that indicates whether all rows should be labeled on the side of the heatmap. #' @param returnMatrix A boolean value that indicates whether the final heatmap matrix should be returned in lieu of plotting the actual heatmap. #' @param transpose A boolean value that indicates whether the heatmap should be transposed prior to plotting or returning. From 706b88a80570f814b01959a5d9d536adf7ac64b4 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 14 Jun 2022 08:11:03 -0700 Subject: [PATCH 178/184] update param def for nLabel --- R/MarkerFeatures.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index ed1f96a6..b6fdbab9 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -828,6 +828,7 @@ markerHeatmap <- function(...){ #' subsetting, then the values provided to `cutOff` are effectively ignored. #' @param labelMarkers A character vector listing the `rownames` of `seMarker` that should be labeled on the side of the heatmap. #' @param nLabel An integer value that indicates how many of the top `n` features for each column in `seMarker` should be labeled on the side of the heatmap. +#' To remove all feature labels, set `nLabel = 0`. #' @param nPrint If provided `seMarker` is from "GeneScoreMatrix" print the top `n` genes for each group based on how uniquely up-regulated the gene is. #' @param labelRows A boolean value that indicates whether all rows should be labeled on the side of the heatmap. #' @param returnMatrix A boolean value that indicates whether the final heatmap matrix should be returned in lieu of plotting the actual heatmap. From 6a0ec0cc4c5f8ad1a923fc3b2912cbfde6c0de23 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 17 Jun 2022 08:59:53 -0700 Subject: [PATCH 179/184] add md5sum check for tutorial data more robust checking of if files exist and have been properly downloaded https://github.com/GreenleafLab/ArchR/discussions/1478 --- R/InputData.R | 57 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/R/InputData.R b/R/InputData.R index ba492bdf..ce42a3a2 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -25,10 +25,18 @@ getTutorialData <- function( pathDownload <- "HemeFragments" - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + filesUrl <- data.frame( + fileUrl = c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_CD34_BMMC_R1.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/HemeFragments/scATAC_PBMC_R1.fragments.tsv.gz" + ), + md5sum = c( + "77502e1f195e21d2f7a4e8ac9c96e65e", + "618613b486e4f8c0101f4c05c69723b0", + "a8d5ae747841055ef230ba496bcfe937" + ), + stringsAsFactors = FALSE ) dir.create(pathDownload, showWarnings = FALSE) @@ -41,11 +49,20 @@ getTutorialData <- function( }else if(tolower(tutorial) %in% c("multiome")){ - filesUrl <- c( - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", - "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + filesUrl <- data.frame( + fileUrl = c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_sorted_3k.filtered_feature_bc_matrix.h5", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.fragments.tsv.gz", + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/Multiome/pbmc_unsorted_3k.filtered_feature_bc_matrix.h5" + ), + md5sum = c( + "d49f4012ff65d9edfee86281d6afb286", + "e326066b51ec8975197c29a7f911a4fd", + "5737fbfcb85d5ebf4dab234a1592e740", + "bd4cc4ff040987e1438f1737be606a27" + ), + stringsAsFactors = FALSE ) pathDownload <- "Multiome" @@ -83,16 +100,26 @@ getTutorialData <- function( if(is.null(pathDownload)) { stop("No value supplied to pathDownload in .downloadFiles()!") } + if(length(which(c("fileUrl","md5sum") %ni% colnames(filesUrl))) != 0) { + cat(colnames(filesUrl)) + stop("File download dataframe does not include columns named 'fileUrl' and 'md5sum' which are required!") + } message(paste0("Downloading files to ",pathDownload,"...")) - downloadFiles <- .safelapply(seq_along(filesUrl), function(x){ - if(!file.exists(file.path(pathDownload, basename(filesUrl[x])))){ - message(paste0("Downloading file ", basename(filesUrl[x]),"...")) + downloadFiles <- .safelapply(seq_along(filesUrl$fileUrl), function(x){ + if(file.exists(file.path(pathDownload, basename(filesUrl$fileUrl[x])))){ + if(tools::md5sum(file.path(pathDownload, basename(filesUrl$fileUrl[x]))) != filesUrl$md5sum[x]) { + message(paste0("File ",basename(filesUrl$fileUrl[x])," exists but has an incorrect md5sum. Removing...")) + file.remove(file.path(pathDownload, basename(filesUrl$fileUrl[x]))) + } + } + if(!file.exists(file.path(pathDownload, basename(filesUrl$fileUrl[x])))){ + message(paste0("Downloading file ", basename(filesUrl$fileUrl[x]),"...")) download.file( - url = filesUrl[x], - destfile = file.path(pathDownload, basename(filesUrl[x])) + url = filesUrl$fileUrl[x], + destfile = file.path(pathDownload, basename(filesUrl$fileUrl[x])) ) } else { - message(paste0("File exists! Skipping file ", basename(filesUrl[x]),"...")) + message(paste0("File exists! Skipping file ", basename(filesUrl$fileUrl[x]),"...")) } }, threads = min(threads, length(filesUrl))) From a52f2bc244b98bf046601a831b3a9de3271abf0d Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Mon, 4 Jul 2022 11:51:05 -0700 Subject: [PATCH 180/184] bugfix threads not capped to threads input --- .DS_Store | Bin 16388 -> 16388 bytes R/CreateArrow.R | 2 +- R/MatrixGeneScores.R | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.DS_Store b/.DS_Store index a885b2f8922c50a3e6fb25a2386aaf50830e8950..cb3f848ea525c0d70b61ad22f3e5337354f0a820 100644 GIT binary patch delta 79 zcmZo^U~Fk%+|VyQd9FZ10zu)6L#8mdum)NIPu4 YD!){8bEd2`6NG2JloigKd{tf_0Ogb#Bme*a delta 166 zcmZo^U~Fk%+|Vzrv?Hk?C$qT3z~CAq6Eh1d8#@O(2RBD-a7KQ4a7kiGX|dDf{nFk( zoE)6-0ut5LX68ByM#d(!IttZ>h9*Wj3MR&8wY8iaqRRT#LGjr+xq10r3}C>>2%#Bx np)`!@-W(}o!#sJXbol1m^7BPEcgu1yL3rl#S>e3Nx8*efy`w6D diff --git a/R/CreateArrow.R b/R/CreateArrow.R index ac0be626..e4a87bf4 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -205,7 +205,7 @@ createArrowFiles <- function( if(subThreading){ h5disableFileLocking() }else{ - args$threads <- length(inputFiles) + args$threads <- max(length(inputFiles), threads) } args$minTSS <- NULL diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index 1d1d10b3..91a669a6 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -125,7 +125,7 @@ addGeneScoreMatrix <- function( if(subThreading){ h5disableFileLocking() }else{ - args$threads <- length(ArrowFiles) + args$threads <- max(length(ArrowFiles), threads) } #Remove Input from args From 79953a93ae376a500e681aa3b5ba669af6bb58e1 Mon Sep 17 00:00:00 2001 From: jeffmgranja Date: Mon, 4 Jul 2022 11:52:44 -0700 Subject: [PATCH 181/184] fix --- R/CreateArrow.R | 2 +- R/MatrixGeneScores.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/CreateArrow.R b/R/CreateArrow.R index e4a87bf4..08243139 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -205,7 +205,7 @@ createArrowFiles <- function( if(subThreading){ h5disableFileLocking() }else{ - args$threads <- max(length(inputFiles), threads) + args$threads <- min(length(inputFiles), threads) } args$minTSS <- NULL diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index 91a669a6..fd342a1a 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -125,7 +125,7 @@ addGeneScoreMatrix <- function( if(subThreading){ h5disableFileLocking() }else{ - args$threads <- max(length(ArrowFiles), threads) + args$threads <- min(length(ArrowFiles), threads) } #Remove Input from args From 71419d34083d033e764368484b16810f613bc935 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Fri, 14 Apr 2023 06:05:32 -0700 Subject: [PATCH 182/184] Update auto-comment.yml --- .github/workflows/auto-comment.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml index 3ae4ec02..e7a11435 100644 --- a/.github/workflows/auto-comment.yml +++ b/.github/workflows/auto-comment.yml @@ -9,17 +9,22 @@ jobs: GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} issuesOpened: | Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
- __Before we help you, you must respond to the following questions__ unless your original post already contained this information: + It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? + Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. + If your post does not contain a reproducible example, it is unlikely to receive a response.
+ __In addition to a reproducible example, you must respond to the following questions__ before we help you, unless your original post already contained this information: __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. __3.__ Did you post your log file? If not, add it now. __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. # issuesOpened: | -# Hi @{{ author }}! Thanks for using ArchR! I am currently on paternity leave and will not be responding to any issues or discussion threads. I plan to be back in late January and will do my best to address your issue then.
-# In the meantime, it is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment. Search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for.
-# If you are able to solve your issue, please post the solution and close this issue post.
-# Otherwise __if you would like my help when I return, you must respond to the following questions__ unless your original post already contained this information: +# Hi @{{ author }}! Thanks for using ArchR! I am currently on vacation and will not be responding to any issues or discussion threads until 5/5.
+# Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+# It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? +# Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. +# If your post does not contain a reproducible example, it is unlikely to receive a response.
# __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? # __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. # __3.__ Did you post your log file? If not, add it now. +# __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. From bc620af5bb59de2a9fa88f439a79f043d1c08690 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Tue, 18 Apr 2023 19:42:10 -0700 Subject: [PATCH 183/184] Update auto-comment.yml --- .github/workflows/auto-comment.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml index e7a11435..885291bf 100644 --- a/.github/workflows/auto-comment.yml +++ b/.github/workflows/auto-comment.yml @@ -7,24 +7,24 @@ jobs: - uses: wow-actions/auto-comment@v1 with: GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} - issuesOpened: | - Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
- It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? - Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. - If your post does not contain a reproducible example, it is unlikely to receive a response.
- __In addition to a reproducible example, you must respond to the following questions__ before we help you, unless your original post already contained this information: - __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? - __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. - __3.__ Did you post your log file? If not, add it now. - __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. - # issuesOpened: | -# Hi @{{ author }}! Thanks for using ArchR! I am currently on vacation and will not be responding to any issues or discussion threads until 5/5.
-# Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+# Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
# It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? # Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. # If your post does not contain a reproducible example, it is unlikely to receive a response.
+# __In addition to a reproducible example, you must respond to the following questions__ before we help you, unless your original post already contained this information: # __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? # __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. # __3.__ Did you post your log file? If not, add it now. # __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. + + issuesOpened: | + Hi @{{ author }}! Thanks for using ArchR! I am currently on vacation and will not be responding to any issues or discussion threads until 5/5.
+ Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+ It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? + Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. + If your post does not contain a reproducible example, it is unlikely to receive a response.
+ __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? + __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. + __3.__ Did you post your log file? If not, add it now. + __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. From e18f7c329767dbaa92d0e6edd9a30bb09b507266 Mon Sep 17 00:00:00 2001 From: Ryan Corces Date: Wed, 17 May 2023 05:47:09 -0700 Subject: [PATCH 184/184] Update auto-comment.yml --- .github/workflows/auto-comment.yml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml index 885291bf..5916cb64 100644 --- a/.github/workflows/auto-comment.yml +++ b/.github/workflows/auto-comment.yml @@ -7,24 +7,23 @@ jobs: - uses: wow-actions/auto-comment@v1 with: GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + issuesOpened: | + Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+ It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, we will not be able to help. + Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. + If your post does not contain a reproducible example, it is unlikely to receive a response.
+ __In addition to a reproducible example, you must do the following things before we help you, unless your original post already contained this information: + __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? + __2.__ Did you post your log file? If not, add it now. + __3.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. + # issuesOpened: | -# Hi @{{ author }}! Thanks for using ArchR! Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
+# Hi @{{ author }}! Thanks for using ArchR! I am currently on vacation and will not be responding to any issues or discussion threads until 5/5.
+# Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
# It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? # Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. # If your post does not contain a reproducible example, it is unlikely to receive a response.
-# __In addition to a reproducible example, you must respond to the following questions__ before we help you, unless your original post already contained this information: # __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? # __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. # __3.__ Did you post your log file? If not, add it now. # __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post. - - issuesOpened: | - Hi @{{ author }}! Thanks for using ArchR! I am currently on vacation and will not be responding to any issues or discussion threads until 5/5.
- Please make sure that your post belongs in the Issues section. Only bugs and error reports belong in the Issues section. Usage questions and feature requests should be posted in the [Discussions](https://github.com/GreenleafLab/ArchR/discussions) section, not in Issues.
- It is worth noting that there are very few actual bugs in ArchR. If you are getting an error, it is probably something specific to your dataset, usage, or computational environment, all of which are extremely challenging to troubleshoot. As such, we require [reproducible examples](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) (preferably using the tutorial dataset) from users who want assistance. If you cannot reproduce your error, how will we be able to help? - Before going through the work of making a reproducible example, search the previous [Issues](https://github.com/GreenleafLab/ArchR/issues), [Discussions](https://github.com/GreenleafLab/ArchR/discussions), [function definitions](https://www.archrproject.com/reference/index.html), or the [ArchR manual](https://www.archrproject.com/bookdown/index.html) and you will likely find the answers you are looking for. - If your post does not contain a reproducible example, it is unlikely to receive a response.
- __1.__ If you've encountered an error, have you already searched previous Issues to make sure that this hasn't already been solved? - __2.__ Can you recapitulate your error using the tutorial code and dataset? If so, provide a reproducible example. - __3.__ Did you post your log file? If not, add it now. - __4.__ Remove any screenshots that contain text and instead copy and paste the text using markdown's codeblock syntax (three consecutive backticks). You can do this by editing your original post.