From 53a65c794a959fcbe2294842cd1e9867650e308c Mon Sep 17 00:00:00 2001 From: Jesse Connell Date: Thu, 15 Sep 2022 17:07:50 -0400 Subject: [PATCH 1/3] For #15: expand plot area slightly Expands the x axis' lower bound to -1 and y axis' upper bound to 101 so that points lying directly at 0% divergence or 100% mAb identity are still plotted (see warning about removal of data outside limits in ?ggplot2::scale_continuous) and expands available selection area in interactive plot to match --- lineage/2.2-get_island_interactive.R | 2 +- plottingFunctions.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lineage/2.2-get_island_interactive.R b/lineage/2.2-get_island_interactive.R index 4165e62..609b820 100755 --- a/lineage/2.2-get_island_interactive.R +++ b/lineage/2.2-get_island_interactive.R @@ -29,7 +29,7 @@ Copyright (c) 2016 Columbia University and Vaccine Research Center, National #### WRAPPER FOR gglocator() TO LIMIT TO POINTS WITHIN PLOT #### -getLocation <- function(xmin=0,xmax=50,ymin=50,ymax=100) { +getLocation <- function(xmin=-1,xmax=50,ymin=50,ymax=101) { myPoint <- data.frame( x=c(xmin-1), y=c(ymax+1) ) while( myPoint$xxmax || myPoint$yymax ) { diff --git a/plottingFunctions.R b/plottingFunctions.R index caa4b63..d249cd1 100755 --- a/plottingFunctions.R +++ b/plottingFunctions.R @@ -36,8 +36,8 @@ plot_all <- function (data, native, pretty, heavy, plot_title=NULL, color=TRUE, na.value="white", breaks=r, labels=signif(r/b,1), guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0), barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+ - theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(0,50)) + - scale_y_continuous(expand=c(0,1),limits=c(50,100)) + + theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) + + scale_y_continuous(expand=c(0,1),limits=c(50,101)) + theme(plot.background = element_blank(),panel.grid.major = element_blank(), axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5), panel.grid.minor = element_blank(), axis.text = element_text(size = 6), From c94c07deb2d3ca6b0c34fd30edc9799694b630c3 Mon Sep 17 00:00:00 2001 From: Jesse Connell Date: Thu, 15 Sep 2022 17:46:12 -0400 Subject: [PATCH 2/3] For #15: add secondary ID/DIV plotting method Adds a --plotmethod to the get_island script, and code to plot_all, to support a method for binning points to plot instead of using KDE. --- lineage/2.2-get_island_interactive.R | 8 ++- plottingFunctions.R | 103 ++++++++++++++++----------- 2 files changed, 68 insertions(+), 43 deletions(-) diff --git a/lineage/2.2-get_island_interactive.R b/lineage/2.2-get_island_interactive.R index 609b820..3bde6b9 100755 --- a/lineage/2.2-get_island_interactive.R +++ b/lineage/2.2-get_island_interactive.R @@ -1,7 +1,7 @@ #!/usr/bin/env Rscript -"Usage: 2.2-get_island_interactive.R [ --plot ] [ --mab ... ] [ --outdir --output --reference ] +"Usage: 2.2-get_island_interactive.R [ --plot ] [ --mab ... ] [ --outdir --output --reference --plotmethod ] Options: -h --help Show this documentation. @@ -18,6 +18,10 @@ Options: [default: islandSeqs] --reference Other data points to display on plots, eg other members of the antibody lineage of interest. + --plotmethod Plotting method to use. 'original' uses kernel density + estimation to show smoothed distribution of sequences. + 'binned' directly plots counts of sequences + for tiled ID/DIV regions [default: original] Created by Chaim A Schramm 2016-08-30. @@ -200,7 +204,7 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin #generate initial plot; supress color bar and increase size of plot title from default # keep title separate, because we'll want to use different titles at different stages - pp <- plot_all(smalldata, mab.R, mab, "germline V") + guides(fill=F) + theme( plot.title=element_text(size = 18) ) + pp <- plot_all(smalldata, mab.R, mab, "germline V", plotmethod=opts$plotmethod) + guides(fill=F) + theme( plot.title=element_text(size = 18) ) #want referents to look different on interactive and final figure (mostly about size) # so save this first, then add diff --git a/plottingFunctions.R b/plottingFunctions.R index d249cd1..0fac0ef 100755 --- a/plottingFunctions.R +++ b/plottingFunctions.R @@ -1,4 +1,3 @@ - #split from 4.3 by Chaim A Schramm 2016-08-30. library(ggplot2) @@ -10,46 +9,68 @@ library(MASS) # PLOTTING FUNCTION #################################### -plot_all <- function (data, native, pretty, heavy, plot_title=NULL, color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') { - - my_x<-paste("% divergence from", heavy) - my_y<-paste("% ID to", pretty) - - if (color) { - my_colors=rev(rainbow(15,end=4/6)) - } else { - my_colors=rev(gray.colors(5)) - } - - g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100)) - densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z)) - b <- (sum(g$z) / length(data$germ_div))/2 - t <- b * 10^4 - if ( max(g$z) > t ) { - t <- b * 10^ceiling( log10( max(g$z)/b ) ) - } - r <- 10^seq(log10(b), log10(t), 1) - - p<-ggplot(densf,aes(x,y,z=z)) + - geom_tile(aes(fill = z)) + - scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t), - na.value="white", breaks=r, labels=signif(r/b,1), - guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0), - barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+ - theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) + - scale_y_continuous(expand=c(0,1),limits=c(50,101)) + - theme(plot.background = element_blank(),panel.grid.major = element_blank(), - axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5), - panel.grid.minor = element_blank(), axis.text = element_text(size = 6), - axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"), - plot.title = element_text(size = 8) ) - - if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) } - if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) } - if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) } - if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) } - - p +plot_all <- function (data, native, pretty, heavy, plot_title=NULL, plotmethod="original", color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') { + + my_x<-paste("% divergence from", heavy) + my_y<-paste("% ID to", pretty) + + if (color) { + my_colors=rev(rainbow(15,end=4/6)) + } else { + my_colors=rev(gray.colors(5)) + } + + if (plotmethod == "binned") { + # Count sequences by ID/DIV location by binning them into discrete tiles + # (using 100 increments from 0 to 60 for identity, and 100 increments from + # 40 to 100 for divergence). + xbreaks <- seq(0, 60, length.out = 100) + ybreaks <- seq(40, 100, length.out = 100) + densf <- as.data.frame(table( + cut(data$germ_div, breaks = xbreaks, include.lowest = TRUE), + cut(data[[native]], breaks = ybreaks, include.lowest = TRUE))) + # Set up the same variables as the original method uses + densf$x <- xbreaks[densf$Var1] + densf$y <- ybreaks[densf$Var2] + densf$z <- densf$Freq + b <- 1 + t <- b * 10^4 + if ( max(densf$z) > t ) { + t <- b * 10^ceiling( log10( max(densf$z) ) ) + } + r <- 10^seq(log10(b), log10(t), 1) + } else { + # The original counting method + g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100)) + densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z)) + b <- (sum(g$z) / length(data$germ_div))/2 + t <- b * 10^4 + if ( max(g$z) > t ) { + t <- b * 10^ceiling( log10( max(g$z)/b ) ) + } + r <- 10^seq(log10(b), log10(t), 1) + } + + p<-ggplot(densf,aes(x,y,z=z)) + + geom_tile(aes(fill = z)) + + scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t), + na.value="white", breaks=r, labels=signif(r/b,1), + guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0), + barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+ + theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) + + scale_y_continuous(expand=c(0,1),limits=c(50,101)) + + theme(plot.background = element_blank(),panel.grid.major = element_blank(), + axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5), + panel.grid.minor = element_blank(), axis.text = element_text(size = 6), + axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"), + plot.title = element_text(size = 8) ) + + if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) } + if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) } + if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) } + if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) } + + p } From 996d4d8f0ed1e05a464f54e0f2c00b7ba3fe7fc4 Mon Sep 17 00:00:00 2001 From: Chaim Schramm Date: Fri, 28 Oct 2022 16:43:23 -0400 Subject: [PATCH 3/3] Fix #9 while we're at it --- lineage/2.2-get_island_interactive.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/lineage/2.2-get_island_interactive.R b/lineage/2.2-get_island_interactive.R index 3bde6b9..761b9a8 100755 --- a/lineage/2.2-get_island_interactive.R +++ b/lineage/2.2-get_island_interactive.R @@ -298,7 +298,6 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin } #generate a final set of plots for inspection - if( length(natAbList) > 1 ) { dev.off() pl <- list() titlePlot <- ggplot( data.frame(x=1,y=1,text=sprintf( "Final Selections (total %d transcripts)",length(idsOnly) )) ) + @@ -338,7 +337,6 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin #ggsave(sprintf("%s/%s.png",outDir,outFile),multiplot( plotlist=pl, layout=myLayout ),h=3,w=2*length(natAbList),dpi=300) #Sys.sleep(10) dev.off() - } #output write.table( idsOnly, file=sprintf("%s/%s.txt",outDir,outFile), quote=F, row.names=F, col.names=F )