diff --git a/lineage/2.2-get_island_interactive.R b/lineage/2.2-get_island_interactive.R index 4165e62..761b9a8 100755 --- a/lineage/2.2-get_island_interactive.R +++ b/lineage/2.2-get_island_interactive.R @@ -1,7 +1,7 @@ #!/usr/bin/env Rscript -"Usage: 2.2-get_island_interactive.R [ --plot ] [ --mab ... ] [ --outdir --output --reference ] +"Usage: 2.2-get_island_interactive.R [ --plot ] [ --mab ... ] [ --outdir --output --reference --plotmethod ] Options: -h --help Show this documentation. @@ -18,6 +18,10 @@ Options: [default: islandSeqs] --reference Other data points to display on plots, eg other members of the antibody lineage of interest. + --plotmethod Plotting method to use. 'original' uses kernel density + estimation to show smoothed distribution of sequences. + 'binned' directly plots counts of sequences + for tiled ID/DIV regions [default: original] Created by Chaim A Schramm 2016-08-30. @@ -29,7 +33,7 @@ Copyright (c) 2016 Columbia University and Vaccine Research Center, National #### WRAPPER FOR gglocator() TO LIMIT TO POINTS WITHIN PLOT #### -getLocation <- function(xmin=0,xmax=50,ymin=50,ymax=100) { +getLocation <- function(xmin=-1,xmax=50,ymin=50,ymax=101) { myPoint <- data.frame( x=c(xmin-1), y=c(ymax+1) ) while( myPoint$xxmax || myPoint$yymax ) { @@ -200,7 +204,7 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin #generate initial plot; supress color bar and increase size of plot title from default # keep title separate, because we'll want to use different titles at different stages - pp <- plot_all(smalldata, mab.R, mab, "germline V") + guides(fill=F) + theme( plot.title=element_text(size = 18) ) + pp <- plot_all(smalldata, mab.R, mab, "germline V", plotmethod=opts$plotmethod) + guides(fill=F) + theme( plot.title=element_text(size = 18) ) #want referents to look different on interactive and final figure (mostly about size) # so save this first, then add @@ -294,7 +298,6 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin } #generate a final set of plots for inspection - if( length(natAbList) > 1 ) { dev.off() pl <- list() titlePlot <- ggplot( data.frame(x=1,y=1,text=sprintf( "Final Selections (total %d transcripts)",length(idsOnly) )) ) + @@ -334,7 +337,6 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin #ggsave(sprintf("%s/%s.png",outDir,outFile),multiplot( plotlist=pl, layout=myLayout ),h=3,w=2*length(natAbList),dpi=300) #Sys.sleep(10) dev.off() - } #output write.table( idsOnly, file=sprintf("%s/%s.txt",outDir,outFile), quote=F, row.names=F, col.names=F ) diff --git a/plottingFunctions.R b/plottingFunctions.R index caa4b63..0fac0ef 100755 --- a/plottingFunctions.R +++ b/plottingFunctions.R @@ -1,4 +1,3 @@ - #split from 4.3 by Chaim A Schramm 2016-08-30. library(ggplot2) @@ -10,46 +9,68 @@ library(MASS) # PLOTTING FUNCTION #################################### -plot_all <- function (data, native, pretty, heavy, plot_title=NULL, color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') { - - my_x<-paste("% divergence from", heavy) - my_y<-paste("% ID to", pretty) - - if (color) { - my_colors=rev(rainbow(15,end=4/6)) - } else { - my_colors=rev(gray.colors(5)) - } - - g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100)) - densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z)) - b <- (sum(g$z) / length(data$germ_div))/2 - t <- b * 10^4 - if ( max(g$z) > t ) { - t <- b * 10^ceiling( log10( max(g$z)/b ) ) - } - r <- 10^seq(log10(b), log10(t), 1) - - p<-ggplot(densf,aes(x,y,z=z)) + - geom_tile(aes(fill = z)) + - scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t), - na.value="white", breaks=r, labels=signif(r/b,1), - guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0), - barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+ - theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(0,50)) + - scale_y_continuous(expand=c(0,1),limits=c(50,100)) + - theme(plot.background = element_blank(),panel.grid.major = element_blank(), - axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5), - panel.grid.minor = element_blank(), axis.text = element_text(size = 6), - axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"), - plot.title = element_text(size = 8) ) - - if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) } - if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) } - if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) } - if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) } - - p +plot_all <- function (data, native, pretty, heavy, plot_title=NULL, plotmethod="original", color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') { + + my_x<-paste("% divergence from", heavy) + my_y<-paste("% ID to", pretty) + + if (color) { + my_colors=rev(rainbow(15,end=4/6)) + } else { + my_colors=rev(gray.colors(5)) + } + + if (plotmethod == "binned") { + # Count sequences by ID/DIV location by binning them into discrete tiles + # (using 100 increments from 0 to 60 for identity, and 100 increments from + # 40 to 100 for divergence). + xbreaks <- seq(0, 60, length.out = 100) + ybreaks <- seq(40, 100, length.out = 100) + densf <- as.data.frame(table( + cut(data$germ_div, breaks = xbreaks, include.lowest = TRUE), + cut(data[[native]], breaks = ybreaks, include.lowest = TRUE))) + # Set up the same variables as the original method uses + densf$x <- xbreaks[densf$Var1] + densf$y <- ybreaks[densf$Var2] + densf$z <- densf$Freq + b <- 1 + t <- b * 10^4 + if ( max(densf$z) > t ) { + t <- b * 10^ceiling( log10( max(densf$z) ) ) + } + r <- 10^seq(log10(b), log10(t), 1) + } else { + # The original counting method + g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100)) + densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z)) + b <- (sum(g$z) / length(data$germ_div))/2 + t <- b * 10^4 + if ( max(g$z) > t ) { + t <- b * 10^ceiling( log10( max(g$z)/b ) ) + } + r <- 10^seq(log10(b), log10(t), 1) + } + + p<-ggplot(densf,aes(x,y,z=z)) + + geom_tile(aes(fill = z)) + + scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t), + na.value="white", breaks=r, labels=signif(r/b,1), + guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0), + barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+ + theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) + + scale_y_continuous(expand=c(0,1),limits=c(50,101)) + + theme(plot.background = element_blank(),panel.grid.major = element_blank(), + axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5), + panel.grid.minor = element_blank(), axis.text = element_text(size = 6), + axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"), + plot.title = element_text(size = 8) ) + + if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) } + if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) } + if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) } + if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) } + + p }