Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions lineage/2.2-get_island_interactive.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env Rscript


"Usage: 2.2-get_island_interactive.R <idDivFile> [ --plot <ids.list> ] [ --mab <abID>... ] [ --outdir <output/tables> --output <islandSeqs.txt> --reference <idDivFile> ]
"Usage: 2.2-get_island_interactive.R <idDivFile> [ --plot <ids.list> ] [ --mab <abID>... ] [ --outdir <output/tables> --output <islandSeqs.txt> --reference <idDivFile> --plotmethod <plotmethod>]

Options:
-h --help Show this documentation.
Expand All @@ -18,6 +18,10 @@ Options:
[default: islandSeqs]
--reference <idDivFile> Other data points to display on plots, eg other members of
the antibody lineage of interest.
--plotmethod <plotmethod> Plotting method to use. 'original' uses kernel density
estimation to show smoothed distribution of sequences.
'binned' directly plots counts of sequences
for tiled ID/DIV regions [default: original]


Created by Chaim A Schramm 2016-08-30.
Expand All @@ -29,7 +33,7 @@ Copyright (c) 2016 Columbia University and Vaccine Research Center, National


#### WRAPPER FOR gglocator() TO LIMIT TO POINTS WITHIN PLOT ####
getLocation <- function(xmin=0,xmax=50,ymin=50,ymax=100) {
getLocation <- function(xmin=-1,xmax=50,ymin=50,ymax=101) {

myPoint <- data.frame( x=c(xmin-1), y=c(ymax+1) )
while( myPoint$x<xmin || myPoint$x>xmax || myPoint$y<ymin || myPoint$y>ymax ) {
Expand Down Expand Up @@ -200,7 +204,7 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin

#generate initial plot; supress color bar and increase size of plot title from default
# keep title separate, because we'll want to use different titles at different stages
pp <- plot_all(smalldata, mab.R, mab, "germline V") + guides(fill=F) + theme( plot.title=element_text(size = 18) )
pp <- plot_all(smalldata, mab.R, mab, "germline V", plotmethod=opts$plotmethod) + guides(fill=F) + theme( plot.title=element_text(size = 18) )

#want referents to look different on interactive and final figure (mostly about size)
# so save this first, then add
Expand Down Expand Up @@ -294,7 +298,6 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin
}

#generate a final set of plots for inspection
if( length(natAbList) > 1 ) {
dev.off()
pl <- list()
titlePlot <- ggplot( data.frame(x=1,y=1,text=sprintf( "Final Selections (total %d transcripts)",length(idsOnly) )) ) +
Expand Down Expand Up @@ -334,7 +337,6 @@ getIsland <- function (dataFile, subsetFile, natAbList, outDir, outFile, refPoin
#ggsave(sprintf("%s/%s.png",outDir,outFile),multiplot( plotlist=pl, layout=myLayout ),h=3,w=2*length(natAbList),dpi=300)
#Sys.sleep(10)
dev.off()
}

#output
write.table( idsOnly, file=sprintf("%s/%s.txt",outDir,outFile), quote=F, row.names=F, col.names=F )
Expand Down
103 changes: 62 additions & 41 deletions plottingFunctions.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#split from 4.3 by Chaim A Schramm 2016-08-30.

library(ggplot2)
Expand All @@ -10,46 +9,68 @@ library(MASS)
# PLOTTING FUNCTION
####################################

plot_all <- function (data, native, pretty, heavy, plot_title=NULL, color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') {

my_x<-paste("% divergence from", heavy)
my_y<-paste("% ID to", pretty)

if (color) {
my_colors=rev(rainbow(15,end=4/6))
} else {
my_colors=rev(gray.colors(5))
}

g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100))
densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z))
b <- (sum(g$z) / length(data$germ_div))/2
t <- b * 10^4
if ( max(g$z) > t ) {
t <- b * 10^ceiling( log10( max(g$z)/b ) )
}
r <- 10^seq(log10(b), log10(t), 1)

p<-ggplot(densf,aes(x,y,z=z)) +
geom_tile(aes(fill = z)) +
scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t),
na.value="white", breaks=r, labels=signif(r/b,1),
guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0),
barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+
theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(0,50)) +
scale_y_continuous(expand=c(0,1),limits=c(50,100)) +
theme(plot.background = element_blank(),panel.grid.major = element_blank(),
axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5),
panel.grid.minor = element_blank(), axis.text = element_text(size = 6),
axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"),
plot.title = element_text(size = 8) )

if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) }
if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) }
if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) }
if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) }

p
plot_all <- function (data, native, pretty, heavy, plot_title=NULL, plotmethod="original", color=TRUE, guide=TRUE, xlabel=TRUE, ylabel=TRUE, contour=TRUE, conCol = 'black') {

my_x<-paste("% divergence from", heavy)
my_y<-paste("% ID to", pretty)

if (color) {
my_colors=rev(rainbow(15,end=4/6))
} else {
my_colors=rev(gray.colors(5))
}

if (plotmethod == "binned") {
# Count sequences by ID/DIV location by binning them into discrete tiles
# (using 100 increments from 0 to 60 for identity, and 100 increments from
# 40 to 100 for divergence).
xbreaks <- seq(0, 60, length.out = 100)
ybreaks <- seq(40, 100, length.out = 100)
densf <- as.data.frame(table(
cut(data$germ_div, breaks = xbreaks, include.lowest = TRUE),
cut(data[[native]], breaks = ybreaks, include.lowest = TRUE)))
# Set up the same variables as the original method uses
densf$x <- xbreaks[densf$Var1]
densf$y <- ybreaks[densf$Var2]
densf$z <- densf$Freq
b <- 1
t <- b * 10^4
if ( max(densf$z) > t ) {
t <- b * 10^ceiling( log10( max(densf$z) ) )
}
r <- 10^seq(log10(b), log10(t), 1)
} else {
# The original counting method
g <- kde2d(data$germ_div,data[[native]],n=100,h=1,lim=c(0,60,40,100))
densf <- data.frame(expand.grid(x=g$x, y=g$y), z=as.vector(g$z))
b <- (sum(g$z) / length(data$germ_div))/2
t <- b * 10^4
if ( max(g$z) > t ) {
t <- b * 10^ceiling( log10( max(g$z)/b ) )
}
r <- 10^seq(log10(b), log10(t), 1)
}

p<-ggplot(densf,aes(x,y,z=z)) +
geom_tile(aes(fill = z)) +
scale_fill_gradientn(colours=rev(rainbow(15,end=4/6)), trans="log10", limits=c(b,t),
na.value="white", breaks=r, labels=signif(r/b,1),
guide = guide_colorbar( title="number of\nsequences", title.theme=element_text(size=4,angle=0),
barheight = unit(.5,"in"), barwidth = unit(.1,"in"), label.theme=element_text(size=3,angle=0,) ) )+
theme_bw() + scale_x_continuous(expand=c(0,0),limits=c(-1,50)) +
scale_y_continuous(expand=c(0,1),limits=c(50,101)) +
theme(plot.background = element_blank(),panel.grid.major = element_blank(),
axis.ticks.length = unit(.02,"in"), axis.ticks = element_line(size = .5),
panel.grid.minor = element_blank(), axis.text = element_text(size = 6),
axis.title = element_text(size = 8), plot.margin = unit(c(.1,.1,.1,.1),"in"),
plot.title = element_text(size = 8) )

if ( contour ) { p <- p + stat_contour(colour=conCol, size=.25, breaks=10*r) }
if ( ! is.null(plot_title) ) { p <- p + labs( title=plot_title ) }
if ( xlabel ) { p <- p + labs( x=my_x ) } else { p <- p + labs( x="" ) }
if ( ylabel ) { p <- p + labs( y=my_y ) } else { p <- p + labs( y="" ) }

p
}


Expand Down