These exercise cover the scales, statistics and themes of ggplot2 for Plotting in R.

Exercise 1 - Themes

patients_clean <- read.delim("data/patients_clean_ggplot2.txt",sep="\t")
library(ggplot2)

theme_set(theme_bw())

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=factor(Pet)))+
  geom_point()+
  stat_smooth(method="lm",se=F) + scale_color_viridis_d(option="plasma")
plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

plot <- plot + theme(legend.title=element_blank(),
                   legend.background=element_rect(fill="gray"),
                   legend.key=element_rect(fill="gray"),
                   legend.position="bottom")

plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

plot <- plot+facet_wrap(~Pet)

plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

plot <- plot+ggtitle("BMI vs Weight - By Pet")+theme(panel.grid.minor=element_blank())

plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave(plot,file="BMIvsWeight.png",units = "in",height = 7,width = 7)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
myTheme <- plot$theme

plot + theme_linedraw()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(ggthemes)
plot + theme_fivethirtyeight()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

myTheme <- myTheme + theme(strip.background = element_rect(fill = "black"), # from theme_linedraw
            strip.text = element_text(colour = "white", size = rel(0.8)), # from theme_linedraw
             panel.grid = element_line(colour = NULL), # from theme_fivethirtyeight
            panel.grid.major = element_line(colour = "#D2D2D2"), # from theme_fivethirtyeight
            panel.grid.minor = element_blank(),# from theme_fivethirtyeight
            rect = element_rect(fill = "#F0F0F0", # from theme_fivethirtyeight
            linetype = 0, colour = NA))# from theme_fivethirtyeight
plot + myTheme
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave(plot, file="BMIvsWeight.pdf")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
theme_set(theme_gray())

plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

Exercise 2 - External Packages

load("data/DESeq2_PCA.RData")
## Warning: namespace 'DESeq2' is not available and has been replaced
## by .GlobalEnv when processing object 'deseq_pca_example'
deseq_pca_example

deseq_pca_example$data
##                PC1         PC2 group condition     name
## sample1  -13.24423  22.0512418     A      Ctrl  sample1
## sample2  -14.40362 -13.8514994     A      Ctrl  sample2
## sample3  -14.79663  -2.9299797     A      Ctrl  sample3
## sample4  -15.12221  -9.4008058     A       Mut  sample4
## sample5  -14.78719  -0.2914010     A       Mut  sample5
## sample6  -13.77777   5.6896212     A       Mut  sample6
## sample7   13.68499  -2.4219808     B      Ctrl  sample7
## sample8   14.85294   3.9328670     B      Ctrl  sample8
## sample9   12.21286  -3.0234083     B      Ctrl  sample9
## sample10  16.25675  -5.6924350     B       Mut sample10
## sample11  14.85222   0.9620356     B       Mut sample11
## sample12  14.27188   4.9757444     B       Mut sample12
deseq_pca_example$mapping
## Aesthetic mapping: 
## * `colour` -> `group`
## * `x`      -> `PC1`
## * `y`      -> `PC2`
deseq_pca_example + scale_fill_viridis_d() + ggtitle("PCA plot of DGE in A and B groups")

deseq_replot <- deseq_pca_example
deseq_replot$layers <- NULL

deseq_replot <- deseq_replot + geom_point(aes(x=PC1, y=PC2, color=group, shape=condition)) + scale_fill_viridis_d() + ggtitle("PCA plot of DGE in A and B groups")

Exercise 3 - Interactive Plots

library(plotly)

ggplotly(deseq_replot)
ggplotly(deseq_replot + geom_point(aes(label = name)))
## Warning in geom_point(aes(label = name)): Ignoring unknown aesthetics: label

Exercise 4 - Working Example

In this final exercise we will run through a common example: making a volcano plot.

A volcano plot consists of: 1) log2FC on the x axis 2) -log10(pval) on the y axis

It is also good to add some additional customization: - Highlight significant genes i.e. pval <0.05 - Highlight genes above a certain threshold log2FC i.e. >1 - Add lines to denote these thresholds - Label some genes of interest directly: “Gm8714”,“Pas1b”,“Rab39”,“Tmc2”,“Ttpal”,“Ctdsp1” - Use a simple theme to also give the plot a simple look - Export the plot as a pdf

load("data/DESeq2_Result.RData")
library(plotly)

goi <- c("Gm8714","Pas1b","Rab39","Tmc2","Ttpal","Ctdsp1")

myplot <- ggplot(my_res, aes(x=log2FoldChange, 
                   y=-log10(pvalue), 
                   color = ifelse(pvalue > 0.05, "NS",
                                 ifelse(log2FoldChange > 1, "SigUp", 
                                        ifelse(log2FoldChange < (-1), "SigDown", "Sig"))))) +
  geom_point(size=0.5, alpha=0.5) +
  scale_color_manual(name = "Significance", breaks=c("NS", "Sig", "SigUp", "SigDown"), values = c("black", "green","blue", "red")) +
  theme_bw() + 
  ggtitle("Volcano Plot showing significance of \ngene expression changes following DESeq analysis") + 
  geom_hline(yintercept=(-log10(0.05)), lty =3, color="gray") + 
  geom_vline(xintercept=c((-1),1), lty =3, color="gray") + 
  geom_text(aes(label=ifelse(SYMBOL %in% goi, as.character(SYMBOL),''))) + xlim(-4.5,4.5)

myplot
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave(myplot, file="DESeq2_volcanoplot.pdf")
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
library(ggrepel)

goi <- c("Gm8714","Pas1b","Rab39","Tmc2","Ttpal","Ctdsp1")

ggplot(my_res, aes(x=log2FoldChange, 
                   y=-log10(pvalue), 
                   color = ifelse(pvalue > 0.05, "NS",
                                 ifelse(log2FoldChange > 1, "SigUp", 
                                        ifelse(log2FoldChange < (-1), "SigDown", "Sig"))))) +
  geom_point(size=0.5, alpha=0.5) +
  scale_color_manual(name = "Significance", breaks=c("NS", "Sig", "SigUp", "SigDown"), values = c("black", "green","blue", "red")) +
  theme_bw() + 
  ggtitle("Volcano Plot showing significance of \ngene expression changes following DESeq analysis") + 
  geom_hline(yintercept=(-log10(0.05)), lty =3, color="gray") + 
  geom_vline(xintercept=c((-1),1), lty =3, color="gray") + 
  geom_text_repel(aes(label=ifelse(SYMBOL %in% goi, as.character(SYMBOL),'')),  min.segment.length = 0, seed = 42, box.padding = 0.2, max.time  =3,max.overlaps = Inf) + xlim(-4.5,4.5)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).

myplot <- ggplot(my_res, aes(x=log2FoldChange, 
                   y=-log10(pvalue), 
                   color = ifelse(pvalue > 0.05, "NS",
                                 ifelse(log2FoldChange > 1, "SigUp", 
                                        ifelse(log2FoldChange < (-1), "SigDown", "Sig"))))) +
  geom_point(size=0.5, alpha=0.5) +
  scale_color_manual(name = "Significance", breaks=c("NS", "Sig", "SigUp", "SigDown"), values = c("black", "green","blue", "red")) +
  theme_bw() + 
  ggtitle("Volcano Plot showing significance of \ngene expression changes following DESeq analysis") + 
  geom_hline(yintercept=(-log10(0.05)), lty =3, color="gray") + 
  geom_vline(xintercept=c((-1),1), lty =3, color="gray") + 
  theme(text = element_text(size = 8))

ggplotly(myplot + geom_point(aes(text = SYMBOL )), source = "select", tooltip = c("SYMBOL"))
## Warning in geom_point(aes(text = SYMBOL)): Ignoring unknown aesthetics: text