These exercise cover the scales, statistics and themes of ggplot2 for Plotting in R.
Exercise 1 - Themes
library(ggplot2)
theme_set(theme_bw())
plot <- ggplot(data=patients_clean,
mapping=aes(x=BMI,y=Weight,colour=factor(Pet)))+
geom_point()+
stat_smooth(method="lm",se=F) + scale_color_viridis_d(option="plasma")
plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
plot <- plot + theme(legend.title=element_blank(),
legend.background=element_rect(fill="gray"),
legend.key=element_rect(fill="gray"),
legend.position="bottom")
plot
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
myTheme <- myTheme + theme(strip.background = element_rect(fill = "black"), # from theme_linedraw
strip.text = element_text(colour = "white", size = rel(0.8)), # from theme_linedraw
panel.grid = element_line(colour = NULL), # from theme_fivethirtyeight
panel.grid.major = element_line(colour = "#D2D2D2"), # from theme_fivethirtyeight
panel.grid.minor = element_blank(),# from theme_fivethirtyeight
rect = element_rect(fill = "#F0F0F0", # from theme_fivethirtyeight
linetype = 0, colour = NA))# from theme_fivethirtyeight
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
Exercise 2 - External Packages
## Warning: namespace 'DESeq2' is not available and has been replaced
## by .GlobalEnv when processing object 'deseq_pca_example'
## PC1 PC2 group condition name
## sample1 -13.24423 22.0512418 A Ctrl sample1
## sample2 -14.40362 -13.8514994 A Ctrl sample2
## sample3 -14.79663 -2.9299797 A Ctrl sample3
## sample4 -15.12221 -9.4008058 A Mut sample4
## sample5 -14.78719 -0.2914010 A Mut sample5
## sample6 -13.77777 5.6896212 A Mut sample6
## sample7 13.68499 -2.4219808 B Ctrl sample7
## sample8 14.85294 3.9328670 B Ctrl sample8
## sample9 12.21286 -3.0234083 B Ctrl sample9
## sample10 16.25675 -5.6924350 B Mut sample10
## sample11 14.85222 0.9620356 B Mut sample11
## sample12 14.27188 4.9757444 B Mut sample12
## Aesthetic mapping:
## * `colour` -> `group`
## * `x` -> `PC1`
## * `y` -> `PC2`
deseq_replot <- deseq_pca_example
deseq_replot$layers <- NULL
deseq_replot <- deseq_replot + geom_point(aes(x=PC1, y=PC2, color=group, shape=condition)) + scale_fill_viridis_d() + ggtitle("PCA plot of DGE in A and B groups")
Exercise 3 - Interactive Plots
## Warning in geom_point(aes(label = name)): Ignoring unknown aesthetics: label
Exercise 4 - Working Example
In this final exercise we will run through a common example: making a volcano plot.
A volcano plot consists of: 1) log2FC on the x axis 2) -log10(pval) on the y axis
It is also good to add some additional customization: - Highlight significant genes i.e. pval <0.05 - Highlight genes above a certain threshold log2FC i.e. >1 - Add lines to denote these thresholds - Label some genes of interest directly: “Gm8714”,“Pas1b”,“Rab39”,“Tmc2”,“Ttpal”,“Ctdsp1” - Use a simple theme to also give the plot a simple look - Export the plot as a pdf
library(plotly)
goi <- c("Gm8714","Pas1b","Rab39","Tmc2","Ttpal","Ctdsp1")
myplot <- ggplot(my_res, aes(x=log2FoldChange,
y=-log10(pvalue),
color = ifelse(pvalue > 0.05, "NS",
ifelse(log2FoldChange > 1, "SigUp",
ifelse(log2FoldChange < (-1), "SigDown", "Sig"))))) +
geom_point(size=0.5, alpha=0.5) +
scale_color_manual(name = "Significance", breaks=c("NS", "Sig", "SigUp", "SigDown"), values = c("black", "green","blue", "red")) +
theme_bw() +
ggtitle("Volcano Plot showing significance of \ngene expression changes following DESeq analysis") +
geom_hline(yintercept=(-log10(0.05)), lty =3, color="gray") +
geom_vline(xintercept=c((-1),1), lty =3, color="gray") +
geom_text(aes(label=ifelse(SYMBOL %in% goi, as.character(SYMBOL),''))) + xlim(-4.5,4.5)
myplot
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
library(ggrepel)
goi <- c("Gm8714","Pas1b","Rab39","Tmc2","Ttpal","Ctdsp1")
ggplot(my_res, aes(x=log2FoldChange,
y=-log10(pvalue),
color = ifelse(pvalue > 0.05, "NS",
ifelse(log2FoldChange > 1, "SigUp",
ifelse(log2FoldChange < (-1), "SigDown", "Sig"))))) +
geom_point(size=0.5, alpha=0.5) +
scale_color_manual(name = "Significance", breaks=c("NS", "Sig", "SigUp", "SigDown"), values = c("black", "green","blue", "red")) +
theme_bw() +
ggtitle("Volcano Plot showing significance of \ngene expression changes following DESeq analysis") +
geom_hline(yintercept=(-log10(0.05)), lty =3, color="gray") +
geom_vline(xintercept=c((-1),1), lty =3, color="gray") +
geom_text_repel(aes(label=ifelse(SYMBOL %in% goi, as.character(SYMBOL),'')), min.segment.length = 0, seed = 42, box.padding = 0.2, max.time =3,max.overlaps = Inf) + xlim(-4.5,4.5)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
myplot <- ggplot(my_res, aes(x=log2FoldChange,
y=-log10(pvalue),
color = ifelse(pvalue > 0.05, "NS",
ifelse(log2FoldChange > 1, "SigUp",
ifelse(log2FoldChange < (-1), "SigDown", "Sig"))))) +
geom_point(size=0.5, alpha=0.5) +
scale_color_manual(name = "Significance", breaks=c("NS", "Sig", "SigUp", "SigDown"), values = c("black", "green","blue", "red")) +
theme_bw() +
ggtitle("Volcano Plot showing significance of \ngene expression changes following DESeq analysis") +
geom_hline(yintercept=(-log10(0.05)), lty =3, color="gray") +
geom_vline(xintercept=c((-1),1), lty =3, color="gray") +
theme(text = element_text(size = 8))
ggplotly(myplot + geom_point(aes(text = SYMBOL )), source = "select", tooltip = c("SYMBOL"))
## Warning in geom_point(aes(text = SYMBOL)): Ignoring unknown aesthetics: text