RNAseq in Bioconductor exercises (part 4)

In todays session we will work with some of the RNAseq data of adult mouse tissues from Bing Ren’s lab, Liver and Heart.

Exercises

1. Over Representaion Analysis - GO terms

Identify GO term cellular component groups enriched in genes significantly upregulated in Liver with clusterProfiler using Fishers exact test. What are the top 5 terms? Make a dotplot of these terms. Also try an enrichment map plot for the top 20 groups.

HINT: “CC” is the ontology category for cellular component GO terms

library(clusterProfiler)
library(org.Mm.eg.db)
library(enrichplot)

newResDF <- read.delim("data/Heart_minus_liver.csv",sep=",")
newResDF <- newResDF[!is.na(newResDF$padj),]

UpInLiver <- newResDF$padj < 0.05 & 
             newResDF$log2FoldChange < 0
UpInLiver_ID <- newResDF$ID[UpInLiver]

go_res <- enrichGO(UpInLiver_ID, OrgDb = org.Mm.eg.db, ont = "CC")

DT::datatable(as.data.frame(go_res)[1:5,])

dotplot(go_res, showCategory = 5)

go_res <- pairwise_termsim(go_res)
emapplot(go_res, showCategory = 20, cex_label_category=0.5)

2. Over Representaion Analysis - MSigDB

Take the same genes (significantly upregulated in Liver) and check enrichment in gene sets from MSigDB Hallmarks. Check the results with a dotplot. Rerun this but subset the universe appropriately. Compare the dotplots.

mm_h <- msigdbr(species = "Mus musculus", category = "H")[,c("gs_name","entrez_gene")]

sig_gene_enr1 <- enricher(UpInLiver_ID, TERM2GENE = mm_h)
dotplot(sig_gene_enr1)

sig_gene_enr2 <- enricher(UpInLiver_ID, TERM2GENE = mm_h, universe = as.character(newResDF$ID))
dotplot(sig_gene_enr2)

# the main groups are largely unchanged, but you can see a swap at the last group. This is because 15 genes from the Androgen gene set are not in the universe, so the background has shifted.

3. GSEA for KEGG enrichment

Run GSEA on our Heart vs Liver dataset, but specifically use the KEGG genesets. Show a dotplot of the clusterProfiler results. Show the top 20 terms. Show an emap of these terms. Lastly draw the classic GSEA plots for the top 3 terms.

HINT: The “gseKEGG” is the clusterPorfiler function for looking at KEGG terms with GSEA

geneList <- newResDF$stat
names(geneList) <-  newResDF$ID
geneList <- geneList[order(geneList, decreasing = T)]


KEGG_HeartLiver <- gseKEGG(
  geneList,
  organism = "mmu",
  keyType = "kegg", eps = 10e-20)

## Reading KEGG annotation online: "https://rest.kegg.jp/link/mmu/pathway"...

## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/mmu"...

## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).

## preparing geneSet collections...

## GSEA analysis...

## leading edge analysis...

## done...

head(KEGG_HeartLiver)

##                ID                                               Description
## mmu04820 mmu04820 Cytoskeleton in muscle cells - Mus musculus (house mouse)
## mmu05415 mmu05415      Diabetic cardiomyopathy - Mus musculus (house mouse)
## mmu00830 mmu00830           Retinol metabolism - Mus musculus (house mouse)
## mmu00140 mmu00140 Steroid hormone biosynthesis - Mus musculus (house mouse)
## mmu00190 mmu00190    Oxidative phosphorylation - Mus musculus (house mouse)
## mmu04260 mmu04260   Cardiac muscle contraction - Mus musculus (house mouse)
##          setSize enrichmentScore       NES       pvalue     p.adjust
## mmu04820     221       0.7247705  2.932080 1.000000e-19 6.940000e-18
## mmu05415     180       0.7332201  2.900254 1.000000e-19 6.940000e-18
## mmu00830      86      -0.7856821 -2.754527 1.000000e-19 6.940000e-18
## mmu00140      77      -0.7984908 -2.748114 1.000000e-19 6.940000e-18
## mmu00190     111       0.7303096  2.710804 1.000000e-19 6.940000e-18
## mmu04260      78       0.7853715  2.743127 1.973916e-19 1.141582e-17
##                qvalue rank                   leading_edge
## mmu04820 4.042105e-18 2512 tags=63%, list=16%, signal=54%
## mmu05415 4.042105e-18 2319 tags=63%, list=15%, signal=54%
## mmu00830 4.042105e-18 2721 tags=81%, list=17%, signal=68%
## mmu00140 4.042105e-18 1798 tags=71%, list=11%, signal=64%
## mmu00190 4.042105e-18 2500 tags=69%, list=16%, signal=59%
## mmu04260 6.648981e-18 1875 tags=58%, list=12%, signal=51%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   core_enrichment
## mmu04820 22138/22003/17888/11464/17929/380698/13346/140781/14200/21924/11472/24131/18175/98660/17930/93677/13009/68794/13808/17868/17906/21956/21954/17897/241431/16651/59006/433766/109676/16773/109620/14115/668940/14114/24051/11459/107765/12826/21916/320502/16403/22352/12715/20742/21393/22437/58522/78321/14199/20391/24053/14118/15530/18073/12827/13527/68802/11932/74103/13717/17901/22379/11717/70549/12834/326618/13405/12842/12843/381485/98932/13003/12833/22004/12830/11931/118449/12832/57778/53318/16404/21955/21826/16420/13138/24052/12111/50874/71960/17898/12829/56376/13179/20648/30794/12831/233199/17896/14201/12825/16480/13007/17885/50875/17880/11735/67399/58916/20740/12835/17882/21825/12345/213019/77579/12845/18810/11928/245026/544791/21953/50876/17884/16905/21828/21827/17879/67451/22330/74122/17883/11465/68753/21925/13726/20392/11468/12828/104099/18074
## mmu05415                                                                                                                                                                       11739/18821/12865/20191/11938/12491/12895/20528/27273/21954/12862/11421/12858/14936/210789/18798/17992/12869/28080/228033/17390/11947/21809/11946/22273/68342/14584/18127/12859/67264/66576/12842/12843/22334/12857/11951/108058/66445/69875/226646/26414/11949/67680/12866/17995/23797/68198/68202/12322/78330/407785/66108/68349/18597/75406/17993/29857/66916/66046/84682/17991/67273/66377/66218/12825/13057/110323/19046/22333/68197/104130/230075/11950/18710/66152/105675/21808/54405/225887/227197/66091/407790/55951/66142/53313/72900/66043/13033/66416/21813/19045/67126/22335/71679/66414/68263/68375/67003/66945/66694/18604/11957/595136/11651/66925/21803/70316/66052/67184/66495/70456/12325/18752
## mmu00830                                                                                                                                                                                                                                                                                                                                                                                                            622127/394430/72082/241452/394432/546726/216453/337924/100041375/20148/28200/13098/100043108/11529/404195/98711/26358/13082/72094/666168/233005/100041449/71773/63857/13095/394434/56388/100727/22236/22238/433247/100040843/54150/13112/394433/112417/79235/100559/231396/107141/103142/67442/11532/17252/11761/13086/216454/13099/13097/394436/94284/27400/19683/13118/26876/11522/13119/11668/13113/243085/13096/13117/226143/13087/545288/13085/277753/13077/226105/71724
## mmu00140                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     13098/100043108/404195/15496/15494/72094/13074/380997/100041449/71773/545123/13095/394434/56388/114664/100727/22236/22238/433247/56448/13112/394433/112417/13106/100559/231396/13101/107141/223706/15490/15493/71754/56348/13099/13097/394436/94284/27400/13113/12846/243085/78925/13096/13123/208665/226143/545288/15483/15486/13077/226105/13122/76279/68444/13105
## mmu00190                                                                                                                                                                                                                                                                                                                                                                                                  12865/12862/12858/17992/12869/28080/228033/11947/11946/22273/68342/12859/67264/66576/12857/11951/66445/69875/226646/11949/67680/12856/12866/17995/68198/11958/57423/68202/78330/407785/66108/68349/70383/75406/17993/66916/66046/84682/17991/67273/66377/66218/110323/76252/68197/104130/230075/11950/66152/54405/225887/227197/66091/407790/66142/72900/66043/66416/67126/71679/66414/68375/67003/66945/66694/11957/595136/66925/13063/70316/66052/67184/66495/73834/74776/66594/11984
## mmu04260                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         22003/17888/11464/12865/20191/140781/21924/11938/98660/12288/20541/17906/21956/76757/21954/17897/12293/12862/12373/12296/15464/12858/12869/11932/22273/12859/326618/66576/12857/66445/22004/11931/12866/84682/17896/110323/65973/66152/66142/53313/12292/12295/67003/66694/11928

dotplot(KEGG_HeartLiver, showCategory = 20)

KEGG_HeartLiver_pt <- pairwise_termsim(KEGG_HeartLiver)

emapplot(KEGG_HeartLiver_pt, cex_label_category=0.5)

gseaplot(KEGG_HeartLiver_pt, geneSetID = as.data.frame(KEGG_HeartLiver_pt)$ID[1], by = "runningScore", title = as.data.frame(KEGG_HeartLiver_pt)$Description[1])

gseaplot(KEGG_HeartLiver_pt, geneSetID = as.data.frame(KEGG_HeartLiver_pt)$ID[2], by = "runningScore", title = as.data.frame(KEGG_HeartLiver_pt)$Description[2])

gseaplot(KEGG_HeartLiver_pt, geneSetID = as.data.frame(KEGG_HeartLiver_pt)$ID[3], by = "runningScore", title = as.data.frame(KEGG_HeartLiver_pt)$Description[3])

3. Additional GSEA plots for KEGG enrichment

Draw a heatmap of the genes driving enrichment in the top 3 KEGG terms. Use rlog counts, scale across row using a Z-score and include the kidney data as a reference point.

HINT - The gene IDs are in the core_enrichment column of the clusterProfiler result

library(pheatmap)
library(DESeq2)

my_gene_IDs <- unlist(strsplit(KEGG_HeartLiver$core_enrichment[1],"/"))

load("data/gC_TissueFull.RData")

dds <- DESeqDataSet(geneCounts, design = ~Tissue)

## renaming the first element in assays to 'counts'

## Warning in DESeqDataSet(geneCounts, design = ~Tissue): some variables in design
## formula are characters, converting to factors

dds <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

rlogTissue <- rlog(dds)


mat_for_heatmap <- assay(rlogTissue)[rownames(rlogTissue) %in% my_gene_IDs,]

annoDF <- as.data.frame(colData(dds))
annoDF <- annoDF[,1,drop=F]


pheatmap(mat_for_heatmap,
         scale="row", show_rownames = F,
         annotation_col = annoDF)