In todays session we will work with some of the RNAseq data of adult mouse tissues from Bing Ren’s lab, Liver and Heart.
More information on liver data can be found here
More information on heart data can be found here
Differential expression results for Heart minus Liver can be found in the data directory:
DGE - data/Heart_minus_liver.csv
Counts - data/gC_TissueFull.RData
Identify GO term cellular component groups enriched in genes significantly upregulated in Liver with clusterProfiler using Fishers exact test. What are the top 5 terms? Make a dotplot of these terms. Also try an enrichment map plot for the top 20 groups.
HINT: “CC” is the ontology category for cellular component GO terms
library(clusterProfiler)
library(org.Mm.eg.db)
library(enrichplot)
newResDF <- read.delim("data/Heart_minus_liver.csv",sep=",")
newResDF <- newResDF[!is.na(newResDF$padj),]
UpInLiver <- newResDF$padj < 0.05 &
newResDF$log2FoldChange < 0
UpInLiver_ID <- newResDF$ID[UpInLiver]
go_res <- enrichGO(UpInLiver_ID, OrgDb = org.Mm.eg.db, ont = "CC")
DT::datatable(as.data.frame(go_res)[1:5,])
Take the same genes (significantly upregulated in Liver) and check enrichment in gene sets from MSigDB Hallmarks. Check the results with a dotplot. Rerun this but subset the universe appropriately. Compare the dotplots.
## The 'msigdbdf' package must be installed to access the full dataset.
## Please run the following command to install the 'msigdbdf' package:
## install.packages('msigdbdf', repos = 'https://igordot.r-universe.dev')
sig_gene_enr2 <- enricher(UpInLiver_ID, TERM2GENE = mm_h, universe = as.character(newResDF$ID))
dotplot(sig_gene_enr2)
Run GSEA on our Heart vs Liver dataset, but specifically use the KEGG genesets. Show a dotplot of the clusterProfiler results. Show the top 20 terms. Show an emap of these terms. Lastly draw the classic GSEA plots for the top 3 terms.
HINT: The “gseKEGG” is the clusterPorfiler function for looking at KEGG terms with GSEA
geneList <- newResDF$stat
names(geneList) <- newResDF$ID
geneList <- geneList[order(geneList, decreasing = T)]
KEGG_HeartLiver <- gseKEGG(
geneList,
organism = "mmu",
keyType = "kegg", eps = 10e-20)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/mmu/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/mmu"...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
## ID Description
## mmu04820 mmu04820 Cytoskeleton in muscle cells - Mus musculus (house mouse)
## mmu05415 mmu05415 Diabetic cardiomyopathy - Mus musculus (house mouse)
## mmu00830 mmu00830 Retinol metabolism - Mus musculus (house mouse)
## mmu00140 mmu00140 Steroid hormone biosynthesis - Mus musculus (house mouse)
## mmu04260 mmu04260 Cardiac muscle contraction - Mus musculus (house mouse)
## mmu00190 mmu00190 Oxidative phosphorylation - Mus musculus (house mouse)
## setSize enrichmentScore NES pvalue p.adjust
## mmu04820 221 0.7247705 2.932080 1.000000e-19 6.900000e-18
## mmu05415 180 0.7332201 2.900254 1.000000e-19 6.900000e-18
## mmu00830 86 -0.7856821 -2.754527 1.000000e-19 6.900000e-18
## mmu00140 77 -0.7984908 -2.748114 1.000000e-19 6.900000e-18
## mmu04260 78 0.7853715 2.743127 1.000000e-19 6.900000e-18
## mmu00190 111 0.7303096 2.710804 1.464658e-19 8.421785e-18
## qvalue rank leading_edge
## mmu04820 4.000000e-18 2512 tags=63%, list=16%, signal=54%
## mmu05415 4.000000e-18 2319 tags=63%, list=15%, signal=54%
## mmu00830 4.000000e-18 2721 tags=81%, list=17%, signal=68%
## mmu00140 4.000000e-18 1798 tags=71%, list=11%, signal=64%
## mmu04260 4.000000e-18 1875 tags=58%, list=12%, signal=51%
## mmu00190 4.882194e-18 2500 tags=69%, list=16%, signal=59%
## core_enrichment
## mmu04820 22138/22003/17888/11464/17929/380698/13346/140781/14200/21924/11472/24131/18175/98660/17930/93677/13009/68794/13808/17868/17906/21956/21954/17897/241431/16651/59006/433766/109676/16773/109620/14115/668940/14114/24051/11459/107765/12826/21916/320502/16403/22352/12715/20742/21393/22437/58522/78321/14199/20391/24053/14118/15530/18073/12827/13527/68802/11932/74103/13717/17901/22379/11717/70549/12834/326618/13405/12842/12843/381485/98932/13003/12833/22004/12830/11931/118449/12832/57778/53318/16404/21955/21826/16420/13138/24052/12111/50874/71960/17898/12829/56376/13179/20648/30794/12831/233199/17896/14201/12825/16480/13007/17885/50875/17880/11735/67399/58916/20740/12835/17882/21825/12345/213019/77579/12845/18810/11928/245026/544791/21953/50876/17884/16905/21828/21827/17879/67451/22330/74122/17883/11465/68753/21925/13726/20392/11468/12828/104099/18074
## mmu05415 11739/18821/12865/20191/11938/12491/12895/20528/27273/21954/12862/11421/12858/14936/210789/18798/17992/12869/28080/228033/17390/11947/21809/11946/22273/68342/14584/18127/12859/67264/66576/12842/12843/22334/12857/11951/108058/66445/69875/226646/26414/11949/67680/12866/17995/23797/68198/68202/12322/78330/407785/66108/68349/18597/75406/17993/29857/66916/66046/84682/17991/67273/66377/66218/12825/13057/110323/19046/22333/68197/104130/230075/11950/18710/66152/105675/21808/54405/225887/227197/66091/407790/55951/66142/53313/72900/66043/13033/66416/21813/19045/67126/22335/71679/66414/68263/68375/67003/66945/66694/18604/11957/595136/11651/66925/21803/70316/66052/67184/66495/70456/12325/18752
## mmu00830 622127/394430/72082/241452/394432/546726/216453/337924/100041375/20148/28200/13098/100043108/11529/404195/98711/26358/13082/72094/666168/233005/100041449/71773/63857/13095/394434/56388/100727/22236/22238/433247/100040843/54150/13112/394433/112417/79235/100559/231396/107141/103142/67442/11532/17252/11761/13086/216454/13099/13097/394436/94284/27400/19683/13118/26876/11522/13119/11668/13113/243085/13096/13117/226143/13087/545288/13085/277753/13077/226105/71724
## mmu00140 13098/100043108/404195/15496/15494/72094/13074/380997/100041449/71773/545123/13095/394434/56388/114664/100727/22236/22238/433247/56448/13112/394433/112417/13106/100559/231396/13101/107141/223706/15490/15493/71754/56348/13099/13097/394436/94284/27400/13113/12846/243085/78925/13096/13123/208665/226143/545288/15483/15486/13077/226105/13122/76279/68444/13105
## mmu04260 22003/17888/11464/12865/20191/140781/21924/11938/98660/12288/20541/17906/21956/76757/21954/17897/12293/12862/12373/12296/15464/12858/12869/11932/22273/12859/326618/66576/12857/66445/22004/11931/12866/84682/17896/110323/65973/66152/66142/53313/12292/12295/67003/66694/11928
## mmu00190 12865/12862/12858/17992/12869/28080/228033/11947/11946/22273/68342/12859/67264/66576/12857/11951/66445/69875/226646/11949/67680/12856/12866/17995/68198/11958/57423/68202/78330/407785/66108/68349/70383/75406/17993/66916/66046/84682/17991/67273/66377/66218/110323/76252/68197/104130/230075/11950/66152/54405/225887/227197/66091/407790/66142/72900/66043/66416/67126/71679/66414/68375/67003/66945/66694/11957/595136/66925/13063/70316/66052/67184/66495/73834/74776/66594/11984
KEGG_HeartLiver_pt <- pairwise_termsim(KEGG_HeartLiver)
emapplot(KEGG_HeartLiver_pt, cex_label_category=0.5)
gseaplot(KEGG_HeartLiver_pt, geneSetID = as.data.frame(KEGG_HeartLiver_pt)$ID[1], by = "runningScore", title = as.data.frame(KEGG_HeartLiver_pt)$Description[1])
gseaplot(KEGG_HeartLiver_pt, geneSetID = as.data.frame(KEGG_HeartLiver_pt)$ID[2], by = "runningScore", title = as.data.frame(KEGG_HeartLiver_pt)$Description[2])
gseaplot(KEGG_HeartLiver_pt, geneSetID = as.data.frame(KEGG_HeartLiver_pt)$ID[3], by = "runningScore", title = as.data.frame(KEGG_HeartLiver_pt)$Description[3])
Draw a heatmap of the genes driving enrichment in the top 3 KEGG terms. Use rlog counts, scale across row using a Z-score and include the kidney data as a reference point.
HINT - The gene IDs are in the core_enrichment column of the clusterProfiler result
library(pheatmap)
library(DESeq2)
my_gene_IDs <- unlist(strsplit(KEGG_HeartLiver$core_enrichment[1],"/"))
load("data/gC_TissueFull.RData")
dds <- DESeqDataSet(geneCounts, design = ~Tissue)
## renaming the first element in assays to 'counts'
## Warning in DESeqDataSet(geneCounts, design = ~Tissue): some variables in design
## formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
rlogTissue <- rlog(dds)
mat_for_heatmap <- assay(rlogTissue)[rownames(rlogTissue) %in% my_gene_IDs,]
annoDF <- as.data.frame(colData(dds))
annoDF <- annoDF[,1,drop=F]
pheatmap(mat_for_heatmap,
scale="row", show_rownames = F,
annotation_col = annoDF)