In these exercises we will review some of the functionality for genomic and functional annotation of peaks.
We will be using output from the differential peak analysis of ATACseq Week 6 vs Week 0. The starting table can be found in the ‘data’ folder of the course.
Â
Read in the differential output table ‘ATAC_W6MinusD0.xlsx’, make a GRanges and do the following:
library(ChIPseeker)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
W6MinusD0 <- rio::import("data/ATAC_W6MinusD0.xlsx")
W6MinusD0_gr <- GRanges(seqnames = W6MinusD0$seqnames,
IRanges(start = W6MinusD0$start, end = W6MinusD0$end),
log2FoldChange = W6MinusD0$log2FoldChange,
padj = W6MinusD0$padj)
peakAnno_all <- annotatePeak(W6MinusD0_gr, tssRegion=c(-1000, 1000),
TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene,
annoDb="org.Mm.eg.db")
W6MinusD0_gr_main <- W6MinusD0_gr[as.vector(seqnames(W6MinusD0_gr)) %in% paste0("chr", c(1:19, "X", "Y", "M"))]
W6MinusD0_gr_up <- W6MinusD0_gr_main[W6MinusD0_gr_main$log2FoldChange > 1 & W6MinusD0_gr_main$padj < 0.05, ]
peakAnno_up <- annotatePeak(W6MinusD0_gr_up, tssRegion=c(-1000, 1000),
TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene,
annoDb="org.Mm.eg.db")
plotAnnoBar(list(all_peaks = peakAnno_all, SigUp_peaks = peakAnno_up),
title = "ATACseq peak distribution")
Perform functional enrichment using the genes that have significant increases in ATACseq signal within peaks located in their promoters.
library(clusterProfiler)
library(org.Mm.eg.db)
library(enrichplot)
library(ggplot2)
# get universe of genes
allGeneGR <- genes(TxDb.Mmusculus.UCSC.mm10.knownGene)
allGeneGR[1:2,]
allGeneIDs <- allGeneGR$gene_id
up_atac_GR <- as.GRanges(peakAnno_up)
up_atac_GR_prom <- up_atac_GR[grepl("Promoter", up_atac_GR$annotation)]
up_promGenes_uniq <- unique(up_atac_GR_prom$geneId)
GO_result_prom <- enrichGO(gene = up_promGenes_uniq,
universe = allGeneIDs,
OrgDb = org.Mm.eg.db,
ont = "BP")
library(msigdbr)
msig_t2g_reac <- msigdbr(species = "Mus musculus",
category = "C2",
subcategory = "REACTOME")
msig_t2g_reac <- msig_t2g_reac[ , colnames(msig_t2g_reac) %in% c("gs_name", "entrez_gene")]
reactome_result_prom <- enricher(gene = up_promGenes_uniq,
universe = allGeneIDs,
TERM2GENE = msig_t2g_reac)
GO BP output
## ID Description GeneRatio BgRatio
## 1 GO:0008154 actin polymerization or depolymerization 19/664 199/22195
## 2 GO:0007015 actin filament organization 32/664 474/22195
## 3 GO:0032535 regulation of cellular component size 29/664 413/22195
## RichFactor FoldEnrichment zScore pvalue p.adjust qvalue
## 1 0.09547739 3.191447 5.453249 9.087002e-06 0.03324488 0.03085578
## 2 0.06751055 2.256621 4.856501 1.802453e-05 0.03324488 0.03085578
## 3 0.07021792 2.347118 4.852904 2.156424e-05 0.03324488 0.03085578
GO_result_plot <- pairwise_termsim(GO_result_prom)
emapplot(GO_result_plot, showCategory = 20, cex.params = list(category_label = 0.6))
Reactome output:
## ID Description GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue
## NA <NA> <NA> <NA> <NA> NA NA NA NA
## NA.1 <NA> <NA> <NA> <NA> NA NA NA NA
## NA.2 <NA> <NA> <NA> <NA> NA NA NA NA
## p.adjust qvalue
## NA NA NA
## NA.1 NA NA
## NA.2 NA NA
library(rGREAT)
great_gobp <- great(W6MinusD0_gr_up,
gene_sets = "GO:BP",
tss_source = "TxDb.Mmusculus.UCSC.mm10.knownGene")
great_gobp_tab <- getEnrichmentTables(great_gobp)
great_gobp_tab10 <- great_gobp_tab[1:10, ]
great_gobp_tab10$description <- ordered(great_gobp_tab10$description, levels = rev(great_gobp_tab10$description))
reac_gene_sets <- split(msig_t2g_reac$entrez_gene, msig_t2g_reac$gs_name)
reac_gene_sets <- lapply(reac_gene_sets, as.character)
great_reac <- great(W6MinusD0_gr_up, gene_sets = reac_gene_sets, tss_source = "TxDb.Mmusculus.UCSC.mm10.knownGene", mode = "oneClosest")
great_reac_tab <- getEnrichmentTable(great_reac)
great_reac_tab10 <- great_reac_tab[1:10, ]
great_reac_tab10 <- great_reac_tab10[order(great_reac_tab10$fold_enrichment, decreasing = TRUE), ]
great_reac_tab10$name_for_plot <- gsub("REACTOME_" ,"", great_reac_tab10$id)
great_reac_tab10$name_for_plot <- substr(great_reac_tab10$name_for_plot, 1, 50)
great_reac_tab10$name_for_plot <- ordered(great_reac_tab10$name_for_plot, levels = rev(great_reac_tab10$name_for_plot))
GO BP output:
## id
## 1 GO:0043378
## 2 GO:0014883
## 3 GO:0030854
## description
## 1 positive regulation of CD8-positive, alpha-beta T cell differentiation
## 2 transition between fast and slow fiber
## 3 positive regulation of granulocyte differentiation
## genome_fraction observed_region_hits fold_enrichment p_value p_adjust
## 1 0.0008624976 48 4.621135 0 0
## 2 0.0009041473 50 4.591939 0 0
## 3 0.0011238427 59 4.359250 0 0
## mean_tss_dist observed_gene_hits gene_set_size fold_enrichment_hyper
## 1 245757 4 6 1.8848901
## 2 149844 5 10 1.4136676
## 3 241655 4 12 0.9424451
## p_value_hyper p_adjust_hyper
## 1 0.1214419 0.2303484
## 2 0.2566108 0.3881452
## 3 0.6633171 0.7644486
ggplot(great_gobp_tab10, aes(x = description, y = fold_enrichment, fill = observed_region_hits)) + geom_col() + coord_flip()
Reactome output:
## id genome_fraction
## 1 REACTOME_RUNX3_REGULATES_P14_ARF 0.0006337473
## 2 REACTOME_TRANSCRIPTIONAL_REGULATION_BY_RUNX3 0.0047532724
## 3 REACTOME_RND3_GTPASE_CYCLE 0.0025275757
## observed_region_hits fold_enrichment p_value p_adjust mean_tss_dist
## 1 42 5.502987 0 0 109650
## 2 178 3.109515 0 0 143895
## 3 87 2.858120 0 0 129096
## observed_gene_hits gene_set_size fold_enrichment_hyper p_value_hyper
## 1 9 10 3.669711 2.488930e-05
## 2 45 96 1.911308 1.552911e-06
## 3 20 42 1.941646 9.581214e-04
## p_adjust_hyper name_for_plot
## 1 0.0010169056 RUNX3_REGULATES_P14_ARF
## 2 0.0001057458 TRANSCRIPTIONAL_REGULATION_BY_RUNX3
## 3 0.0126862373 RND3_GTPASE_CYCLE
ggplot(great_reac_tab10, aes(x = name_for_plot, y = fold_enrichment, fill = observed_region_hits)) + geom_col() + coord_flip()