params <-
list(isSlides = "no")
## ----include=FALSE------------------------------------------------------------
suppressPackageStartupMessages(require(knitr))
knitr::opts_chunk$set(echo = TRUE, tidy = T) # delete cache before any merging
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides != "yes"){
cat("# CUT&RUN/ATAC (part 5) - Peak annotation and functional enrichment
---
"
)
}
## ----setwd_introtoR,eval=F----------------------------------------------------
# setwd("~/Downloads/ATAC.Cut-Run.ChIP-master/r_course")
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Peak Annotation
---
"
)
}else{
cat("# Peak Annotation
---
"
)
}
## ----echo=F, eval=T, warning=FALSE,tidy=T,message=FALSE-----------------------
library(GenomeInfoDb)
## ----echo=T, eval=T, echo=T, warning=FALSE,tidy=T,message=FALSE---------------
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(org.Mm.eg.db)
library(ChIPseeker)
library(rtracklayer)
cnrPeaks_GR <- rtracklayer::import("data/peaks/SOX9CNR_W6_rep1_macs_peaks.narrowPeak")
## ----eval=T,echo=T, message=FALSE,messages=FALSE, eval=T, echo=T, warning=FALSE----
peakAnno <- annotatePeak(cnrPeaks_GR, tssRegion=c(-1000, 1000),
TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene,
annoDb="org.Mm.eg.db")
class(peakAnno)
## ----eval=T,echo=T, message=F,messages=F, eval=T, echo=T, warning=FALSE,tidy=T----
peakAnno
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
annotatedPeaksGR <- as.GRanges(peakAnno)
annotatedPeaksDF <- as.data.frame(peakAnno)
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
annotatedPeaksGR[1:2,]
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
annotatedPeaksGR$annotation[1:5]
annotatedPeaksGR$SYMBOL[1:5]
## ----eval=T, echo=T, fig.height=5, fig.width=15, warning=FALSE, tidy=T--------
plotAnnoBar(peakAnno)
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,fig.height=5, fig.width=15,tidy=T----
plotDistToTSS(peakAnno)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Gene Set Enrichment
---
"
)
}else{
cat("# Gene Set Enrichment
---
"
)
}
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE-----------------------------
length(unique(annotatedPeaksGR$geneId))
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE-----------------------------
W6MinusD0 <- rio::import("data/W6MinusD0.xlsx")
W6MinusD0[1:5, ]
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE-----------------------------
W6MinusD0_gr <- GRanges(seqnames = W6MinusD0$seqnames,
IRanges(start = W6MinusD0$start, end = W6MinusD0$end),
log2FoldChange = W6MinusD0$log2FoldChange,
padj = W6MinusD0$padj)
W6MinusD0_gr[1:3, ]
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE-----------------------------
W6MinusD0_gr_main <- W6MinusD0_gr[as.vector(seqnames(W6MinusD0_gr)) %in% paste0("chr", c(1:19, "X", "Y", "M"))]
W6MinusD0_gr_up <- W6MinusD0_gr_main[W6MinusD0_gr_main$log2FoldChange > 1 & W6MinusD0_gr_main$padj < 0.05]
W6MinusD0_gr_up
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Functional Enrichment with nearest gene
---
"
)
}else{
cat("# Functional Enrichment with nearest gene
---
"
)
}
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE, message = F, results='hide'----
peakAnno_up <- annotatePeak(W6MinusD0_gr_up,
tssRegion=c(-1000, 1000),
TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene,
annoDb="org.Mm.eg.db")
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE, message = F----------------
peakAnno_up
## -----------------------------------------------------------------------------
annotatedPeaksGR_up <- as.GRanges(peakAnno_up)
annotatedPeaksGR_up[1:3]
## -----------------------------------------------------------------------------
annotatedPeaksGR_up_prom <- annotatedPeaksGR_up[grepl("Promoter", annotatedPeaksGR_up$annotation)]
up_promGenes_uniq <- unique(annotatedPeaksGR_up_prom$geneId)
length(up_promGenes_uniq)
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T, message = F---------
allGeneGR <- genes(TxDb.Mmusculus.UCSC.mm10.knownGene)
allGeneGR[1:2,]
allGeneIDs <- allGeneGR$gene_id
## ----message = F, warning=F, results = 'hide'---------------------------------
library(clusterProfiler)
library(org.Mm.eg.db)
GO_result_prom <- enrichGO(gene = up_promGenes_uniq,
universe = allGeneIDs,
OrgDb = org.Mm.eg.db,
ont = "BP")
## ----message = F, warning=F---------------------------------------------------
GO_result_prom
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
up_genes_uniq <- unique(annotatedPeaksGR_up$geneId)
length(up_genes_uniq)
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
GO_result <- enrichGO(gene = up_genes_uniq,
universe = allGeneIDs,
OrgDb = org.Mm.eg.db,
ont = "BP")
GO_result
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
GO_result_df <- data.frame(GO_result, row.names = NULL)
GO_result_df[1:3, ]
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T, fig.height=4, fig.width=9----
library(enrichplot)
GO_result_plot <- pairwise_termsim(GO_result)
emapplot(GO_result_plot, showCategory = 20, cex.params = list(category_label = 0.6))
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
library(msigdbr)
msigdbr_collections()
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
library(msigdbr)
msig_t2g_reac <- msigdbr(species = "Mus musculus",
category = "C2",
subcategory = "REACTOME")
msig_t2g_reac <- msig_t2g_reac[ , colnames(msig_t2g_reac) %in% c("gs_name", "entrez_gene")]
msig_t2g_reac[1:3, ]
## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T----------------------
reactome <- enricher(gene = up_genes_uniq,
universe = allGeneIDs,
TERM2GENE = msig_t2g_reac)
reactome_df <- data.frame(reactome, row.names = NULL)
# clean up table to print on slide
reactome_df <- reactome_df %>% dplyr::select(-Description, -geneID) %>% dplyr::mutate(ID = substr(ID, 1, 50))
reactome_df[1:5, ]
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Functional Enrichment of distal regions with GREAT
---
"
)
}else{
cat("# Functional Enrichment of distal regions with GREAT
---
"
)
}
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
library(rGREAT)
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
great_gobp <- great(W6MinusD0_gr_up, gene_sets = "GO:BP", tss_source = "TxDb.Mmusculus.UCSC.mm10.knownGene")
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
great_gobp
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
great_gobp_tab <- getEnrichmentTables(great_gobp)
great_gobp_tab[1:5, ]
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
# convert to a list of gene sets
reac_gene_sets <- split(msig_t2g_reac$entrez_gene, msig_t2g_reac$gs_name)
reac_gene_sets <- lapply(reac_gene_sets, as.character)
reac_gene_sets[1:2]
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
great_reac <- great(W6MinusD0_gr_up, gene_sets = reac_gene_sets, tss_source = "TxDb.Mmusculus.UCSC.mm10.knownGene")
great_reac_tab <- getEnrichmentTable(great_reac)
great_reac_tab[1:2, ]
## ----eval=T,echo=T, message=F, warning=FALSE,tidy=T---------------------------
great_genes <- getRegionGeneAssociations(great_reac)
great_genes[1:5]