params <-
list(isSlides = "no")
## ----include=FALSE------------------------------------------------------------
suppressPackageStartupMessages(require(knitr))
knitr::opts_chunk$set(echo = TRUE, tidy = T)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides != "yes"){
cat("# ATACseq (part 2)
---
"
)
}
## ----setup, include=FALSE-----------------------------------------------------
library(soGGi)
library(ChIPQC)
library(GenomicAlignments)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(DESeq2)
library(BSgenome.Mmusculus.UCSC.mm10)
library(tracktables)
library(clusterProfiler)
library(org.Mm.eg.db)
library(ChIPseeker)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Evaluating TSS signal
---
"
)
}else{
cat("# Evaluating TSS signal
---
"
)
}
## ----processData_aligna, echo=TRUE,eval=FALSE,cache=FALSE---------------------
## BiocManager::install("soGGi")
## library(soGGi)
## ----processData_txdb, echo=TRUE,eval=TRUE,cache=FALSE------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
TxDb.Hsapiens.UCSC.hg19.knownGene
## ----processData_genes, echo=TRUE,eval=TRUE,cache=FALSE-----------------------
genesLocations <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
genesLocations
## ----processData_resize, echo=TRUE,eval=TRUE,cache=FALSE----------------------
tssLocations <- resize(genesLocations,fix="start",width = 1)
tssLocations
## ----processData_subset, echo=TRUE,eval=F-------------------------------------
## mainChromosomes <- paste0("chr",c(1:21,"X","Y","M"))
##
## myindex<-(match(seqnames(tssLocations), mainChromosomes))
##
##
## tssLocations <- tssLocations[as.numeric(myindex)]
##
## seqlevels(tssLocations) <- mainChromosomes
##
## ----processData_subset_fix, echo=F,eval=F------------------------------------
## mainChromosomes <- paste0("chr",c(1:21,"X","Y","M"))
##
## myindex<-(match(seqnames(tssLocations), mainChromosomes))
##
##
## tssLocations <- tssLocations[!is.na(myindex)]
##
## seqlevels(tssLocations) <- mainChromosomes
## save(tssLocations, file="data/tss_pres2.Rmd")
##
## ---- echo=F, eval=T----------------------------------------------------------
load("data/tss_pres2.Rmd")
## ----processData_soggi, echo=TRUE,eval=FALSE,cache=FALSE----------------------
## library(soGGi)
## sortedBAM <- "~/Downloads/ATAC_Workshop/Sorted_ATAC_50K_2.bam"
##
## library(Rsamtools)
## # Nucleosome free
## allSignal <- regionPlot(bamFile = sortedBAM,
## testRanges = tssLocations)
## ----processData_soggia1, echo=F,eval=T---------------------------------------
load("data/nucFree_TSS.Rdata")
## ----processData_soggia, echo=TRUE,eval=FALSE,cache=FALSE---------------------
## nucFree <- regionPlot(bamFile = sortedBAM,
## testRanges = tssLocations,
## style = "point",
## format="bam",
## paired=TRUE,
## minFragmentLength = 0,
## maxFragmentLength = 100,
## forceFragment = 50)
## class(nucFree)
## ----processData_plot,fig.height=3.5,fig.width=7, echo=TRUE,eval=T,cache=TRUE,message=FALSE,warning=FALSE----
plotRegion(nucFree)
## ----processData_soggi4, echo=F,eval=FALSE,cache=FALSE,message=FALSE,warning=FALSE----
## monoNuc <- regionPlot(bamFile = sortedBAM,
## testRanges = tssLocations,
## style = "point",
## format="bam",
## paired=TRUE,
## minFragmentLength = 180,maxFragmentLength = 240,forceFragment = 80)
## save(monoNuc,file = "data/monoNuc_TSS.RData")
## ----processData_soggi2.5, echo=F,eval=T,cache=FALSE,message=FALSE,warning=FALSE----
load(file = "data/monoNuc_TSS.RData")
## ----processData_soggi3, echo=TRUE,eval=FALSE,cache=FALSE,message=FALSE,warning=FALSE----
## monoNuc <- regionPlot(bamFile = sortedBAM,
## testRanges = tssLocations,
## style = "point",
## format="bam",
## paired=TRUE,
## minFragmentLength = 180,maxFragmentLength = 240,forceFragment = 80)
##
## ----processData_plot3, fig.height=3.5,fig.width=7, echo=TRUE,eval=T,cache=FALSE, message=FALSE, warning=FALSE----
plotRegion(monoNuc)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Peak calling
---
"
)
}else{
cat("# Peak calling
---
"
)
}
## ---- eval=F------------------------------------------------------------------
## BiocManager::install("Herper")
## library(Herper)
##
## ---- echo=T, eval=F----------------------------------------------------------
## salmon_paths <- install_CondaTools(tools="macs2", env="ATACseq_analysis")
## salmon_paths
## ---- eval=F, echo=F----------------------------------------------------------
## tempdir2 <- function() {
## tempDir <- tempdir()
## if(dir.exists(tempDir)){
## tempDir <- file.path(tempDir,"rr")
## }
## tempDir <- gsub("\\", "/", tempDir, fixed = TRUE)
## tempDir
## }
##
## myMiniconda <- file.path(tempdir2(), "Test")
## install_CondaTools(tools="macs2", env="ATACseq_analysis", pathToMiniConda = myMiniconda)
##
## ----echo=TRUE,eval=F, warning=F----------------------------------------------
##
## with_CondaEnv("ATACseq_analysis",
## system2(command="macs2",args =c("callpeak",
## "-t", "singleEnd.bam",
## "--nomodel",
## "--shift","-100",
## "--extsize", "200",
## "--format", "BAM",
## "-g", "hs")),
## stdout = TRUE))
##
## ----echo=TRUE,eval=F, warning=F----------------------------------------------
##
## with_CondaEnv("ATACseq_analysis",
## system2(command="macs2",args =c("callpeak",
## "-t", "singleEnd.bam",
## "--nomodel",
## "--shift","37",
## "--extsize", "73",
## "--format", "BAM",
## "-g", "hs")),
## stdout = TRUE)
##
## ----echo=TRUE,eval=F, warning=F----------------------------------------------
##
## with_CondaEnv("ATACseq_analysis",
## system2(command="macs2",args =c("callpeak",
## "-t", "pairedEnd.bam",
## "--format", "BAMPE",
## "--outdir", "/Documents/ATAC_MACS2_calls/",
## "--name","pairedEndPeakName",
## "-g", "hs")),
## stdout = TRUE)
##
## ----echo=TRUE,eval=F, warning=F----------------------------------------------
##
## with_CondaEnv("ATACseq_analysis",
## system2(command="macs2",args =c("callpeak",
## "-t", "~/Downloads/Sorted_ATAC_50K_2_openRegions.bam",
## "--outdir", "ATAC_Data/ATAC_Peaks/ATAC_50K_2",
## "--name","Sorted_ATAC_50K_2_Small_Paired_peaks.narrowPeak",
## "-f", "BAMPE",
## "-g", "hs")),
## stdout = TRUE)
##
## ----processData_callQC,messages=FALSE,warning=FALSE, echo=TRUE,eval=FALSE,cache=TRUE,dependson="readinPeakCalling"----
## library(ChIPQC)
## library(rtracklayer)
## library(DT)
## library(dplyr)
## library(tidyr)
##
## blkList <- import.bed("~/Downloads/ENCFF001TDO.bed.gz")
## openRegionPeaks <- "~/Downloads/ATAC_Workshop/ATAC_Data/ATAC_Peaks/Sorted_ATAC_50K_2_Small_Paired_peaks.narrowPeak"
##
## qcRes <- ChIPQCsample("~/Downloads/ATAC_Workshop/ATAC_Data/ATAC_BAM/Sorted_ATAC_50K_2_openRegions.bam",
## peaks = openRegionPeaks,
## annotation ="hg19",
## chromosomes = "chr20",
## blacklist = blkList,
## verboseT = FALSE)
## ----ded, include=FALSE,cache=TRUE--------------------------------------------
library(ChIPQC)
library(rtracklayer)
load("data/qcRes.RData")
blkList <- import.bed("data/ENCFF001TDO.bed.gz")
openRegionPeaks <- "data/Sorted_ATAC_50K_2_Small_Paired_peaks.narrowPeak"
## ----processData_callQC2,messages=FALSE,warning=FALSE, echo=TRUE,eval=TRUE,cache=TRUE,dependson="ded"----
myMetrics <- QCmetrics(qcRes)
myMetrics[c("RiBL%","RiP%")]
flgCounts <- flagtagcounts(qcRes)
DupRate <- flgCounts["DuplicateByChIPQC"]/flgCounts["Mapped"]
DupRate*100
## ----processData_filterBLKlist, echo=F,eval=F,cache=TRUE,dependson="processData_callQC"----
##
## MacsCalls <- granges(qcRes[seqnames(qcRes) %in% "chr20"])
##
## data.frame(Blacklisted=sum(MacsCalls %over% blkList),
## Not_Blacklisted=sum(!MacsCalls %over% blkList))
## save(MacsCalls,file="data/MacsCall_pres2.RData")
##
## ---- echo=T,eval=F-----------------------------------------------------------
##
## MacsCalls <- granges(qcRes[seqnames(qcRes) %in% "chr20"])
##
## data.frame(Blacklisted=sum(MacsCalls %over% blkList),
## Not_Blacklisted=sum(!MacsCalls %over% blkList))
##
## MacsCalls <- MacsCalls[!MacsCalls %over% blkList]
##
## ---- echo=F, eval=T----------------------------------------------------------
load("data/MacsCall_pres2.RData")
MacsCalls <- MacsCalls[!MacsCalls %over% blkList]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Annotating Peaks
---
"
)
}else{
cat("# Annotating Peaks
---
"
)
}
## ----processData_annotatePeak, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_filterBLKlist"----
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
MacsCalls_Anno <- annotatePeak(MacsCalls,
TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene)
MacsCalls_Anno
## ----processData_Pie, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_annotatePeak"----
plotAnnoPie(MacsCalls_Anno)
## ----processData_annotated, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_annotatePeak"----
MacsGR_Anno <- as.GRanges(MacsCalls_Anno)
MacsGR_TSS <- MacsGR_Anno[abs(MacsGR_Anno$distanceToTSS) < 500]
MacsGR_TSS[1,]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Functional analysis of peaks
---
"
)
}else{
cat("# Functional analysis of peaks
---
"
)
}
## ----processData_funAnalysise,echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_filterBLKlist"----
library(rGREAT)
great_Job <- submitGreatJob(MacsCalls, species = "hg19")
availableCategories(great_Job)
## ----processData_funAnalysis2,echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_funAnalysis2",message=FALSE, warning=FALSE----
great_ResultTable = getEnrichmentTables(great_Job, category = "GO")
names(great_ResultTable)
great_ResultTable[["GO Biological Process"]][1:4, ]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Differential ATACseq
---
"
)
}else{
cat("# Differential ATACseq
---
"
)
}
## ----processData_consensus_read, echo=F,eval=T--------------------------------
peaks <- dir("data/ATAC_Peaks_forCounting/",
pattern="*.narrowPeak",full.names=TRUE)
## ----processData_consensusa, echo=TRUE,eval=F---------------------------------
## peaks <- dir("~/Downloads/ATAC_Workshop/ATAC_Data/ATAC_Peaks_forCounting/",
## pattern="*.narrowPeak",full.names=TRUE)
## ----processData_consensus_read2, echo=TRUE,eval=T----------------------------
myPeaks <- lapply(peaks,ChIPQC:::GetGRanges,simple=TRUE)
allPeaksSet_nR <- reduce(unlist(GRangesList(myPeaks)))
overlap <- list()
for(i in 1:length(myPeaks)){
overlap[[i]] <- allPeaksSet_nR %over% myPeaks[[i]]
}
overlapMatrix <- do.call(cbind,overlap)
colnames(overlapMatrix) <- basename(peaks)
mcols(allPeaksSet_nR) <- overlapMatrix
## ----processData_consensusaa, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_consensusa"----
allPeaksSet_nR[1:2,]
## ----processData_consensusCounting, echo=F,eval=F,cache=TRUE,dependson="processData_consensusa"----
## blklist <- import.bed("data/ENCFF547MET.bed.gz")
## nrToCount <- allPeaksSet_nR[!allPeaksSet_nR %over% blklist
## & !seqnames(allPeaksSet_nR) %in% "chrM"]
## save(nrToCount,file="data/nrToCount_pres2.RData")
## ---- eval=F, echo=T----------------------------------------------------------
## blklist <- import.bed("data/ENCFF547MET.bed.gz")
## nrToCount <- allPeaksSet_nR[!allPeaksSet_nR %over% blklist
## & !seqnames(allPeaksSet_nR) %in% "chrM"]
## nrToCount
## ---- eval=T, echo=F----------------------------------------------------------
load("data/nrToCount_pres2.RData")
nrToCount
## ----processData_consensusCountingf, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_consensus"----
library(Rsubread)
occurrences <- rowSums(
as.data.frame(elementMetadata(nrToCount)
)
)
nrToCount <- nrToCount[occurrences >= 2,]
nrToCount
## ----processData_consensusCounting2, echo=TRUE,eval=FALSE,cache=TRUE,dependson="processData_consensusCountingf"----
## library(GenomicAlignments)
## bamsToCount <- dir("~/Downloads/ATAC_Workshop/ATAC_Data/ATAC_BAM_forCounting/",
## full.names = TRUE,pattern = "*.\\.bam$")
##
## myCounts <- summarizeOverlaps(consensusToCount,
## bamsToCount,singleEnd=FALSE)
##
## colnames(myCounts) <- c("HindBrain_1","HindBrain_2","Kidney_1","Kidney_2",
## "Liver_1","Liver_2")
## ----processData_DEseq2_PCA, echo=TRUE, eval=TRUE,cache=F---------------------
library(DESeq2)
load("data/myCounts.RData")
Group <- factor(c("HindBrain","HindBrain","Kidney","Kidney",
"Liver","Liver"))
metaData <- data.frame(Group,row.names=colnames(myCounts))
atacDDS <- DESeqDataSetFromMatrix(assay(myCounts),
metaData,
~Group,
rowRanges=rowRanges(myCounts))
atacDDS <- DESeq(atacDDS)
## ----processData_DEseq2_Results_ResultsTable, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_DEseq2_PCA"----
KidneyMinusHindbrain <- results(atacDDS,
c("Group","Kidney","HindBrain"),
format="GRanges")
KidneyMinusHindbrain <- KidneyMinusHindbrain[
order(KidneyMinusHindbrain$pvalue)
]
KidneyMinusHindbrain
## ----processData_DEseq2_ResultsToTSSregions,message=FALSE,warning=FALSE, echo=F,eval=TRUE,cache=TRUE,dependson="processData_DEseq2_Results_ResultsTable"----
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
toOverLap <- promoters(TxDb.Mmusculus.UCSC.mm10.knownGene,
500,500)
KidneyMinusHindbrain <- KidneyMinusHindbrain[
(!is.na(KidneyMinusHindbrain$padj)
& KidneyMinusHindbrain$padj < 0.05)
& KidneyMinusHindbrain %over% toOverLap,]
makebedtable(KidneyMinusHindbrain,"KidneyMinusHindbrain.html",'data/')
## ----processData_DEseq2_ResultsToTSSregions2,message=FALSE,warning=FALSE, echo=TRUE,eval=F, cache=TRUE,dependson="processData_DEseq2_Results_ResultsTable"----
## library(TxDb.Mmusculus.UCSC.mm10.knownGene)
## toOverLap <- promoters(TxDb.Mmusculus.UCSC.mm10.knownGene,
## 500,500)
## KidneyMinusHindbrain <- KidneyMinusHindbrain[
## (!is.na(KidneyMinusHindbrain$padj)
## & KidneyMinusHindbrain$padj < 0.05)
## & KidneyMinusHindbrain %over% toOverLap,]
## myReport<-makebedtable(KidneyMinusHindbrain,"KidneyMinusHindbrain.html",getwd())
## browseURL(myReport)
## ----processData_DEseq2_functionalEnrichmentAnalysiss, echo=TRUE,eval=TRUE,cache=TRUE, dependson="processData_DEseq2_ResultsToTSSregions",message=FALSE,warning=FALSE----
anno_KidneyMinusHindbrain <- annotatePeak(KidneyMinusHindbrain,
TxDb = TxDb.Mmusculus.UCSC.mm10.knownGene,verbose = FALSE)
DB_ATAC <- as.data.frame(anno_KidneyMinusHindbrain)
DB_ATAC[1,]
## ----processData_DEseq2_functionalEnrichmentAnalysisd, echo=TRUE,eval=TRUE,cache=TRUE, dependson="processData_DEseq2_functionalEnrichmentAnalysiss",message=FALSE,warning=FALSE----
library(clusterProfiler)
go <- enrichGO(DB_ATAC$geneId,
OrgDb = "org.Mm.eg.db",ont = "BP",maxGSSize = 5000)
go[1:2,1:6]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Finding Motifs in ATACseq data
---
"
)
}else{
cat("# Finding Motifs in ATACseq data
---
"
)
}
## ----processData_motifCutsa, echo=TRUE,eval=TRUE,cache=TRUE,message=FALSE,warning=FALSE----
library(MotifDb)
library(Biostrings)
library(BSgenome.Hsapiens.UCSC.hg19)
CTCF <- query(MotifDb, c("CTCF"))
CTCF
## ----processData_motifCutsb, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsa"----
names(CTCF)
ctcfMotif <- CTCF[[1]]
ctcfMotif[,1:4]
## ----processData_motifCutsc, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsb",fig.height=5,fig.width=7----
library(seqLogo)
seqLogo(ctcfMotif)
## ----processData_motifCutsd, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsc",warning=FALSE,message=FALSE----
myRes <- matchPWM(ctcfMotif,BSgenome.Hsapiens.UCSC.hg19[["chr20"]])
myRes
## ----processData_motifCutse, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsd"----
toCompare <- GRanges("chr20",ranges(myRes))
toCompare
## ----processData_motifCutsdwdw, echo=TRUE,eval=FALSE--------------------------
## BAM <- "~/Downloads/ATAC_Workshop/ATAC_Data/ATAC_BAM/Sorted_ATAC_50K_2_openRegions.bam"
## atacReads_Open <- readGAlignmentPairs(BAM)
## read1 <- first(atacReads_Open)
## read2 <- second(atacReads_Open)
## read2[1,]
## ---- echo=F, eval=F----------------------------------------------------------
## save(read1, read2, file="data/pres2_reads1and2.RData")
##
##
## ----processData_motifCutsfa, echo=FALSE,eval=TRUE----------------------------
load(file="data/pres2_reads1and2.RData")
read2[1,]
## ----processData_motifCutsf, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsfa"----
Firsts <- resize(granges(read1),fix="start",1)
First_Pos_toCut <- shift(granges(Firsts[strand(read1) == "+"]),
4)
First_Neg_toCut <- shift(granges(Firsts[strand(read1) == "-"]),
-5)
Seconds <- resize(granges(read2),fix="start",1)
Second_Pos_toCut <- shift(granges(Seconds[strand(read2) == "+"]),
4)
Second_Neg_toCut <- shift(granges(Seconds[strand(read2) == "-"]),
-5)
test_toCut <- c(First_Pos_toCut,First_Neg_toCut,
Second_Pos_toCut,Second_Neg_toCut)
test_toCut[1:2,]
## ----processData_motifCutsg, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsf"----
cutsCoverage <- coverage(test_toCut)
cutsCoverage20 <- cutsCoverage["chr20"]
cutsCoverage20[[1]]
## ----processData_motifCutsh, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsg",message=FALSE,warning=FALSE,fig.height=5,fig.width=7----
CTCF_Cuts_open <- regionPlot(cutsCoverage20,
testRanges = toCompare,
style = "point",
format="rlelist",distanceAround = 500)
## ----processData_motifCutsi, echo=TRUE,eval=TRUE,cache=TRUE,dependson="processData_motifCutsh",message=FALSE,warning=FALSE,fig.height=5,fig.width=7----
plotRegion(CTCF_Cuts_open,outliers = 0.001)+
ggtitle("NucFree Cuts Centred on CTCF")+theme_bw()