params <- list(isSlides = "no") ## ----include=FALSE------------------------------------------------------------ suppressPackageStartupMessages(require(knitr)) knitr::opts_chunk$set(echo = TRUE, tidy = T) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides != "yes"){ cat("# ChIPseq (part 2) --- " ) } ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Quality Control

--- " ) }else{ cat("# Quality Control --- " ) } ## ----mycQCdwdwshowL,include=FALSE--------------------------------------------- library(ChIPQC) ## ----eval=F------------------------------------------------------------------- ## QCresult <- ChIPQCsample(reads="/pathTo/myChIPreads.bam", ## genome="mm10", ## blacklist = "/pathTo/mm10_Blacklist.bed") ## ----mycQC,cache=TRUE,eval=FALSE---------------------------------------------- ## library(ChIPQC) ## toBlkList<-"~/Downloads/ENCFF547MET.bed.gz" ## chipqc_MycMel_rep1 <- ChIPQCsample("SR_Myc_Mel_rep1.bam", ## annotation = "mm10", ## blacklist = toBlkList, ## chromosomes = paste0("chr",1:10)) ## class(chipqc_MycMel_rep1) ## ## ----mycQCsecret,eval=FALSE,echo=F-------------------------------------------- ## library(ChIPQC) ## toBlkList<-"~/Documents/Box Sync/RU/Teaching/Compilation/Genomes_And_Datasets/mm10/ENCFF547MET.bed.gz" ## chipqc_MycMel_rep1 <- ChIPQCsample("SR_Myc_Mel_rep1.bam", ## annotation = "mm10", ## blacklist = toBlkList, ## chromosomes = paste0("chr",1:10)) ## save(chipqc_MycMel_rep1,file='~/Documents/Box Sync/RU/Teaching/RU_side/RU_ChIPseq/chipseq/inst/extdata/data/rep1.RData') ## ----mycQCshowLa,echo=FALSE,eval=TRUE----------------------------------------- toBlkList<-"data/ENCFF547MET.bed.gz" library(ChIPQC) load(file='data/rep1.RData') class(chipqc_MycMel_rep1) ## ----mycQCshow,eval=TRUE------------------------------------------------------ chipqc_MycMel_rep1 ## ---- echo=F, eval=F---------------------------------------------------------- ## FQ_FILES<-paste0("~/Documents/Box Sync/RU/Teaching/Compilation/Genomes_And_Datasets/ChIPseq_course/",c("ENCFF001NQP.fastq.gz","ENCFF001NQP.fastq.gz","ENCFF001NGC.fastq.gz","ENCFF001NGO.fastq.gz","ENCFF001NCH.fastq.gz","ENCFF001NCF.fastq.gz","ENCFF001NIM.fastq.gz")) ## ## FQ_NAMES<-c("Myc_Mel_1.bam","Myc_Mel_2.bam","Myc_Ch12_1.bam","Myc_Ch12_2.bam","input_Mel_1.bam","input_Mel_2.bam","input_Ch12_1.bam") ## ## myMapped <- align("~/Documents/Box Sync/RU/Teaching/Compilation/Genomes_And_Datasets/mm10/mm10_mainchrs", ## FQ_FILES, ## output_format = "BAM", ## output_file = FQ_NAMES, ## nthreads = 4) ## ## library(Rsamtools) ## library(stringr) ## ## SR_FQ_NAMES<-paste0("SR_",FQ_NAMES) ## SR_FQ_NAMES_1<-paste0("SR_",str_split(FQ_NAMES,".bam", simplify = T)[,1]) ## ## bplapply(1:length(SR_FQ_NAMES), function(x){ ## sortBam(FQ_NAMES[x], SR_FQ_NAMES_1[x]) ## indexBam(SR_FQ_NAMES[x]) ## }) ## ## ----mycQCshowd2,cache=TRUE,eval=FALSE,include=FALSE, echo=F------------------ ## FQ_NAMES<-c("Myc_Mel_1.bam","Myc_Mel_2.bam","Myc_Ch12_1.bam","Myc_Ch12_2.bam","input_Mel_1.bam","input_Mel_2.bam","input_Ch12_1.bam") ## SR_FQ_NAMES<-paste0("SR_",FQ_NAMES) ## bamsToQC <- SR_FQ_NAMES ## myQC <- bplapply(bamsToQC,ChIPQCsample, ## annotation = "mm10", ## blacklist = toBlkList, ## chromosomes = paste0("chr",1:10)) ## names(myQC)<-bamsToQC ## save(myQC, file="data/myQCnoPeaks.RData") ## # tried to update, but ChIPQC is upset. so leave it for now and owrk with old chipqc object ## ----mycQCshow2,cache=TRUE,eval=FALSE----------------------------------------- ## bamsToQC <- c("Sorted_Myc_Ch12_1.bam","Sorted_Myc_Ch12_2.bam", ## "Sorted_Myc_MEL_1.bam","Sorted_Myc_MEL_2.bam", ## "Sorted_Input_MEL.bam","Sorted_Input_Ch12.bam") ## myQC <- bplapply(bamsToQC,ChIPQCsample, ## annotation = "mm10", ## blacklist = toBlkList, ## chromosomes = paste0("chr",1:10)) ## names(myQC) <- bamsToQC ## ----qcmetricsA,include=FALSE------------------------------------------------- load(file="data/myQCnoPeaks.RData") ## ----qcmetrics,cache=FALSE,eval=TRUE------------------------------------------ QCmetrics(myQC) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Assessing fragment length

--- " ) }else{ cat("# Assessing fragment length --- " ) } ## ----qcmetridedecs,cache=FALSE,eval=TRUE,fig.width=6,fig.height=4------------- plotCC(myQC,facetBy = "Sample") ## ----qcmetridecs,cache=FALSE,eval=TRUE---------------------------------------- myMeta <- data.frame(Sample= names(myQC), Tissue=c("Ch12","Ch12","MEL","MEL","MEL","Ch12"), Antibody=c(rep("Myc",4),rep("Input",2))) myMeta ## ----qcmetricsede,cache=FALSE,eval=TRUE,fig.width=6,fig.height=3-------------- plotCC(myQC,facetBy = "Tissue",addMetaData = myMeta, colourBy="Antibody") ## ----qcmetricsrf,cache=FALSE,eval=TRUE,fig.width=6,fig.height=3--------------- plotCC(myQC,facetBy = "Tissue",addMetaData = myMeta, colourBy="Antibody")+theme_bw()+ ggtitle("ChIPQC results") ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Blacklists and SSD

--- " ) }else{ cat("# Blacklists and SSD --- " ) } ## ----fig.width=6,fig.height=2,warning=FALSE,message=FALSE--------------------- plotSSD(myQC)+xlim(0,5) ## ----fig.width=6,fig.height=3,warning=FALSE,message=FALSE--------------------- plotSSD(myQC)+xlim(0.2,0.8) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Library complexity and enrichment

--- " ) }else{ cat("# Library complexity and enrichment --- " ) } ## ----fig.width=6,fig.height=3,warning=FALSE,message=FALSE--------------------- myFlags <- flagtagcounts(myQC) myFlags["DuplicateByChIPQC",]/myFlags["Mapped",] ## ----warning=FALSE,message=FALSE,fig.width=8,fig.height=4--------------------- p <- plotRegi(myQC) ## ----warning=FALSE,fig.width=12,fig.height=6---------------------------------- p ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Peak Calling

--- " ) }else{ cat("# Peak Calling --- " ) } ## ---- eval=F------------------------------------------------------------------ ## BiocManager::install("Herper") ## library(Herper) ## ## ---- echo=T, eval=F---------------------------------------------------------- ## salmon_paths <- install_CondaTools(tools="macs2", env="ChIPseq_analysis") ## salmon_paths ## ---- eval=F, echo=F---------------------------------------------------------- ## tempdir2 <- function() { ## tempDir <- tempdir() ## if(dir.exists(tempDir)){ ## tempDir <- file.path(tempDir,"rr") ## } ## tempDir <- gsub("\\", "/", tempDir, fixed = TRUE) ## tempDir ## } ## ## myMiniconda <- file.path(tempdir2(), "Test") ## install_CondaTools(tools="macs2", env="ChIPseq_analysis", pathToMiniConda = myMiniconda) ## ## macs2 callpeak -t Sorted_Myc_MEL_1.bam ## –name Mel_Rep1 ## –-outdir PeakDirectory ## -c Sorted_Input_MEL.bam ## ## ----salI_1,echo=TRUE,eval=F, warning=F--------------------------------------- ## ## myChIP <- "Sorted_Myc_MEL_1.bam" ## myControl <- "Sorted_Input_MEL.bam" ## ## with_CondaEnv("ChIPseq_analysis", ## system2(command="macs2",args =c("callpeak", ## "-t", myChIP, ## "-n", "Mel_Rep1", ## "–-outdir","PeakDirectory", ## "-c", myControl)), ## ## stdout = TRUE)) ## ## ----eval=T,echo=T, warning=FALSE,collapse=T--------------------------------- macsPeaks <- "data/Mel1_peaks.xls" macsPeaks_DF <- read.delim(macsPeaks) macsPeaks_DF[1:8,] ## ----eval=T,echo=T, warning=FALSE,collapse=T--------------------------------- macsPeaks <- "data/Mel1_peaks.xls" macsPeaks_DF <- read.delim(macsPeaks, comment.char = "#") macsPeaks_DF[1:2, ] ## ----eval=T,echo=T, warning=FALSE,collapse=T--------------------------------- library(GenomicRanges) macsPeaks_GR <- GRanges(seqnames = macsPeaks_DF[, "chr"], IRanges(macsPeaks_DF[, "start"], macsPeaks_DF[, "end"])) macsPeaks_GR ## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE----------------------------- seqnames(macsPeaks_GR) ranges(macsPeaks_GR) ## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE----------------------------- mcols(macsPeaks_GR) <- macsPeaks_DF[, c("abs_summit", "fold_enrichment")] macsPeaks_GR ## ----eval=T,echo=T, warning=FALSE,collapse=T--------------------------------- library(rtracklayer) macsPeaks_GR_np <- import("data/Mel1_peaks.narrowPeak", format = "narrowPeak") macsPeaks_GR_np ## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE----------------------------- library(rtracklayer) blkList <- import.bed(toBlkList) macsPeaks_GR <- macsPeaks_GR[!macsPeaks_GR %over% blkList] ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Peak Annotation

--- " ) }else{ cat("# Peak Annotation --- " ) } ## ----include=FALSE------------------------------------------------------------ library(TxDb.Mmusculus.UCSC.mm10.knownGene) library(org.Mm.eg.db) library(GenomeInfoDb) library(ChIPseeker) ## ----eval=F,echo=T, eval=T, echo=T, warning=FALSE,tidy=T,message=FALSE-------- library(TxDb.Mmusculus.UCSC.mm10.knownGene) library(org.Mm.eg.db) library(GenomeInfoDb) library(ChIPseeker) ## ----eval=T,echo=T, message=FALSE,messages=FALSE, eval=T, echo=T, warning=FALSE---- peakAnno <- annotatePeak(macsPeaks_GR, tssRegion=c(-500, 500), TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene, annoDb="org.Mm.eg.db") class(peakAnno) ## ----eval=T,echo=T, message=F,messages=F, eval=T, echo=T, warning=FALSE,tidy=T---- peakAnno ## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T---------------------- peakAnno_GR <- as.GRanges(peakAnno) peakAnno_DF <- as.data.frame(peakAnno) ## ----eval=T,echo=T, eval=T, echo=T, warning=FALSE,tidy=T---------------------- peakAnno_GR[1:2,] ## ---- eval=T, echo=T, fig.height=5, fig.width=15, warning=FALSE, tidy=T------- plotAnnoBar(peakAnno) ## ----eval=T,echo=T, eval=F, echo=T, warning=FALSE,fig.height=5, fig.width=15,tidy=T---- ## plotDistToTSS(peakAnno) ## ---- eval=T, echo=T, fig.height=5, fig.width=15, warning=FALSE, tidy=T------- upsetplot(peakAnno, vennpie=F)