In these exercises we will review some of the functionality of ChIPQC, reading in peaks and annotating peaks to genes.
We will be using data directly downloaded from the ENCODE consortium.
Precomputed ChIPQC results as a list for 2 Encode CTCF samples and their input can be found in the data directory.
data/CTCFQC.RData
We will also perform some of our own QC on some human data of Pancreas CTCF data. We should download thew BAM file here
We can also retrieve the relevant peak calls from here
library(ChIPQC)
load("data/CTCFQC.RData")
<- data.frame(Sample=names(myQClist),Antibody=c("CTCF","CTCF","Input"))
myMeta
plotCC(myQClist,addMetaData = myMeta,facetBy="Sample",colourBy="Antibody")+
ggtitle("CTCF in Ch12 CC-plot")+
theme_minimal()
library(ggplot2)
<- flagtagcounts(myQClist)
myFlags <- myFlags["DuplicateByChIPQC",]/myFlags["Mapped",]
dupRates <- data.frame(Sample=names(dupRates),DupPercent=dupRates*100)
toPlot <- merge(myMeta,toPlot,by=1)
toPlot ggplot(toPlot,aes(x=Sample,y=DupPercent,fill=Antibody))+
geom_bar(stat="identity")+coord_flip()+theme_bw()
plotRegi(myQClist)+scale_fill_gradient2(low="white",high="red",
mid="yellow")
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
plotSSD(myQClist)+xlim(0,10)+theme_minimal()
## Using Sample as id variables
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
require(rtracklayer)
download.file("https://www.encodeproject.org/files/ENCFF627LTJ/@@download/ENCFF627LTJ.bed.gz","ENCFF627LTJ.bed.gz")
<- read.delim("ENCFF627LTJ.bed.gz",header = FALSE)
myPeaks <- GRanges(myPeaks[,1],IRanges(myPeaks[,2],myPeaks[,3]))
myPeakGR <- myPeakGR[as.vector(seqnames(myPeakGR)) %in% paste0("chr",1:21)]
myPeakGR <- table(as.vector(seqnames(myPeakGR)))
freqs <- data.frame(Chromosome=names(freqs),Total=as.vector(freqs))
toPlot ggplot(toPlot,aes(x=Chromosome,y=Total))+
geom_bar(stat="identity")+coord_flip()+theme_minimal()
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
<- annotatePeak(myPeakGR, tssRegion=c(-500, 500),
peakAnno TxDb=TxDb.Hsapiens.UCSC.hg38.knownGene,
annoDb="org.Hs.eg.db")
## >> preparing features information... 2021-08-02 03:25:45 PM
## >> identifying nearest features... 2021-08-02 03:25:46 PM
## >> calculating distance from peak to TSS... 2021-08-02 03:25:50 PM
## >> assigning genomic annotation... 2021-08-02 03:25:50 PM
## >> adding gene annotation... 2021-08-02 03:26:26 PM
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths 2021-08-02 03:26:27 PM
## >> done... 2021-08-02 03:26:27 PM
plotAnnoPie(peakAnno)
<- as.data.frame(peakAnno)
toWrite write.table(toWrite,file="annotatedPeaks",row.names = FALSE,sep="\t")
library(rtracklayer)
library(Rsamtools)
# Always check or index
indexBam("ENCFF789ZHS.bam")
<- ChIPQCsample("ENCFF789ZHS.bam",blacklist = blkList, annotation = "hg38",
myQCCTCF chromosomes = c("chr10","chr11","chr12"), verboseT = FALSE)
QCmetrics(myQCCTCF)
## Reads Map% Filt% Dup% ReadL FragL RelCC
## 4984428.00 100.00 5.56 0.00 76.00 264.00 2.34
## SSD RiP% RiBL%
## 0.93 NA 2.61
plotCC(myQCCTCF)