In these exercises we will review some of the functionality of ChIPQC, reading in peaks and annotating peaks to genes.
We will be using data directly downloaded from the ENCODE consortium.
Precomputed ChIPQC results as a list for 2 Encode CTCF samples and their input can be found in the data directory.
data/CTCFQC.RData
We will also perform some of our own QC on some human data of Pancreas CTCF data. We should download thew BAM file here
We can also retrieve the relevant peak calls from here
myMeta <- data.frame(Sample=names(myQClist),Antibody=c("CTCF","CTCF","Input"))
plotCC(myQClist,addMetaData = myMeta,facetBy="Sample",colourBy="Antibody")+
ggtitle("CTCF in Ch12 CC-plot")+
theme_minimal()
library(ggplot2)
myFlags <- flagtagcounts(myQClist)
dupRates <- myFlags["DuplicateByChIPQC",]/myFlags["Mapped",]
toPlot <- data.frame(Sample=names(dupRates),DupPercent=dupRates*100)
toPlot <- merge(myMeta,toPlot,by=1)
ggplot(toPlot,aes(x=Sample,y=DupPercent,fill=Antibody))+
geom_bar(stat="identity")+coord_flip()+theme_bw()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Using Sample as id variables
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
require(rtracklayer)
download.file("https://www.encodeproject.org/files/ENCFF627LTJ/@@download/ENCFF627LTJ.bed.gz","ENCFF627LTJ.bed.gz")
myPeaks <- read.delim("ENCFF627LTJ.bed.gz",header = FALSE)
myPeakGR <- GRanges(myPeaks[,1],IRanges(myPeaks[,2],myPeaks[,3]))
myPeakGR <- myPeakGR[as.vector(seqnames(myPeakGR)) %in% paste0("chr",1:21)]
freqs <- table(as.vector(seqnames(myPeakGR)))
toPlot <- data.frame(Chromosome=names(freqs),Total=as.vector(freqs))
ggplot(toPlot,aes(x=Chromosome,y=Total))+
geom_bar(stat="identity")+coord_flip()+theme_minimal()
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
peakAnno <- annotatePeak(myPeakGR, tssRegion=c(-500, 500),
TxDb=TxDb.Hsapiens.UCSC.hg38.knownGene,
annoDb="org.Hs.eg.db")
## >> preparing features information... 2025-06-05 20:11:33
## >> identifying nearest features... 2025-06-05 20:11:37
## >> calculating distance from peak to TSS... 2025-06-05 20:11:41
## >> assigning genomic annotation... 2025-06-05 20:11:41
## >> adding gene annotation... 2025-06-05 20:12:11
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths 2025-06-05 20:12:12
## >> done... 2025-06-05 20:12:12
toWrite <- as.data.frame(peakAnno)
write.table(toWrite,file="annotatedPeaks",row.names = FALSE,sep="\t")
library(rtracklayer)
library(Rsamtools)
# Always check or index
indexBam("ENCFF789ZHS.bam")
myQCCTCF <- ChIPQCsample("ENCFF789ZHS.bam",blacklist = blkList, annotation = "hg38",
chromosomes = c("chr10","chr11","chr12"), verboseT = FALSE)
## Reads Map% Filt% Dup% ReadL FragL RelCC SSD
## 4984428.00 100.00 5.56 0.00 76.00 264.00 2.34 0.93
## RiP% RiBL%
## NA 2.61