In todays session we will work with some of the ATAC-seq data of T-regulatory cells from Christina Leslie’s lab.
FQ files can be found can be found here for read1 and here for read2.
We will also work with the aligned data as a BAM file which can be found here.
library(GenomicRanges)
library(Rsamtools)
library(rtracklayer)
library(GenomicAlignments)
library(ShortRead)
library(ggplot2)
<- FastqSampler("~/Downloads/ENCFF175VOD.fastq.gz", n=10000)
f1 <- FastqSampler("~/Downloads/ENCFF447BGX.fastq.gz", n=10000)
f2
set.seed(123456)
<- yield(f1)
p1 set.seed(123456)
<- yield(f2)
p2
<- quality(p1)
allQuals <- as(allQuals,"matrix")
forBox colnames(forBox) <- paste0("Cycle",1:ncol(forBox))
boxplot(forBox,main="Read1")
<- quality(p2)
allQuals <- as(allQuals,"matrix")
forBox colnames(forBox) <- paste0("Cycle",1:ncol(forBox))
boxplot(forBox,main="Read2")
<- alphabetByCycle(sread(p1))
alpByCyle <- alpByCyle[c("A","G","C","T","N"),]
alpByCyleFilt <- data.frame(Base="A",Freq=alpByCyleFilt["A",],Cycle=1:max(width(sread(p1))))
AbyCycFrame <- data.frame(Base="C",Freq=alpByCyleFilt["C",],Cycle=1:max(width(sread(p1))))
CbyCycFrame <- data.frame(Base="T",Freq=alpByCyleFilt["T",],Cycle=1:max(width(sread(p1))))
TbyCycFrame <- data.frame(Base="G",Freq=alpByCyleFilt["G",],Cycle=1:max(width(sread(p1))))
GbyCycFrame <- data.frame(Base="N",Freq=alpByCyleFilt["N",],Cycle=1:max(width(sread(p1))))
NbyCycFrame <- rbind(AbyCycFrame,CbyCycFrame,TbyCycFrame,GbyCycFrame,NbyCycFrame)
myFrameRead1 $Read <- "Read1"
myFrameRead1<- alphabetByCycle(sread(p2))
alpByCyle <- alpByCyle[c("A","G","C","T","N"),]
alpByCyleFilt <- data.frame(Base="A",Freq=alpByCyleFilt["A",],Cycle=1:max(width(sread(p2))))
AbyCycFrame <- data.frame(Base="C",Freq=alpByCyleFilt["C",],Cycle=1:max(width(sread(p2))))
CbyCycFrame <- data.frame(Base="T",Freq=alpByCyleFilt["T",],Cycle=1:max(width(sread(p2))))
TbyCycFrame <- data.frame(Base="G",Freq=alpByCyleFilt["G",],Cycle=1:max(width(sread(p2))))
GbyCycFrame <- data.frame(Base="N",Freq=alpByCyleFilt["N",],Cycle=1:max(width(sread(p2))))
NbyCycFrame <- rbind(AbyCycFrame,CbyCycFrame,TbyCycFrame,GbyCycFrame,NbyCycFrame)
myFrameRead2 $Read <- "Read2"
myFrameRead2<- rbind(myFrameRead1,myFrameRead2)
myFrame
ggplot(myFrame,aes(x=Cycle,y=Freq,colour=Base))+geom_line()+theme_bw()+facet_grid(~Read)
library(BSgenome.Hsapiens.UCSC.hg38)
<- paste0("chr",c(1:21,"X","Y","M"))
mainChromosomes <- lapply(mainChromosomes,
mainChrSeq function(x)BSgenome.Hsapiens.UCSC.hg38[[x]])
names(mainChrSeq) <- mainChromosomes
<- DNAStringSet(mainChrSeq)
mainChrSeqSet writeXStringSet(mainChrSeqSet,
"BSgenome.Hsapiens.UCSC.hg38.mainChrs.fa")
library(Rsubread)
buildindex("BSgenome.Hsapiens.UCSC.hg38.mainChrs",
"BSgenome.Hsapiens.UCSC.hg38.mainChrs.fa",
indexSplit = TRUE,
memory = 5000)
<- "~/Downloads/ENCFF175VOD.fastq.gz"
read1 <- "~/Downloads/ENCFF447BGX.fastq.gz"
read2 <- "TcellReg_ATAC.bam"
outBAM
align("BSgenome.Hsapiens.UCSC.hg38.mainChrs",
readfile1=read1,readfile2=read2,
output_file = outBAM,
nthreads=2,type=1,
unique=TRUE,maxFragLength = 2000)
library(Rsamtools)
sortBam(outBAM,"Sorted_TcellReg_ATAC")
indexBam("Sorted_TcellReg_ATAC.bam")
library(Rsamtools)
library(ggplot2)
indexBam("~/Downloads/ENCFF053CGD.bam")
library(GenomicAlignments)
<- idxstatsBam("~/Downloads/ENCFF053CGD.bam")
toReviewLenOfChrom =ScanBamParam(flag=scanBamFlag(isProperPair =TRUE),
myParamwhat=c("qname","mapq","isize"),
which=GRanges("chr10", IRanges(1,130694993)))
<- readGAlignmentPairs("~/Downloads/ENCFF053CGD.bam",param = myParam)
myPairs
<- abs(mcols(second(myPairs))$isize)
IS <- table(IS)
tableOfIS <- data.frame(InsertSize=as.numeric(names(tableOfIS)),
toPlot Count=as.numeric(tableOfIS))
<- ggplot(toPlot,aes(x=InsertSize,y=Count))+geom_line()
fragLenPlot + scale_y_continuous(trans='log2')+theme_minimal() fragLenPlot
<- myPairs[IS < 100,]
atacReads_Open <- myPairs[IS > 180 &
atacReads_MonoNuc < 247,]
IS <- myPairs[IS > 315 &
atacReads_diNuc < 437,]
IS <- data.frame(Fraction=c("NucleosomeFree","MonoNucleosome","DiNucleosome"),Total=c(length(atacReads_Open),
toPlot length(atacReads_MonoNuc),
length(atacReads_diNuc)))
ggplot(toPlot,aes(x=Fraction,y=Total,fill=Fraction))+geom_bar(stat="identity")+theme_bw()
6 Create a bigwig for the nucleosome free (< 100bp), mono-nucleosome (180bp-247bp) and di-nucleosome (315-437) fractions and visualise in IGV.
<- granges(atacReads_Open)
atacFragments_Open <- coverage(atacFragments_Open)
myCoverage_Open export.bw(myCoverage_Open,"NucleosomeFree.bw")
<- granges(atacReads_MonoNuc)
atacFragments_MonoNuc <- coverage(atacFragments_MonoNuc)
myCoverage_MonoNuc export.bw(myCoverage_MonoNuc,"MonoNucleosome.bw")
<- granges(atacReads_diNuc)
atacFragments_diNuc <- coverage(atacFragments_diNuc)
myCoverage_diNuc export.bw(myCoverage_diNuc,"DiNucleosome.bw")