In todays session we will work with some of the ATAC-seq data of T-regulatory cells from Christina Leslie’s lab.
FQ files can be found can be found here for read1 and here for read2.
We will also work with the aligned data as a BAM file which can be found here.
library(GenomicRanges)
library(Rsamtools)
library(rtracklayer)
library(GenomicAlignments)
library(ShortRead)
library(ggplot2)
f1 <- FastqSampler("~/Downloads/ENCFF175VOD.fastq.gz", n=10000)
f2 <- FastqSampler("~/Downloads/ENCFF447BGX.fastq.gz", n=10000)
set.seed(123456)
p1 <- yield(f1)
set.seed(123456)
p2 <- yield(f2)
allQuals <- quality(p1)
forBox <- as(allQuals,"matrix")
colnames(forBox) <- paste0("Cycle",1:ncol(forBox))
boxplot(forBox,main="Read1")
allQuals <- quality(p2)
forBox <- as(allQuals,"matrix")
colnames(forBox) <- paste0("Cycle",1:ncol(forBox))
boxplot(forBox,main="Read2")alpByCyle <- alphabetByCycle(sread(p1))
alpByCyleFilt <- alpByCyle[c("A","G","C","T","N"),]
AbyCycFrame <- data.frame(Base="A",Freq=alpByCyleFilt["A",],Cycle=1:max(width(sread(p1))))
CbyCycFrame <- data.frame(Base="C",Freq=alpByCyleFilt["C",],Cycle=1:max(width(sread(p1))))
TbyCycFrame <- data.frame(Base="T",Freq=alpByCyleFilt["T",],Cycle=1:max(width(sread(p1))))
GbyCycFrame <- data.frame(Base="G",Freq=alpByCyleFilt["G",],Cycle=1:max(width(sread(p1))))
NbyCycFrame <- data.frame(Base="N",Freq=alpByCyleFilt["N",],Cycle=1:max(width(sread(p1))))
myFrameRead1 <- rbind(AbyCycFrame,CbyCycFrame,TbyCycFrame,GbyCycFrame,NbyCycFrame)
myFrameRead1$Read <- "Read1"
alpByCyle <- alphabetByCycle(sread(p2))
alpByCyleFilt <- alpByCyle[c("A","G","C","T","N"),]
AbyCycFrame <- data.frame(Base="A",Freq=alpByCyleFilt["A",],Cycle=1:max(width(sread(p2))))
CbyCycFrame <- data.frame(Base="C",Freq=alpByCyleFilt["C",],Cycle=1:max(width(sread(p2))))
TbyCycFrame <- data.frame(Base="T",Freq=alpByCyleFilt["T",],Cycle=1:max(width(sread(p2))))
GbyCycFrame <- data.frame(Base="G",Freq=alpByCyleFilt["G",],Cycle=1:max(width(sread(p2))))
NbyCycFrame <- data.frame(Base="N",Freq=alpByCyleFilt["N",],Cycle=1:max(width(sread(p2))))
myFrameRead2 <- rbind(AbyCycFrame,CbyCycFrame,TbyCycFrame,GbyCycFrame,NbyCycFrame)
myFrameRead2$Read <- "Read2"
myFrame <- rbind(myFrameRead1,myFrameRead2)
ggplot(myFrame,aes(x=Cycle,y=Freq,colour=Base))+geom_line()+theme_bw()+facet_grid(~Read)library(BSgenome.Hsapiens.UCSC.hg38)
mainChromosomes <- paste0("chr",c(1:21,"X","Y","M"))
mainChrSeq <- lapply(mainChromosomes,
function(x)BSgenome.Hsapiens.UCSC.hg38[[x]])
names(mainChrSeq) <- mainChromosomes
mainChrSeqSet <- DNAStringSet(mainChrSeq)
writeXStringSet(mainChrSeqSet,
"BSgenome.Hsapiens.UCSC.hg38.mainChrs.fa")
library(Rsubread)
buildindex("BSgenome.Hsapiens.UCSC.hg38.mainChrs",
"BSgenome.Hsapiens.UCSC.hg38.mainChrs.fa",
indexSplit = TRUE,
memory = 5000)
read1 <- "~/Downloads/ENCFF175VOD.fastq.gz"
read2 <- "~/Downloads/ENCFF447BGX.fastq.gz"
outBAM <- "TcellReg_ATAC.bam"
align("BSgenome.Hsapiens.UCSC.hg38.mainChrs",
readfile1=read1,readfile2=read2,
output_file = outBAM,
nthreads=2,type=1,
unique=TRUE,maxFragLength = 2000)
library(Rsamtools)
sortBam(outBAM,"Sorted_TcellReg_ATAC")
indexBam("Sorted_TcellReg_ATAC.bam")library(Rsamtools)
library(ggplot2)
indexBam("~/Downloads/ENCFF053CGD.bam")
library(GenomicAlignments)
toReviewLenOfChrom <- idxstatsBam("~/Downloads/ENCFF053CGD.bam")
myParam=ScanBamParam(flag=scanBamFlag(isProperPair =TRUE),
what=c("qname","mapq","isize"),
which=GRanges("chr10", IRanges(1,130694993)))
myPairs <- readGAlignmentPairs("~/Downloads/ENCFF053CGD.bam",param = myParam)
IS <- abs(mcols(second(myPairs))$isize)
tableOfIS <- table(IS)
toPlot <- data.frame(InsertSize=as.numeric(names(tableOfIS)),
Count=as.numeric(tableOfIS))
fragLenPlot <- ggplot(toPlot,aes(x=InsertSize,y=Count))+geom_line()
fragLenPlot + scale_y_continuous(trans='log2')+theme_minimal()atacReads_Open <- myPairs[IS < 100,]
atacReads_MonoNuc <- myPairs[IS > 180 &
IS < 247,]
atacReads_diNuc <- myPairs[IS > 315 &
IS < 437,]
toPlot <- data.frame(Fraction=c("NucleosomeFree","MonoNucleosome","DiNucleosome"),Total=c(length(atacReads_Open),
length(atacReads_MonoNuc),
length(atacReads_diNuc)))
ggplot(toPlot,aes(x=Fraction,y=Total,fill=Fraction))+geom_bar(stat="identity")+theme_bw()6 Create a bigwig for the nucleosome free (< 100bp), mono-nucleosome (180bp-247bp) and di-nucleosome (315-437) fractions and visualise in IGV.
atacFragments_Open <- granges(atacReads_Open)
myCoverage_Open <- coverage(atacFragments_Open)
export.bw(myCoverage_Open,"NucleosomeFree.bw")
atacFragments_MonoNuc <- granges(atacReads_MonoNuc)
myCoverage_MonoNuc <- coverage(atacFragments_MonoNuc)
export.bw(myCoverage_MonoNuc,"MonoNucleosome.bw")
atacFragments_diNuc <- granges(atacReads_diNuc)
myCoverage_diNuc <- coverage(atacFragments_diNuc)
export.bw(myCoverage_diNuc,"DiNucleosome.bw")