These are answers for exercises that cover the FastQ sections of Introduction to Bioconductor.
In these exercises we will review some of the ShortRead packages functionality with handling fastQ files.
The example data can be found in the data directory.
data/heart.bodyMap.fq
library(ShortRead)
<- "data/heart.bodyMap.fq"
FileName <- readFastq(FileName)
my_fastq <- my_fastq[as.character(id(my_fastq)) == "HWI-BRUNOP16X_0001:7:66:3246:157000#0/1"]
fastqRead <- sread(fastqRead)
fastqReadSeq <- alphabetFrequency(fastqReadSeq)
alpFreq <- alpFreq[,c("A","C","G","T","N")]
alpFreq5 <- data.frame(bases=names(alpFreq5),Freq=alpFreq5)
toPlot library(ggplot2)
ggplot(toPlot,aes(x=bases,y=Freq,fill=bases))+geom_bar(stat="identity") + theme_bw()
<- quality(my_fastq[1:10000,])
allQuals <- as(allQuals,"matrix")
forBox colnames(forBox) <- paste0("Cycle",1:ncol(forBox))
boxplot(forBox)
<- alphabetByCycle(sread(my_fastq))
alpByCyle <- alpByCyle[c("A","G","C","T","N"),]
alpByCyleFilt <- data.frame(Base="A",Freq=alpByCyleFilt["A",],Cycle=1:75)
AbyCycFrame <- data.frame(Base="C",Freq=alpByCyleFilt["C",],Cycle=1:75)
CbyCycFrame <- data.frame(Base="T",Freq=alpByCyleFilt["T",],Cycle=1:75)
TbyCycFrame <- data.frame(Base="G",Freq=alpByCyleFilt["G",],Cycle=1:75)
GbyCycFrame <- data.frame(Base="N",Freq=alpByCyleFilt["N",],Cycle=1:75)
NbyCycFrame <- rbind(AbyCycFrame,CbyCycFrame,TbyCycFrame,GbyCycFrame,NbyCycFrame)
myFrame ggplot(myFrame,aes(x=Cycle,y=Freq,colour=Base))+geom_line()+theme_bw()
length(my_fastq)
## [1] 132604
<- FastqSampler("data/heart.bodyMap.fq",n=10000)
mySampledFile <- yield(mySampledFile)
my_fastq_10000 my_fastq_10000
## class: ShortReadQ
## length: 10000 reads; width: 50 75 cycles
<- srduplicated(my_fastq_10000)
dupLogical <- table(dupLogical)
numberOfDups numberOfDups
## dupLogical
## FALSE TRUE
## 4010 5990
<- my_fastq_10000[dupLogical] nonDups
<- trimTails(nonDups,10,"5") trimmedNonDups
hist(width(trimmedNonDups),xlab="Trimmed Length",main="Histogram of trimmed lengths")
8.Filter out reads less than 32 bases and write to file
<- trimmedNonDups[width(trimmedNonDups) >= 32]
filteredFastq writeFastq(filteredFastq,file="filteredFastq.fq.gz")
unlink("filteredFastq.fq.gz")