params <-
list(isSlides = "no")
## ----include=FALSE------------------------------------------------------------
suppressPackageStartupMessages(require(knitr))
knitr::opts_chunk$set(echo = TRUE, tidy = T)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides != "yes"){
cat("# Epigenomics
---
"
)
}
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Set Up
---
"
)
}else{
cat("# Set Up
---
"
)
}
## ----setwd_introtoR,eval=F----------------------------------------------------
# setwd("/PathToMyDownload/ATAC.Cut-Run.ChIP-master/r_course")
# # e.g. setwd("~/Downloads/ATAC.Cut-Run.ChIP-master/r_course")
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Epigenomic Approaches
---
"
)
}else{
cat("# Epigenomic Approaches
---
"
)
}
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Fastq QC
---
"
)
}else{
cat("# Fastq QC
---
"
)
}
## ----shortreada,include=FALSE-------------------------------------------------
library(ShortRead)
## ----shortread, warning=F, message=F------------------------------------------
library(ShortRead)
## ----echo=F,eval=F------------------------------------------------------------
# fqSample <- FastqSampler("~/Downloads/SOX9CNR_D0_rep1_R1.fastq.gz",n=10^6)
# fastq <- yield(fqSample)
#
# writeFastq(fastq,file = "../data/SOX9CNR_D0_rep1_R1_subsample.fastq.gz",mode = "w")
#
## ----eval=F, echo=F-----------------------------------------------------------
# fastq <- readFastq(dirPath = "data/SOX9CNR_D0_rep1_R1_subsample.fastq.gz")
## ----mycRep1Reads,echo=T,eval=F-----------------------------------------------
# fqSample <- FastqSampler("~/Downloads/SOX9CNR_D0_rep1_R1.fastq.gz",n=10^6)
# fastq <- yield(fqSample)
## ----eval=T, echo=T-----------------------------------------------------------
fastq <- readFastq(dirPath = "data/SOX9CNR_D0_rep1_R1_subsample.fastq.gz")
## ----mycRep1ReadsShortReadQ---------------------------------------------------
fastq
## ----mycRep1ReadsAccessor-----------------------------------------------------
readSequences <- sread(fastq)
readQuality <- quality(fastq)
readIDs <- id(fastq)
readSequences
## ----mycRep1ReadsAlpFreq------------------------------------------------------
readSequences <- sread(fastq)
readSequences_AlpFreq <- alphabetFrequency(readSequences)
readSequences_AlpFreq[1:3,]
## ----mycRep1ReadsAlpFreqSum---------------------------------------------------
summed__AlpFreq <- colSums(readSequences_AlpFreq)
summed__AlpFreq[c("A","C","G","T","N")]
## ----mycRep1ReadsAlpByCycle---------------------------------------------------
readSequences_AlpbyCycle <- alphabetByCycle(readSequences)
readSequences_AlpbyCycle[1:4,1:10]
## ----mycRep1ReadsAlpByCyclePlot-----------------------------------------------
AFreq <- readSequences_AlpbyCycle["A",]
CFreq <- readSequences_AlpbyCycle["C",]
GFreq <- readSequences_AlpbyCycle["G",]
TFreq <- readSequences_AlpbyCycle["T",]
toPlot <- data.frame(Count=c(AFreq,CFreq,GFreq,TFreq),
Cycle=rep(1:40,4),
Base=rep(c("A","C","G","T"),each=40))
## ----mycRep1ReadsAlpByCyclePlot2,cache=TRUE,eval=FALSE,dependson="mycRep1ReadsAlpByCyclePlot",fig.height=4,fig.width=8----
# library(ggplot2)
# ggplot(toPlot, aes(y=Count,x=Cycle,colour=Base)) + geom_line() +
# theme_bw()
## ----mycRep1ReadsAlpByCyclePlot3, warning=F, message=F, fig.width=6, fig.height=4----
library(ggplot2)
ggplot(toPlot,aes(y=Count,x=Cycle,colour=Base)) + geom_line() + ylim(150000,350000) +
theme_bw()
## ----mycRep1ReadsQScores------------------------------------------------------
readQuality <- quality(fastq)
readQualities <- alphabetScore(readQuality)
readQualities[1:10]
## ----mycRep1ReadsQScoresPlot, warning=F, message=F, fig.width=6, fig.height=4----
toPlot <- data.frame(ReadQ=readQualities)
ggplot(toPlot,aes(x=ReadQ))+geom_histogram()+theme_minimal()
## ----eval=T-------------------------------------------------------------------
library(Rfastp)
## ----eval=F-------------------------------------------------------------------
#
# rfastp_res <- rfastp(read1="/Users/mattpaul/Downloads/SRR20110418_1.fastq.gz",
# read2 = "/Users/mattpaul/Downloads/SRR20110418_2.fastq.gz",
# outputFastq = "SOX9CNR_D0_rep1_filtered")
## ----eval=FALSE, echo=F-------------------------------------------------------
# save(rfastp_res, file = "data/fastp_res.RData")
## -----------------------------------------------------------------------------
load("data/fastp_res.RData")
## -----------------------------------------------------------------------------
qcSummary(rfastp_res)
## -----------------------------------------------------------------------------
curvePlot(rfastp_res, curve = "content_curves")
## -----------------------------------------------------------------------------
rfastp_res <- rfastp(read1 = "data/ERR458755.fastq.gz", outputFastq = "ERR458755_rfastp")
## -----------------------------------------------------------------------------
curvePlot(rfastp_res)
## -----------------------------------------------------------------------------
curvePlot(rfastp_res, curve="content_curves")
## -----------------------------------------------------------------------------
json_report <- rfastp(read1 = "data/ERR458755.fastq.gz", outputFastq = "ERR458755_rfastp", trimFrontRead1 = 10)
## -----------------------------------------------------------------------------
curvePlot(json_report)
## -----------------------------------------------------------------------------
curvePlot(json_report, curve="content_curves")