params <-
list(isSlides = "no")
## ----include=FALSE------------------------------------------------------------
suppressPackageStartupMessages(require(knitr))
knitr::opts_chunk$set(echo = TRUE, tidy = T)
# condaSalmon <- CondaSysReqs::install_CondaTools("salmon","salmon")
# pathToSalmon <- file.path(dirname(dirname(condaSalmon$pathToConda)),"envs",condaSalmon$environment,"bin","salmon")
## ----neQants,include=FALSE,eval=FALSE-----------------------------------------
##
## salmonExec <- paste0(pathToSalmon," quant")
## fq <- "ENCFF070QMF.fastq.gz"
## outDir <- "TReg_2_Quant"
## salmonQuantCmd <- paste(salmonExec,
## "-i",indexName,
## "-o",outDir,
## "-l A",
## "-r",fq)
## salmonQuantCmd
## system(salmonQuantCmd, wait = TRUE)
##
## salmonExec <- paste0(pathToSalmon," quant")
## fq <- "ENCFF144YYI.fastq.gz"
## outDir <- "TReg_act_1_Quant"
## salmonQuantCmd <- paste(salmonExec,
## "-i",indexName,
## "-o",outDir,
## "-l A",
## "-r",fq)
## salmonQuantCmd
## system(salmonQuantCmd, wait = TRUE)
##
## salmonExec <- paste0(pathToSalmon," quant")
## fq <- "ENCFF042XBW.fastq.gz"
## outDir <- "TReg_act_2_Quant"
## salmonQuantCmd <- paste(salmonExec,
## "-i",indexName,
## "-o",outDir,
## "-l A",
## "-r",fq)
## salmonQuantCmd
## system(salmonQuantCmd, wait = TRUE)
##
## salmonExec <- paste0(pathToSalmon," quant")
## fq <- "ENCFF053CFZ.fastq.gz"
## outDir <- "TReg_act_3_Quant"
## salmonQuantCmd <- paste(salmonExec,
## "-i",indexName,
## "-o",outDir,
## "-l A",
## "-r",fq)
## salmonQuantCmd
## system(salmonQuantCmd, wait = TRUE)
##
##
## ----setup, include=FALSE-----------------------------------------------------
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(GenomicAlignments)
library(DESeq2)
library(tximport)
library(org.Mm.eg.db)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides != "yes"){
cat("# RNAseq (part 2)
---
"
)
}
## ---- eval=F------------------------------------------------------------------
## setwd("Path/to/Download/RU_RNAseq-master")
##
##
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Counting with multiple RNAseq datasets
---
"
)
}else{
cat("# Counting with multiple RNAseq datasets
---
"
)
}
## ----eval=FALSE,echo=TRUE-----------------------------------------------------
## library(Rsamtools)
## bamFilesToCount <- c("Sorted_Treg_1.bam","Sorted_Treg_2.bam",
## "Sorted_Treg_act_1.bam","Sorted_Treg_act_2.bam",
## "Sorted_Treg_act_3.bam")
## names(bamFilesToCount) <- c("Sorted_Treg_1","Sorted_Treg_2",
## "Sorted_Treg_act_1","Sorted_Treg_act_2",
## "Sorted_Treg_act_3")
## myBams <- BamFileList(bamFilesToCount,yieldSize = 10000)
##
## ----eval=FALSE,echo=TRUE-----------------------------------------------------
## library(TxDb.Mmusculus.UCSC.mm10.knownGene)
## library(GenomicAlignments)
## geneExons <- exonsBy(TxDb.Mmusculus.UCSC.mm10.knownGene,by="gene")
## geneCounts <- summarizeOverlaps(geneExons,myBams,
## ignore.strand = TRUE)
## geneCounts
## ----gC1,eval=TRUE,echo=FALSE,cache=TRUE--------------------------------------
load("data/GeneCounts.RData")
geneCounts
## ----bp,eval=FALSE,echo=TRUE,cache=TRUE---------------------------------------
## library(BiocParallel)
## ----bp1,eval=FALSE,echo=TRUE,cache=TRUE,dependson="bp"-----------------------
## paramMulti <- MulticoreParam(workers=2)
## paramSerial <- SerialParam()
## register(paramSerial)
## ----eval=F-------------------------------------------------------------------
## load("data/GeneCounts.RData")
##
## ----gC2,eval=TRUE,echo=TRUE,cache=TRUE,dependson="gC1"-----------------------
assay(geneCounts)[1:2,]
## ----gC3,eval=TRUE,echo=TRUE,cache=TRUE,dependson="gC1"-----------------------
rowRanges(geneCounts)[1:2,]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Differential gene expression analysis
---
"
)
}else{
cat("# Differential gene expression analysis
---
"
)
}
## ----de1,eval=TRUE,echo=TRUE,cache=TRUE,dependson="gC1"-----------------------
metaData <- data.frame(Group=c("Naive","Naive","Act","Act","Act"),
row.names = colnames(geneCounts))
metaData
## ----de22,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de1", warning=F-----------
countMatrix <- assay(geneCounts)
countGRanges <- rowRanges(geneCounts)
dds <- DESeqDataSetFromMatrix(countMatrix,
colData = metaData,
design = ~Group,
rowRanges=countGRanges)
dds
## ----de2,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de1"-----------------------
colData(geneCounts)$Group <- metaData$Group
geneCounts
## ----de3,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de1", warning=F------------
dds <- DESeqDataSet(geneCounts,design = ~Group)
dds
## ----de4,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de3"-----------------------
dds <- DESeq(dds)
## ----de5,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de4"-----------------------
normCounts <- counts(dds, normalized=TRUE)
normCounts[1:2,]
## ----de6,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de4",fig.width=6,fig.height=4----
plotDispEsts(dds)
## ----de7,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de4"-----------------------
myRes <-results(dds,contrast = c("Group","Act","Naive"))
myRes <- myRes[order(myRes$pvalue),]
myRes[1:3,]
## ----drssaas,eval=FALSE,echo=TRUE,cache=FALSE---------------------------------
## summary(myRes)
## ----dr1,eval=TRUE,echo=FALSE,cache=TRUE,dependson=c("de7")-------------------
DESeq2::summary(myRes)
## ----drccd1a,eval=FALSE,echo=TRUE---------------------------------------------
## plotMA(myRes)
## ----dr1a,eval=TRUE,echo=FALSE,cache=TRUE,dependson=c("de7"),fig.height=4,fig.width=7----
DESeq2::plotMA(myRes)
## ----dr1b,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("de7"),fig.height=4,fig.width=7,message=FALSE,warning=FALSE----
myRes_lfc <- lfcShrink(dds, coef = "Group_Naive_vs_Act")
DESeq2::plotMA(myRes_lfc)
## ----dr1c,eval=TRUE,echo=FALSE,cache=TRUE,dependson=c("dr1b")-----------------
myRes <-results(dds,contrast = c("Group","Act","Naive"))
## ----dr2,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("de7")--------------------
myResAsDF <- as.data.frame(myRes)
myResAsDF[1:2,]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Significance and Multiple Testing
---
"
)
}else{
cat("# Significance and Multiple Testing
---
"
)
}
## ----dr22a,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("de7")------------------
table(is.na(myResAsDF$padj))
## ----dr22b,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("de7")------------------
myResAsDF$newPadj <- p.adjust(myResAsDF$pvalue)
myResAsDF[1:3,]
## ----dr22,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("de7")-------------------
myResAsDF <- myResAsDF[!is.na(myResAsDF$padj),]
myResAsDF <- myResAsDF[order(myResAsDF$pvalue), ]
myResAsDF[1:3,]
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# DEseq2 and Salmon
---
"
)
}else{
cat("# DEseq2 and Salmon
---
"
)
}
## ----tx1,eval=TRUE,echo=TRUE,cache=TRUE,dependson="de4"-----------------------
library(tximport)
## ----tx2,eval=TRUE,echo=TRUE,cache=TRUE,dependson="tx1"-----------------------
temp <- read.delim("data/Salmon/TReg_2_Quant/quant.sf")
temp[1:3,]
## ----tx3a,eval=TRUE,echo=TRUE,cache=TRUE,dependson="tx2",warning=FALSE,message=FALSE----
Tx2Gene <- select(TxDb.Mmusculus.UCSC.mm10.knownGene,
keys = as.vector(temp[,1]),
keytype = "TXNAME",
columns = c("GENEID","TXNAME"))
Tx2Gene <- Tx2Gene[!is.na(Tx2Gene$GENEID),]
Tx2Gene[1:10,]
## ----tx4,eval=TRUE,echo=TRUE,cache=TRUE,dependson="tx3a"----------------------
salmonQ <- dir("data/Salmon/",recursive = T,
pattern = "quant.sf",full.names = T)
salmonCounts <- tximport(salmonQ,
type="salmon",
tx2gene = Tx2Gene)
## ----tx5,eval=TRUE,echo=TRUE,cache=TRUE,dependson="tx4"-----------------------
salmonCounts$abundance[1:2,]
salmonCounts$counts[1:2,]
## ----tx6,eval=TRUE,echo=TRUE,cache=TRUE,dependson="tx5", warning=F------------
ddsSalmon <- DESeqDataSetFromTximport(salmonCounts,
colData = metaData,
design = ~Group)
## ----tx7,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("tx6"),message=FALSE,warning=FALSE----
ddsSalmon <- DESeq(ddsSalmon)
myResS <-results(ddsSalmon,contrast = c("Group","Act","Naive"))
myResS <- myResS[order(myResS$pvalue),]
myResS[1:3,]
## ----dr1ss,eval=TRUE,echo=FALSE,cache=TRUE,dependson=c("tx7","de7")-----------
swde <- merge(as.data.frame(myRes),as.data.frame(myResS),by=0,all=FALSE)
toCompare <- cbind(abs(swde$log2FoldChange.x) > 1 & swde$padj.x < 0.05 & !is.na(swde$padj.x) & !is.na(swde$padj.y), abs(swde$log2FoldChange.y) > 1 & swde$padj.y < 0.05 & !is.na(swde$padj.y) & !is.na(swde$padj.x))
colnames(toCompare) <- c("summarizeOverlaps","Salmon")
limma::vennDiagram(toCompare)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Adding Annotation
---
"
)
}else{
cat("# Adding Annotation
---
"
)
}
## ----anno,eval=TRUE,echo=TRUE,cache=TRUE,dependson=c("tx7","de7"),message=FALSE,warning=FALSE,tidy=FALSE----
library(org.Mm.eg.db)
eToSym <- select(org.Mm.eg.db,
keys = rownames(myResAsDF),
keytype = "ENTREZID",
columns="SYMBOL")
eToSym[1:10,]
## ----anno2,eval=TRUE,echo=TRUE,cache=TRUE,dependson="anno",tidy=FALSE---------
annotatedRes <- merge(eToSym,myResAsDF,
by.x=1,
by.y=0,
all.x=FALSE,
all.y=TRUE)
annotatedRes <- annotatedRes[order(annotatedRes$pvalue),]
annotatedRes[1:3,]