params <-
list(isSlides = "no")
## ----setup, include=FALSE-----------------------------------------------------
suppressPackageStartupMessages(require(knitr))
suppressPackageStartupMessages(require(memes))
knitr::opts_chunk$set(echo = TRUE, tidy = T)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides != "yes"){
cat("# Epigenomics (part 6)
---
"
)
}
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Motif Databases
---
"
)
}else{
cat("# Motif Databases
---
"
)
}
## ----eval=TRUE----------------------------------------------------------------
library(MotifDb)
MotifDb
## ----eval=TRUE----------------------------------------------------------------
class(MotifDb)
## ----eval=TRUE----------------------------------------------------------------
length(MotifDb)
MotifNames <- names(MotifDb)
MotifNames[1:10]
## ----eval=TRUE----------------------------------------------------------------
MotifDb[1]
## ----eval=TRUE----------------------------------------------------------------
MotifDb[[1]]
colSums(MotifDb[[1]])
## ----eval=TRUE----------------------------------------------------------------
values(MotifDb)[1:2,]
## ----eval=TRUE----------------------------------------------------------------
Sox9Motifs <- query(MotifDb,"Sox9")
Sox9Motifs
## ----eval=TRUE----------------------------------------------------------------
Sox9Motifs <- query(MotifDb,c("Sox9","hsapiens","jaspar2022"))
Sox9Motifs
## ----eval=TRUE----------------------------------------------------------------
library(JASPAR2024)
library(RSQLite)
JASPAR2024 <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(JASPAR2024))
## ----eval=TRUE, message=F, warning=F------------------------------------------
library(TFBSTools)
?getMatrixByID
## ----eval=TRUE,echo=TRUE------------------------------------------------------
SOX9mat <- getMatrixByName(sq24,"SOX9")
class(SOX9mat)
## ----eval=TRUE,echo=TRUE------------------------------------------------------
SOX9mat2 <- getMatrixByID(sq24,"MA0077.1")
## ----eval=TRUE,echo=TRUE------------------------------------------------------
ID(SOX9mat)
## ----eval=TRUE----------------------------------------------------------------
myMatrix <- Matrix(SOX9mat)
myMatrixToo <- as.matrix(myMatrix)
myMatrix
## ----eval=TRUE----------------------------------------------------------------
opts <- list()
opts[["collection"]] <- "CORE"
opts[["tax_group"]] <- "vertebrates"
opts[["all_versions"]] <- FALSE
motifList <- getMatrixSet(sq24, opts)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Visualizing motifs
---
"
)
}else{
cat("# Visualizing motifs
---
"
)
}
## ----eval=TRUE,echo=FALSE,warning=FALSE,message=FALSE, fig.height=4,fig.width=6----
require(seqLogo)
Sox9Motifs <- query(MotifDb,"Sox9")
seqLogo::seqLogo(Sox9Motifs[[1]],ic.scale = FALSE)
## ----eval=TRUE, fig.height=4,fig.width=6--------------------------------------
library(seqLogo)
Sox9Motifs <- query(MotifDb,"Sox9")
seqLogo::seqLogo(Sox9Motifs[[1]], ic.scale = FALSE)
## ----eval=TRUE, fig.height=4,fig.width=6--------------------------------------
seqLogo::seqLogo(Sox9Motifs[[1]])
## ----eval=TRUE----------------------------------------------------------------
myMatrix
## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6----------------------------
ppm <- myMatrix/colSums(myMatrix)
ppm
## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6----------------------------
seqLogo::seqLogo(ppm)
## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6----------------------------
Sox9_IC <- toICM(SOX9mat)
TFBSTools::seqLogo(Sox9_IC)
## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6----------------------------
library(ggseqlogo)
library(ggplot2)
ggseqlogo(myMatrix)+theme_minimal()
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Motif enrichment analysis
---
"
)
}else{
cat("# Motif enrichment analysis
---
"
)
}
## ----eval=F-------------------------------------------------------------------
# library(Herper)
# install_CondaTools("meme","motif_analysis", channels="bioconda", pathToMiniConda = "miniconda")
## -----------------------------------------------------------------------------
opts <- list()
opts[["tax_group"]] <- "vertebrates"
opts[["collection"]] <- "CORE"
opts[["all_versions"]] <- FALSE
jaspar <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(jaspar))
## -----------------------------------------------------------------------------
library(universalmotif)
motifs <- getMatrixSet(sq24, opts)
motifs_um <- convert_motifs(motifs)
class(motifs_um)
class(motifs_um[[1]])
## -----------------------------------------------------------------------------
library(rtracklayer)
UpinW6_peaks <- rtracklayer::import("data/UpinW6.bed")
## ----eval=T-------------------------------------------------------------------
library(Biostrings)
library(BSgenome.Mmusculus.UCSC.mm10)
peaksSequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, UpinW6_peaks)
names(peaksSequences) <- paste0("peak_", 1:length(UpinW6_peaks))
## ----eval=F, echo=F, warning=F, message=F-------------------------------------
# library(Biostrings)
# mm10 <- readDNAStringSet("~/Downloads/mm10.fa.gz")
#
# peaksSequences <- getSeq(mm10, UpinW6_peaks)
# names(peaksSequences) <- paste0("peak_", 1:length(UpinW6_peaks))
## ----eval=T-------------------------------------------------------------------
HC_peaks <- rtracklayer::import("data/HC_Peaks.bed")
background_peaksSequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, HC_peaks)
names(background_peaksSequences) <- paste0("peak_", 1:length(HC_peaks))
## ----eval=F, echo=F, warning=F, message=F-------------------------------------
# HC_peaks <- rtracklayer::import("data/HC_Peaks.bed")
# background_peaksSequences <- getSeq(mm10, HC_peaks)
# names(background_peaksSequences) <- paste0("peak_", 1:length(HC_peaks))
## ----eval=F-------------------------------------------------------------------
# library(memes)
#
# meme_path_id="miniconda/envs/motif_analysis/bin"
## ----eval=F, echo=FALSE-------------------------------------------------------
#
# meme_path_id ="/Users/mattpaul/Documents/RU/Software/mini/envs/motif_analysis/bin"
#
## ----eval=F-------------------------------------------------------------------
# ame_results <- runAme(peaksSequences,
# control = background_peaksSequences,
# outdir = "ame_diff",
# meme_path = meme_path_id,
# database = motifs_um)
## ----eval=F, echo=F-----------------------------------------------------------
#
# save(ame_results, file = "data/ame_results.RData")
#
## -----------------------------------------------------------------------------
load(file = "data/ame_results.RData")
class(ame_results)
## -----------------------------------------------------------------------------
ame_results %>%
dplyr::filter(rank %in% 1:10) %>%
plot_ame_heatmap() +
ggtitle("Top 10 AME Hits")
## -----------------------------------------------------------------------------
top_res_motif <- getMatrixByID(sq24,ame_results$motif_alt_id[1])
top_res_motif_mat <- Matrix(top_res_motif)
ggseqlogo(top_res_motif_mat)+theme_minimal()
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# De novo motif analysis
---
"
)
}else{
cat("# De novo motif analysis
---
"
)
}
## ----eval=F-------------------------------------------------------------------
# dreme_results <- runDreme(peaksSequences,
# control = background_peaksSequences,
# outdir = "dreme_diff",
# meme_path = meme_path_id)
## ----eval=F, echo=F-----------------------------------------------------------
#
# save(dreme_results, file = "data/dreme_results.RData")
#
## -----------------------------------------------------------------------------
load(file = "data/dreme_results.RData")
class(dreme_results)
## -----------------------------------------------------------------------------
dreme_results[1:3,] %>%
to_list() %>%
view_motifs()
## ----eval=F-------------------------------------------------------------------
#
# tomtom_results <- runTomTom(dreme_results,
# meme_path = meme_path_id,
# database = motifs_um,
# outdir = "tomtom_from_dreme"
# )
#
## ----eval=F, echo=F-----------------------------------------------------------
#
# save(tomtom_results, file = "data/tomtom_results.RData")
#
## -----------------------------------------------------------------------------
load(file = "data/tomtom_results.RData")
view_motifs(tomtom_results$best_match_motif[1:3])
## -----------------------------------------------------------------------------
unlist(lapply(tomtom_results$tomtom[[1]]$match_motif, function(x) x@name))
## ----eval=F-------------------------------------------------------------------
#
# tomtom_results[1,] %>%
# view_tomtom_hits(top_n = 3)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Finding Motifs
---
"
)
}else{
cat("# Finding Motifs
---
"
)
}
## -----------------------------------------------------------------------------
opts <- list()
opts[["tax_group"]] <- "vertebrates"
opts[["collection"]] <- "CORE"
opts[["all_versions"]] <- FALSE
jaspar <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(jaspar))
motifs <- getMatrixSet(sq24, opts)
## -----------------------------------------------------------------------------
peaksSequences
## -----------------------------------------------------------------------------
library(motifmatchr)
motifs_subset <- motifs[names(motifs) %in% ame_results$motif_alt_id[1:3]]
motif_positions <- matchMotifs(motifs_subset, peaksSequences[1:100],out="positions")
class(motif_positions)
length(motif_positions)
## -----------------------------------------------------------------------------
motif_positions$MA1152.2
## -----------------------------------------------------------------------------
MA1152.2hits <- motif_positions$MA1152.2
names(MA1152.2hits) <- names(peaksSequences[1:100])
unlist(MA1152.2hits, use.names = TRUE)
## -----------------------------------------------------------------------------
motifHits <- matchMotifs(motifs, peaksSequences, out="matches")
class(motifHits)
motifHits
## -----------------------------------------------------------------------------
mmMatrix <- motifMatches(motifHits)
dim(mmMatrix)
mmMatrix[1:8,1:8]
## ----eval=F, echo=T-----------------------------------------------------------
# totalMotifOccurence <- colSums(mmMatrix)
# totalMotifOccurence[1:4]
# totalMotifOccurence["MA1152.2"]
## ----eval=T, echo=F-----------------------------------------------------------
my_mat<-apply(data.matrix(mmMatrix),2,sum)
names(my_mat)<-colnames(mmMatrix)
my_mat[1:4]
my_mat["MA1152.2"]
## ----eval=T-------------------------------------------------------------------
UpinW6_peaksWithMA1152.2 <- UpinW6_peaks[mmMatrix[,"MA1152.2"] == 1]
UpinW6_peaksWithMA1152.2