These exercises are about motifs.
Exercise 1 - Motif Databases
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): input string 6084 is invalid
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): input string 6140 is invalid
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): input string 6281 is invalid
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): input string 6598 is invalid
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): input string 6622 is invalid
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## Warning in grep(queryString, mcols(object)[, colname], ignore.case =
## ignore.case): unable to translate 'Three-zinc finger Kr<c3><bc>ppel-related
## factors' to a wide string
## [1] 14
## 1 2 3 4 5 6 7
## A 0.282316 0.341654 0.399523 0.3650856 0.2433208 0.0298729 0.00494519
## C 0.413385 0.135011 0.173081 0.1963598 0.1778388 0.8084791 0.81546493
## G 0.114930 0.267943 0.168845 0.2118558 0.2242058 0.0363090 0.04063490
## T 0.189369 0.255392 0.258551 0.2266988 0.3546346 0.1253390 0.13895499
## 8 9 10 11 12 13 14
## A 0.934557140 0.00509888 0.969303806 0.008189473 0.66957954 0.130511 0.198982
## C 0.058944109 0.00399771 0.020594196 0.984816325 0.01218231 0.171241 0.289660
## G 0.005027481 0.98614801 0.001929150 0.004198781 0.28215323 0.334041 0.252392
## T 0.001471270 0.00475540 0.008172848 0.002795421 0.03608493 0.364207 0.258966
## 15 16 17
## A 0.4231716 0.213519 0.371956
## C 0.1909778 0.287388 0.285061
## G 0.1644108 0.294414 0.150191
## T 0.2214398 0.204679 0.192792
library(JASPAR2024)
library(RSQLite)
library(TFBSTools)
JASPAR2024 <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(JASPAR2024))
SMAD3mat <- getMatrixByName(sq24,"SMAD3")
SMAD3mat
## An object of class PFMatrix
## ID: MA0795.1
## Name: SMAD3
## Matrix Class: SMAD/NF-1 DNA-binding domain factors
## strand: +
## Tags:
## $comment
## [1] "Data is from Taipale HTSELEX DBD (2013)"
##
## $family
## [1] "SMAD factors"
##
## $medline
## [1] "12582250"
##
## $remap_tf_name
## [1] "SMAD3"
##
## $source
## [1] "23332764"
##
## $tax_group
## [1] "vertebrates"
##
## $type
## [1] "HT-SELEX"
##
## $unibind
## [1] "1"
##
## $collection
## [1] "CORE"
##
## $species
## 9606
## "Homo sapiens"
##
## $acc
## [1] "P84022"
##
## Background:
## A C G T
## 0.25 0.25 0.25 0.25
## Matrix:
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## A 554 20 103 9 0 3491 0 3491 20 3491
## C 1829 16 29 3491 46 30 10 199 3491 434
## G 87 3491 83 11 31 22 3491 5 10 316
## T 1662 18 3491 6 3491 0 3 56 20 758
Exercise 2 - Visualizing motifs
- Lets compare this to the motif from JASPAR2024. Are these equivalent
motifs?
## Warning in makePWM(pwm): Columns of PWM must add up to 1.0
Exercise 3 - Motif enrichment analysis
library(Herper)
install_CondaTools("meme","motif_analysis", channels="bioconda", pathToMiniConda = "miniconda")
library(universalmotif)
opts <- list()
opts[["tax_group"]] <- "vertebrates"
opts[["collection"]] <- "CORE"
opts[["all_versions"]] <- FALSE
jaspar <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(jaspar))
motifs <- getMatrixSet(sq24, opts)
motifs_um <- convert_motifs(motifs)
class(motifs_um)
## [1] "list"
## [1] "universalmotif"
## attr(,"package")
## [1] "universalmotif"
##
## Attaching package: 'rio'
## The following object is masked from 'package:MotifDb':
##
## export
## The following object is masked from 'package:BSgenome':
##
## export
## The following objects are masked from 'package:rtracklayer':
##
## export, import
## The following objects are masked from 'package:BiocIO':
##
## export, import
library(Biostrings)
library(BSgenome.Mmusculus.UCSC.mm10)
dge <- rio::import("data/ATAC_W6MinusD0.xlsx")
dge_up <- dge[dge$padj < 0.05 & dge$log2FoldChange > 0,]
dge_up_gr <- GRanges(dge_up$seqnames, IRanges(dge_up$start, dge_up$end))
peaksSequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, dge_up_gr)
names(peaksSequences) <- paste0("peak_", 1:length(dge_up_gr))
HC_peaks <- rtracklayer::import("data/ATAC_HC_Peaks.bed")
background_peaksSequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10, HC_peaks)
names(background_peaksSequences) <- paste0("peak_", 1:length(HC_peaks))
library(memes)
meme_path_id="miniconda/envs/motif_analysis/bin"
ame_results <- runAme(peaksSequences,
control = background_peaksSequences,
outdir = "ATAC_ame_diff",
meme_path = meme_path_id,
database = motifs_um)
load(file = "data/ATAC_ame_results.RData")
ame_results %>%
dplyr::filter(rank %in% 1:10) %>%
plot_ame_heatmap() +
ggtitle("Top 10 AME Hits")
top_res_motif <- getMatrixByID(sq24,ame_results$motif_alt_id[1])
top_res_motif_mat <- Matrix(top_res_motif)
ggseqlogo(top_res_motif_mat)+theme_minimal()
top_res_motif <- getMatrixByID(sq24,ame_results$motif_alt_id[2])
top_res_motif_mat <- Matrix(top_res_motif)
ggseqlogo(top_res_motif_mat)+theme_minimal()
top_res_motif <- getMatrixByID(sq24,ame_results$motif_alt_id[3])
top_res_motif_mat <- Matrix(top_res_motif)
ggseqlogo(top_res_motif_mat)+theme_minimal()
Exercise 4 - De novo motif analysis
dreme_results <- runDreme(peaksSequences,
control = background_peaksSequences,
outdir = "ATAC_dreme",
meme_path = meme_path_id)
tomtom_results <- runTomTom(dreme_results[2:nrow(dreme_results),],
meme_path = meme_path_id,
database = motifs_um,
outdir = "ATAC_tomtom_from_dreme"
)
## $m02_AKTCA
## $m39_AAT
## $m32_CAG
Exercise 5 - Finding Motifs
library(motifmatchr)
opts <- list()
opts[["tax_group"]] <- "vertebrates"
opts[["collection"]] <- "CORE"
opts[["all_versions"]] <- FALSE
jaspar <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(jaspar))
motifs <- getMatrixSet(sq24, opts)
motifs_subset <- motifs[names(motifs) %in% ame_results$motif_alt_id[1]]
motifHits <- matchMotifs(motifs, peaksSequences, out="matches")
motifHits
## class: SummarizedExperiment
## dim: 12987 879
## metadata(0):
## assays(1): motifMatches
## rownames: NULL
## rowData names(0):
## colnames(879): MA0004.1 MA0069.1 ... MA1602.2 MA1722.2
## colData names(1): name
mmMatrix <- motifMatches(motifHits)
mat_sums <- colSums(mmMatrix)
hist(mat_sums)
mat_sums[which.max(mat_sums)]
# meis1 = MA0498.3