params <- list(isSlides = "no") ## ----include=FALSE------------------------------------------------------------ suppressPackageStartupMessages(require(knitr)) knitr::opts_chunk$set(echo = TRUE, tidy = T) require(SummarizedExperiment) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides != "yes"){ cat("# ATACseq (part 2) --- " ) } ## ----setup, include=FALSE----------------------------------------------------- library(motifmatchr) library(MotifDb) library(JASPAR2020) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Motif Databases

--- " ) }else{ cat("# Motif Databases --- " ) } ## ----eval=TRUE---------------------------------------------------------------- library(MotifDb) MotifDb ## ----eval=TRUE---------------------------------------------------------------- class(MotifDb) ## ----eval=TRUE---------------------------------------------------------------- length(MotifDb) MotifNames <- names(MotifDb) MotifNames[1:10] ## ----eval=TRUE---------------------------------------------------------------- MotifDb[1] ## ----eval=TRUE---------------------------------------------------------------- MotifDb[[1]] colSums(MotifDb[[1]]) ## ----eval=TRUE---------------------------------------------------------------- values(MotifDb)[1:2,] ## ----eval=TRUE---------------------------------------------------------------- CTCFMotifs <- query(MotifDb,"CTCF") CTCFMotifs ## ----eval=TRUE---------------------------------------------------------------- CTCFMotifs <- query(MotifDb,c("CTCF","hsapiens","jaspar2018")) CTCFMotifs ## ----eval=TRUE---------------------------------------------------------------- library(JASPAR2020) JASPAR2020 ## ----eval=TRUE, message=F, warning=F------------------------------------------ library(TFBSTools) ?getMatrixByID ## ----eval=TRUE,echo=TRUE------------------------------------------------------ GATA2mat <- getMatrixByName(JASPAR2020,"GATA2") class(GATA2mat) ## ----eval=TRUE,echo=TRUE------------------------------------------------------ GATA2mat <- getMatrixByID(JASPAR2020,"MA0036.2") ## ----eval=TRUE,echo=TRUE------------------------------------------------------ ID(GATA2mat) ## ----eval=TRUE---------------------------------------------------------------- myMatrix <- Matrix(GATA2mat) myMatrixToo <- as.matrix(myMatrix) myMatrix ## ----eval=TRUE---------------------------------------------------------------- opts <- list() opts[["collection"]] <- "CORE" opts[["tax_group"]] <- "vertebrates" motifList <- getMatrixSet(JASPAR2020, opts) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Visualizing motifs

--- " ) }else{ cat("# Visualizing motifs --- " ) } ## ----eval=TRUE,echo=FALSE,warning=FALSE,message=FALSE, fig.height=4,fig.width=6---- require(seqLogo) CTCFMotifs <- query(MotifDb,"CTCF") seqLogo::seqLogo(CTCFMotifs[[1]],ic.scale = FALSE) ## ----eval=TRUE, fig.height=4,fig.width=6-------------------------------------- library(seqLogo) CTCFMotifs <- query(MotifDb,"CTCF") seqLogo::seqLogo(CTCFMotifs[[1]],ic.scale = FALSE) ## ----eval=TRUE, fig.height=4,fig.width=6-------------------------------------- seqLogo::seqLogo(CTCFMotifs[[1]]) ## ----eval=TRUE---------------------------------------------------------------- myMatrix ## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6---------------------------- ppm <- myMatrix/colSums(myMatrix) ppm ## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6---------------------------- seqLogo::seqLogo(ppm) ## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6---------------------------- GATA2_IC <- toICM(GATA2mat) TFBSTools::seqLogo(GATA2_IC) ## ----eval=TRUE,echo=TRUE, fig.height=4,fig.width=6---------------------------- library(ggseqlogo) library(ggplot2) ggseqlogo(myMatrix)+theme_minimal() ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Identifying Motifs in ATAC

--- " ) }else{ cat("# Identifying Motifs in ATAC --- " ) } ## ----------------------------------------------------------------------------- opts <- list() opts[["tax_group"]] <- "vertebrates" opts[["collection"]] <- "CORE" opts[["all_versions"]] <- FALSE motifsToScan <- getMatrixSet(JASPAR2020,opts) ## ----------------------------------------------------------------------------- load("data/myCounts.RData") myCounts ## ----------------------------------------------------------------------------- peakRanges <- rowRanges(myCounts) peakRanges[1,] ## ----------------------------------------------------------------------------- library(BSgenome.Mmusculus.UCSC.mm10) peakRangesCentered <- resize(peakRanges,fix = "center",width = 100) peakSeqs <- getSeq(BSgenome.Mmusculus.UCSC.mm10,peakRangesCentered) names(peakSeqs) <- as.character(peakRangesCentered) peakSeqs ## ----------------------------------------------------------------------------- motif_positions <- matchMotifs(motifsToScan[1:4], peakSeqs[1:100],out="positions") class(motif_positions) length(motif_positions) ## ----------------------------------------------------------------------------- motif_positions$MA0029.1 ## ----------------------------------------------------------------------------- MA0029hits <- motif_positions$MA0029.1 names(MA0029hits) <- names(peakSeqs[1:100]) unlist(MA0029hits, use.names = TRUE) ## ----------------------------------------------------------------------------- motifHits <- matchMotifs(motifsToScan, peakSeqs, out="matches") class(motifHits) motifHits ## ----------------------------------------------------------------------------- mmMatrix <- motifMatches(motifHits) dim(mmMatrix) mmMatrix[1:8,1:8] ## ----eval=F, echo=T----------------------------------------------------------- ## totalMotifOccurence <- colSums(mmMatrix) ## totalMotifOccurence[1:4] ## ----eval=T, echo=F----------------------------------------------------------- my_mat<-apply(data.matrix(mmMatrix),2,sum) names(my_mat)<-colnames(mmMatrix) my_mat[1:4] ## ----eval=T------------------------------------------------------------------- peaksWithMA0912 <- peakRangesCentered[mmMatrix[,"MA0912.2"] == 1] peaksWithMA0912 ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # summarizing ATAC signal to Motifs

--- " ) }else{ cat("# summarizing ATAC signal to Motifs --- " ) } ## ----------------------------------------------------------------------------- library(chromVAR) ## ----------------------------------------------------------------------------- myCounts <- myCounts[rowSums(assay(myCounts)) > 5,] ## ----------------------------------------------------------------------------- myCounts <- addGCBias(myCounts, genome = BSgenome.Mmusculus.UCSC.mm10) ## ----------------------------------------------------------------------------- motif_ix <- matchMotifs(motifsToScan, myCounts, genome = BSgenome.Mmusculus.UCSC.mm10) motif_ix ## ----eval=FALSE--------------------------------------------------------------- ## deviations <- computeDeviations(object = myCounts, annotations = motif_ix) ## variability_Known <- computeVariability(deviations) ## ----include=FALSE------------------------------------------------------------ load("data/deviations.RData") load("data/variability.RData") ## ----------------------------------------------------------------------------- devZscores <- deviationScores(deviations) devZscores[1:2,] ## ----------------------------------------------------------------------------- variability_Known <- variability_Known[order(variability_Known$p_value),] variability_Known[1:10,] ## ----------------------------------------------------------------------------- topVariable <- variability_Known[1:20,] devTop <- merge(topVariable[,1,drop=FALSE],devZscores,by=0) devTop[1:2,] ## ----------------------------------------------------------------------------- devToPlot <- as.matrix(devTop[,-c(1:2)]) rownames(devToPlot) <- devTop[,2] library(pheatmap) pheatmap(devToPlot) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Identifying Motifs *de novo* in ATAC

--- " ) }else{ cat("# Identifying Motifs *de novo* in ATAC --- " ) } ## ----------------------------------------------------------------------------- require(DESeq2) load("data/myCounts.RData") Group <- factor(c("HindBrain","HindBrain","Kidney","Kidney", "Liver","Liver")) colData(myCounts) <- DataFrame(data.frame(Group,row.names=colnames(myCounts))) dds <- DESeqDataSet(myCounts,design=~Group) dds <- DESeq(dds) ## ----------------------------------------------------------------------------- myRes <- results(dds,contrast = c("Group","Liver","Kidney"),format = "GRanges") myRes <- myRes[order(myRes$padj),] upRegions <- myRes[myRes$log2FoldChange > 0][1:1000] downRegions <- myRes[myRes$log2FoldChange < 0,][1:1000] upRegions ## ----------------------------------------------------------------------------- upRegions <- resize(upRegions,fix = "center",width = 100) downRegions <- resize(downRegions,fix = "center",width = 100) ## ----------------------------------------------------------------------------- library(BSgenome.Mmusculus.UCSC.mm10) upStrings <- getSeq(BSgenome.Mmusculus.UCSC.mm10,upRegions) downStrings <- getSeq(BSgenome.Mmusculus.UCSC.mm10,downRegions) names(upStrings) <- as.character(upRegions) names(downStrings) <- as.character(downRegions) writeXStringSet(upStrings,file="UpRegions.fa") writeXStringSet(downStrings,file="DownStrings.fa") ## ----------------------------------------------------------------------------- library(universalmotif) ## ----------------------------------------------------------------------------- memeMotifs <- read_meme("data/memeResult_LiverVsBrain/combined.meme") memeMotifs ## ----------------------------------------------------------------------------- memeMotifsTFBStools <- convert_motifs(memeMotifs,"TFBSTools-PWMatrix") memeMotifsTFBStools ## ----eval=FALSE,echo=FALSE---------------------------------------------------- ## require(tidyverse) ## require(DESeq2) ## load("/Volumes/TomBackup/Temp/rui_ATAC_IL12combined_hg38/salmon/Antibody_ATAC_fromMACSisBlacklisted/ATAC/dds.RData") ## ddsFilt <- dds[,colData(dds)$Group %in% c("ATAC_Control","ATAC_Tbet_mm")] ## colData(ddsFilt) <- colData(dds)[colData(dds)$Group %in% c("ATAC_Control","ATAC_Tbet_mm"),] ## colData(ddsFilt)$Group <- droplevels(colData(ddsFilt)$Group) ## dds <- DESeq(ddsFilt) ## myRes <- results(dds,contrast = c("Group","ATAC_Control","ATAC_Tbet_mm")) ## myRes <- myRes[order(myRes$padj),] ## upRegions <- rownames(myRes)[myRes$log2FoldChange > 0][1:500] %>% gsub("ID:","",.) %>% gsub(":","-",.) %>% sub("-",":",.) %>% GRanges ## # upRegions <- rownames(myRes)[myRes$log2FoldChange > 0 & myRes$padj < 0.05 & !is.na(myRes$padj)] %>% gsub("ID:","",.) %>% gsub(":","-",.) %>% sub("-",":",.) %>% GRanges ## downRegions <- rownames(myRes)[myRes$log2FoldChange < 0][1:500] %>% gsub("ID:","",.) %>% gsub(":","-",.) %>% sub("-",":",.) %>% GRanges ## ## # DownRegions <- rownames(myRes)[myRes$log2FoldChange < 0 & myRes$padj < 0.05 & !is.na(myRes$padj)] %>% gsub("ID:","",.) %>% gsub(":","-",.) %>% sub("-",":",.) %>% GRanges ## ## ## library(BSgenome.Hsapiens.UCSC.hg38) ## upStrings <- getSeq(BSgenome.Hsapiens.UCSC.hg38,resize(upRegions,fix = "center",width = 100)) ## downStrings <- getSeq(BSgenome.Hsapiens.UCSC.hg38,resize(downRegions,fix = "center",width = 100)) ## names(upStrings) <- as.character(resize(upRegions,fix = "center",width = 100)) ## names(downStrings) <- as.character(resize(downRegions,fix = "center",width = 100)) ## writeXStringSet(upStrings,file="UpRegions.fa") ## writeXStringSet(downStrings,file="DownStrings.fa") ## ## ## require(motifmatchr) ## require(JASPAR2018) ## ## ## db <- file.path(system.file("extdata", package="JASPAR2018"), ## "JASPAR2018.sqlite") ## opts <- list() ## opts[["tax_group"]] <- "vertebrates" ## opts[["collection"]] <- "CORE" ## opts[["all_versions"]] <- FALSE ## require(TFBSTools) ## motifs <- getMatrixSet(db,opts) ## ## ## ## motif_ixUp <- matchMotifs(motifs, upStrings) ## motif_ixDown <- matchMotifs(motifs, downStrings) ## totalsUp <- colSums(assay(motif_ixUp)) ## totalsDow <- colSums(assay(motif_ixDown)) ## total <- data.frame(totalsUp,totalsDow) ## total[total == 0] <- 1 ## total$Diff <- total$totalsUp/total$totalsDow ## total[order(total$Diff,decreasing = TRUE),] ## ## IDtoGRanges <- function(IDs,asIGV=FALSE){ ## if(!asIGV){ ## IDmat <- data.frame(matrix(unlist(strsplit(IDs,":")),ncol = 4,byrow = T)[,-1]) ## newSe <- GRanges(IDmat$X1,IRanges(as.numeric(as.vector(IDmat$X2)),as.numeric(as.vector(IDmat$X3)))) ## } ## newSe ## } ## ## load("data/myCounts.RData") ## # newDDS <- SummarizedExperiment(counts(dds,normalized=TRUE), ## # rowData=NULL, ## # rowRanges=IDtoGRanges(rownames(counts(dds))), ## # colData= colData(dds) %>% ## # as.data.frame %>% ## # cbind(data.frame(depth=colSums(counts(dds,normalized=FALSE)))) %>% ## # dplyr::select(-sizeFactor) ## # ) ## ## newDDS <- myCounts ## Group <- factor(c("HindBrain","HindBrain","Kidney","Kidney", ## "Liver","Liver")) ## colData(newDDS) <- DataFrame(data.frame(Group,row.names=colnames(myCounts))) ## require(DESeq2) ## require(tidyverse) ## atacDDS <- DESeqDataSetFromMatrix(assay(myCounts), ## colData(newDDS), ## ~Group, ## rowRanges=rowRanges(myCounts)) ## atacDDS <- DESeq(atacDDS) ## myRes <- results(atacDDS,contrast = c("Group","Liver","Kidney"),format = "GRanges") ## myRes <- myRes[order(myRes$padj),] ## upRegions <- myRes[myRes$log2FoldChange > 0][1:1000] ## downRegions <- myRes[myRes$log2FoldChange < 0,][1:1000] ## ## library(BSgenome.Mmusculus.UCSC.mm10) ## upStrings <- getSeq(BSgenome.Mmusculus.UCSC.mm10,resize(upRegions,fix = "center",width = 100)) ## downStrings <- getSeq(BSgenome.Mmusculus.UCSC.mm10,resize(downRegions,fix = "center",width = 100)) ## names(upStrings) <- as.character(resize(upRegions,fix = "center",width = 100)) ## names(downStrings) <- as.character(resize(downRegions,fix = "center",width = 100)) ## writeXStringSet(upStrings,file="UpRegions.fa") ## writeXStringSet(downStrings,file="DownStrings.fa") ## ## # names(sxs) <- gsub("\\s.*","",names(sxs)) ## assayNames(newDDS) <- "counts" ## require(chromVAR) ## newDDS <- newDDS[rowSums(assay(newDDS)) > 5,] ## require(BSgenome.Mmusculus.UCSC.mm10) ## newDDS <- addGCBias(newDDS, ## genome = BSgenome.Mmusculus.UCSC.mm10) ## ## motif_ix <- matchMotifs(motifs, newDDS, ## genome = BSgenome.Mmusculus.UCSC.mm10) ## dev_Known <- computeDeviations(object = newDDS, annotations = motif_ix) ## ## variability_Known <- computeVariability(dev_Known) %>% arrange(p_value) ## ## plotVariability(variability_Known, use_plotly = FALSE) ## datatable(variability_Known) ## ## temp <- deviationScores(dev_Known) ## colnames(temp) <- paste0("Var_",colnames(temp)) ## mapOfIDs <- data.frame(id=names(rowData(dev_Known)$name),name=unname(rowData(dev_Known)$name)) ## sevew <- merge(mapOfIDs,temp,by.x=1,by.y=0) ## sevew <- merge(variability_Known,sevew,by.x=1,by.y=2) ## sigZ <- sevew %>% arrange(p_value) %>% ## head(n=20) %>% ## mutate(newName=name) %>% ## dplyr::select(newName,starts_with("Var_")) %>% ## tibble::column_to_rownames("newName") ## pheatmap(sigZ)