In this exercise we will work with some ChIP-seq data for the Encode experiment ENCSR000ERN.
Due to bigwig libraries only being available on unix machines, the exercises below will only work on Mac and Linux operating systems.
This data can be found at the Encode experiment here or at the direct link here.
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(GenomicRanges))
library(rtracklayer)
<- GRanges(seqnames="chr1",
regionToImport ranges=IRanges(133040000,133149400))
<- BigWigSelection(regionToImport)
mySelection <- import.bw("ENCFF940MBK.bigWig",
bigWigRegion selection=mySelection,
as="RleList")
<- max(bigWigRegion$chr1[133040000:133149400])
myMax <- max(bigWigRegion)["chr1"] myMax
<- seq(133131166-500,133131166+500)
TSSpos <- as.data.frame(bigWigRegion$chr1[TSSpos])
tssDF $position <- TSSpos
tssDFlibrary(ggplot2)
ggplot(tssDF,aes(y=value,x=position))+geom_area()+theme_minimal()+xlab("Genome")+ylab("Score")+theme(axis.text.x = element_text(angle = 30, hjust = 1),axis.title.y = element_text(angle = 0))
<- bigWigRegion
newBigWigRegion <- log2(newBigWigRegion) newBigWigRegion
export.bw(newBigWigRegion,"log2.bw")
library(rtracklayer)
library(GenomicRanges)
<- read.delim("data/Myc_Ch12_1_withInput_Input_Ch12_peaks.xls",sep="\t",comment.char = "#")
MYCpeaks <- GRanges(MYCpeaks$chr,IRanges(MYCpeaks$start,MYCpeaks$end),
MYCgranges FE=MYCpeaks$fold_enrichment,
minuslog10Pval=MYCpeaks$X.log10.pvalue.)
<- MYCgranges[seqnames(MYCgranges) %in% "chr1"] MYCgrangesChr1
library(rtracklayer)
library(GenomicRanges)
<- BigWigSelection(MYCgrangesChr1)
mySelection <- import.bw("ENCFF940MBK.bigWig",
bigWigPeaks selection=mySelection,
as="RleList")
<- read.delim("data/mm10_GenePosForIGV.txt",sep="\t")
IGVGenePositions
<- GRanges(IGVGenePositions$seqnames,
genePos IRanges(IGVGenePositions$start,IGVGenePositions$end),
strand = IGVGenePositions$strand)
names(genePos) <- IGVGenePositions$gene_id
<- resize(genePos,width = 1,fix = "start")
tssPos <- resize(tssPos,width = 2000,fix = "end")
promoterPos
<- MYCgrangesChr1[MYCgrangesChr1 %over% promoterPos]
promoterPeaks <- MYCgrangesChr1[!MYCgrangesChr1 %over% promoterPos] nonPromoterPeaks
<- bigWigPeaks[promoterPeaks]
PeakCoveragePromoter <- sum(PeakCoveragePromoter)
PeakMaxs_Promoter
<- bigWigPeaks[nonPromoterPeaks]
PeakCoverageNonPromoter <- sum(PeakCoverageNonPromoter)
PeakMaxs_NonPromoter
<- data.frame(Sum=PeakMaxs_Promoter,PromoterOrNot="Promoter")
df_promoter <- data.frame(Sum=PeakMaxs_NonPromoter,PromoterOrNot="NotPromoter")
df_notPromoter <- rbind(df_promoter,df_notPromoter)
toPlot_SumSignal
ggplot(toPlot_SumSignal,aes(x=PromoterOrNot,y=Sum,fill=PromoterOrNot))+geom_violin()+scale_y_log10()+scale_fill_brewer(palette = "Pastel2")+theme_minimal()
<- bigWigPeaks
toExport <- median(c(sum(PeakCoveragePromoter),sum(PeakCoverageNonPromoter)))
peakSumMedian <- promoterPeaks[PeakMaxs_Promoter > peakSumMedian*2]
filteredPeaks export.bed(filteredPeaks,"filteredPeaks.bed")