In this exercise we will work with some ChIP-seq data for the Encode experiment ENCSR000ERN.
Due to bigwig libraries only being available on unix machines, the exercises below will only work on Mac and Linux operating systems.
This data can be found at the Encode experiment here or at the direct link here.
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(GenomicRanges))library(rtracklayer)
regionToImport <- GRanges(seqnames="chr1",
ranges=IRanges(133040000,133149400))
mySelection <- BigWigSelection(regionToImport)
bigWigRegion <- import.bw("ENCFF940MBK.bigWig",
selection=mySelection,
as="RleList")myMax <- max(bigWigRegion$chr1[133040000:133149400])
myMax <- max(bigWigRegion)["chr1"]TSSpos <- seq(133131166-500,133131166+500)
tssDF <- as.data.frame(bigWigRegion$chr1[TSSpos])
tssDF$position <- TSSpos
library(ggplot2)
ggplot(tssDF,aes(y=value,x=position))+geom_area()+theme_minimal()+xlab("Genome")+ylab("Score")+theme(axis.text.x = element_text(angle = 30, hjust = 1),axis.title.y = element_text(angle = 0))newBigWigRegion <- bigWigRegion
newBigWigRegion <- log2(newBigWigRegion)export.bw(newBigWigRegion,"log2.bw")library(rtracklayer)
library(GenomicRanges)
MYCpeaks <- read.delim("data/Myc_Ch12_1_withInput_Input_Ch12_peaks.xls",sep="\t",comment.char = "#")
MYCgranges <- GRanges(MYCpeaks$chr,IRanges(MYCpeaks$start,MYCpeaks$end),
FE=MYCpeaks$fold_enrichment,
minuslog10Pval=MYCpeaks$X.log10.pvalue.)
MYCgrangesChr1 <- MYCgranges[seqnames(MYCgranges) %in% "chr1"]library(rtracklayer)
library(GenomicRanges)
mySelection <- BigWigSelection(MYCgrangesChr1)
bigWigPeaks <- import.bw("ENCFF940MBK.bigWig",
selection=mySelection,
as="RleList")IGVGenePositions <- read.delim("data/mm10_GenePosForIGV.txt",sep="\t")
genePos <- GRanges(IGVGenePositions$seqnames,
IRanges(IGVGenePositions$start,IGVGenePositions$end),
strand = IGVGenePositions$strand)
names(genePos) <- IGVGenePositions$gene_id
tssPos <- resize(genePos,width = 1,fix = "start")
promoterPos <- resize(tssPos,width = 2000,fix = "end")
promoterPeaks <- MYCgrangesChr1[MYCgrangesChr1 %over% promoterPos]
nonPromoterPeaks <- MYCgrangesChr1[!MYCgrangesChr1 %over% promoterPos]PeakCoveragePromoter <- bigWigPeaks[promoterPeaks]
PeakMaxs_Promoter <- sum(PeakCoveragePromoter)
PeakCoverageNonPromoter <- bigWigPeaks[nonPromoterPeaks]
PeakMaxs_NonPromoter <- sum(PeakCoverageNonPromoter)
df_promoter <- data.frame(Sum=PeakMaxs_Promoter,PromoterOrNot="Promoter")
df_notPromoter <- data.frame(Sum=PeakMaxs_NonPromoter,PromoterOrNot="NotPromoter")
toPlot_SumSignal <- rbind(df_promoter,df_notPromoter)
ggplot(toPlot_SumSignal,aes(x=PromoterOrNot,y=Sum,fill=PromoterOrNot))+geom_violin()+scale_y_log10()+scale_fill_brewer(palette = "Pastel2")+theme_minimal()toExport <- bigWigPeaks
peakSumMedian <- median(c(sum(PeakCoveragePromoter),sum(PeakCoverageNonPromoter)))
filteredPeaks <- promoterPeaks[PeakMaxs_Promoter > peakSumMedian*2]
export.bed(filteredPeaks,"filteredPeaks.bed")