In this exercise we will work with some ChIP-seq peak calls for the Encode experiment ENCSR000ERN
install.packages("BiocManager")
require(BiocManager)
install("GenomicRanges",update = FALSE)
install("rtracklayer",update = FALSE)
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(GenomicRanges))
"%in%" <- BiocGenerics::"%in%"
library(rtracklayer)
library(GenomicRanges)
<- read.delim("data/Myc_Ch12_1_withInput_Input_Ch12_peaks.xls",sep="\t",comment.char = "#")
MYCpeaks <- GRanges(MYCpeaks$chr,IRanges(MYCpeaks$start,MYCpeaks$end),
MYCgranges FE=MYCpeaks$fold_enrichment,
minuslog10Pval=MYCpeaks$X.log10.pvalue.)
<- as.data.frame(MYCgranges)
MYCDF library(ggplot2)
ggplot(MYCDF,aes(x=seqnames,y=FE,color=seqnames))+geom_boxplot()+theme_bw()
<- MYCgranges[MYCgranges$FE > 10 & MYCgranges$minuslog10Pval > -log10(0.0001)]
MYCfilteredGRanges <- MYCfilteredGRanges[seqnames(MYCfilteredGRanges) %in% "chr1"]
MYCfilteredGRangesChr1 export.bed(MYCfilteredGRangesChr1,"filteredMyc.bed")
<- read.delim("data/mm10_GenePosForIGV.txt",sep="\t")
IGVGenePositions
<- GRanges(IGVGenePositions$seqnames,
genePos IRanges(IGVGenePositions$start,IGVGenePositions$end),
strand = IGVGenePositions$strand)
names(genePos) <- IGVGenePositions$gene_id
export.bed(genePos,con="Genes.bed")
<- resize(genePos,width = 1,fix = "start") tssPos
<- resize(tssPos,width = 1000,fix = "center") tssPosExt
<- MYCfilteredGRangesChr1[MYCfilteredGRangesChr1 %over% tssPosExt]
MYCforBED export.bed(MYCforBED,con="filteredMycOnTSS.bed")
Download the signal p-value bigwig from the encode portal for replicate 1 from experiment ENCSR000ERN and capture an image of bigWig signal over our BED intervals over igfbp2 gene.
10. Import the data/Myc_Ch12_1_withInput_Input_Ch12_summits.bed BED file to a GRanges. The 5th column in file represents summit height.
<- import.bed("data/Myc_Ch12_1_withInput_Input_Ch12_summits.bed") mySummits
$Overlap <- ifelse(mySummits %over% tssPosExt,"TSS","Not_TSS")
mySummits<- as.data.frame(mySummits)
mySummitsDF ggplot(mySummitsDF,aes(x=score,fill=Overlap))+geom_density(alpha=0.5)+
facet_wrap(~seqnames)+theme_bw()
<- mySummits[order(mySummits$score,decreasing = TRUE)][1:500,] orderSummits500
<- resize(orderSummits500,width = 50,fix="center")
centredSummits library(BSgenome.Mmusculus.UCSC.mm10)
<- getSeq(BSgenome.Mmusculus.UCSC.mm10,centredSummits)
centredSummitsSeq writeXStringSet(centredSummitsSeq,filepath = "top500bySummit.fa")