In these exercises we will review some of the functionality for summarizing counts and signal across genomes and within regions.
The example data can be found in the data directory
data/heart.bodyMap.bam data/liver.bodyMap.bam
library(GenomicAlignments)
library(Rsamtools)
sortBam("data/liver.bodyMap.bam","Liver")## [1] "Liver.bam"
sortBam("data/heart.bodyMap.bam","Heart")## [1] "Heart.bam"
indexBam("Liver.bam")## Liver.bam
## "Liver.bam.bai"
indexBam("Heart.bam")## Heart.bam
## "Heart.bam.bai"
liverMapped <- idxstatsBam("Liver.bam")
heartMapped <- idxstatsBam("Heart.bam")
totalLiver <- sum(liverMapped[,"mapped"])
totalHeart <- sum(heartMapped[,"mapped"])
liverCoverageNorm <- coverage("Liver.bam",weight = 1/totalLiver)
heartCoverageNorm <- coverage("Heart.bam",weight = 1/totalHeart)heartCoverageNorm12 <- heartCoverageNorm[["chr12"]]
signalDepthHeart <- heartCoverageNorm12[98592587:98607803]
signalDepthScaled <- data.frame(Sample="Heart",
Position=98592587:98607803,
Signal=signalDepthHeart)
liverCoverageNorm12 <- liverCoverageNorm[["chr12"]]
signalDepthLiver <- liverCoverageNorm12[98592587:98607803]
signalDepthScaled2 <- data.frame(Sample="Liver",
Position=98592587:98607803,
Signal=signalDepthLiver)
toPlot <- rbind(signalDepthScaled,signalDepthScaled2)
library(ggplot2)
ggplot(toPlot,aes(x=Position,y=Signal,colour=Sample))+
geom_line()+theme_minimal()+ggtitle("Coverage over Chr12 98,986,183-98,998,558 for liver and heart samples")library(org.Hs.eg.db)
myIds <- AnnotationDbi::select(org.Hs.eg.db, keys = c("SLC25A3","TMPO","IKBIP"), keytype = "SYMBOL",
columns = c("SYMBOL", "GENENAME","ENTREZID"))## 'select()' returned 1:1 mapping between keys and columns
entrezIDforGenes <- myIds[,"ENTREZID"]
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
hg38Genes <- genes(TxDb.Hsapiens.UCSC.hg38.knownGene)## 228 genes were dropped because they have exons located on both strands
## of the same reference sequence or on more than one reference sequence,
## so cannot be represented by a single genomic range.
## Use 'single.strand.genes.only=FALSE' to get all the genes in a
## GRangesList object, or use suppressMessages() to suppress this message.
myPromoters <- promoters(hg38Genes,500,500)
prms <- myPromoters[myPromoters$gene_id %in% entrezIDforGenes,]
liverSums <- sum(liverCoverageNorm[prms])
heartSums <- sum(heartCoverageNorm[prms])
lFfra <- data.frame(Sample="Liver",ENTREZID=prms$gene_id,Coverage=liverSums)
hFfra <- data.frame(Sample="Heart",ENTREZID=prms$gene_id,Coverage=heartSums)
covFrame <- rbind(lFfra,hFfra)
toPlot <- merge(myIds,covFrame,by="ENTREZID")
ggplot(toPlot,aes(x=SYMBOL,y=Coverage,fill=Sample))+geom_bar(stat="identity",position = "dodge")+theme_bw()library(org.Hs.eg.db)
myIds <- AnnotationDbi::select(org.Hs.eg.db, keys = c("SLC25A3","TMPO","IKBIP"), keytype = "SYMBOL",
columns = c("SYMBOL", "GENENAME","ENTREZID"))## 'select()' returned 1:1 mapping between keys and columns
entrezIDforGenes <- myIds[,"ENTREZID"]
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
hg38Genes <- exonsBy(TxDb.Hsapiens.UCSC.hg38.knownGene,by="gene")
exByGenes <- hg38Genes[names(hg38Genes) %in% entrezIDforGenes,]
sortBam("data/liver.bodyMap.bam","Liver")## [1] "Liver.bam"
sortBam("data/heart.bodyMap.bam","Heart")## [1] "Heart.bam"
indexBam("Liver.bam")## Liver.bam
## "Liver.bam.bai"
indexBam("Heart.bam")## Heart.bam
## "Heart.bam.bai"
liverMapped <- idxstatsBam("Liver.bam")
heartMapped <- idxstatsBam("Heart.bam")
totalLiver <- sum(liverMapped[,"mapped"])
totalHeart <- sum(heartMapped[,"mapped"])
newCounts <- summarizeOverlaps(exByGenes,c("Liver.bam","Heart.bam"))
cMatrix <- assay(newCounts)
cMatrix[,"Liver.bam"] <- (cMatrix[,"Liver.bam"]/totalLiver)*(10^6)
cMatrix[,"Heart.bam"] <- (cMatrix[,"Heart.bam"]/totalHeart)*(10^6)
toPlot <- data.frame(ENTREZID=rownames(cMatrix),mean=rowMeans(cMatrix),log2FC=log2(cMatrix[,"Liver.bam"]/cMatrix[,"Heart.bam"]))
toPlot <- merge(myIds,toPlot,by="ENTREZID")
ggplot(toPlot,aes(x=mean,y=log2FC))+geom_point()+theme_bw()+ylim(-3,3)+ggtitle("Liver_Minus_Heart")+geom_text(label=toPlot$SYMBOL)## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_text).