In these exercises we will review some of the functionality for summarizing counts and signal across genomes and within regions.
The example data can be found in the data directory
data/heart.bodyMap.bam data/liver.bodyMap.bam
library(GenomicAlignments)
library(Rsamtools)
sortBam("data/liver.bodyMap.bam","Liver")
## [1] "Liver.bam"
sortBam("data/heart.bodyMap.bam","Heart")
## [1] "Heart.bam"
indexBam("Liver.bam")
## Liver.bam
## "Liver.bam.bai"
indexBam("Heart.bam")
## Heart.bam
## "Heart.bam.bai"
<- idxstatsBam("Liver.bam")
liverMapped <- idxstatsBam("Heart.bam")
heartMapped <- sum(liverMapped[,"mapped"])
totalLiver <- sum(heartMapped[,"mapped"])
totalHeart
<- coverage("Liver.bam",weight = 1/totalLiver)
liverCoverageNorm <- coverage("Heart.bam",weight = 1/totalHeart) heartCoverageNorm
<- heartCoverageNorm[["chr12"]]
heartCoverageNorm12 <- heartCoverageNorm12[98592587:98607803]
signalDepthHeart <- data.frame(Sample="Heart",
signalDepthScaled Position=98592587:98607803,
Signal=signalDepthHeart)
<- liverCoverageNorm[["chr12"]]
liverCoverageNorm12 <- liverCoverageNorm12[98592587:98607803]
signalDepthLiver <- data.frame(Sample="Liver",
signalDepthScaled2 Position=98592587:98607803,
Signal=signalDepthLiver)
<- rbind(signalDepthScaled,signalDepthScaled2)
toPlot
library(ggplot2)
ggplot(toPlot,aes(x=Position,y=Signal,colour=Sample))+
geom_line()+theme_minimal()+ggtitle("Coverage over Chr12 98,986,183-98,998,558 for liver and heart samples")
library(org.Hs.eg.db)
<- AnnotationDbi::select(org.Hs.eg.db, keys = c("SLC25A3","TMPO","IKBIP"), keytype = "SYMBOL",
myIds columns = c("SYMBOL", "GENENAME","ENTREZID"))
## 'select()' returned 1:1 mapping between keys and columns
<- myIds[,"ENTREZID"]
entrezIDforGenes library(TxDb.Hsapiens.UCSC.hg38.knownGene)
<- genes(TxDb.Hsapiens.UCSC.hg38.knownGene) hg38Genes
## 228 genes were dropped because they have exons located on both strands
## of the same reference sequence or on more than one reference sequence,
## so cannot be represented by a single genomic range.
## Use 'single.strand.genes.only=FALSE' to get all the genes in a
## GRangesList object, or use suppressMessages() to suppress this message.
<- promoters(hg38Genes,500,500)
myPromoters <- myPromoters[myPromoters$gene_id %in% entrezIDforGenes,]
prms
<- sum(liverCoverageNorm[prms])
liverSums <- sum(heartCoverageNorm[prms])
heartSums
<- data.frame(Sample="Liver",ENTREZID=prms$gene_id,Coverage=liverSums)
lFfra <- data.frame(Sample="Heart",ENTREZID=prms$gene_id,Coverage=heartSums)
hFfra <- rbind(lFfra,hFfra)
covFrame
<- merge(myIds,covFrame,by="ENTREZID")
toPlot ggplot(toPlot,aes(x=SYMBOL,y=Coverage,fill=Sample))+geom_bar(stat="identity",position = "dodge")+theme_bw()
library(org.Hs.eg.db)
<- AnnotationDbi::select(org.Hs.eg.db, keys = c("SLC25A3","TMPO","IKBIP"), keytype = "SYMBOL",
myIds columns = c("SYMBOL", "GENENAME","ENTREZID"))
## 'select()' returned 1:1 mapping between keys and columns
<- myIds[,"ENTREZID"]
entrezIDforGenes library(TxDb.Hsapiens.UCSC.hg38.knownGene)
<- exonsBy(TxDb.Hsapiens.UCSC.hg38.knownGene,by="gene")
hg38Genes
<- hg38Genes[names(hg38Genes) %in% entrezIDforGenes,]
exByGenes
sortBam("data/liver.bodyMap.bam","Liver")
## [1] "Liver.bam"
sortBam("data/heart.bodyMap.bam","Heart")
## [1] "Heart.bam"
indexBam("Liver.bam")
## Liver.bam
## "Liver.bam.bai"
indexBam("Heart.bam")
## Heart.bam
## "Heart.bam.bai"
<- idxstatsBam("Liver.bam")
liverMapped <- idxstatsBam("Heart.bam")
heartMapped <- sum(liverMapped[,"mapped"])
totalLiver <- sum(heartMapped[,"mapped"])
totalHeart
<- summarizeOverlaps(exByGenes,c("Liver.bam","Heart.bam"))
newCounts <- assay(newCounts)
cMatrix
"Liver.bam"] <- (cMatrix[,"Liver.bam"]/totalLiver)*(10^6)
cMatrix[,"Heart.bam"] <- (cMatrix[,"Heart.bam"]/totalHeart)*(10^6)
cMatrix[,
<- data.frame(ENTREZID=rownames(cMatrix),mean=rowMeans(cMatrix),log2FC=log2(cMatrix[,"Liver.bam"]/cMatrix[,"Heart.bam"]))
toPlot <- merge(myIds,toPlot,by="ENTREZID")
toPlot ggplot(toPlot,aes(x=mean,y=log2FC))+geom_point()+theme_bw()+ylim(-3,3)+ggtitle("Liver_Minus_Heart")+geom_text(label=toPlot$SYMBOL)
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_text).