Â
These exercises are about consensus peaks and differentials.
Exercise 1 - Consensus ATAC Peaks
library(rtracklayer)
peakFiles <- dir("data/atac_peaks",pattern="narrowPeak",
full.names = TRUE)
peakFiles
## [1] "data/atac_peaks/D0_ATAC_rep1_macs_peaks.narrowPeak"
## [2] "data/atac_peaks/D0_ATAC_rep2_macs_peaks.narrowPeak"
## [3] "data/atac_peaks/W6_ATAC_rep1_macs_peaks.narrowPeak"
## [4] "data/atac_peaks/W6_ATAC_rep2_macs_peaks.narrowPeak"
macsPeaks_GRL <- GRangesList(macsPeaks_list)
names(macsPeaks_GRL) <- c("D0_rep1","D0_rep2","W6_rep1","W6_rep2")
W6_rep1_unique.Peaks <- macsPeaks_GRL[[3]][!macsPeaks_GRL[[3]] %over% macsPeaks_GRL[[4]]]
W6_rep2_unique.Peaks <- macsPeaks_GRL[[4]][!macsPeaks_GRL[[4]] %over% macsPeaks_GRL[[3]]]
my_tots <- c(length(W6_rep1_unique.Peaks),
length(W6_rep2_unique.Peaks),
length(macsPeaks_GRL[[3]]) - length(W6_rep1_unique.Peaks))
names(my_tots) <- c("Rep1_Unique", "Rep2_Unique", "Common")
my_tots
## Rep1_Unique Rep2_Unique Common
## 35572 14103 53001
rtracklayer::export(W6_rep1_unique.Peaks, "ATAC_W6_rep1_unique.Peaks.bed")
rtracklayer::export(W6_rep2_unique.Peaks, "ATAC_W6_rep2_unique.Peaks.bed")
allPeaksSet_Overlapping <- unlist(macsPeaks_GRL)
allPeaksSet_nR <- reduce(allPeaksSet_Overlapping)
allPeaksSet_nR
## GRanges object with 130138 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 3398990-3399195 *
## [2] chr1 3426076-3426243 *
## [3] chr1 3455860-3455997 *
## [4] chr1 3583575-3583721 *
## [5] chr1 3611242-3611457 *
## ... ... ... ...
## [130134] chrUn_GL456368 1054-1363 *
## [130135] chrUn_GL456368 16472-16664 *
## [130136] chrUn_GL456367 8397-8711 *
## [130137] chrUn_GL456367 11398-12675 *
## [130138] chrUn_GL456370 18753-19115 *
## -------
## seqinfo: 39 sequences from an unspecified genome; no seqlengths
overlap <- list()
for(i in 1:length(macsPeaks_GRL)){
overlap[[i]] <- allPeaksSet_nR %over% macsPeaks_GRL[[i]]
}
overlapMatrix <- do.call(cbind,overlap)
colnames(overlapMatrix) <- names(macsPeaks_GRL)
mcols(allPeaksSet_nR) <- overlapMatrix
allPeaksSet_nR[1:3,]
## GRanges object with 3 ranges and 4 metadata columns:
## seqnames ranges strand | D0_rep1 D0_rep2 W6_rep1 W6_rep2
## <Rle> <IRanges> <Rle> | <logical> <logical> <logical> <logical>
## [1] chr1 3398990-3399195 * | TRUE FALSE FALSE FALSE
## [2] chr1 3426076-3426243 * | FALSE FALSE FALSE TRUE
## [3] chr1 3455860-3455997 * | FALSE TRUE FALSE FALSE
## -------
## seqinfo: 39 sequences from an unspecified genome; no seqlengths
HC_Peaks <- allPeaksSet_nR[
rowSums(as.data.frame(
mcols(allPeaksSet_nR)[,c("D0_rep1","D0_rep2")])) >= 2 |
rowSums(as.data.frame(
mcols(allPeaksSet_nR)[,c("W6_rep1","W6_rep2")])) >= 2
]
export.bed(HC_Peaks,"ATAC_HC_Peaks.bed")
Exercise 2 - Generate counts
You can grab the bam files from DropBox
library(Rsamtools)
bams <- c("ATAC_D0_rep1.bam",
"ATAC_D0_rep2.bam",
"ATAC_W6_rep1.bam",
"ATAC_W6_rep2.bam")
bamFL <- BamFileList(bams,yieldSize = 5000000)
bamFL
HC_Peaks <- rtracklayer::import("data/ATAC_HC_Peaks.bed")
library(GenomicAlignments)
MyCounts <- summarizeOverlaps(HC_Peaks,
reads = bamFL,
ignore.strand = TRUE)
save(MyCounts,file="ATAC_Counts.RData")
Exercise 3 - Differentials with DESeq2
## [1] "RangedSummarizedExperiment"
## attr(,"package")
## [1] "SummarizedExperiment"
## class: RangedSummarizedExperiment
## dim: 57433 4
## metadata(0):
## assays(1): counts
## rownames: NULL
## rowData names(2): name score
## colnames(4): D0_ATAC_rep1_sorted.bam D0_ATAC_rep2_sorted.bam
## W6_ATAC_rep1_sorted.bam W6_ATAC_rep2_sorted.bam
## colData names(0):
metaDataFrame <- data.frame(DevStage=c("D0","D0","W6","W6"))
rownames(metaDataFrame) <- colnames(MyCounts)
metaDataFrame
## DevStage
## D0_ATAC_rep1_sorted.bam D0
## D0_ATAC_rep2_sorted.bam D0
## W6_ATAC_rep1_sorted.bam W6
## W6_ATAC_rep2_sorted.bam W6
library(DESeq2)
HC_Peaks <- rtracklayer::import("data/ATAC_HC_Peaks.bed")
dds <- DESeqDataSetFromMatrix(countData = assay(MyCounts),
colData = metaDataFrame,
design = ~DevStage,
rowRanges= HC_Peaks)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
W6MinusD0 <- results(dds,
contrast = c("DevStage","W6","D0"),
format="GRanges")
W6MinusD0 <- W6MinusD0[order(W6MinusD0$padj),]
W6MinusD0
## GRanges object with 57433 ranges and 6 metadata columns:
## seqnames ranges strand | baseMean log2FoldChange
## <Rle> <IRanges> <Rle> | <numeric> <numeric>
## [1] chr4 97465816-97466559 * | 624.313 5.14453
## [2] chr3 154696026-154696867 * | 506.128 5.83443
## [3] chr19 21882105-21883278 * | 641.165 3.87490
## [4] chr1 177385925-177386486 * | 390.054 5.04621
## [5] chr16 76851339-76852034 * | 356.948 6.03987
## ... ... ... ... . ... ...
## [57429] chr2 76057296-76057422 * | 61.8615 2.28036e-04
## [57430] chr8 126685200-126685821 * | 892.5875 -6.53726e-05
## [57431] chr7 73595414-73595544 * | 80.2626 8.32260e-05
## [57432] chr6 57839762-57840115 * | 70.6482 -5.62486e-05
## [57433] chr11 117999734-117999888 * | 54.4867 -1.81022e-05
## lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric>
## [1] 0.237520 21.6593 4.96320e-104 2.85052e-99
## [2] 0.307708 18.9609 3.58843e-80 1.03047e-75
## [3] 0.211921 18.2847 1.09614e-74 2.09849e-70
## [4] 0.285292 17.6879 5.20147e-70 7.46840e-66
## [5] 0.352288 17.1447 6.88455e-66 7.90801e-62
## ... ... ... ... ...
## [57429] 0.548252 4.15932e-04 0.999668 0.999738
## [57430] 0.172098 -3.79856e-04 0.999697 0.999749
## [57431] 0.470401 1.76926e-04 0.999859 0.999894
## [57432] 0.493346 -1.14014e-04 0.999909 0.999926
## [57433] 0.560431 -3.23005e-05 0.999974 0.999974
## -------
## seqinfo: 34 sequences from an unspecified genome; no seqlengths
## using ntop=500 top features by variance
library(EnhancedVolcano)
W6MinusD0.Filt <- W6MinusD0[!is.na(W6MinusD0$pvalue) | !is.na(W6MinusD0$padj)]
EnhancedVolcano(as.data.frame(W6MinusD0.Filt),
lab=paste0(seqnames(W6MinusD0.Filt), ranges(W6MinusD0.Filt)),
x = "log2FoldChange",
y="padj", selectLab = "")