In this practical we will investigate the motifs and pathways associated with the Ebf1 transcription factor in proB-cells and B-cells.
Pro-B-Cell Ebf1 data has been extracted from GSM499030.
B-Cell Ebf1 data has been extracted from GSE35857.
Data/ebf1_proB.bed
Data/ebf1_B.bed
suppressPackageStartupMessages(library(rtracklayer))
library(rtracklayer)
<- import.bed("data/ebf1_proB.bed")
ebf1_proB <- import.bed("data/ebf1_B.bed") ebf1_B
<- ebf1_B[ebf1_B %over% ebf1_proB] ebf1_BCommon
library(BSgenome.Mmusculus.UCSC.mm10)
<- resize(ebf1_BCommon,100,fix="center")
toMotif <- getSeq(BSgenome.Mmusculus.UCSC.mm10,
peaksSequences
toMotif)names(peaksSequences) <- paste0(seqnames(toMotif),":",
start(toMotif),
"-",
end(toMotif))
writeXStringSet(peaksSequences,file="commonEBFinB.fa")
Results files from Meme-ChIP can be found here
library(ChIPseeker)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
<- annotatePeak(ebf1_BCommon, tssRegion=c(-500, 500),
peakAnno_Ebf1B TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene,
annoDb="org.Mm.eg.db")
## >> preparing features information... 2021-08-02 03:26:34 PM
## >> identifying nearest features... 2021-08-02 03:26:35 PM
## >> calculating distance from peak to TSS... 2021-08-02 03:26:35 PM
## >> assigning genomic annotation... 2021-08-02 03:26:35 PM
## >> adding gene annotation... 2021-08-02 03:26:54 PM
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths 2021-08-02 03:26:54 PM
## >> done... 2021-08-02 03:26:54 PM
<- as.data.frame(peakAnno_Ebf1B)
myAnnotation <- as.vector(myAnnotation$annotation)
simpleAnnotation !simpleAnnotation %in% c("Promoter", "Distal Intergenic")] <- "other"
simpleAnnotation[<- data.frame(Annotation=simpleAnnotation)
forPlot library(ggplot2)
ggplot(forPlot,aes(x=Annotation,fill=Annotation))+geom_bar()+coord_flip()+theme_bw()+ggtitle("Simple distribution of B-cell Ebf1 binding sites")
library(clusterProfiler)
library(org.Mm.eg.db)
<- myAnnotation[
myAnnotation_TSS $annotation == "Promoter",]
myAnnotation<- unique(myAnnotation_TSS$geneId)
genesWithPeakInTSS
<- genes(TxDb.Mmusculus.UCSC.mm10.knownGene) allGeneGR
## 66 genes were dropped because they have exons located on both strands
## of the same reference sequence or on more than one reference sequence,
## so cannot be represented by a single genomic range.
## Use 'single.strand.genes.only=FALSE' to get all the genes in a
## GRangesList object, or use suppressMessages() to suppress this message.
<- allGeneGR$gene_id
allGeneIDs
<- enrichGO(gene = genesWithPeakInTSS,
GO_result universe = allGeneIDs,
OrgDb = org.Mm.eg.db,
ont = "BP")
dotplot(GO_result)
library(enrichplot)
<- pairwise_termsim(GO_result)
GO_result_plot emapplot(GO_result_plot, showCategory = 20, cex_label_category=0.5 )+ theme(text = element_text(size = 10))
write.csv(as.data.frame(GO_result), "ebf1_B_vs_ebf1_proB_diff_promoter_peaks_GOterm.csv")
<- ebf1_B[!ebf1_B %over% ebf1_proB]
ebf1_Bunique<- ebf1_proB[!ebf1_proB %over% ebf1_B] ebf1_proBunique
8 Using rGREAT identify the MSigDB Pathways enriched in Ebf1 peaks only in Bcells and Ebf1 peaks only in proB cells.
library(rGREAT)
<- submitGreatJob(ebf1_Bunique,species="mm10",request_interval=1,version = "3.0.0")
great_Job availableCategories(great_Job)
## [1] "GO" "Phenotype Data and Human Disease"
## [3] "Pathway Data" "Gene Expression"
## [5] "Regulatory Motifs" "Gene Families"
= getEnrichmentTables(great_Job,
great_ResultTable category="Pathway Data")
## The default enrichment tables contain no associated genes for the input
## regions. You can set `download_by = 'tsv'` to download the complete
## table, but note only the top 500 regions can be retreived. See the
## following link:
##
## https://great-help.atlassian.net/wiki/spaces/GREAT/pages/655401/Export#Export-GlobalExport
names(great_ResultTable)
## [1] "PANTHER Pathway" "BioCyc Pathway" "MSigDB Pathway"
<- great_ResultTable[["MSigDB Pathway"]]
msigPath_Bcells
1:10,] msigPath_Bcells[
## ID
## 1 KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY
## 2 PID_BCR_5PATHWAY
## 3 REACTOME_INTERFERON_ALPHA_BETA_SIGNALING
## 4 REACTOME_DOWNSTREAM_SIGNALING_EVENTS_OF_B_CELL_RECEPTOR_BCR
## 5 REACTOME_SIGNALLING_BY_NGF
## 6 REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR
## 7 REACTOME_SIGNALING_BY_SCF_KIT
## 8 PID_RAC1_REG_PATHWAY
## 9 REACTOME_IMMUNE_SYSTEM
## 10 REACTOME_PIP3_ACTIVATES_AKT_SIGNALING
## name
## 1 B cell receptor signaling pathway
## 2 BCR signaling pathway
## 3 Genes involved in Interferon alpha/beta signaling
## 4 Genes involved in Downstream Signaling Events Of B Cell Receptor (BCR)
## 5 Genes involved in Signalling by NGF
## 6 Genes involved in Signaling by the B Cell Receptor (BCR)
## 7 Genes involved in Signaling by SCF-KIT
## 8 Regulation of RAC1 activity
## 9 Genes involved in Immune System
## 10 Genes involved in PIP3 activates AKT signaling
## Binom_Genome_Fraction Binom_Expected Binom_Observed_Region_Hits
## 1 0.005806883 12.531250 37
## 2 0.006121690 13.210610 38
## 3 0.001747780 3.771710 19
## 4 0.006124732 13.217170 37
## 5 0.020280090 43.764440 83
## 6 0.009296625 20.062120 48
## 7 0.006275704 13.542970 37
## 8 0.004501446 9.714120 30
## 9 0.056859840 122.703500 182
## 10 0.002103194 4.538692 19
## Binom_Fold_Enrichment Binom_Region_Set_Coverage Binom_Raw_PValue
## 1 2.952618 0.017145510 1.434094e-08
## 2 2.876477 0.017608900 1.799178e-08
## 3 5.037503 0.008804449 1.988724e-08
## 4 2.799389 0.017145510 5.366261e-08
## 5 1.896517 0.038461540 5.825394e-08
## 6 2.392569 0.022242820 7.137937e-08
## 7 2.732045 0.017145510 9.693786e-08
## 8 3.088288 0.013901760 1.258219e-07
## 9 1.483250 0.084337350 1.415786e-07
## 10 4.186228 0.008804449 3.277231e-07
## Binom_Adjp_BH Hyper_Total_Genes Hyper_Expected Hyper_Observed_Gene_Hits
## 1 8.743757e-06 74 9.909689 25
## 2 8.743757e-06 63 8.436627 22
## 3 8.743757e-06 48 6.427906 11
## 4 1.536739e-05 90 12.052320 27
## 5 1.536739e-05 209 27.988170 57
## 6 1.569156e-05 119 15.935850 35
## 7 1.826586e-05 73 9.775774 25
## 8 2.074489e-05 38 5.088759 13
## 9 2.074913e-05 792 106.060500 141
## 10 4.075300e-05 27 3.615697 14
## Hyper_Fold_Enrichment Hyper_Gene_Set_Coverage Hyper_Term_Gene_Coverage
## 1 2.522784 0.008523696 0.3378378
## 2 2.607677 0.007500852 0.3492063
## 3 1.711288 0.003750426 0.2291667
## 4 2.240232 0.009205592 0.3000000
## 5 2.036574 0.019434030 0.2727273
## 6 2.196306 0.011933170 0.2941176
## 7 2.557342 0.008523696 0.3424658
## 8 2.554650 0.004432322 0.3421053
## 9 1.329431 0.048073640 0.1780303
## 10 3.872006 0.004773270 0.5185185
## Hyper_Raw_PValue Hyper_Adjp_BH
## 1 6.065365e-06 3.200087e-04
## 2 1.184083e-05 4.338349e-04
## 3 4.924903e-02 1.840212e-01
## 4 3.089307e-05 8.489158e-04
## 5 7.317380e-08 2.244281e-05
## 6 3.668240e-06 2.556315e-04
## 7 4.597072e-06 3.031769e-04
## 8 8.932663e-04 1.132902e-02
## 9 2.077171e-04 4.005688e-03
## 10 2.083329e-06 1.962794e-04
<- submitGreatJob(ebf1_proBunique,species="mm10",request_interval=1,version = "3.0.0")
great_Job availableCategories(great_Job)
## [1] "GO" "Phenotype Data and Human Disease"
## [3] "Pathway Data" "Gene Expression"
## [5] "Regulatory Motifs" "Gene Families"
= getEnrichmentTables(great_Job,
great_ResultTable category="Pathway Data")
## The default enrichment tables contain no associated genes for the input
## regions. You can set `download_by = 'tsv'` to download the complete
## table, but note only the top 500 regions can be retreived. See the
## following link:
##
## https://great-help.atlassian.net/wiki/spaces/GREAT/pages/655401/Export#Export-GlobalExport
names(great_ResultTable)
## [1] "PANTHER Pathway" "BioCyc Pathway" "MSigDB Pathway"
<- great_ResultTable[["MSigDB Pathway"]]
msigPath_proBcells
1:10,] msigPath_proBcells[
## ID
## 1 REACTOME_METABOLISM_OF_PORPHYRINS
## 2 PID_PDGFRBPATHWAY
## 3 KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY
## 4 REACTOME_HEMOSTASIS
## 5 REACTOME_IMMUNE_SYSTEM
## 6 PID_PI3KCIPATHWAY
## 7 PID_BCR_5PATHWAY
## 8 KEGG_TASTE_TRANSDUCTION
## 9 KEGG_FC_GAMMA_R_MEDIATED_PHAGOCYTOSIS
## 10 REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE
## name
## 1 Genes involved in Metabolism of porphyrins
## 2 PDGFR-beta signaling pathway
## 3 Natural killer cell mediated cytotoxicity
## 4 Genes involved in Hemostasis
## 5 Genes involved in Immune System
## 6 Class I PI3K signaling events
## 7 BCR signaling pathway
## 8 Taste transduction
## 9 Fc gamma R-mediated phagocytosis
## 10 Genes involved in Deposition of New CENPA-containing Nucleosomes at the Centromere
## Binom_Genome_Fraction Binom_Expected Binom_Observed_Region_Hits
## 1 0.0003517876 1.589376 28
## 2 0.0120526400 54.453810 144
## 3 0.0066675330 30.123920 91
## 4 0.0357002700 161.293800 284
## 5 0.0568598400 256.892800 399
## 6 0.0037282650 16.844300 62
## 7 0.0061216900 27.657790 82
## 8 0.0021354530 9.647976 45
## 9 0.0072730230 32.859520 88
## 10 0.0014112860 6.376189 35
## Binom_Fold_Enrichment Binom_Region_Set_Coverage Binom_Raw_PValue
## 1 17.616970 0.006197432 2.831283e-25
## 2 2.644443 0.031872510 2.554682e-24
## 3 3.020856 0.020141660 2.290614e-19
## 4 1.760762 0.062859670 3.066842e-19
## 5 1.553177 0.088313410 1.308965e-17
## 6 3.680770 0.013722890 1.835537e-17
## 7 2.964806 0.018149620 3.742359e-17
## 8 4.664191 0.009960159 1.186513e-16
## 9 2.678067 0.019477640 9.529404e-16
## 10 5.489173 0.007746791 2.642269e-15
## Binom_Adjp_BH Hyper_Total_Genes Hyper_Expected Hyper_Observed_Gene_Hits
## 1 3.734462e-22 14 3.011962 8
## 2 1.684813e-21 127 27.322800 67
## 3 1.007107e-16 101 21.729160 36
## 4 1.011291e-16 424 91.219430 157
## 5 3.453050e-15 792 170.391000 250
## 6 4.035122e-15 48 10.326730 26
## 7 7.051674e-15 63 13.553830 40
## 8 1.956263e-14 26 5.593644 8
## 9 1.396587e-13 92 19.792900 43
## 10 3.485153e-13 51 10.972150 5
## Hyper_Fold_Enrichment Hyper_Gene_Set_Coverage Hyper_Term_Gene_Coverage
## 1 2.6560760 0.001697793 0.57142860
## 2 2.4521640 0.014219020 0.52755910
## 3 1.6567600 0.007640068 0.35643560
## 4 1.7211250 0.033319190 0.37028300
## 5 1.4672140 0.053056030 0.31565660
## 6 2.5177380 0.005517827 0.54166670
## 7 2.9511950 0.008488964 0.63492060
## 8 1.4301950 0.001697793 0.30769230
## 9 2.1724970 0.009125637 0.46739130
## 10 0.4556993 0.001061121 0.09803922
## Hyper_Raw_PValue Hyper_Adjp_BH
## 1 3.888215e-03 2.182364e-02
## 2 1.008313e-14 1.329965e-11
## 3 7.711774e-04 6.826731e-03
## 4 1.334306e-13 8.799748e-11
## 5 1.298131e-11 4.280587e-09
## 6 7.359506e-07 4.044662e-05
## 7 7.825095e-13 3.440433e-10
## 8 1.788232e-01 3.428311e-01
## 9 6.382067e-08 6.475343e-06
## 10 9.916507e-01 1.000000e+00