ChIPseq functional annotation and motif identification.

In this practical we will investigate the motifs and pathways associated with the Ebf1 transcription factor in proB-cells and B-cells.

  1. Load the ebf1 peaks for proB and B cells into R.
suppressPackageStartupMessages(library(rtracklayer))

library(rtracklayer)
ebf1_proB <- import.bed("data/ebf1_proB.bed")
ebf1_B <- import.bed("data/ebf1_B.bed")
  1. Find peaks in B cells which we also present in proB cells.
ebf1_BCommon <- ebf1_B[ebf1_B %over% ebf1_proB]
  1. Extract the sequence 100bp (+/-50bp) around the geometric centre of these peaks, write to FASTA and submit to Meme-ChIP.
library(BSgenome.Mmusculus.UCSC.mm10)
toMotif <- resize(ebf1_BCommon,100,fix="center")
peaksSequences <- getSeq(BSgenome.Mmusculus.UCSC.mm10,
                         toMotif)
names(peaksSequences) <- paste0(seqnames(toMotif),":",
                                         start(toMotif),
                                         "-",
                                         end(toMotif))
writeXStringSet(peaksSequences,file="commonEBFinB.fa")

Results files from Meme-ChIP can be found here

  1. Annotate common B cell peaks to genes with TSS set as +/- 500bp
library(ChIPseeker)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
peakAnno_Ebf1B <- annotatePeak(ebf1_BCommon, tssRegion=c(-500, 500), 
                         TxDb=TxDb.Mmusculus.UCSC.mm10.knownGene, 
                         annoDb="org.Mm.eg.db")
## >> preparing features information...      2021-08-02 03:26:34 PM 
## >> identifying nearest features...        2021-08-02 03:26:35 PM 
## >> calculating distance from peak to TSS...   2021-08-02 03:26:35 PM 
## >> assigning genomic annotation...        2021-08-02 03:26:35 PM 
## >> adding gene annotation...          2021-08-02 03:26:54 PM
## 'select()' returned 1:many mapping between keys and columns
## >> assigning chromosome lengths           2021-08-02 03:26:54 PM 
## >> done...                    2021-08-02 03:26:54 PM
  1. Create a barplot of the occurrence of differing annotations (“Promoter”, “Distal Intergenic” and “other”).
myAnnotation <- as.data.frame(peakAnno_Ebf1B)
simpleAnnotation <- as.vector(myAnnotation$annotation)
simpleAnnotation[!simpleAnnotation %in% c("Promoter", "Distal Intergenic")] <- "other"
forPlot <- data.frame(Annotation=simpleAnnotation)
library(ggplot2)
ggplot(forPlot,aes(x=Annotation,fill=Annotation))+geom_bar()+coord_flip()+theme_bw()+ggtitle("Simple distribution of B-cell Ebf1 binding sites")

  1. Perform a geneset enrichment test of genes with peaks in promoter against BP GO terms using clusterProfiler. Make a dotplot and an enrichment map of the results. Finally export the result.
library(clusterProfiler)
library(org.Mm.eg.db)

myAnnotation_TSS <- myAnnotation[
  myAnnotation$annotation == "Promoter",]
genesWithPeakInTSS <- unique(myAnnotation_TSS$geneId)

allGeneGR <- genes(TxDb.Mmusculus.UCSC.mm10.knownGene)
##   66 genes were dropped because they have exons located on both strands
##   of the same reference sequence or on more than one reference sequence,
##   so cannot be represented by a single genomic range.
##   Use 'single.strand.genes.only=FALSE' to get all the genes in a
##   GRangesList object, or use suppressMessages() to suppress this message.
allGeneIDs <- allGeneGR$gene_id

GO_result <- enrichGO(gene = genesWithPeakInTSS, 
                      universe = allGeneIDs,
                      OrgDb = org.Mm.eg.db,
                      ont = "BP")

dotplot(GO_result)

library(enrichplot)
GO_result_plot <- pairwise_termsim(GO_result)
emapplot(GO_result_plot, showCategory = 20, cex_label_category=0.5 )+ theme(text = element_text(size = 10)) 

write.csv(as.data.frame(GO_result), "ebf1_B_vs_ebf1_proB_diff_promoter_peaks_GOterm.csv")
  1. Identify Ebf1 peaks only in Bcells and Ebf1 peaks only in proB cells.
ebf1_Bunique<- ebf1_B[!ebf1_B %over% ebf1_proB]
ebf1_proBunique<- ebf1_proB[!ebf1_proB %over% ebf1_B]

8 Using rGREAT identify the MSigDB Pathways enriched in Ebf1 peaks only in Bcells and Ebf1 peaks only in proB cells.

library(rGREAT)
great_Job <- submitGreatJob(ebf1_Bunique,species="mm10",request_interval=1,version = "3.0.0")
availableCategories(great_Job)
## [1] "GO"                               "Phenotype Data and Human Disease"
## [3] "Pathway Data"                     "Gene Expression"                 
## [5] "Regulatory Motifs"                "Gene Families"
great_ResultTable = getEnrichmentTables(great_Job,
                                        category="Pathway Data")
## The default enrichment tables contain no associated genes for the input
## regions. You can set `download_by = 'tsv'` to download the complete
## table, but note only the top 500 regions can be retreived. See the
## following link:
## 
## https://great-help.atlassian.net/wiki/spaces/GREAT/pages/655401/Export#Export-GlobalExport
names(great_ResultTable)
## [1] "PANTHER Pathway" "BioCyc Pathway"  "MSigDB Pathway"
msigPath_Bcells <- great_ResultTable[["MSigDB Pathway"]]

msigPath_Bcells[1:10,]
##                                                             ID
## 1                       KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY
## 2                                             PID_BCR_5PATHWAY
## 3                     REACTOME_INTERFERON_ALPHA_BETA_SIGNALING
## 4  REACTOME_DOWNSTREAM_SIGNALING_EVENTS_OF_B_CELL_RECEPTOR_BCR
## 5                                   REACTOME_SIGNALLING_BY_NGF
## 6                REACTOME_SIGNALING_BY_THE_B_CELL_RECEPTOR_BCR
## 7                                REACTOME_SIGNALING_BY_SCF_KIT
## 8                                         PID_RAC1_REG_PATHWAY
## 9                                       REACTOME_IMMUNE_SYSTEM
## 10                       REACTOME_PIP3_ACTIVATES_AKT_SIGNALING
##                                                                      name
## 1                                       B cell receptor signaling pathway
## 2                                                   BCR signaling pathway
## 3                       Genes involved in Interferon alpha/beta signaling
## 4  Genes involved in Downstream Signaling Events Of B Cell Receptor (BCR)
## 5                                     Genes involved in Signalling by NGF
## 6                Genes involved in Signaling by the B Cell Receptor (BCR)
## 7                                  Genes involved in Signaling by SCF-KIT
## 8                                             Regulation of RAC1 activity
## 9                                         Genes involved in Immune System
## 10                         Genes involved in PIP3 activates AKT signaling
##    Binom_Genome_Fraction Binom_Expected Binom_Observed_Region_Hits
## 1            0.005806883      12.531250                         37
## 2            0.006121690      13.210610                         38
## 3            0.001747780       3.771710                         19
## 4            0.006124732      13.217170                         37
## 5            0.020280090      43.764440                         83
## 6            0.009296625      20.062120                         48
## 7            0.006275704      13.542970                         37
## 8            0.004501446       9.714120                         30
## 9            0.056859840     122.703500                        182
## 10           0.002103194       4.538692                         19
##    Binom_Fold_Enrichment Binom_Region_Set_Coverage Binom_Raw_PValue
## 1               2.952618               0.017145510     1.434094e-08
## 2               2.876477               0.017608900     1.799178e-08
## 3               5.037503               0.008804449     1.988724e-08
## 4               2.799389               0.017145510     5.366261e-08
## 5               1.896517               0.038461540     5.825394e-08
## 6               2.392569               0.022242820     7.137937e-08
## 7               2.732045               0.017145510     9.693786e-08
## 8               3.088288               0.013901760     1.258219e-07
## 9               1.483250               0.084337350     1.415786e-07
## 10              4.186228               0.008804449     3.277231e-07
##    Binom_Adjp_BH Hyper_Total_Genes Hyper_Expected Hyper_Observed_Gene_Hits
## 1   8.743757e-06                74       9.909689                       25
## 2   8.743757e-06                63       8.436627                       22
## 3   8.743757e-06                48       6.427906                       11
## 4   1.536739e-05                90      12.052320                       27
## 5   1.536739e-05               209      27.988170                       57
## 6   1.569156e-05               119      15.935850                       35
## 7   1.826586e-05                73       9.775774                       25
## 8   2.074489e-05                38       5.088759                       13
## 9   2.074913e-05               792     106.060500                      141
## 10  4.075300e-05                27       3.615697                       14
##    Hyper_Fold_Enrichment Hyper_Gene_Set_Coverage Hyper_Term_Gene_Coverage
## 1               2.522784             0.008523696                0.3378378
## 2               2.607677             0.007500852                0.3492063
## 3               1.711288             0.003750426                0.2291667
## 4               2.240232             0.009205592                0.3000000
## 5               2.036574             0.019434030                0.2727273
## 6               2.196306             0.011933170                0.2941176
## 7               2.557342             0.008523696                0.3424658
## 8               2.554650             0.004432322                0.3421053
## 9               1.329431             0.048073640                0.1780303
## 10              3.872006             0.004773270                0.5185185
##    Hyper_Raw_PValue Hyper_Adjp_BH
## 1      6.065365e-06  3.200087e-04
## 2      1.184083e-05  4.338349e-04
## 3      4.924903e-02  1.840212e-01
## 4      3.089307e-05  8.489158e-04
## 5      7.317380e-08  2.244281e-05
## 6      3.668240e-06  2.556315e-04
## 7      4.597072e-06  3.031769e-04
## 8      8.932663e-04  1.132902e-02
## 9      2.077171e-04  4.005688e-03
## 10     2.083329e-06  1.962794e-04
great_Job <- submitGreatJob(ebf1_proBunique,species="mm10",request_interval=1,version = "3.0.0")
availableCategories(great_Job)
## [1] "GO"                               "Phenotype Data and Human Disease"
## [3] "Pathway Data"                     "Gene Expression"                 
## [5] "Regulatory Motifs"                "Gene Families"
great_ResultTable = getEnrichmentTables(great_Job,
                                        category="Pathway Data")
## The default enrichment tables contain no associated genes for the input
## regions. You can set `download_by = 'tsv'` to download the complete
## table, but note only the top 500 regions can be retreived. See the
## following link:
## 
## https://great-help.atlassian.net/wiki/spaces/GREAT/pages/655401/Export#Export-GlobalExport
names(great_ResultTable)
## [1] "PANTHER Pathway" "BioCyc Pathway"  "MSigDB Pathway"
msigPath_proBcells <- great_ResultTable[["MSigDB Pathway"]]


msigPath_proBcells[1:10,]
##                                                                           ID
## 1                                          REACTOME_METABOLISM_OF_PORPHYRINS
## 2                                                          PID_PDGFRBPATHWAY
## 3                             KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY
## 4                                                        REACTOME_HEMOSTASIS
## 5                                                     REACTOME_IMMUNE_SYSTEM
## 6                                                          PID_PI3KCIPATHWAY
## 7                                                           PID_BCR_5PATHWAY
## 8                                                    KEGG_TASTE_TRANSDUCTION
## 9                                      KEGG_FC_GAMMA_R_MEDIATED_PHAGOCYTOSIS
## 10 REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE
##                                                                                  name
## 1                                          Genes involved in Metabolism of porphyrins
## 2                                                        PDGFR-beta signaling pathway
## 3                                           Natural killer cell mediated cytotoxicity
## 4                                                        Genes involved in Hemostasis
## 5                                                     Genes involved in Immune System
## 6                                                       Class I PI3K signaling events
## 7                                                               BCR signaling pathway
## 8                                                                  Taste transduction
## 9                                                    Fc gamma R-mediated phagocytosis
## 10 Genes involved in Deposition of New CENPA-containing Nucleosomes at the Centromere
##    Binom_Genome_Fraction Binom_Expected Binom_Observed_Region_Hits
## 1           0.0003517876       1.589376                         28
## 2           0.0120526400      54.453810                        144
## 3           0.0066675330      30.123920                         91
## 4           0.0357002700     161.293800                        284
## 5           0.0568598400     256.892800                        399
## 6           0.0037282650      16.844300                         62
## 7           0.0061216900      27.657790                         82
## 8           0.0021354530       9.647976                         45
## 9           0.0072730230      32.859520                         88
## 10          0.0014112860       6.376189                         35
##    Binom_Fold_Enrichment Binom_Region_Set_Coverage Binom_Raw_PValue
## 1              17.616970               0.006197432     2.831283e-25
## 2               2.644443               0.031872510     2.554682e-24
## 3               3.020856               0.020141660     2.290614e-19
## 4               1.760762               0.062859670     3.066842e-19
## 5               1.553177               0.088313410     1.308965e-17
## 6               3.680770               0.013722890     1.835537e-17
## 7               2.964806               0.018149620     3.742359e-17
## 8               4.664191               0.009960159     1.186513e-16
## 9               2.678067               0.019477640     9.529404e-16
## 10              5.489173               0.007746791     2.642269e-15
##    Binom_Adjp_BH Hyper_Total_Genes Hyper_Expected Hyper_Observed_Gene_Hits
## 1   3.734462e-22                14       3.011962                        8
## 2   1.684813e-21               127      27.322800                       67
## 3   1.007107e-16               101      21.729160                       36
## 4   1.011291e-16               424      91.219430                      157
## 5   3.453050e-15               792     170.391000                      250
## 6   4.035122e-15                48      10.326730                       26
## 7   7.051674e-15                63      13.553830                       40
## 8   1.956263e-14                26       5.593644                        8
## 9   1.396587e-13                92      19.792900                       43
## 10  3.485153e-13                51      10.972150                        5
##    Hyper_Fold_Enrichment Hyper_Gene_Set_Coverage Hyper_Term_Gene_Coverage
## 1              2.6560760             0.001697793               0.57142860
## 2              2.4521640             0.014219020               0.52755910
## 3              1.6567600             0.007640068               0.35643560
## 4              1.7211250             0.033319190               0.37028300
## 5              1.4672140             0.053056030               0.31565660
## 6              2.5177380             0.005517827               0.54166670
## 7              2.9511950             0.008488964               0.63492060
## 8              1.4301950             0.001697793               0.30769230
## 9              2.1724970             0.009125637               0.46739130
## 10             0.4556993             0.001061121               0.09803922
##    Hyper_Raw_PValue Hyper_Adjp_BH
## 1      3.888215e-03  2.182364e-02
## 2      1.008313e-14  1.329965e-11
## 3      7.711774e-04  6.826731e-03
## 4      1.334306e-13  8.799748e-11
## 5      1.298131e-11  4.280587e-09
## 6      7.359506e-07  4.044662e-05
## 7      7.825095e-13  3.440433e-10
## 8      1.788232e-01  3.428311e-01
## 9      6.382067e-08  6.475343e-06
## 10     9.916507e-01  1.000000e+00