In this exercise, we will practice how to handle variants from multiple samples. An MAF file of breast carcinoma (BRCA) was fetched from TCGA. Please find this MAF file “data/tcga_brca.maf” and answer the following questions.
## -Reading
## -Validating
## --Removed 319 duplicated variants
## -Summarizing
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 10.0s elapsed (9.964s cpu)
## [1] 976 10
#
var_to <- sample_sum$total
names(var_to) <- sample_sum$Tumor_Sample_Barcode
sample_sum <- dplyr::select(sample_sum,-total)
melt_dat <- reshape2::melt(sample_sum,id="Tumor_Sample_Barcode")
melt_dat$totalVar <- var_to[match(melt_dat$Tumor_Sample_Barcode,names(var_to))]
melt_dat$prop <- melt_dat$value / melt_dat$totalVar
head(melt_dat)
## Tumor_Sample_Barcode variable value totalVar prop
## 1 TCGA-AN-A046 Frame_Shift_Del 5 4352 0.001148897
## 2 TCGA-AC-A23H Frame_Shift_Del 14 3452 0.004055620
## 3 TCGA-A8-A0A6 Frame_Shift_Del 3 2052 0.001461988
## 4 TCGA-A2-A0T5 Frame_Shift_Del 2 1078 0.001855288
## 5 TCGA-BH-A18G Frame_Shift_Del 159 1055 0.150710900
## 6 TCGA-A8-A09Z Frame_Shift_Del 129 1000 0.129000000
#
ggplot(melt_dat,aes(x=Tumor_Sample_Barcode,y=log10(value),fill=variable))+
geom_bar(stat='identity',position = 'stack')+
labs(x="",y="Mutations",fill="")+
theme(axis.text.x=element_blank())
## Warning: Removed 3506 rows containing missing values (geom_bar).
## Hugo_Symbol Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins
## 1: PIK3CA 0 2 4 0
## 2: TP53 40 12 5 0
## 3: TTN 10 1 2 1
## 4: CDH1 24 30 0 2
## 5: GATA3 16 50 0 0
## ---
## 15232: ZSCAN9 0 0 0 0
## 15233: ZSWIM1 0 0 0 0
## 15234: ZW10 0 0 0 0
## 15235: ZXDA 0 0 0 0
## 15236: ZYX 0 0 0 0
## Pathway N n_affected_genes fraction_affected Mutated_samples
## 1: NRF2 3 3 1.0000000 10
## 2: TP53 6 6 1.0000000 326
## 3: TGF-Beta 7 7 1.0000000 37
## 4: MYC 13 11 0.8461538 34
## 5: Cell_Cycle 15 13 0.8666667 52
## 6: PI3K 29 26 0.8965517 403
## 7: Hippo 38 36 0.9473684 186
## 8: WNT 68 54 0.7941176 152
## 9: NOTCH 71 58 0.8169014 217
## 10: RTK-RAS 85 76 0.8941176 253
## Fraction_mutated_samples
## 1: 0.01024590
## 2: 0.33401639
## 3: 0.03790984
## 4: 0.03483607
## 5: 0.05327869
## 6: 0.41290984
## 7: 0.19057377
## 8: 0.15573770
## 9: 0.22233607
## 10: 0.25922131
library(BSgenome.Hsapiens.UCSC.hg19, quietly = TRUE)
brca.tnm = trinucleotideMatrix(maf = brca,
prefix = 'chr',
add = TRUE,
ref_genome = "BSgenome.Hsapiens.UCSC.hg19")
## -Extracting 5' and 3' adjacent bases
## -Extracting +/- 20bp around mutated bases for background C>T estimation
## -Estimating APOBEC enrichment scores
## --Performing one-way Fisher's test for APOBEC enrichment
## ---APOBEC related mutations are enriched in 29.527 % of samples (APOBEC enrichment score > 2 ; 287 of 972 samples)
## -Creating mutation matrix
## --matrix of dimension 975x96
#
library('NMF')
brca.sign <- estimateSignatures(mat = brca.tnm,
nTry = 10,
# pConstant = 0.1,
parallel = 1)
## -Running NMF for 10 ranks
## Compute NMF rank= 2 ... + measures ... OK
## Compute NMF rank= 3 ... + measures ... OK
## Compute NMF rank= 4 ... + measures ... OK
## Compute NMF rank= 5 ... + measures ... OK
## Compute NMF rank= 6 ... + measures ... OK
## Compute NMF rank= 7 ... + measures ... OK
## Compute NMF rank= 8 ... + measures ... OK
## Compute NMF rank= 9 ... + measures ... OK
## Compute NMF rank= 10 ... + measures ... OK
## -Finished in 00:23:56 elapsed (00:24:00 cpu)
## -Running NMF for factorization rank: 6
## -Finished in12.9s elapsed (12.9s cpu)
#
plotSignatures(nmfRes = brca.sig.ext,
title_size = 1.2,
contributions = FALSE,
show_title = TRUE,
sig_db = 'legacy')
## Warning in signatureEnrichment(maf = brca, sig_res = brca.sig.ext): !!Do not use
## this function. This will be removed in future!!
## Running k-means for signature assignment..
## Performing pairwise and groupwise comparisions..
## Sample size per factor in Signature:
##
## Signature_1 Signature_2 Signature_3 Signature_4 Signature_5 Signature_6
## 75 198 126 126 110 221
## Estimating mutation load and signature exposures..