params <- list(isSlides = "no") ## ----setup_varManS2, include=FALSE-------------------------------------------- knitr::opts_chunk$set(echo = TRUE,cache = TRUE,cache.lazy = FALSE, tidy = T) # AsSlides <- TRUE # suppressPackageStartupMessages(library(VariantAnnotation)) suppressPackageStartupMessages(library(DT)) suppressPackageStartupMessages(library(BSgenome.Hsapiens.UCSC.hg19)) suppressPackageStartupMessages(library(TxDb.Hsapiens.UCSC.hg19.knownGene)) suppressPackageStartupMessages(library(SNPlocs.Hsapiens.dbSNP144.GRCh37)) suppressPackageStartupMessages(library(ggplot2)) vcf <- readVcf("data/SAMN01882168_filt.vcf.gz","hg19") #vcf <- readVcf("../data/SAMN01882168_filt.vcf.gz","hg19") rd <- rowRanges(vcf) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides != "yes"){ cat("# Genomic Variants (part 2) --- " ) } ## ----annoRS_varMan------------------------------------------------------------ library(BSgenome.Hsapiens.UCSC.hg19) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(SNPlocs.Hsapiens.dbSNP144.GRCh37) ## ----chrSub,eval=TRUE,tidy=FALSE,echo=TRUE------------------------------------ names(vcf)[1:2] grepl(names(vcf),pattern = "chr1:")[1:2] vcf_chr1 <- vcf[grepl(names(vcf),pattern = "chr1:")] rd_chr1 <- rowRanges(vcf_chr1) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Annotating Variants

--- " ) }else{ cat("# Annotating Variants --- " ) } ## ----dbSNPv_varMan, cache=F--------------------------------------------------- all_snps <- SNPlocs.Hsapiens.dbSNP144.GRCh37 all_snps ## ----dbSNPv_varMan_load_fail,eval=FALSE--------------------------------------- ## tar_chr <- as.vector(seqnames(rd_chr1)@values) ## my_snps <- snpsBySeqname(all_snps,c(tar_chr)) ## ----dbSNPv_varMan_load_fail2,eval=TRUE,echo=FALSE,fig.align="center",out.width="75%"---- knitr::include_graphics("imgs/vcfMan_fig5r.png") ## ----check_seqLvl------------------------------------------------------------- # seqlevels ~ rd seqlevels(rd_chr1) # seqlevels ~ dbSNP seqlevels(all_snps) ## ----check_seqStyle----------------------------------------------------------- seqlevelsStyle(rd_chr1) seqlevelsStyle(all_snps) ## ----check_genome------------------------------------------------------------- genome(rd_chr1) genome(all_snps) ## ----dbSNPv_varMan_load, cache=F---------------------------------------------- tar_chr <- as.vector(seqnames(rd_chr1)@values) tar_chr <- gsub("chr","",tar_chr) tar_chr[grepl(tar_chr,pattern = "M")] <- "MT" my_snps <- snpsBySeqname(all_snps,c(tar_chr)) my_snps[1:2] ## ----change_seqlvl, cache=F--------------------------------------------------- # change seqlevelsStyle seqlevelsStyle(my_snps) <- "UCSC" # change genome genome(my_snps) <- "hg19" ## ----makeTab_varMan, cache=F-------------------------------------------------- snp_ID <- data.frame( posIDX=paste0(seqnames(my_snps),":",pos(my_snps)), rsID=my_snps$RefSNP_id,stringsAsFactors = FALSE) head(snp_ID) ## ----dbSNPV_varMan_tbl_1, cache=F--------------------------------------------- matV1 <- data.frame(Variant=names(rd_chr1),stringsAsFactors = FALSE) matV1[1:2,] ## ----dbSNPV_varMan_tbl_2, cache=F--------------------------------------------- matV1$chromosome <- gsub("(.*):(.*)_(.*)/(.*)","\\1",matV1$Variant) matV1$start <- gsub("(.*):(.*)_(.*)/(.*)","\\2",matV1$Variant) matV1$end <- gsub("(.*):(.*)_(.*)/(.*)","\\2",matV1$Variant) matV1$ref_allele <- gsub("(.*):(.*)_(.*)/(.*)","\\3",matV1$Variant) matV1$alt_allele <- gsub("(.*):(.*)_(.*)/(.*)","\\4",matV1$Variant) matV1$posIDX <- gsub("(.*)_(.*)","\\1",matV1$Variant) matV1[1:2,] ## ----dbSNPV_varMan_tbl_3,tidy=FALSE------------------------------------------- matS <- merge(matV1,snp_ID,all.x=TRUE,by="posIDX") matS <- dplyr::select(matS,-posIDX) matS[1:2,] ## ----dbSNPV_varMan_muCt,tidy=FALSE-------------------------------------------- taC2 <- table(!is.na(matS$rsID)) taC2_dat <- as.data.frame(taC2) taC2 ## ----dbSNPv_varMan_muCt_disp1,tidy=FALSE,eval=FALSE,echo=TRUE----------------- ## ggplot(taC2_dat,aes(x=Var1,y=Freq,fill=Var1))+ ## geom_bar(stat='Identity')+ ## labs(x="",y="Counts",fill="in_dbSNP")+ ## theme(legend.position = "none") ## ----dbSNPv_varMan_muCt_disp2,tidy=FALSE,echo=FALSE,eval=TRUE,fig.align="center"---- ggplot(taC2_dat,aes(x=Var1,y=Freq,fill=Var1))+ geom_bar(stat='Identity')+ labs(x="",y="Counts",fill="in_dbSNP")+ theme(legend.position = "none") ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Variants and Amino Acid Changes

--- " ) }else{ cat("# Variants and Amino Acid Changes --- " ) } ## ----aaCh_varMan-------------------------------------------------------------- txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## ----aaCh_varMan_txdb--------------------------------------------------------- txdb ## ----aaCh_varMan_pred--------------------------------------------------------- coding <- predictCoding(vcf_chr1, txdb, seqSource=Hsapiens) ## ----aaCh_varMan_pres2-------------------------------------------------------- coding[1] ## ----aaCh_varMan_frame,eval=TRUE,tidy=FALSE,echo=TRUE------------------------- matA <- data.frame(Variant=names(coding), chromosome=seqnames(coding), start=start(coding),end=end(coding), ref_allele=as.character(coding$REF), alt_allele=unlist(lapply(lapply( coding$ALT,`[[`,1),as.character)), GeneID=coding$GENEID, TxID=coding$TXID, Protein_posi=unlist(lapply(lapply( coding$PROTEINLOC,`[[`,1),as.integer)), ref_AA=as.character(coding$REFAA), alt_AA=as.character(coding$VARAA), Type=coding$CONSEQUENCE) matA$aaChange <- paste0("p.",matA$ref_AA,matA$Protein_posi,matA$alt_AA) matA <- dplyr::select(matA,-Protein_posi,-ref_AA,-alt_AA) ## ----aaCh_varMan_tbl---------------------------------------------------------- matA[1:2,] ## ----aaCh_varMan_muCt--------------------------------------------------------- var_in_coding <- data.frame(varName=names(vcf_chr1), in_coding=names(vcf_chr1) %in% matA$Variant, stringsAsFactors = FALSE) table(var_in_coding$in_coding) ## ----aaCh_varMan_muType------------------------------------------------------- taC <- table(matA$Type) taC_dat <- as.data.frame(taC) taC ## ----aaCh_varMan_muType_disp1,tidy=FALSE,echo=TRUE,eval=FALSE----------------- ## ggplot(taC_dat,aes(x=Var1,y=Freq,fill=Var1))+ ## geom_bar(stat='Identity')+ ## labs(x="",y="Counts",fill="")+ ## theme(legend.position = "none") ## ----aaCh_varMan_muType_disp2,tidy=FALSE,echo=FALSE,eval=TRUE,fig.align="center"---- ggplot(taC_dat,aes(x=Var1,y=Freq,fill=Var1))+ geom_bar(stat='Identity')+ labs(x="",y="Counts",fill="")+ theme(legend.position = "none") ## ----comb_varMan-------------------------------------------------------------- matS$GeneID <- matA$GeneID[match(matS$Variant,matA$Variant)] matS$AAChange <- matA$GeneID[match(matS$Variant,matA$Variant)] matS[1:2,]