params <-
list(isSlides = "no")
## ----setup, include=FALSE-----------------------------------------------------
suppressPackageStartupMessages(require(knitr))
suppressPackageStartupMessages(require(Seurat))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(bioMart))
suppressPackageStartupMessages(require(SeuratWrappers))
suppressPackageStartupMessages(require(ggplot2))
knitr::opts_chunk$set(echo = TRUE, tidy = T)
## ----warning=F, message=F, echo=F, eval=F-------------------------------------
# library(RCurl)
# if(!url.exists("https://cf.10xgenomics.com/samples/cell-exp/2.1.0/pbmc8k/pbmc8k_filtered_gene_bc_matrices.tar.gz")){stop("Download path broken")}
#
## ----eval=F-------------------------------------------------------------------
# download.file("https://cf.10xgenomics.com/samples/cell-exp/2.1.0/pbmc8k/pbmc8k_filtered_gene_bc_matrices.tar.gz","pbmc8k_filtered_gene_bc_matrices.tar.gz")
## ----eval=F-------------------------------------------------------------------
# untar("pbmc8k_filtered_gene_bc_matrices.tar.gz")
#
## ----eval=F-------------------------------------------------------------------
# dir()
#
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Cell Ranger to Seurat
---
"
)
}else{
cat("# Cell Ranger to Seurat
---
"
)
}
## ----echo=T,eval=TRUE---------------------------------------------------------
mtx_dir <- "filtered_gene_bc_matrices/GRCh38"
## ----load_data,include=TRUE,eval=F--------------------------------------------
# library(Seurat)
#
# mtx <- Seurat::Read10X(mtx_dir)
#
## ----eval=F, echo=F-----------------------------------------------------------
# a <- is(mtx)
# b <- head(mtx)
# save(a,b, file="data/pbmc8k_mtx_res.RData")
## ----eval=F, echo=F-----------------------------------------------------------
# library(Seurat)
# save(mtx, file = "data/pbmc8k_seurat_read.RData")
# load("data/pbmc8k_seurat_read.RData")
## ----eval=T, echo=F-----------------------------------------------------------
library(Seurat)
load("data/pbmc8k_mtx_res.RData")
## ----eval=F, echo=T-----------------------------------------------------------
# is(mtx)
## ----eval=T, echo=F-----------------------------------------------------------
a
## ----eval=F, echo=T-----------------------------------------------------------
# head(mtx)
## ----eval=T, echo=F-----------------------------------------------------------
b
## ----load_h5,include=TRUE,eval=FALSE------------------------------------------
# h5_file <- "path to matrix h5 file"
# h5_file <- "~/Downloads/pbmc8k_raw_gene_bc_matrices_h5.h5"
# mtx <- Seurat::Read10X_h5(h5_file)
## ----load_CreateOBJ,include=TRUE,eval=TRUE------------------------------------
sample_id <- "PBMC_8k" # sample name
min_gene <- 200
min_cell <- 10
## ----eval=F-------------------------------------------------------------------
# seu_obj <- Seurat::CreateSeuratObject(mtx, project=sample_id, min.cells=min_cell, min.features=min_gene)
## ----eval=F, echo=F-----------------------------------------------------------
# save(seu_obj, file="data/pbmc8k_seu_obj_raw.RData")
## ----eval=T, echo=F-----------------------------------------------------------
load("data/pbmc8k_seu_obj_raw.RData")
## -----------------------------------------------------------------------------
seu_obj[["dset"]] <- sample_id # Create a category for sample
seu_obj <- Seurat::RenameCells(seu_obj, add.cell.id=sample_id) # add sample name in front of cell barcode
## ----laod_CreatOBJ_pres,include=TRUE,eval=TRUE--------------------------------
seu_obj
## -----------------------------------------------------------------------------
head(seu_obj,2)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Normalization, feature selection, and data scaling
---
"
)
}else{
cat("# Normalization, feature selection, and data scaling
---
"
)
}
## ----norm_log,include=TRUE,eval=TRUE------------------------------------------
seu_obj <- NormalizeData(seu_obj, normalization.method="LogNormalize")
## -----------------------------------------------------------------------------
seu_obj <- FindVariableFeatures(seu_obj, select.method="vst", nfeatures=3000)
## -----------------------------------------------------------------------------
seu_obj <- ScaleData(seu_obj)
## ----norm_plotHVF,include=TRUE, eval=F----------------------------------------
# top10 <- head(VariableFeatures(seu_obj), 10)
#
# plot1 <- VariableFeaturePlot(seu_obj)
# plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
# plot2
## ----echo=F,fig.height=4,fig.width=7------------------------------------------
top10 <- head(VariableFeatures(seu_obj), 10)
plot1 <- VariableFeaturePlot(seu_obj)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot2
## ----eval=F, echo=F-----------------------------------------------------------
# download.file("https://cf.10xgenomics.com/samples/cell-exp/2.1.0/pbmc8k/pbmc8k_filtered_gene_bc_matrices.tar.gz","pbmc8k_filtered_gene_bc_matrices.tar.gz")
# untar("pbmc8k_filtered_gene_bc_matrices.tar.gz")
## ----include=F,echo=T,eval=F--------------------------------------------------
# library(Seurat)
#
# sample_id <- "PBMC_8k"
# min_gene <- 200
# min_cell <- 10
#
# mtx_dir <- "filtered_gene_bc_matrices/GRCh38"
# seu_obj <- Seurat::Read10X(mtx_dir)
# seu_obj <- Seurat::CreateSeuratObject(seu_obj, project=sample_id, min.cells=min_cell, min.features=min_gene)
# seu_obj <- NormalizeData(seu_obj, normalization.method="LogNormalize")
# seu_obj <- FindVariableFeatures(seu_obj, select.method="vst", nfeatures=3000)
# seu_obj <- ScaleData(seu_obj)
## ----eval=F, echo=F-----------------------------------------------------------
# unlink("pbmc8k_filtered_gene_bc_matrices.tar.gz", recursive=TRUE)
# unlink("filtered_feature_bc_matrix/GRCh38", recursive=TRUE)
## ----norm_sct, include=TRUE, eval=F-------------------------------------------
# seu_obj <- SCTransform(seu_obj, variable.features.n = 3000)
# seu_obj
## ----eval=F, echo=F-----------------------------------------------------------
# SCT_assay <- seu_obj@assays$SCT
# save(SCT_assay, file="data/pbmc8k_SCT_assay.RData")
## ----eval=T, echo=F-----------------------------------------------------------
load("data/pbmc8k_SCT_assay.RData")
seu_obj@assays$SCT <- SCT_assay
DefaultAssay(seu_obj) <- "SCT"
seu_obj
## ----include=TRUE, eval=T-----------------------------------------------------
DefaultAssay(seu_obj) <- "RNA"
DefaultAssay(seu_obj) <- "SCT"
## ----include=F,warning=F, message=F, echo=FALSE-------------------------------
rm(SCT_assay)
gc()
## ----norm_comp,include=TRUE,eval=F--------------------------------------------
# log_mat <- GetAssayData(seu_obj,assay="RNA",slot="data")
# log_mat <- as.matrix(log_mat)
# log_avgExp <- rowMeans(log_mat)
#
# sct_mat <- GetAssayData(seu_obj,assay="SCT",slot="data")
# sct_mat <- as.matrix(sct_mat)
# sct_avgExp <- rowMeans(sct_mat)
## ----eval=F, echo=F-----------------------------------------------------------
#
# exp_sct <- list(log_avgExp,sct_avgExp)
# names(exp_sct) <- c("logNorm","SCTransform")
# save(exp_sct, file="data/pbmc8k_exp_sct.RData")
#
## ----eval=T, echo=F-----------------------------------------------------------
load("data/pbmc8k_exp_sct.RData")
log_avgExp <- exp_sct$logNorm
sct_avgExp <- exp_sct$SCTransform
## ----eval=F-------------------------------------------------------------------
# library(ggplot2)
#
# dat <- data.frame(logNorm=log_avgExp, SCT=sct_avgExp)
# cor_val <- cor.test(log_avgExp,sct_avgExp,method = "spearman")
#
# ggplot(dat,aes(x=logNorm,y=SCT))+geom_point()+geom_smooth()+
# labs(x="Log_Normalization",y="SCTransform",subtitle = paste0("rho=",round(cor_val$estimate,3),"; p-value=",cor_val$p.value[1]))+
# theme_classic()
## ----echo=F, fig.height=4,fig.width=7-----------------------------------------
dat <- data.frame(logNorm=log_avgExp, SCT=sct_avgExp)
cor_val <- cor.test(log_avgExp,sct_avgExp,method = "spearman")
ggplot(dat,aes(x=logNorm,y=SCT))+geom_point()+geom_smooth()+
labs(x="Log_Normalization",y="SCTransform",subtitle = paste0("rho=",round(cor_val$estimate,3),"; p-value=",cor_val$p.value[1]))+
theme_classic()
## ----echo=F, eval=T, warning=F, message=FALSE, include=F----------------------
rm(dat, log_mat, sct_mat, plot1, plot2, SCT_assay)
DefaultAssay(seu_obj) <- "RNA"
gc()
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Dimension Reduction
---
"
)
}else{
cat("# Dimension Reduction
---
"
)
}
## ----dimRed_pca,include=TRUE,eval=T-------------------------------------------
set.seed(1001)
DefaultAssay(seu_obj) <- "RNA"
seu_obj <- RunPCA(seu_obj, assay = "RNA", npcs = 50)
## ----dimRed_elbow,include=TRUE,eval=TRUE, fig.height=4,fig.width=7------------
ElbowPlot(seu_obj, ndims = 50, reduction = "pca")
## ----dimRed_elbow2,include=TRUE,eval=T----------------------------------------
library(dplyr)
# Determine percent of variation associated with each PC
pct <- seu_obj[["pca"]]@stdev / sum(seu_obj[["pca"]]@stdev) * 100
# Calculate cumulative percents for each PC
cumu <- cumsum(pct)
## ----include=TRUE,eval=T------------------------------------------------------
# Determine which PC exhibits cumulative percent greater than 90% and % variation associated with the PC as less than 5
co1 <- which(cumu > 90 & pct < 5)[1]
# Determine the difference between variation of PC and subsequent PC, last point where change of % of variation is more than 0.1%.
co2 <- sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = T)[1] + 1
pc <- min(co1, co2)
pc
## ----dimRed_elbow2_rep,include=TRUE,eval=F------------------------------------
# plot_df <- data.frame(pct = pct, cumu = cumu,rank = 1:length(pct))
#
# ggplot(plot_df, aes(cumu, pct, label = rank, color = rank > pc)) + geom_text() +
# geom_vline(xintercept = 90, color = "grey") +
# geom_hline(yintercept = min(pct[pct > 5]), color = "grey") +
# theme_bw()
## ----eval=T, echo=F, fig.height=4,fig.width=7---------------------------------
plot_df <- data.frame(pct = pct, cumu = cumu,rank = 1:length(pct))
ggplot(plot_df, aes(cumu, pct, label = rank, color = rank > pc)) + geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(pct[pct > 5]), color = "grey") +
theme_bw()
## ----dimRed_UMAP,include=TRUE,eval=T------------------------------------------
seu_obj <- FindNeighbors(seu_obj,dims = 1:pc, reduction = "pca")
seu_obj <- RunUMAP(seu_obj,dims = 1:pc, reduction = "pca")
## ----fig.height=4,fig.width=7-------------------------------------------------
DimPlot(seu_obj)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Clustering
---
"
)
}else{
cat("# Clustering
---
"
)
}
## ----clust_default,include=TRUE,eval=T----------------------------------------
seu_obj <- FindClusters(seu_obj, resolution = 0.5)
seu_obj[["cluster_byDefault"]] <- seu_obj$seurat_clusters
## ----fig.height=4,fig.width=7-------------------------------------------------
DimPlot(seu_obj, group.by = "seurat_clusters",label = TRUE,pt.size = 0.2)+NoLegend()
## ----clust_resoEval,include=TRUE,eval=T, warning=FALSE, message=FALSE---------
library(clustree)
reso <- c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1)
reso_res <- lapply(1:length(reso), function(x,seu_obj,reso){
seu_obj <- FindClusters(seu_obj,resolution = reso[x])
clust <- setNames(seu_obj$seurat_clusters,Cells(seu_obj))
return(clust)}, seu_obj, reso)
names(reso_res) <- paste0("k",1:length(reso))
## ----eval=F-------------------------------------------------------------------
# remotes::install_github("thomasp85/tweenr")
## ----include=TRUE,eval=T------------------------------------------------------
k_tab <- do.call(cbind,reso_res)
k_dat <- as.data.frame(k_tab)
head(k_dat,2)
## ----clust_resoEval2,include=TRUE,eval=F--------------------------------------
# clustree(k_dat, prefix = "k", node_colour = "sc3_stability")
## ----include=TRUE,eval=T, echo=F, warning=FALSE, fig.height=4,fig.width=7-----
clustree(k_dat, prefix = "k", node_colour = "sc3_stability")
## ----clust_resoFix,include=TRUE,eval=T, fig.height=4,fig.width=7, warning=FALSE, message=FALSE----
seu_obj <- FindClusters(seu_obj, resolution = 0.6)
## ----fig.height=4,fig.width=7-------------------------------------------------
DimPlot(seu_obj, group.by = "seurat_clusters",label = TRUE,pt.size = 0.2) + NoLegend() + ggtitle("Optimized Clusters")
## ----include=TRUE,eval=T, fig.height=4,fig.width=7----------------------------
DimPlot(seu_obj, group.by = "cluster_byDefault",label = TRUE,pt.size = 0.2) + NoLegend() + ggtitle("Default Clusters")
## ----fig.height=4,fig.width=6-------------------------------------------------
FeaturePlot(seu_obj,features = "LYZ",pt.size = 0)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Droplet processing
---
"
)
}else{
cat("# Droplet processing
---
"
)
}
## input_h5=the_raw_matrix_in_h5_format_from_cellranger #essential
## output_h5=assign_the_h5_file_path_for_the_cellbender_corrected_matrix # essential
## fpr=threshold_of_FALSE_POSITIVE_RATE # default 0.01
## epochs=number_of_epochs_to_train # default 150
## num_train=number_of_times_to_attempt_to_train_the_model # default 1. would speed up while setting greater
##
## cellbender remove-background --input $input_h5 --output $output_h5 --fpr $fpr --epochs $epochs --num-training-tries $num_train --cuda
## ptrepack --complevel 5 celbender_filtered.h5:/matrix celbender_filtered_forseurat.h5:/matrix
##
## ----sec3_dropProc_cbFilt_loadData,include=F,eval=T, echo=FALSE---------------
load("data/pbmc8k_cbFilt_20250131_filtered_mtx.RData")
## ----include = T, eval = F, echo = T------------------------------------------
#
#
# cbFilt_mtx <- Read10X_h5("~/Downloads/cbFilt_PBMC8K_20250131_filtered_forseurat.h5")
#
## ----sec_mergeData_funcUsed_dataProc,include=TRUE-----------------------------
data_proc <- function(seu){
seu <- NormalizeData(seu, normalization.method="LogNormalize", scale.factor=10000)
seu <- FindVariableFeatures(seu, select.method="vst", nfeatures=2000)
return(seu)}
## ----sec3_mergeData_funcUsed_quickClust,include=TRUE--------------------------
quick_clust <- function(seu){
set.seed(42)
seu <- RunPCA(seu, npcs=30, verbose=FALSE)
seu <- RunUMAP(seu, reduction = "pca", dims = 1:10, verbose=FALSE)
seu <- FindNeighbors(seu, reduction = "pca", dims = 1:10, verbose=FALSE)
seu <- FindClusters(seu, resolution = 0.5, verbose=FALSE)
return(seu)}
## ----sec3_dropProc_cb_Filt_dataPRoc,include=TRUE,eval=T, message=F, warning=F----
message("processing matrix from CellBender")
seu <- CreateSeuratObject(cbFilt_mtx)
seu <- data_proc(seu)
seu <- ScaleData(seu)
seu <- quick_clust(seu)
seu_cbFilt <- seu
## ----echo=F, eval=T, warning=F, message=FALSE, include=F----------------------
rm(seu)
gc()
## ----fig.height=4,fig.width=6-------------------------------------------------
DimPlot(seu_obj ,group.by = "seurat_clusters",pt.size = 0.1,label = TRUE)+NoLegend()
## ----fig.height=4,fig.width=6-------------------------------------------------
DimPlot(seu_cbFilt,group.by = "seurat_clusters",pt.size = 0.1,label = TRUE)+NoLegend()
## ----fig.height=4,fig.width=7-------------------------------------------------
mark_gene <- c("LYZ","HLA-DRA")
FeaturePlot(seu_obj,features = mark_gene,pt.size = 0)
## ----fig.height=3,fig.width=7-------------------------------------------------
VlnPlot(seu_obj,features = mark_gene,group.by = "seurat_clusters",pt.size = 0)
## ----fig.height=4,fig.width=7-------------------------------------------------
FeaturePlot(seu_cbFilt,features = mark_gene,pt.size = 0)
## ----fig.height=3,fig.width=7-------------------------------------------------
VlnPlot(seu_cbFilt,features = mark_gene,group.by = "seurat_clusters",pt.size = 0)
## ----echo=F, warning=F, message=FALSE-----------------------------------------
rm(cbFilt_mtx, filt_mtx, seu_obj)
gc()
## -----------------------------------------------------------------------------
sample_id <- "PBMC_8k"
seu_cbFilt[["dset"]] <- sample_id # Create a category for sample
seu_cbFilt <- Seurat::RenameCells(seu_cbFilt, add.cell.id=sample_id) # add sample name in front of cell barcode
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Mitochondrial Proportion
---
"
)
}else{
cat("# Mitochondrial Proportion
---
"
)
}
## -----------------------------------------------------------------------------
mt_gene <- c("MT-ND1","MT-ND2","MT-ND3","MT-ND4","MT-ND4L","MT-ND5","MT-ND6",
"MT-CO1","MT-CO2","MT-CO3","MT-ATP6","MT-ATP8","MT-CYB")
mt_gene_det <- mt_gene[mt_gene %in% rownames(seu_cbFilt)]
seu_cbFilt[["percent.mt"]] <- PercentageFeatureSet(seu_cbFilt, features = mt_gene_det)
summary(seu_cbFilt$percent.mt)
## ----load_estMT,include=TRUE,eval=TRUE----------------------------------------
seu_cbFilt[["percent.mt2"]] <- PercentageFeatureSet(seu_cbFilt, pattern = "^MT-")
summary(seu_cbFilt$percent.mt2)
## ----eval =F------------------------------------------------------------------
# library(ggplot2)
# dat <- data.frame(byPattern=seu_cbFilt$percent.mt, byGene=seu_cbFilt$percent.mt2,stringsAsFactors = FALSE)
# cor_val <- cor.test(dat$byPattern,dat$byGene,method = "spearman")
# ggplot(dat,aes(x=byPattern,y=byGene))+geom_point()+geom_smooth()+
# labs(x="% of MT, est by genes",y="% of MT,est by pattern",
# subtitle = paste0("rho=",round(cor_val$estimate,3),"; p-value=",cor_val$p.value[1]))+
# theme_classic()
## ----echo =F, fig.height=4,fig.width=7----------------------------------------
library(ggplot2)
dat <- data.frame(byPattern=seu_cbFilt$percent.mt, byGene=seu_cbFilt$percent.mt2,stringsAsFactors = FALSE)
cor_val <- cor.test(dat$byPattern,dat$byGene,method = "spearman")
ggplot(dat,aes(x=byPattern,y=byGene))+geom_point()+geom_smooth()+
labs(x="% of MT, est by genes",y="% of MT,est by pattern",
subtitle = paste0("rho=",round(cor_val$estimate,3),"; p-value=",cor_val$p.value[1]))+
theme_classic()
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Cell Cycle Phases
---
"
)
}else{
cat("# Cell Cycle Phases
---
"
)
}
## ----ccPhase_Seurat,include=TRUE,eval=TRUE------------------------------------
feat_s <- cc.genes$s.genes
feat_g2m <- cc.genes$g2m.genes
feat_s
feat_g2m
## ----ccPhase_plot_Seurat,include=TRUE,eval=TRUE-------------------------------
DefaultAssay(seu_cbFilt) <- "RNA"
seu_cbFilt <- CellCycleScoring(seu_cbFilt, s.features = feat_s, g2m.features = feat_g2m)
## -----------------------------------------------------------------------------
dat_s <- data.frame(cell_id=Cells(seu_cbFilt), cat="S_Score", Phase=seu_cbFilt$Phase, score=seu_cbFilt$S.Score)
dat_g2m <- data.frame(cell_id=Cells(seu_cbFilt), cat="G2M_Score", Phase=seu_cbFilt$Phase, score=seu_cbFilt$G2M.Score)
dat <- rbind(dat_s, dat_g2m)
dat$Phase <- factor(dat$Phase, levels = c("G1","S","G2M"))
## ----fig.height=4,fig.width=7-------------------------------------------------
ggplot(dat,aes(x=Phase, y=score, fill=Phase))+geom_violin()+
labs(x="",y="Score",fill="Phase")+
facet_wrap(~cat)+theme_classic()
## ----ccPhase_cyclon_prep,include=TRUE,eval=TRUE, warning=F, message=F---------
library(scran)
sce <- as.SingleCellExperiment(seu_cbFilt, assay = "RNA")
rowData(sce)$SYMBOL <- rownames(sce)
## -----------------------------------------------------------------------------
sce
## -----------------------------------------------------------------------------
load("data/ccGene_mouse_human_geneSymbol_ensemblID_20220817.RData")
ccGene_hs <- ccGene_mm_hs$human_symbol
lapply(ccGene_hs, function(x){head(x,2)})
## ----ccPhase_cyclon_proc,include=TRUE,eval=FALSE------------------------------
# assignments <- cyclone(sce, ccGene_hs, gene.names=rowData(sce)$SYMBOL)
#
## ----echo=F, eval=F-----------------------------------------------------------
# save(assignments, file="data/pbmc8k_cyclone.RData")
## ----echo=F-------------------------------------------------------------------
load("data/pbmc8k_cyclone.RData")
## -----------------------------------------------------------------------------
lapply(assignments, head)
## -----------------------------------------------------------------------------
seu_cbFilt[["cyclon_Phase"]] <- assignments$phases
seu_cbFilt[["cyclon_G1Score"]] <- assignments$scores$G1
seu_cbFilt[["cyclon_SScore"]] <- assignments$scores$S
seu_cbFilt[["cyclon_G2MScore"]] <- assignments$scores$G2M
## ----ccPhase_cyclon_boxPlot,include=TRUE,eval=T-------------------------------
dat_g1 <- data.frame(cell_id=Cells(seu_cbFilt), cat="cyclon_G1Score", Phase=seu_cbFilt$cyclon_Phase, score=seu_cbFilt$cyclon_G1Score)
dat_s <- data.frame(cell_id=Cells(seu_cbFilt), cat="cyclon_SScore", Phase=seu_cbFilt$cyclon_Phase, score=seu_cbFilt$cyclon_SScore)
dat_g2m <- data.frame(cell_id=Cells(seu_cbFilt), cat="cyclon_G2MScore", Phase=seu_cbFilt$cyclon_Phase, score=seu_cbFilt$cyclon_G2MScore)
dat <- rbind(dat_g1,dat_s,dat_g2m)
dat$Phase <- factor(dat$Phase,levels = c("G1","S","G2M"))
dat$cat <- factor(dat$cat,levels = c("cyclon_G1Score","cyclon_SScore","cyclon_G2MScore"))
## ----fig.height=4,fig.width=7-------------------------------------------------
ggplot(dat,aes(x=Phase,y=score,fill=Phase))+geom_violin()+
labs(x="",y="Score",fill="Phase")+
facet_wrap(~cat)+theme_classic()
## -----------------------------------------------------------------------------
table(seu_cbFilt$Phase)
## -----------------------------------------------------------------------------
table(seu_cbFilt$cyclon_Phase)
## -----------------------------------------------------------------------------
table(seu_cbFilt$Phase,seu_cbFilt$cyclon_Phase)
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Detecting Doublets
---
"
)
}else{
cat("# Detecting Doublets
---
"
)
}
## ----eval=F, echo=FALSE-------------------------------------------------------
# library(Herper)
#
# conda_install <- install_CondaTools("scrublet", "scRNA", pathToMiniConda = "../mini")
#
# Sys.setenv('RETICULATE_PYTHON'=file.path(conda_install$pathToEnvBin, "python"))
## ----eval=F-------------------------------------------------------------------
# library(reticulate)
# reticulate::py_install("scrublet")
#
# scr <- reticulate::import("scrublet")
#
## ----det_doublet_est,include=TRUE,eval=F--------------------------------------
#
# mat <- GetAssayData(seu_cbFilt, assay = "RNA", slot = "counts")
# mat <- as.matrix(mat)
#
## ----eval=F-------------------------------------------------------------------
# # Loading the scrublet library
# scr <- import("scrublet")
# # Run scrublet
# scrub <- scr$Scrublet(t(mat))
# # Extract scrublet results
# doublet <- scrub$scrub_doublets()
# names(doublet) <- c("doublet_score","doublet")
## ----eval=F, echo=F-----------------------------------------------------------
# save(doublet,file = "data/pbmc8k_doublet.RData")
#
## ----eval=T, echo=F-----------------------------------------------------------
load("data/pbmc8k_doublet.RData")
## -----------------------------------------------------------------------------
summary(doublet$doublet_score)
## -----------------------------------------------------------------------------
table(doublet$doublet)
## ----det_doublet_pres,include=TRUE,eval=TRUE----------------------------------
seu_cbFilt[["doublet_score"]] <- doublet$doublet_score
seu_cbFilt[["doublet"]] <- doublet$doublet
## ----fig.height=4,fig.width=7-------------------------------------------------
VlnPlot(seu_cbFilt, group.by = "doublet",
features = c("doublet_score", "nCount_RNA","nFeature_RNA"),
pt.size = 0)
## ----fig.height=4,fig.width=7-------------------------------------------------
FeatureScatter(seu_cbFilt,feature1 = "nCount_RNA",feature2 = "nFeature_RNA",pt.size = 0.1,group.by = "doublet")
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Quality Assessment
---
"
)
}else{
cat("# Quality Assessment
---
"
)
}
## ----qcPlot_vlnPlot_pres1,include=TRUE,eval=F---------------------------------
# VlnPlot(seu_cbFilt, group.by = "dset",
# features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
# pt.size = 0)
## ----include=TRUE,echo=F, fig.height=4,fig.width=7----------------------------
VlnPlot(seu_cbFilt, group.by = "dset",
features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
pt.size = 0)
## ----include=TRUE,eval=T, echo=F, fig.height=3,fig.width=6--------------------
FeatureScatter(seu_cbFilt,feature1 = "nCount_RNA",feature2 = "nFeature_RNA",
group.by = "doublet")
## ----include=TRUE,eval=F------------------------------------------------------
# FeatureScatter(seu_cbFilt, feature1 = "nCount_RNA", feature2 = "percent.mt")
## ----qcPlot_scatter_pres2,include=TRUE,eval=TRUE, echo=F, fig.height=4,fig.width=7----
FeatureScatter(seu_cbFilt, feature1 = "nCount_RNA", feature2 = "percent.mt")
## ----include=TRUE,eval=F------------------------------------------------------
# RidgePlot(seu_cbFilt,group.by = "doublet",features = c("doublet_score"))
## ----qcPlot_ridgePlot_pres,include=TRUE,eval=TRUE, echo=F, fig.height=3,fig.width=6----
RidgePlot(seu_cbFilt,group.by = "doublet",features = c("doublet_score"))
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Filtering debris and doublets
---
"
)
}else{
cat("# Filtering debris and doublets
---
"
)
}
## ----filtCell_pres,include=TRUE,eval=TRUE-------------------------------------
table(seu_cbFilt$doublet=="TRUE" | seu_cbFilt$percent.mt >= 10)
## ----eval=T-------------------------------------------------------------------
seu_filt <- subset(seu_cbFilt, subset=doublet=="FALSE" &
percent.mt < 10)
## ----eval = T, echo = F-------------------------------------------------------
VlnPlot(seu_cbFilt, group.by = "dset",
features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
pt.size = 0)
## ----eval = T, echo = F-------------------------------------------------------
VlnPlot(seu_filt, group.by = "dset",
features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),
pt.size = 0)
## ----echo=F, eval=TRUE--------------------------------------------------------
#save(seu_filt,file="data/pbmc8k_seu_filt.RData")
rm(doublet)
rm(seu_cbFilt)
#load("data/pbmc8k_seu_filt.RData")
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Regress out confounders
---
"
)
}else{
cat("# Regress out confounders
---
"
)
}
## ----filtCell_scaleData,include=TRUE,eval=F-----------------------------------
#
# #First let's replace NA values in the cyclon phase columns because ScaleData doesn't like this
# cyclone_cols_numeric <- c("cyclon_G1Score", "cyclon_G2MScore", "cyclon_SScore")
# # For each numeric column, replace NA with 0
# for (col_name in cyclone_cols_numeric) {
# seu_filt@meta.data[[col_name]][is.na(seu_filt@meta.data[[col_name]])] <- 0
# }
#
# pot_conf <- c("percent.mt","doublet_score","cyclon_G1Score","cyclon_SScore","cyclon_G2MScore")
# seu_filt <- ScaleData(seu_filt, vars.to.regress = pot_conf)
#
## ----fig.height=3,fig.width=7-------------------------------------------------
seu_filt <- quick_clust(seu_filt)
DimPlot(seu_filt)
## ----eval=F, echo=F-----------------------------------------------------------
#
# save(seu_filt, file="data/pbmc8k_SCT2.RData")
# saveRDS(seu_filt, file="data/pbmc8k_SCT2.rds")
# save(sce, file="data/pbmc8k_sce.RData")
#
## ----sceload, eval=T, echo=F--------------------------------------------------
load("data/pbmc8k_sce.RData")
## ----sctload, eval=T, echo=F--------------------------------------------------
load("data/pbmc8k_SCT2.RData")
#seu_filt <- readRDS("data/pbmc8k_SCT2.rds")
## ----markGene_cal,include=TRUE,eval=F-----------------------------------------
# markers <- FindAllMarkers(seu_filt, only.pos = TRUE,
# min.pct = 0.25, logfc.threshold = 0.25)
## ----eval=F, echo=F-----------------------------------------------------------
# save(markers, file="data/pbmc8k_markers.RData")
## ----echo=F, eval=T-----------------------------------------------------------
load("data/pbmc8k_markers.RData")
## -----------------------------------------------------------------------------
head(markers)
## ----top2Mark,include=TRUE,eval=T---------------------------------------------
top_genes <- markers %>% group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
head(top_genes)
## ----fig.height=4,fig.width=7-------------------------------------------------
DoHeatmap(seu_filt, features = top_genes$gene) + NoLegend()
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Evaluate known marker genes expression
---
"
)
}else{
cat("# Evaluate known marker genes expression
---
"
)
}
## ----resEval_knownMarker_allMark,include=TRUE,eval=T,fig.height=4,fig.width=7----
known_marker <- c("IL7R","CCR7")
FeaturePlot(seu_filt, features = known_marker)
## ----include=TRUE,eval=T,fig.height=4,fig.width=7-----------------------------
known_marker <- c("GNLY","NKG7")
FeaturePlot(seu_filt, features = known_marker)
## ----resEval_knowMarker_heatmap,include=TRUE,eval=T---------------------------
known_marker <- c("IL7R","CCR7","S100A4","CD8A",
"MS4A1",
"CD14","LYZ","FCGR3A","MS4A7",
"FCER1A","CST3",
"GNLY","NKG7","PPBP")
mat <- GetAssayData(seu_filt,assay = "RNA",slot = "data")
mat <- mat[known_marker,]
mat <- as.matrix(mat)
## -----------------------------------------------------------------------------
clust <- unique(seu_filt$seurat_clusters)
clust <- as.character(clust)
avgExp_byClust <- lapply(clust,function(clust, seu, known){
sub <- subset(seu, subset=seurat_clusters==clust)
mat <- GetAssayData(sub,assay="RNA",slot = "data")
mat <- mat[known,]
mat <- as.matrix(mat)
avg <- rowMeans(mat)
return(avg)}, seu_filt, known_marker)
names(avgExp_byClust) <- paste0("C",clust)
## ----fig.height=4,fig.width=7-------------------------------------------------
library(pheatmap)
avgExp_mat <- do.call(cbind, avgExp_byClust)
pheatmap(avgExp_mat,scale = "row")
## ----echo=F, eval=T, warning=F, message=FALSE, include=F----------------------
rm(avgExp_mat, avgExp_byClust, clust, mat, marker)
gc()
## ----sec2_resEval_cellTypeAsign,include=TRUE,eval=T---------------------------
seu_filt[["cellType_byClust"]] <- NA
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(0)] <- "Naive CD4+ T cells"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(4)] <- "Memory CD4+ T cells"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(3)] <- "CD8+ T cells"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(7)] <- "NK cells"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(9,11)] <- "DC"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(8)] <- "FCGR3A+ Monocytes"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(2)] <- "B cells"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(1,6)] <- "CD14+ Monocytes"
seu_filt$cellType_byClust[seu_filt$seurat_clusters %in% c(12)] <- "Platelets"
seu_filt$cellType_byClust[is.na(seu_filt$cellType_byClust)] <- "Unspecified"
## -----------------------------------------------------------------------------
table(seu_filt$cellType_byClust)
## ----fig.height=4,fig.width=7-------------------------------------------------
DimPlot(seu_filt, group.by = c("seurat_clusters","cellType_byClust"),label = TRUE,pt.size = 0.2)+NoLegend()
## ----results='asis',include=TRUE,echo=FALSE-----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Loupe Browser
---
"
)
}else{
cat("# Loupe Browser
---
"
)
}
## ----eval=F-------------------------------------------------------------------
# remotes::install_github("10xGenomics/loupeR")
# loupeR::setup()
## ----eval=F-------------------------------------------------------------------
# library(loupeR)
# create_loupe_from_seurat(seu_filt,
# output_dir = "loupe",
# output_name = "seu_filt")
#