params <-
list(isSlides = "no")
## ----setup, include=FALSE-----------------------------------------------------
suppressPackageStartupMessages(require(knitr))
suppressPackageStartupMessages(require(Seurat))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(bioMart))
suppressPackageStartupMessages(require(SeuratWrappers))
knitr::opts_chunk$set(echo = TRUE, tidy = T)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Create Seurat object and Generare QC plots
---
"
)
}else{
cat("# Create Seurat object and Generare QC plots
---
"
)
}
## ----loadCR2Seurat,echo=TRUE,eval=FALSE,include=TRUE--------------------------
## tar_dir <- "~/path/to/raw/data"
## ----loadCR2Seurat2,echo=TRUE,eval=FALSE,include=TRUE-------------------------
## library(Seurat)
## samID <- "CTRL"
## X10_file <- Seurat::Read10X(tar_dir)
## obj <- CreateSeuratObject(counts = X10_file, project = samID, min.cells = 5,min.features = 200)
## obj$dset <- samID
## obj <- Seurat::RenameCells(obj,add.cell.id=samID)
## #
## obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^mt-")
## ----loadCR2Seurat3,echo=TRUE,eval=TRUE,include=FALSE-------------------------
obj <- readRDS("data/scSeq_CTRL_1kCell_ori.rds")
obj
## ----objInfo_cell,echo=TRUE,eval=TRUE,include=TRUE----------------------------
head(obj@meta.data)
## ----objInfo_count,echo=TRUE,eval=TRUE,include=TRUE---------------------------
head(obj@assays$RNA@counts)
## ----objInfo_count2,echo=TRUE,eval=TRUE,include=TRUE--------------------------
head(obj@assays$RNA@data)
## ----eval_readCount_geneCount_mitoCont,echo=TRUE,eval=TRUE,include=TRUE,fig.height=3,fig.width=9----
VlnPlot(obj,features = c("nCount_RNA","nFeature_RNA","percent.mt"),pt.size = 0.2)
## ----eval_readCount_geneCount,echo=TRUE,eval=TRUE,include=TRUE,fig.width=5,fig.height=5,fig.align='center'----
FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
## ----eval_readCount_mitoCont,echo=TRUE,eval=TRUE,include=TRUE,fig.width=5,fig.height=5,fig.align='center'----
FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
## ----rmMito,echo=TRUE,eval=TRUE,include=TRUE----------------------------------
summary(obj@meta.data$percent.mt)
mt_cutH <- 10
obj_unfiltered <- obj
obj <- subset(obj,subset = percent.mt < mt_cutH)
## ----rmMito_vln1,echo=TRUE,eval=TRUE,include=TRUE-----------------------------
VlnPlot(obj_unfiltered,features = c("nCount_RNA","nFeature_RNA","percent.mt"),pt.size = 0.2)
## ----rmMito_vln2,echo=TRUE,eval=TRUE,include=TRUE-----------------------------
VlnPlot(obj,features = c("nCount_RNA","nFeature_RNA","percent.mt"),pt.size = 0.2)
## ----rmMito_scatter1,echo=TRUE,eval=TRUE,include=TRUE-------------------------
FeatureScatter(obj_unfiltered, feature1 = "nCount_RNA", feature2 = "percent.mt")
## ----rmMito_scatter2,echo=TRUE,eval=TRUE,include=TRUE-------------------------
FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Cell cycle phase determination
---
"
)
}else{
cat("# Cell cycle phase determination
---
"
)
}
## ----cellCycle_est1,echo=TRUE,eval=TRUE,include=TRUE--------------------------
convertHumanGeneList <- function(x){
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl",host = "http://useast.ensembl.org")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl",host="http://useast.ensembl.org")
genesV2 = getLDS(attributes = c("hgnc_symbol"),
filters = "hgnc_symbol",
values = x ,
mart = human,
attributesL = c("mgi_symbol"),
martL = mouse, uniqueRows=T)
humanx <- unique(genesV2[, 2])
return(humanx)}
#
s_gene <- convertHumanGeneList(cc.genes$s.genes)
g2m_gene <- convertHumanGeneList(cc.genes$g2m.genes)
## ----cellCycle_est2,echo=TRUE,eval=TRUE,include=TRUE--------------------------
obj <- CellCycleScoring(obj, s.features = s_gene,
g2m.features = g2m_gene, set.ident = TRUE)
obj@meta.data[1:2,]
## ----cellCycle_plot1,echo=TRUE,eval=TRUE,include=TRUE-------------------------
yd_dat <- as.data.frame(table(obj@meta.data$dset,obj@meta.data$Phase))
head(yd_dat)
## ----cellCycle_plot2,echo=TRUE,eval=TRUE,include=TRUE,fig.width=5,fig.height=5,fig.align='center'----
library(ggplot2)
ggplot(yd_dat,aes(x=Var1,y=Freq,fill=Var2))+geom_bar(stat="identity",position="stack")+labs(x="",y="Counts",fill="Phase")+theme_classic()
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Regression and clustering
---
"
)
}else{
cat("# Regression and clustering
---
"
)
}
## ----regress,echo=TRUE,eval=TRUE,include=TRUE---------------------------------
obj <- ScaleData(obj,
vars.to.regress = c("percent.mt","S.score","G2M.score","Phase"))
## ----prcomp,echo=TRUE,eval=TRUE,include=TRUE----------------------------------
set.seed(1000)
obj <- RunPCA(obj, npcs = 30, verbose = FALSE)
## ----pcSel,echo=TRUE,eval=TRUE,include=TRUE,fig.width=5,fig.height=5,fig.align='center'----
ElbowPlot(obj,ndims=30)
#
numPC <- 15
## ----clust,echo=TRUE,eval=TRUE,include=TRUE-----------------------------------
set.seed(1000)
maxPC <- numPC
obj <- FindNeighbors(obj, reduction = "pca", dims = 1:maxPC)
obj <- FindClusters(obj, resolution = 0.5)
## ----clust2,echo=TRUE,eval=TRUE,include=TRUE----------------------------------
obj <- RunUMAP(obj, reduction = "pca", dims = 1:maxPC)
obj <- RunTSNE(obj, reduction = "pca", dims = 1:maxPC)
## ----seurat_UMAP,echo=TRUE,eval=TRUE,include=TRUE-----------------------------
DimPlot(obj,reduction="umap") # default it "umap"
## ----seurat_tSNE,echo=TRUE,eval=TRUE,include=TRUE-----------------------------
DimPlot(obj,reduction = "tsne")
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Identify marker genes for each cluster
---
"
)
}else{
cat("# Identify marker genes for each cluster
---
"
)
}
## ----markGene,echo=TRUE,eval=TRUE,include=TRUE--------------------------------
obj <- SetIdent(obj,value = "seurat_clusters")
clust.markers <- FindAllMarkers(obj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
head(clust.markers)
## ----markGene_sub,echo=TRUE,eval=TRUE,include=TRUE----------------------------
topG <- clust.markers %>%
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC)
head(topG)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Advanced plots
---
"
)
}else{
cat("# Advanced plots
---
"
)
}
## ----dimPlot_ptSize,echo=TRUE,eval=TRUE,include=TRUE--------------------------
DimPlot(obj,pt.size = 0.2)
## ----dimPlot_label,echo=TRUE,eval=TRUE,include=TRUE---------------------------
DimPlot(obj,pt.size = 0.2,label=TRUE)+NoLegend()
## ----markGene_heatmap,echo=TRUE,eval=F,include=TRUE---------------------------
## DoHeatmap(obj, features = topG$gene) + NoLegend()
## ----markGene_heatmap2,echo=F,eval=TRUE,include=TRUE--------------------------
suppressWarnings(DoHeatmap(obj, features = topG$gene) + NoLegend())
## ----makGene_featurePlot,echo=TRUE,eval=TRUE,include=TRUE---------------------
gene_marker <- c("Krt1","Pthlh","Krt14","Cenpa","Shh")
FeaturePlot(obj,features = gene_marker,pt.size = 0.2)
## ----makGene_RidgePlott,echo=TRUE,eval=F,include=TRUE-------------------------
## RidgePlot(obj,features = gene_marker)
## ----makGene_RidgePlott2,echo=F,eval=TRUE,include=TRUE------------------------
suppressMessages(print(RidgePlot(obj,features = gene_marker)))
## ----cellcycle_cluster,echo=TRUE,eval=TRUE,include=TRUE-----------------------
tbl <- table(obj@meta.data$seurat_clusters,obj@meta.data$Phase)
tbl_dat <- as.data.frame(tbl)
to <- rowSums(tbl)
names(to) <- rownames(tbl)
tbl_dat$to <- to[match(names(to),tbl_dat$Var1)]
tbl_dat$prop <- tbl_dat$Freq / tbl_dat$to
tbl_dat[1:2,]
## ----cellcycle_cluster_plot2,echo=FALSE,eval=TRUE,include=TRUE----------------
ggplot(tbl_dat,aes(x=Var1,y=prop,fill=Var2))+
geom_bar(stat="identity",position="stack")+
labs(x="Seurat_clusters",y="Proportion",fill="Phase")+
theme_classic()
## ----saveRDS_surat,echo=TRUE,eval=TRUE,include=TRUE---------------------------
cellID <- rownames(obj@meta.data)[!obj@meta.data$seurat_clusters==1]
obj_sub <- obj[,cellID]
saveRDS(obj_sub,"scSeq_Seurat_clean.rds")