Â
These exercises are about manipulate single-cell data with Seurat. Please download the counting matrix from DropBox and loading them into a Seurat object. Or you may also used the rds file data/scSeq_CKO_1kCell_ori.rds.
library(Seurat)
<- readRDS("data/scSeq_CKO_1kCell_ori.rds")
obj "percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^mt-")
obj[[head(obj@meta.data)
## orig.ident nCount_RNA nFeature_RNA dset percent.mt
## CKO_AGACGTTCAGCTGGCT-1 CKO 2679 1200 CKO 0.1119821
## CKO_TTAACTCGTAGTACCT-1 CKO 813 476 CKO 14.3911439
## CKO_GAGGTGAGTCTAGTGT-1 CKO 4852 1759 CKO 10.9233306
## CKO_CTCGTACAGCTAAGAT-1 CKO 1750 672 CKO 46.0000000
## CKO_AGGGAGTTCAAACCAC-1 CKO 4632 1585 CKO 5.3108808
## CKO_ATTATCCTCAACGGCC-1 CKO 2498 1177 CKO 3.8030424
VlnPlot(obj,features = c("nCount_RNA","nFeature_RNA","percent.mt"),pt.size = 0.2)
FeatureScatter(obj,feature1="nCount_RNA",feature2="nFeature_RNA")
FeatureScatter(obj,feature1="nCount_RNA",feature2="percent.mt")
<- 10
mt_cutH <- obj
obj_unfiltered <- subset(obj,subset = percent.mt < mt_cutH)
obj obj
## An object of class Seurat
## 14353 features across 619 samples within 1 assay
## Active assay: RNA (14353 features, 0 variable features)
<- function(x){
convertHumanGeneList require("biomaRt")
= useMart("ensembl", dataset = "hsapiens_gene_ensembl",host = "http://useast.ensembl.org")
human = useMart("ensembl", dataset = "mmusculus_gene_ensembl",host = "http://useast.ensembl.org")
mouse = getLDS(attributes = c("hgnc_symbol"),
genesV2 filters = "hgnc_symbol",
values = x ,
mart = human,
attributesL = c("mgi_symbol"),
martL = mouse, uniqueRows=T)
<- unique(genesV2[, 2])
humanx return(humanx)}
#
<- convertHumanGeneList(cc.genes$s.genes)
s_gene <- convertHumanGeneList(cc.genes$g2m.genes)
g2m_gene #
<- CellCycleScoring(obj, s.features = s_gene,
obj g2m.features = g2m_gene, set.ident = TRUE)
table(obj@meta.data$Phase)
##
## G1 G2M S
## 260 108 251
<- FindVariableFeatures(obj)
obj <- ScaleData(obj,vars.to.regress = c("percent.mt","S.score","G2M.score","Phase")) obj
## Warning: Requested variables to regress not in object: S.score, G2M.score
## Regressing out percent.mt, Phase
## Centering and scaling data matrix
set.seed(0)
<- RunPCA(obj, npcs = 30, verbose = FALSE)
obj ElbowPlot(obj,ndims = 30)
#
<- 10
numPC <- numPC
maxPC <- FindNeighbors(obj, reduction = "pca", dims = 1:maxPC) obj
## Computing nearest neighbor graph
## Computing SNN
<- FindClusters(obj, resolution = 0.5) obj
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 619
## Number of edges: 17846
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8145
## Number of communities: 6
## Elapsed time: 0 seconds
#
<- RunUMAP(obj, reduction = "pca", dims = 1:maxPC) obj
## 16:31:11 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:31:11 Read 619 rows and found 10 numeric columns
## 16:31:11 Using Annoy for neighbor search, n_neighbors = 30
## 16:31:11 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:31:11 Writing NN index file to temp file /tmp/RtmpMsibtn/file1769ffaf6b2
## 16:31:11 Searching Annoy index using 1 thread, search_k = 3000
## 16:31:12 Annoy recall = 100%
## 16:31:12 Commencing smooth kNN distance calibration using 1 thread
## 16:31:14 Initializing from normalized Laplacian + noise
## 16:31:14 Commencing optimization for 500 epochs, with 23934 positive edges
## 16:31:16 Optimization finished
<- RunTSNE(obj, reduction = "pca", dims = 1:maxPC)
obj #
DimPlot(obj,reduction = "umap")
<- SetIdent(obj,value = "seurat_clusters")
obj <- FindAllMarkers(obj,
clust.markers only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Warning in FindMarkers.default(object = data.use, slot = data.slot, counts =
## counts, : No features pass logfc.threshold threshold; returning empty data.frame
## Calculating cluster 4
## Calculating cluster 5
head(clust.markers)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## Dcn 5.248586e-28 3.6236022 0.832 0.434 7.533296e-24 0 Dcn
## Tnfrsf19 9.893203e-16 0.6278188 0.621 0.271 1.419971e-11 0 Tnfrsf19
## Kremen2 1.517087e-15 1.6636156 0.703 0.377 2.177475e-11 0 Kremen2
## Ifitm3 1.519477e-13 3.6505185 0.871 0.638 2.180906e-09 0 Ifitm3
## Lef1 9.500305e-13 1.7603907 0.513 0.222 1.363579e-08 0 Lef1
## Postn 2.547487e-12 1.6586465 0.599 0.320 3.656408e-08 0 Postn
#
<- clust.markers %>%
topG group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC)
#
DoHeatmap(obj, features = topG$gene) + NoLegend()
## Warning in DoHeatmap(obj, features = topG$gene): The following features were
## omitted as they were not found in the scale.data slot for the RNA assay: Gpx1,
## Rpl41, Rps13, Rps19, Rps27a, Rps28, Zfos1
<- clust.markers %>%
topG group_by(cluster) %>%
top_n(n = 1, wt = avg_log2FC)
<- topG$gene
top_gene #
FeaturePlot(obj,features = top_gene,pt.size = 0.2)
RidgePlot(obj,features = top_gene)
## Picking joint bandwidth of 0.766
## Picking joint bandwidth of 0.927
## Picking joint bandwidth of 2.08
## Picking joint bandwidth of 0.12
## Picking joint bandwidth of 0.167