params <- list(isSlides = "no") ## ----setup, include=FALSE----------------------------------------------------- suppressPackageStartupMessages(require(knitr)) suppressPackageStartupMessages(require(DropletUtils)) suppressPackageStartupMessages(require(SingleCellExperiment)) suppressPackageStartupMessages(require(scran)) suppressPackageStartupMessages(require(scater)) suppressPackageStartupMessages(require(scuttle)) suppressPackageStartupMessages(require(scDblFinder)) knitr::opts_chunk$set(echo = TRUE, tidy = T) ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Loading data and empty droplets

--- " ) }else{ cat("# Loading data and empty drops --- " ) } ## ----loadSCE,eval=FALSE,include=TRUE,echo=TRUE-------------------------------- ## library(DropletUtils) ## library(DropletTestFiles) ## fname <- "~/filepath/toCellRanger/results" ## sce <- read10xCounts(fname, col.names=TRUE) ## cellID <- colData(sce)$Barcode ## ## ----loadSCE_io,include=TRUE,echo=FALSE,eval=TRUE----------------------------- sce <- readRDS("data/scSeq_CTRL_sceSub.rds") ## ----loadSCE_pres,include=TRUE,echo=TRUE,eval=TRUE---------------------------- sce ## ----loadSCE_pres2,include=TRUE,echo=TRUE,eval=TRUE--------------------------- # cell information colData(sce)[1:2,] # gene information rowData(sce)[1:2,] ## ----rankUMI,eval=TRUE-------------------------------------------------------- bcrank <- barcodeRanks(counts(sce)) bcrank[1:2,] ## ----rankUMI_knee,eval=FALSE,echo=TRUE,include=TRUE--------------------------- ## uniq <- !duplicated(bcrank$rank) ## # ## plot(bcrank$rank[uniq], bcrank$total[uniq], log="xy", ## xlab="Rank", ylab="Total UMI count", cex.lab=1.2) ## ## abline(h=metadata(bcrank)$inflection, col="darkgreen", lty=2) ## abline(h=metadata(bcrank)$knee, col="dodgerblue", lty=2) ## ## legend("bottomleft", legend=c("Inflection", "Knee"), ## col=c("darkgreen", "dodgerblue"), lty=2, cex=1.2) ## ----rankUMI_knee2,eval=TRUE,echo=FALSE,include=TRUE-------------------------- uniq <- !duplicated(bcrank$rank) # plot(bcrank$rank[uniq], bcrank$total[uniq], log="xy", xlab="Rank", ylab="Total UMI count", cex.lab=1.2) abline(h=metadata(bcrank)$inflection, col="darkgreen", lty=2) abline(h=metadata(bcrank)$knee, col="dodgerblue", lty=2) legend("bottomleft", legend=c("Inflection", "Knee"), col=c("darkgreen", "dodgerblue"), lty=2, cex=1.2) ## ----estDroplet,eval=TRUE,echo=TRUE,include=TRUE------------------------------ set.seed(100) limit <- 100 e.out <- emptyDrops(counts(sce),lower=limit, test.ambient=TRUE) # e.out ## ----estDroplet2,eval=TRUE,echo=TRUE,include=TRUE----------------------------- # Testeed by FDR summary(e.out$FDR <= 0.001) # Concordance by testing with FDR and limited table(Sig=e.out$FDR <= 0.001, Limited=e.out$Limited) ## ----estDroplet3,eval=TRUE,echo=TRUE,include=TRUE----------------------------- hist(e.out$PValue[e.out$Total <= limit & e.out$Total > 0], xlab="P-value", main="", col="grey80") ## ----estDroplet4,eval=TRUE,echo=TRUE,include=TRUE----------------------------- sce2 <- sce[,which(e.out$FDR <= 0.001)] ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Normalization and clustering

--- " ) }else{ cat("# Normalization and clustering --- " ) } ## ----countNorm,eval=TRUE,echo=TRUE,include=TRUE------------------------------- library(scran) library(scuttle) library(scater) clusters <- quickCluster(sce2) sce2 <- computeSumFactors(sce2, cluster=clusters) sce2 <- logNormCounts(sce2) sce2 ## ----featureIdent,eval=TRUE,echo=TRUE,include=TRUE---------------------------- set.seed(1000) # modeling variables dec.pbmc <- modelGeneVarByPoisson(sce2) # calcualte top features top.pbmc <- getTopHVGs(dec.pbmc, prop=0.1) ## ----plotUMAP,eval=TRUE,echo=TRUE,include=TRUE-------------------------------- set.seed(1000) # Evaluate PCs sce2 <- denoisePCA(sce2, subset.row=top.pbmc, technical=dec.pbmc) # make TSNE plot sce2 <- runTSNE(sce2, dimred="PCA") # make UMAP plot sce2 <- runUMAP(sce2, dimred="PCA") ## ----clust,eval=TRUE,echo=TRUE,include=TRUE----------------------------------- g <- buildSNNGraph(sce2, k=10, use.dimred = 'PCA') clust <- igraph::cluster_walktrap(g)$membership colLabels(sce2) <- factor(clust) # colData(sce2) ## ----plotUMAP_2,eval=TRUE,echo=TRUE,include=TRUE------------------------------ plotUMAP(sce2,colour_by="label") ## ----plotTSNE,eval=TRUE,echo=TRUE,include=TRUE-------------------------------- plotTSNE(sce2,colour_by ="label") ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Removing Ambient RNA

--- " ) }else{ cat("# Removing Ambient RNA --- " ) } ## ----estAmb1,eval=TRUE,echo=TRUE,include=TRUE--------------------------------- # extrat potential ambient RNA and thee estimated score amb <- metadata(e.out)$ambient[,1] head(amb) ## ----estAmb2,eval=TRUE,echo=TRUE,include=TRUE--------------------------------- library(scater) stripped <- sce2[names(amb),] out <- removeAmbience(counts(stripped), ambient=amb,groups = colLabels(stripped)) ## ----recalAmb,eval=TRUE,echo=TRUE,include=TRUE-------------------------------- counts(stripped, withDimnames=FALSE) <- out stripped <- logNormCounts(stripped) ## ----compRMAmb,eval=FALSE,echo=TRUE,include=TRUE------------------------------ ## ensmbl_id <- rowData(sce2)$ID[rowData(sce2)$Symbol=="Hba-a1"] ## plotExpression(sce2, x="label", colour_by="label", features=ensmbl_id) + ## ggtitle("Before") ## ## plotExpression(stripped, x="label", colour_by="label", features=ensmbl_id) + ## ggtitle("After") ## ----compRMAmb_pres1,eval=TRUE,echo=FALSE,include=TRUE------------------------ ensmbl_id <- rowData(sce2)$ID[rowData(sce2)$Symbol=="Hba-a1"] plotExpression(sce2, x="label", colour_by="label", features=ensmbl_id) + ggtitle("Before") ## ----compRMAmb_pres2,eval=TRUE,echo=FALSE,include=TRUE------------------------ plotExpression(stripped, x="label", colour_by="label", features=ensmbl_id) + ggtitle("After") ## ----compRMAmb2,eval=FALSE,echo=TRUE,include=TRUE----------------------------- ## ensmbl_id <- rowData(sce2)$ID[rowData(sce2)$Symbol=="Krt17"] ## plotExpression(sce2, x="label", colour_by="label", features=ensmbl_id) + ## ggtitle("Before") ## plotExpression(stripped, x="label", colour_by="label", features=ensmbl_id) + ## ggtitle("After") ## ----compRMAmb2_pres,eval=TRUE,echo=FALSE,include=TRUE------------------------ ensmbl_id <- rowData(sce2)$ID[rowData(sce2)$Symbol=="Krt17"] plotExpression(sce2, x="label", colour_by="label", features=ensmbl_id) + ggtitle("Before") ## ----compRMAmb2_pres2,eval=TRUE,echo=FALSE,include=TRUE----------------------- plotExpression(stripped, x="label", colour_by="label", features=ensmbl_id) + ggtitle("After") ## ----normClust,eval=FALSE,echo=TRUE,include=TRUE------------------------------ ## dec <- modelGeneVar(stripped) ## hvgs <- getTopHVGs(dec,n=1000) ## stripped <- runPCA(stripped, ncomponents=10, subset_row=hvgs) ## stripped <- runUMAP(stripped, dimred="PCA") ## g <- buildSNNGraph(stripped, k=10, use.dimred = 'PCA') ## clust <- igraph::cluster_walktrap(g)$membership ## colLabels(stripped) <- factor(clust) ## plotUMAP(stripped,colour_by="label") ## ----normClust2,eval=TRUE,echo=FALSE,include=TRUE,fig.height=4,fig.width=4---- dec <- modelGeneVar(stripped) hvgs <- getTopHVGs(dec,n=1000) stripped <- runPCA(stripped, ncomponents=10, subset_row=hvgs) stripped <- runUMAP(stripped, dimred="PCA") g <- buildSNNGraph(stripped, k=10, use.dimred = 'PCA') clust <- igraph::cluster_walktrap(g)$membership colLabels(stripped) <- factor(clust) plotUMAP(stripped,colour_by="label") ## ----rmAmb_store,eval=TRUE,echo=TRUE,include=TRUE----------------------------- saveRDS(stripped,"data/scSeq_CTRL_sceSub_rmAmbRNA.rds") ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # Removing Doublets

--- " ) }else{ cat("# Removing Doublets --- " ) } ## ----estDoublet,eval=TRUE,echo=TRUE,include=TRUE------------------------------ dbl.dens <- computeDoubletDensity(stripped, #subset.row=top.mam, d=ncol(reducedDim(stripped)),subset.row=hvgs) summary(dbl.dens) stripped$DoubletScore <- dbl.dens ## ----doublerScore,eval=TRUE,echo=TRUE,include=TRUE---------------------------- plotUMAP(stripped,colour_by="DoubletScore") ## ----doublerScorebyClust,eval=TRUE,echo=TRUE,include=TRUE,fig.height=3.5,fig.width=5---- plotColData(stripped, x="label", y="DoubletScore", colour_by="label")+ geom_hline(yintercept = quantile(colData(stripped)$DoubletScore,0.95),lty="dashed",color="red") ## ----rmDoublet,eval=TRUE,echo=TRUE,include=TRUE------------------------------- cut_off <- quantile(stripped$DoubletScore,0.95) stripped$isDoublet <- c("no","yes")[factor(as.integer(stripped$DoubletScore>=cut_off),levels=c(0,1))] table(stripped$isDoublet) sce_clean <- stripped[stripped$isDoublet=="no",] saveRDS(sce_clean,"data/scSeq_CTRL_sceSub_rmAmbRNA_rmDoublet.rds") ## ---- results='asis',include=TRUE,echo=FALSE---------------------------------- if(params$isSlides == "yes"){ cat("class: inverse, center, middle # QC plots after clearance

--- " ) }else{ cat("# QC plots after clearance --- " ) } ## ----evaQC_cal,eval=TRUE,echo=TRUE,include=TRUE------------------------------- library(scater) mtGene <- rowData(sce_clean)$ID[grepl(rowData(sce_clean)$Symbol,pattern = "mt-")] is.mito <- names(sce_clean) %in% mtGene sce_clean <- addPerCellQC(sce_clean, subsets=list(Mito=is.mito)) ## ----qc_mrgr,eval=TRUE,echo=TRUE,include=TRUE,fig.height=4,fig.width=4-------- plotColData(sce_clean,x="label", y="sum", colour_by="label")+ggtitle("read counts") ## ----qc_mrg2r,eval=TRUE,echo=TRUE,include=TRUE,fig.height=4,fig.width=4------- plotColData(sce_clean,x="label", y="detected", colour_by="label")+ggtitle("gene counts") ## ----qc_mrg3r,eval=TRUE,echo=TRUE,include=TRUE,fig.height=4,fig.width=4------- plotColData(sce_clean,x="label", y="subsets_Mito_percent", colour_by="label")+ggtitle("mitocondrial content") ## ----qc_complex,eval=TRUE,echo=TRUE,include=TRUE,fig.height=4,fig.width=4----- plotColData(sce_clean,x="sum",y="subsets_Mito_percent",colour_by="label")+ggtitle("is.mito vs read counts") ## ----qc_complex2,eval=TRUE,echo=TRUE,include=TRUE,fig.height=4,fig.width=4---- plotColData(sce_clean,x="sum",y="detected",colour_by="label")+ggtitle("gene counts vs read counts") ## ----varExp,eval=T,echo=TRUE,include=TRUE------------------------------------- vars <- getVarianceExplained(sce_clean, variables=c("DoubletScore","label","sum","detected","subsets_Mito_percent")) ## ----varExp2,eval=F,echo=TRUE,include=TRUE------------------------------------ ## plotExplanatoryVariables(vars) ## ----varExp3,eval=TRUE,echo=TRUE,include=TRUE,fig.height=4,fig.width=4-------- plotExplanatoryVariables(vars)