params <-
list(isSlides = "no")
## ----setup, include=FALSE-----------------------------------------------------
library(org.Hs.eg.db)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
knitr::opts_chunk$set(echo = TRUE)
#knitr::opts_knit$set(root.dir = "RU_tidyverse/inst/extdata/")
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides != "yes"){
cat("# Making R easier with Tidyverse
---
"
)
}
## -----------------------------------------------------------------------------
load(file='data/my_tidy.Rdata')
## -----------------------------------------------------------------------------
head(df1)
## -----------------------------------------------------------------------------
head(df2)
## -----------------------------------------------------------------------------
head(df3a)
## -----------------------------------------------------------------------------
head(df3b)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Reading and Tibbles
---
"
)
}else{
cat("# Reading and Tibbles
---
"
)
}
## -----------------------------------------------------------------------------
library(tidyverse)
## -----------------------------------------------------------------------------
untidy_counts_base <- read.csv("data/hemato_rnaseq_counts.csv")
untidy_counts_base
## -----------------------------------------------------------------------------
read_csv("data/hemato_rnaseq_counts.csv")
## -----------------------------------------------------------------------------
untidy_counts <- read_csv("data/hemato_rnaseq_counts.csv", col_types = cols(
ENTREZ = col_character(),
CD34_1 = col_integer(),
ORTHO_1 = col_integer(),
CD34_2 = col_integer(),
ORTHO_2 = col_integer()
))
untidy_counts
## -----------------------------------------------------------------------------
untidy_counts[1,]
## -----------------------------------------------------------------------------
untidy_counts[,1]
## -----------------------------------------------------------------------------
untidy_counts[1]
## -----------------------------------------------------------------------------
untidy_counts[[1]]
## -----------------------------------------------------------------------------
untidy_counts$ENTREZ
## -----------------------------------------------------------------------------
as_tibble(untidy_counts_base)
## -----------------------------------------------------------------------------
untidy_counts_base <- as_tibble(untidy_counts_base)
untidy_counts_base <- mutate_at(untidy_counts_base, vars(ENTREZ), as.character)
untidy_counts_base
## -----------------------------------------------------------------------------
as.data.frame(untidy_counts_base) %>% head(n=12)
## -----------------------------------------------------------------------------
# Lets load in some packages
library(org.Hs.eg.db)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
hg19_genes <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
keys <- hg19_genes$gene_id
symbols <- AnnotationDbi::select(org.Hs.eg.db, keys = keys, columns = c("SYMBOL"), keytype = "ENTREZID")
## -----------------------------------------------------------------------------
counts_metadata <- tibble(ID = symbols$ENTREZID, SYMBOL = symbols$SYMBOL, LENGTH = lengths(hg19_genes))
counts_metadata
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Tidying up your data
---
"
)
}else{
cat("# Tidying up your data
---
"
)
}
## -----------------------------------------------------------------------------
untidy_counts
## -----------------------------------------------------------------------------
untidy_counts
## -----------------------------------------------------------------------------
tidier_counts <- pivot_longer(untidy_counts, names_to = "Sample", values_to = "counts", cols = c(-ENTREZ))
tidier_counts
## -----------------------------------------------------------------------------
pivot_wider(tidier_counts, names_from = c(Sample), values_from = counts)
## -----------------------------------------------------------------------------
tidier_counts
## -----------------------------------------------------------------------------
tidier_counts
## -----------------------------------------------------------------------------
tidy_counts <- separate(tidier_counts, Sample, sep = "_", into=c("CellType", "Rep"), remove=TRUE)
tidy_counts
## -----------------------------------------------------------------------------
unite(tidy_counts, Sample, CellType, Rep, remove=FALSE)
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Piping with Magrittr
---
"
)
}else{
cat("# Piping with Magrittr
---
"
)
}
## -----------------------------------------------------------------------------
tidy_counts <- separate(pivot_longer(untidy_counts, names_to = "Sample", values_to = "counts", cols = c(-ENTREZ)), Sample, sep = "_", into = c("CellType","Rep"), remove=FALSE)
## -----------------------------------------------------------------------------
tidier_counts <- pivot_longer(untidy_counts, names_to = "Sample", values_to = "counts", cols = c(-ENTREZ))
tidy_counts <- separate(tidier_counts, Sample, sep = "_", into = c("CellType","Rep"), remove=FALSE)
## -----------------------------------------------------------------------------
tidy_counts <- untidy_counts %>%
gather(key=Sample, value=counts, -ENTREZ) %>%
separate(Sample, sep = "_", into = c("CellType","Rep"), remove=FALSE)
tidy_counts
## -----------------------------------------------------------------------------
tidier_counts %>% .[.$counts > 0,]
## -----------------------------------------------------------------------------
library(magrittr)
tidier_counts %<>% .[.$counts > 0,]
tidier_counts
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Joining tibbles together
---
"
)
}else{
cat("# Joining tibbles together
---
"
)
}
## -----------------------------------------------------------------------------
tidy_counts
## -----------------------------------------------------------------------------
counts_metadata
## -----------------------------------------------------------------------------
tidy_counts_meta <- inner_join(tidy_counts, counts_metadata, by = c("ENTREZ" = "ID"))
## -----------------------------------------------------------------------------
left_join(tidy_counts, counts_metadata, by = c("ENTREZ" = "ID"))
## -----------------------------------------------------------------------------
tidy_counts_expressed <- right_join(tidy_counts, counts_metadata, by = c("ENTREZ" = "ID"))
tidy_counts_expressed %>% tail()
## -----------------------------------------------------------------------------
semi_join(counts_metadata, tidy_counts, by = c("ID" = "ENTREZ") )
## -----------------------------------------------------------------------------
anti_join(counts_metadata, tidy_counts, by = c("ID" = "ENTREZ") )
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Quickly manipulate data with dplyr
---
"
)
}else{
cat("# Quickly manipulate data with dplyr
---
"
)
}
## -----------------------------------------------------------------------------
select(tidy_counts_meta , counts)
## -----------------------------------------------------------------------------
select(tidy_counts_meta, counts, ENTREZ)
## -----------------------------------------------------------------------------
select(tidy_counts_meta, -Sample)
## -----------------------------------------------------------------------------
select(tidy_counts_meta, CellType:SYMBOL)
## -----------------------------------------------------------------------------
filter(tidy_counts_meta, Sample == 'CD34_1')
## -----------------------------------------------------------------------------
filter(tidy_counts_meta, Sample %in% c('CD34_1', 'ORTHO_1'))
## -----------------------------------------------------------------------------
filter(tidy_counts_meta, counts > 0)
## -----------------------------------------------------------------------------
arrange(tidy_counts_meta, counts)
## -----------------------------------------------------------------------------
arrange(tidy_counts_meta, CellType, desc(counts))
## -----------------------------------------------------------------------------
mutate(tidy_counts_meta, scale(counts))
## -----------------------------------------------------------------------------
mutate(tidy_counts_meta, count_zscore = scale(counts))
## -----------------------------------------------------------------------------
filter(tidy_counts_meta, counts == 0)
## -----------------------------------------------------------------------------
filter(tidy_counts_meta, counts == 0) %>%
group_by(Sample)
## -----------------------------------------------------------------------------
filter(tidy_counts_expressed, counts == 0) %>%
group_by(Sample) %>%
summarise(n())
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
group_by(ENTREZ) %>%
summarise(counts_mean = mean(counts))
## -----------------------------------------------------------------------------
tidy_counts %>%
group_by(ENTREZ, CellType) %>%
summarise(counts_mean = mean(counts))
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
group_by(Sample) %>%
filter(order(counts, decreasing=T) <= 3)
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
filter(counts != 0) %>%
group_by(CellType, ENTREZ)
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
filter(counts != 0) %>%
group_by(CellType, ENTREZ) %>%
filter(n()>1)
## -----------------------------------------------------------------------------
p <- tidy_counts_meta %>%
group_by(ENTREZ, CellType) %>%
summarise(counts_mean = mean(counts)) %>%
pivot_wider(names_from=CellType, values_from=counts_mean) %>%
ggplot(aes(x=CD34, y=ORTHO)) + geom_point()
## -----------------------------------------------------------------------------
p
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Outputting your tidy data
---
"
)
}else{
cat("# Outputting your tidy data
---
"
)
}
## -----------------------------------------------------------------------------
write_delim(tidy_counts_meta, '../counts_with_metadata.csv', delim =',')
write_csv(tidy_counts_meta, '../counts_with_metadata.csv')
## ---- results='asis',include=TRUE,echo=FALSE----------------------------------
if(params$isSlides == "yes"){
cat("class: inverse, center, middle
# Pattern matching with strings
---
"
)
}else{
cat("# Pattern matching with strings
---
"
)
}
## -----------------------------------------------------------------------------
brc <- c("Tom", "Ji-Dung", "Matthew", "Wei", "Doug")
brc %>% str_sub(1, 3)
## -----------------------------------------------------------------------------
brc %>% str_sub(2, -2)
## -----------------------------------------------------------------------------
str_sub(brc, 2, -2) <- 'X'
brc
## -----------------------------------------------------------------------------
str_replace_all(brc, 'Matthew', 'Matt')
str_replace_all(brc, 'u', 'z' )
## -----------------------------------------------------------------------------
brc2 <- c("Tom ", " Ji -Dung", "Matt ", "Wei", "D o u g")
str_replace_all(brc2, ' ','' )
## -----------------------------------------------------------------------------
str_trim(brc2)
## -----------------------------------------------------------------------------
str_pad(brc2, width=10, side='left')
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
pull(SYMBOL) %>%
head()
tidy_counts_meta %>%
pull(SYMBOL) %>%
str_to_title() %>%
head()
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
mutate(SYMBOL2 = str_to_title(SYMBOL))
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
mutate(SYMBOL2 = str_to_title(SYMBOL)) %>%
mutate(SYMBOL3 = str_to_upper(SYMBOL2))
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
pull(SYMBOL) %>%
str_detect('GAP') %>% head()
tidy_counts_meta %>%
pull(SYMBOL) %>%
str_detect('GAP') %>%
filter(tidy_counts_meta, .)
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
pull(SYMBOL) %>%
str_subset('GAP')
## -----------------------------------------------------------------------------
tidy_counts_meta %>%
pull(SYMBOL) %>%
str_count('GAP') %>% .[1:100]
tidy_counts_meta %>%
pull(SYMBOL) %>%
str_count('A') %>% .[1:100]