These exercises cover the sections of Data wrangling with tidy.
All files can be found in the “dataset” directory.Exercise 10
tidy_counts_expressed_norm %>%
mutate(length_cat=if_else(LENGTH<3000, 'short', if_else(LENGTH>6000, 'long', 'medium')))
## # A tibble: 376 x 12
## # Groups: Sample [4]
## ENTREZ Sample CellType Rep counts count_total CPM SYMBOL CHR LENGTH TPM length_cat
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <chr> <chr> <int> <dbl> <chr>
## 1 350 CD34_1 CD34 1 204 307 1362. APOH chr17 1201 3069. short
## 2 350 ORTHO_1 ORTHO 1 0 307 0 APOH chr17 1201 0 short
## 3 350 CD34_2 CD34 2 103 307 975. APOH chr17 1201 2027. short
## 4 350 ORTHO_2 ORTHO 2 0 307 0 APOH chr17 1201 0 short
## 5 351 CD34_1 CD34 1 15586 26580 104068. APP chr21 4480 62851. medium
## 6 351 ORTHO_1 ORTHO 1 479 26580 5825. APP chr21 4480 3333. medium
## 7 351 CD34_2 CD34 2 10476 26580 99125. APP chr21 4480 55281. medium
## 8 351 ORTHO_2 ORTHO 2 39 26580 1185. APP chr21 4480 703. medium
## 9 353 CD34_1 CD34 1 842 2471 5622. APRT chr16 807 18849. short
## 10 353 ORTHO_1 ORTHO 1 355 2471 4317. APRT chr16 807 13711. short
## # … with 366 more rows
tidy_counts_expressed_norm %>%
mutate(length_cat=if_else(LENGTH<3000, 'short', if_else(LENGTH>6000, 'long', 'medium'))) %>%
mutate(length_cat=as_factor(length_cat)) %>%
mutate(length_cat=fct_relevel(length_cat,levels=c('long', 'medium','short')))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## # A tibble: 376 x 12
## # Groups: Sample [4]
## ENTREZ Sample CellType Rep counts count_total CPM SYMBOL CHR LENGTH TPM length_cat
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <chr> <chr> <int> <dbl> <fct>
## 1 350 CD34_1 CD34 1 204 307 1362. APOH chr17 1201 3069. short
## 2 350 ORTHO_1 ORTHO 1 0 307 0 APOH chr17 1201 0 short
## 3 350 CD34_2 CD34 2 103 307 975. APOH chr17 1201 2027. short
## 4 350 ORTHO_2 ORTHO 2 0 307 0 APOH chr17 1201 0 short
## 5 351 CD34_1 CD34 1 15586 26580 104068. APP chr21 4480 62851. medium
## 6 351 ORTHO_1 ORTHO 1 479 26580 5825. APP chr21 4480 3333. medium
## 7 351 CD34_2 CD34 2 10476 26580 99125. APP chr21 4480 55281. medium
## 8 351 ORTHO_2 ORTHO 2 39 26580 1185. APP chr21 4480 703. medium
## 9 353 CD34_1 CD34 1 842 2471 5622. APRT chr16 807 18849. short
## 10 353 ORTHO_1 ORTHO 1 355 2471 4317. APRT chr16 807 13711. short
## # … with 366 more rows
tidy_counts_expressed_norm %>%
mutate(length_cat=if_else(LENGTH<3000, 'short', if_else(LENGTH>6000, 'long', 'medium'))) %>%
mutate(length_cat=as_factor(length_cat)) %>%
mutate(length_cat=fct_relevel(length_cat,levels=c('long', 'medium','short'))) %>%
ggplot(aes(x=length_cat, y=TPM)) +
geom_boxplot() +
scale_y_continuous(trans='log2')
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 46 rows containing non-finite values (stat_boxplot).