These exercises are about the factors and data frames sections of Introduction to R.
Exercise 1 - Factors
## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
## Levels: DC1 DC2 Mono NK
## [1] DC1 DC1 Neu NK NK Mono Mono DC2 NK
## Levels: DC1 DC2 Mono NK Neu
CellType2 <- factor(c("DC1","DC1","DC1","NK","NK","Mono","Mono","DC2","NK"), levels = c("DC1", "DC2", "Mono", "NK", "Neu", "Bcell", "Tcell"))
CellType2
## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK
## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
CellType2 <- c(CellType2, factor(c("Neu","Neu","Bcell","DC1"), levels = c("DC1", "DC2", "Mono", "NK", "Neu", "Bcell", "Tcell")))
CellType2
## [1] DC1 DC1 DC1 NK NK Mono Mono DC2 NK Neu Neu Bcell
## [13] DC1
## Levels: DC1 DC2 Mono NK Neu Bcell Tcell
## DC1 DC2 Mono NK Neu Bcell Tcell
## 4 1 2 3 2 1 0
## Bcell DC1 DC2 Mono Neu NK Tcell
## 4 1 2 3 2 1 0
Height <- factor(c("high", "low", "mid", "low", "mid", "low", "mid", "high", "mid", "high"),ordered=T,levels=c("low", "mid", "high"))
Height
## [1] high low mid low mid low mid high mid high
## Levels: low < mid < high
## [1] high mid mid mid high mid high
## Levels: low < mid < high
newFactor <- factor(Height,ordered=T,levels=c("low", "mid", "high","veryHigh"))
newFactor[length(newFactor)] <- "veryHigh"
newFactor[newFactor > "mid"]
## [1] high high veryHigh
## Levels: low < mid < high < veryHigh
Exercise 2 - Data frames
Annotation <- data.frame(geneNames=c("Gene_1", "Gene_2", "Gene_3","Gene_4","Gene_5"), ensembl=c("Ens001", "Ens003", "Ens006", "Ens007", "Ens010"),pathway=c("Glycolysis", "TGFb", "Glycolysis", "TGFb", "Glycolysis"),geneLengths=c(100, 3000, 200, 1000,1200))
Annotation
## geneNames ensembl pathway geneLengths
## 1 Gene_1 Ens001 Glycolysis 100
## 2 Gene_2 Ens003 TGFb 3000
## 3 Gene_3 Ens006 Glycolysis 200
## 4 Gene_4 Ens007 TGFb 1000
## 5 Gene_5 Ens010 Glycolysis 1200
## geneNames ensembl pathway geneLengths
## 4 Gene_4 Ens007 TGFb 1000
## 5 Gene_5 Ens010 Glycolysis 1200
## [1] "character"
## [1] "character"
## [1] "character"
## [1] "numeric"
## geneNames ensembl pathway geneLengths
## 1 Gene_1 Ens001 Glycolysis 100
## 2 Gene_2 Ens003 TGFb 3000
## 3 Gene_3 Ens006 Glycolysis 200
## 4 Gene_4 Ens007 TGFb 1000
## 5 Gene_5 Ens010 Glycolysis 1200
## [1] "factor"
Sample1 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens010"),expression=c(1000, 3000, 10000,5000))
Sample1
## ensembl expression
## 1 Ens001 1000
## 2 Ens003 3000
## 3 Ens006 10000
## 4 Ens010 5000
Sample2 <- data.frame(ensembl=c("Ens001", "Ens003", "Ens006","Ens007","Ens010"),expression=c(1500, 1500, 17000,500,10000))
Sample2
## ensembl expression
## 1 Ens001 1500
## 2 Ens003 1500
## 3 Ens006 17000
## 4 Ens007 500
## 5 Ens010 10000
AnnoSample1 <- merge(Annotation,Sample1,by.x=2,by.y=1,all=F)
AnnoSample1And2 <- merge(AnnoSample1,Sample2,by=1,all=F)
AnnoSample1And2
## ensembl geneNames pathway geneLengths expression.x expression.y
## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
## 2 Ens003 Gene_2 TGFb 3000 3000 1500
## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
AnnoSample1And2 <- AnnoSample1And2[order(AnnoSample1And2$geneLengths, decreasing = T),]
AnnoSample1And2
## ensembl geneNames pathway geneLengths expression.x expression.y
## 2 Ens003 Gene_2 TGFb 3000 3000 1500
## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
AnnoSample1And2$Sample1_lne <- AnnoSample1And2$expression.x/AnnoSample1And2$geneLengths
AnnoSample1And2$Sample2_lne <- AnnoSample1And2$expression.y/AnnoSample1And2$geneLengths
AnnoSample1And2
## ensembl geneNames pathway geneLengths expression.x expression.y
## 2 Ens003 Gene_2 TGFb 3000 3000 1500
## 4 Ens010 Gene_5 Glycolysis 1200 5000 10000
## 3 Ens006 Gene_3 Glycolysis 200 10000 17000
## 1 Ens001 Gene_1 Glycolysis 100 1000 1500
## Sample1_lne Sample2_lne
## 2 1.000000 0.500000
## 4 4.166667 8.333333
## 3 50.000000 85.000000
## 1 10.000000 15.000000
rownames(AnnoSample1And2) <- AnnoSample1And2$ensembl
mean(c(AnnoSample1And2["Ens006","Sample1_lne"],AnnoSample1And2["Ens006","Sample2_lne"]))
## [1] 67.5
log2FoldChange <- log2(AnnoSample1And2$Sample2_lne) - log2(AnnoSample1And2$Sample1_lne)
names(log2FoldChange) <- AnnoSample1And2$geneNames
log2FoldChange
## Gene_2 Gene_5 Gene_3 Gene_1
## -1.0000000 1.0000000 0.7655347 0.5849625
## [1] 1500