These exercises are about reading and writing data sections of Introduction to R.
Exercise 1
Have a look at all files in notepad or excel before attempting to read. All files can be found in the “data” directory.
[1] "/Users/mattpaul"
## [1] "data.frame" "list" "oldClass" "vector"
## [5] "list_OR_List" "vector_OR_factor" "vector_OR_Vector"
sampleMeans <- c(mean(geneExpression[1,]),mean(geneExpression[2,]),mean(geneExpression[3,]),mean(geneExpression[4,]),mean(geneExpression[5,]),mean(geneExpression[6,]),mean(geneExpression[7,]),mean(geneExpression[8,]))
names(sampleMeans) <- rownames(geneExpression)
sampleMeans
## Gene_a Gene_b Gene_c Gene_d Gene_e Gene_f Gene_g Gene_h
## 4.660569 4.379796 4.259824 5.849420 5.850658 6.732781 10.405203 10.201357
geneExpression <- as.data.frame(geneExpression)
geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
geneExpression$mean_expr <- sampleMeans
geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
## Sample_1.low tissue_localisation mean_expr
## Gene_a 5.9350948 Kidney 4.660569
## Gene_b 2.6313925 Adrenal 4.379796
## Gene_c 5.7149521 Liver 4.259824
## Gene_d 8.1964109 Adrenal 5.849420
## Gene_e 0.7332521 Kidney 5.850658
## Gene_f 3.8519471 Liver 6.732781
## Gene_g 11.1733928 Liver 10.405203
## Gene_h 9.9032500 Kidney 10.201357
orderedExpression <- geneExpression[order(geneExpression$mean_expr,decreasing=T),]
filteredExpression <- orderedExpression[orderedExpression$mean_expr>5,]
expressionDF <- cbind(rownames(filteredExpression),filteredExpression)
colnames(expressionDF)[1] <- "geneNames"
write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
## Sample_1.low
## Gene_a 5.9350948
## Gene_b 2.6313925
## Gene_c 5.7149521
## Gene_d 8.1964109
## Gene_e 0.7332521
## Gene_f 3.8519471
## Gene_g 11.1733928
## Gene_h 9.9032500
geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
geneExpression
## Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a 5.742510 3.214303 4.116820 3.212353 5.742333
## Gene_b 6.444368 5.896076 2.592581 5.089549 3.624812
## Gene_c 3.083392 3.414723 3.706069 4.535536 5.104273
## Gene_d 4.726498 3.023746 3.033173 8.017895 8.098800
## Gene_e 9.909185 9.174323 9.957153 2.053501 3.276533
## Gene_f 10.680459 9.951243 8.985412 3.360963 3.566663
## Gene_g 10.516534 10.176163 9.778173 11.781520 9.005437
## Gene_h 9.017020 9.342291 9.895636 12.046704 11.003240
## Sample_1.low
## Gene_a 5.9350948
## Gene_b 2.6313925
## Gene_c 5.7149521
## Gene_d 8.1964109
## Gene_e 0.7332521
## Gene_f 3.8519471
## Gene_g 11.1733928
## Gene_h 9.9032500