These exercises are about reading and writing data sections of Introduction to R.

Exercise 1

Have a look at all files in notepad or excel before attempting to read. All files can be found in the “data” directory.

getwd()
[1] "/Users/mattpaul"
setwd("~/Downloads/Intro_To_R_1Day-master/r_course")
geneExpression <- read.table("data/GeneExpression.txt",h=T,sep="\t",row.names=1)
is(geneExpression)
## [1] "data.frame"       "list"             "oldClass"         "vector"          
## [5] "list_OR_List"     "vector_OR_factor" "vector_OR_Vector"
geneExpression <- as.matrix(geneExpression)
sampleMeans <- c(mean(geneExpression[1,]),mean(geneExpression[2,]),mean(geneExpression[3,]),mean(geneExpression[4,]),mean(geneExpression[5,]),mean(geneExpression[6,]),mean(geneExpression[7,]),mean(geneExpression[8,]))
names(sampleMeans) <- rownames(geneExpression)
sampleMeans
##    Gene_a    Gene_b    Gene_c    Gene_d    Gene_e    Gene_f    Gene_g    Gene_h 
##  4.660569  4.379796  4.259824  5.849420  5.850658  6.732781 10.405203 10.201357
# Alternatively we could use the colMeans() function.
# rowMeans(geneExpression)
geneExpression <- as.data.frame(geneExpression)
geneExpression$tissue_localisation <- factor(c("Kidney","Adrenal","Liver","Adrenal","Kidney","Liver","Liver","Kidney"))
geneExpression$mean_expr <- sampleMeans
geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
##        Sample_1.low tissue_localisation mean_expr
## Gene_a    5.9350948              Kidney  4.660569
## Gene_b    2.6313925             Adrenal  4.379796
## Gene_c    5.7149521               Liver  4.259824
## Gene_d    8.1964109             Adrenal  5.849420
## Gene_e    0.7332521              Kidney  5.850658
## Gene_f    3.8519471               Liver  6.732781
## Gene_g   11.1733928               Liver 10.405203
## Gene_h    9.9032500              Kidney 10.201357
orderedExpression <- geneExpression[order(geneExpression$mean_expr,decreasing=T),]
filteredExpression <- orderedExpression[orderedExpression$mean_expr>5,]
expressionDF <- cbind(rownames(filteredExpression),filteredExpression)
colnames(expressionDF)[1] <- "geneNames"
write.table(expressionDF,"orderedExpression.txt",sep=",",col.names = T,row.names=F)
geneExpression <- read.table("data/GeneExpressionWithMethods.txt",h=T,sep="\t",row.names=1,skip=3)
geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
##        Sample_1.low
## Gene_a    5.9350948
## Gene_b    2.6313925
## Gene_c    5.7149521
## Gene_d    8.1964109
## Gene_e    0.7332521
## Gene_f    3.8519471
## Gene_g   11.1733928
## Gene_h    9.9032500
geneExpression <- read.table("data/GeneExpressionWithNotes.txt",h=T,sep="\t",row.names=1,comment.char = ">")
geneExpression
##        Sample_1.hi Sample_2.hi Sample_3.hi Sample_4.low Sample_5.low
## Gene_a    5.742510    3.214303    4.116820     3.212353     5.742333
## Gene_b    6.444368    5.896076    2.592581     5.089549     3.624812
## Gene_c    3.083392    3.414723    3.706069     4.535536     5.104273
## Gene_d    4.726498    3.023746    3.033173     8.017895     8.098800
## Gene_e    9.909185    9.174323    9.957153     2.053501     3.276533
## Gene_f   10.680459    9.951243    8.985412     3.360963     3.566663
## Gene_g   10.516534   10.176163    9.778173    11.781520     9.005437
## Gene_h    9.017020    9.342291    9.895636    12.046704    11.003240
##        Sample_1.low
## Gene_a    5.9350948
## Gene_b    2.6313925
## Gene_c    5.7149521
## Gene_d    8.1964109
## Gene_e    0.7332521
## Gene_f    3.8519471
## Gene_g   11.1733928
## Gene_h    9.9032500