These exercises cover the Facets, Scales and Transformations of ggplot2 for Plotting in R.

Exercise 1 - Facets

1.1 - Read in the cleaned patients dataset as we saw in ggplot2 course earlier (“patients_clean_ggplot2.txt”)

patients_clean <- read.delim("data/patients_clean_ggplot2.txt",sep="\t")

1.2 - Generate a scatter plot of BMI vs Weight and split (or facet) the data into a grid of plots separated by Smoking status and Sex .

library(ggplot2)
plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight))+geom_point()
plot+facet_grid(Sex~Smokes)

1.3 - Make a boxplot that is grouped by smoker and colored by sex, then include a separate facet for people of different age (using Age column).

plot <- ggplot(data=patients_clean,
               mapping=aes(x=Smokes,y=BMI,colour=Sex))+
  geom_boxplot()+
  facet_wrap(~Age)
plot

1.4 - Produce a similar boxplot of BMIs but this time group data by Sex, colour by Age and facet by Smoking status.

HINT - Discrete values such as in factors are used for categorical data.

plot <- ggplot(data=patients_clean,
               mapping=aes(x=Sex,y=BMI,colour=factor(Age)))+
  geom_boxplot()+
  facet_wrap(~Smokes)
plot

1.5 - Regenerate the previous solution from exercise 1.4, but this time using a violin plot.

plot <- ggplot(data=patients_clean,
               mapping=aes(x=Sex,y=BMI,colour=factor(Age)))+
  geom_violin()+
  facet_wrap(~Smokes)
plot

1.6 - Generate a separate density plot of BMI coloured by sex for each Grade,

plot <- ggplot(data=patients_clean,
               mapping=aes(BMI))+ geom_density(aes(fill=Sex),alpha=0.5)
plot+facet_wrap(~Grade)

Exercise 2 - Scales

2.1 - Using the patient dataset, generate a scatter plot of BMI versus Weight

library(ggplot2)

patients_clean <- read.delim("data/patients_clean_ggplot2.txt",sep="\t")

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight))+geom_point()
plot

2.2 - With the plot above, from exercise 2.1, adjust the BMI axis to show only labels for 20, 30, 40 and the weight axis to show breaks for 60 to 100 in steps of 5 as well as to specify units in y axis label.

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight))+geom_point()
plot+scale_x_continuous(breaks=c(20,30,40),label=c(20,30,40),limits=c(20,40))+
  scale_y_continuous(breaks=seq(60,100,by=5),label=seq(60,100,by=5),
                     name="Weight (kilos)")

2.3 - Create a violin plot of BMI by Age where violins are filled using a sequential colour palette.

plot <- ggplot(data=patients_clean,
               mapping=aes(x=factor(Age),y=BMI))+geom_violin(aes(fill=factor(Age)))+
               scale_fill_brewer(palette="Blues", na.value="black")
plot

2.4 - Create a scatterplot of BMI versus Weight and add a continuous colour scale for the height. Make the colour scale with a midpoint (set to mean point) colour of gray and extremes of blue (low) and yellow (high).

library(ggplot2)

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=Height))+geom_point()+
  scale_colour_gradient2(low="blue",high="yellow",mid="grey",midpoint=mean(patients_clean$Height))
plot

2.5 - Adjust the plot from exercise 2.4 using scales to remove values greater than 180.

library(ggplot2)

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=Height))+geom_point()+
  scale_colour_gradient2(low="blue",high="yellow",
                         mid="grey",midpoint=mean(patients_clean$Height),
                         limits=c(min(patients_clean$Height),180),
                         na.value=NA)
plot
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

2.6 - Adjust the scale legend from plot in exercise 2.4 to show only 75%, median and min values in scale legend.

library(ggplot2)

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=Height))+geom_point()+
  scale_colour_gradient2(low="blue",high="yellow",
                         mid="grey",midpoint=mean(patients_clean$Height),
                         breaks=c(min(patients_clean$Height),
                                  median(patients_clean$Height),
                                  quantile(patients_clean$Height)[4]),
                         labels=c(signif(min(patients_clean$Height),3),
                                  signif(median(patients_clean$Height),3),
                                  signif(quantile(patients_clean$Height)[4],3)))
plot

2.7 - With the plot from exercise 2.4, create another scatterplot with Count variable mapped to transparency/alpha and size illustrating whether a person is overweight. Is there a better combination of aesthetic mappings?

library(ggplot2)

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=Height,alpha=Count,size=Overweight))+geom_point()
plot
## Warning: Using size for a discrete variable is not advised.

Exercise 3 - Statistics

3.1 - Recreate the scatterplot of BMI by height. Colour by Age group and add fitted lines (but no SE lines) for each Age group.

library(ggplot2)

plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=factor(Age)))+geom_point()+stat_smooth(method="lm",se=F)

plot
## `geom_smooth()` using formula = 'y ~ x'

3.2 - Show the equation for the fitted line made for each age group in exercise 3.1

library(ggpubr)
plot <- ggplot(data=patients_clean,
               mapping=aes(x=BMI,y=Weight,colour=factor(Age)))+geom_point()+stat_smooth(method="lm",se=F)+ 
  stat_regline_equation(data = patients_clean[patients_clean$Age == 43, ], label.y = 95, aes(label = after_stat(eq.label)), formula = y ~ x) + 
  stat_regline_equation(data = patients_clean[patients_clean$Age == 44, ], label.y = 93, aes(label = after_stat(eq.label)), formula = y ~ x) +
  stat_regline_equation(data = patients_clean[patients_clean$Age == 45, ], label.y = 91, aes(label = after_stat(eq.label)), formula = y ~ x)

plot
## `geom_smooth()` using formula = 'y ~ x'

3.3 - Make a boxplot of BMI that is grouped by smoker and colored by sex. On the plot, include the adjusted pvalues for the comparisons of BMI between Sex within each smoking group.

library(rstatix)

grouped_data <- group_by(patients_clean, Smokes)
stat.test <- t_test(grouped_data, formula = BMI ~ Sex) 
stat.test <- adjust_pvalue(stat.test, method = "BH") 
stat.test <- add_xy_position(stat.test, x = "Smokes", dodge = 0.8)

plot <- ggplot(data=patients_clean,
               mapping=aes(x=Smokes,y=BMI,colour=Sex))+
  geom_boxplot() + 
  stat_pvalue_manual(stat.test, label = "p = {p.adj}", inherit.aes = F) + scale_y_continuous(expand = expansion(mult = 0.1))

plot