Peter Haschke



Back to the Index

Correlation Matrix Plot

I was creating some correlation tables for the dissertation and realized that plots are vastly more intuitive. Below is an example of how to create a correlation matrix using ggplot2.

## The Data (Motor Trend Car Road Tests)

data(mtcars)

dat <- with(mtcars, data.frame(mpg, cyl, disp, hp, wt, gear))
summary(dat)
##       mpg            cyl            disp             hp       
##  Min.   :10.4   Min.   :4.00   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.4   1st Qu.:4.00   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.2   Median :6.00   Median :196.3   Median :123.0  
##  Mean   :20.1   Mean   :6.19   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.8   3rd Qu.:8.00   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.9   Max.   :8.00   Max.   :472.0   Max.   :335.0  
##        wt            gear     
##  Min.   :1.51   Min.   :3.00  
##  1st Qu.:2.58   1st Qu.:3.00  
##  Median :3.33   Median :4.00  
##  Mean   :3.22   Mean   :3.69  
##  3rd Qu.:3.61   3rd Qu.:4.00  
##  Max.   :5.42   Max.   :5.00
## Computing the correlation matrix

cor.matrix <- round(cor(dat, use = "pairwise.complete.obs", method = "spearman"), digits = 2)
cor.matrix
##        mpg   cyl  disp    hp    wt  gear
## mpg   1.00 -0.91 -0.91 -0.89 -0.89  0.54
## cyl  -0.91  1.00  0.93  0.90  0.86 -0.56
## disp -0.91  0.93  1.00  0.85  0.90 -0.59
## hp   -0.89  0.90  0.85  1.00  0.77 -0.33
## wt   -0.89  0.86  0.90  0.77  1.00 -0.68
## gear  0.54 -0.56 -0.59 -0.33 -0.68  1.00
## Setting duplicates to NA and taking the absolute value

cor.matrix[2,1] <- NA
cor.matrix[3,1:2] <- NA
cor.matrix[4,1:3] <- NA
cor.matrix[5,1:4] <- NA
cor.matrix[6,1:5] <- NA

cor.matrix <- abs(cor.matrix)
cor.matrix
##      mpg  cyl disp   hp   wt gear
## mpg    1 0.91 0.91 0.89 0.89 0.54
## cyl   NA 1.00 0.93 0.90 0.86 0.56
## disp  NA   NA 1.00 0.85 0.90 0.59
## hp    NA   NA   NA 1.00 0.77 0.33
## wt    NA   NA   NA   NA 1.00 0.68
## gear  NA   NA   NA   NA   NA 1.00
## Turning it all into a dataframe and removing duplicates

library(reshape)

cor.dat <- melt(cor.matrix)
cor.dat <- cor.dat[-which(is.na(cor.dat[, 3])),]
cor.dat <- data.frame(cor.dat)
cor.dat
##      X1   X2 value
## 1   mpg  mpg  1.00
## 7   mpg  cyl  0.91
## 8   cyl  cyl  1.00
## 13  mpg disp  0.91
## 14  cyl disp  0.93
## 15 disp disp  1.00
## 19  mpg   hp  0.89
## 20  cyl   hp  0.90
## 21 disp   hp  0.85
## 22   hp   hp  1.00
## 25  mpg   wt  0.89
## 26  cyl   wt  0.86
## 27 disp   wt  0.90
## 28   hp   wt  0.77
## 29   wt   wt  1.00
## 31  mpg gear  0.54
## 32  cyl gear  0.56
## 33 disp gear  0.59
## 34   hp gear  0.33
## 35   wt gear  0.68
## 36 gear gear  1.00
## Renaming the variables and ordering the dataframe

library(reshape)

levels(cor.dat$X1) <- list("Miles per Gallon" = "mpg", "# of Cylinders" = "cyl",
  "Displacement" = "disp", "Horsepower" = "hp", "Weight" = "wt", "# of Gears" = "gear")
levels(cor.dat$X2) <- rev(list("Miles per Gallon" = "mpg", "# of Cylinders" = "cyl",
  "Displacement" = "disp", "Horsepower" = "hp", "Weight" = "wt", "# of Gears" = "gear"))

## Plotting

library(ggplot2)
library(ggthemes)

theme_set(theme_solarized())

ggplot(cor.dat, aes(X2, X1, fill = value)) + 
  geom_tile() + 
  geom_text(aes(X2, X1, label = value), color = "#073642", size = 4) +
  scale_fill_gradient(name=expression("Spearman" * ~ rho), low = "#fdf6e3", high = "steelblue",
    breaks=seq(0, 1, by = 0.2), limits = c(0.3, 1)) +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  labs(x = "", y = "") + 
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top",
    title.hjust = 0.5)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), 
        panel.grid.major = element_blank(),
      panel.border = element_blank(),
      panel.background = element_blank(),
      axis.ticks = element_blank(),
      legend.justification = c(1, 0),
      legend.position = c(0.9, 0.7),
      legend.direction = "horizontal") +
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", 
    title.hjust = 0.5))

center

This post is filed under category R, and contains the following tags: R, ggplot2, plots.

Back to the Blog-Index