Plotting.RmdOne can quickly plot a histogram for a set of values via the ‘plot_histogram’ function, which uses by default the Freedman-Diaconis rule for determining bin size (which works somewhat better then base R’s default using Sturge’s rule):
# Simulate data from normal distribution
x <- rnorm(100)
plot_histogram(x, main = 'Normal distribution', new = FALSE )
# Simulate data from log-normal distribution
y <- exp( rnorm(100) )
plot_histogram(y, main = 'Log-normal distribution', new = FALSE )
A common figure type in psychology is a plot of a measure central tendency and its variation (e.g., means and 95% confidence intervals), shown over a grouping factor. Such a figure can be created quickly using the ‘draw_dots’ function:
# Example data examining effect of diet on early growth of chicks
data("ChickWeight")
# Create descriptive summary by Diet for final day
dtf_obs <- stats_by_group( 
  ChickWeight[ChickWeight$Time == 21, ], 
  'weight', 'Diet', 
  # Sample size, mean, standard error of the mean, 
  # and associated uncertainty intervals
  statistics = c( 'N', 'M', 'SE', 'UI' )
)
dtf_obs$X <- 1:nrow( dtf_obs )
# Plot means and 95% confidence intervals for weight
# First create blank plot
xl <- c( .5, 4.5 )
yl <- c( 120, 340 )
plot_blank( xl, yl )
# Add estimates and error bars
draw_dots( dtf_obs, columns = c( 'X', 'M', 'UI_LB', 'UI_UB' ) )
# Add borders, labels, and axes
draw_borders_and_labels(
  xl, yl, labels = c( 'Diet', 'Weight at 21 days (gm)' )
)
draw_axes( seq( yl[1], yl[2], 40 ), side = 2, line = -1.25, cex = 1 )
draw_axes( 1:4, 'Diet ' %p% 1:4, side = 1, line = -1.25, cex = 1 )
A useful variant of figures summarizing estimates and error bars is the forest plot, used commonly to summarize the results of a meta-analysis. In its most basic form, a forest plot reports a set of estimates and associated error bars for different variables:
overall_m <- mean( dtf_obs$M )
# P-value based on two-tailed one-sample t-test
dtf_obs$P_value <- pt(
  abs( dtf_obs$M - overall_m ) / dtf_obs$SE, dtf_obs$N - 1, lower.tail = FALSE
) * 2
# Identify significant differences
dtf_obs$Significant <- dtf_obs$P_value < .05
# Create nicely formatted results
dtf_obs$Results <- 
  round( dtf_obs$M ) %p% ' [' %p% 
  round( dtf_obs$UI_LB ) %p% ', ' %p% round( dtf_obs$UI_UB ) %p% 
  ']; p = ' %p% format( round( dtf_obs$P_value, 3 ), nsmall = 3 )
plot_forest(
  dtf_obs[, c('M', 'UI_LB', 'UI_UB')], 
  # X-axis
  xlim = c(140, 340),
  labels_x = seq( 140, 340, 40 ), 
  title_x = 'Estimated mean', 
  # Y-axis
  labels_y = 'Diet ' %p% 1:4, 
  # Add results next to each error bar
  labels_estimates = dtf_obs$Results,
  labels_estimates_limit = overall_m, 
  # Show overall mean
  vert_grid = overall_m, 
  # Indicate which mean significantly differs
  point_type = replace_cases( dtf_obs$Significant, c( F, T ), c( 19, 21 ) ),
  # Size of points, x/y-axis labels, and title
  text_size = c( 1.25, .8, 1 ), 
  # Specify margin (in inches) to ensure nice visibility
  margin = c( .5, .5, .25, 1.5 ),
  new = FALSE
)
Another common figure in psychology is a line plot displaying change in a variable over time. We can quickly create such a figure using the ‘draw_line’ function:
# Create descriptive summary by time collapsing over diet
dtf_obs <- stats_by_group(
  ChickWeight, 
  'weight', 'Time', statistics = c( 'M', 'UI' )
)
# Plot means and 95% confidence intervals for weight
# First create blank plot
xl <- c( -.5, 21.5 )
yl <- c( 0, 250 )
plot_blank( xl, yl )
# Add estimates and error bars
draw_lines( 
  dtf_obs, columns = c( 'Time', 'M', 'UI_LB', 'UI_UB' ), col.eb = 'grey'
)
# Add borders, labels, and axes
draw_borders_and_labels(
  xl, yl, labels = c( 'Day', 'Weight (gm)' )
)
draw_axes( seq( yl[1], yl[2], 50 ), side = 2, line = -1.25, cex = 1 )
draw_axes( seq( 0, 20, 5 ), side = 1, line = -1.25, cex = 1 )
Often we will need to plot multiple trajectories over time for separate groups. The ‘draw_by_groups’ function streamlines the process of plotting separate lines by different groups.
# Create descriptive summary across both time and diet
dtf_obs <- stats_by_group(
  ChickWeight, 
  'weight', c( 'Time', 'Diet' ), 
  statistics = c( 'M', 'UI' )
)
dtf_obs$X <- dtf_obs$Time + 
  replace_cases( dtf_obs$Diet, 1:4, c( -.6, -.2, .2, .6 ) )
dtf_obs$col <- 
  replace_cases( dtf_obs$Diet, 1:4, palettes( index = 1:4 ) )
# See the package 'dplyr' for a concise way to create these summaries
# Plot means and 95% confidence intervals for weight
# First create blank plot
xl <- c( -1, 22 )
yl <- c( 0, 350 )
plot_blank( xl, yl )
draw_by_group(
  dtf_obs, 'Diet', 1:4,
  draw_fun = draw_lines,
  columns = c( 'X', 'M', 'UI_LB', 'UI_UB' ),
  arrow = TRUE,
  pch = 21, 
  aes = c( col = 'col', col.eb = 'col', bg = 'col' )
)
# Add borders, labels, and axes
draw_borders_and_labels(
  xl, yl, labels = c( 'Day', 'Weight (gm)' )
)
draw_axes( seq( yl[1], yl[2], 50 ), side = 2, line = -1.25, cex = 1 )
draw_axes( seq( 0, 20, 5 ), side = 1, line = -1.25, cex = 1 )
legend(
  0, 340, 'Diet ' %p% 1:4, fill = palettes( 1:4 ), bty = 'n'
)
The function ‘plot_correlations’ is a quick way to create a figure summarizing the set of correlations over multiple variables along with useful information on the magnitude and statistical significant of each relationship:
# Simulate data from a multivariate normal with correlated values
Sigma <- rbind(
  c( 1.0, 0.2, 0.5 ),
  c( 0.2, 1.0, 0.1 ),
  c( 0.5, 0.1, 1.0 )
)
x <- MASS::mvrnorm( 100, rep( 0, 3 ), Sigma = Sigma )
colnames(x) <- 'V' %p% 1:3
x <- data.frame(x)
plot_correlations(
  x, labels = list( 'Variable ' %p% 1:3, 'V' %p% 1:3 ),  new = FALSE
)