7
votes

I have a data.frame (which I melted using the melt function), from which I produce multiple scatter plots and fit a regression line using the following:

ggplot(dat, aes(id, value)) + geom_point() + geom_smooth(method="lm", se=FALSE) + facet_wrap(variable~var1, scales="free")

I would like to add the regression equation and the R^2 in each of these scatter plots for the relevant regression (i.e. the one produced by geom_smooth in each scatter plot).


var1 above is just the name of one of the id columns of the melted data and I am facing the same question with facet_grid instad of facet_wrap.

2
Yes, but I am not able to generalize it so the multiple scatter plots...StephQ
Use ddply and the function from Ramnath's answer in that other question to create a data frame with both your faceting variables, x and y variables (locations for eqn in each panel) and a character variable for the eqn itself. Then just pass that data frame to geom_text.joran

2 Answers

10
votes

I actually solved this, please see below a worked out example where the dependent variable is var1. This was a time series dataset, please ignore the date part if not relevant for your problem.

library(plyr)
library(ggplot2)

rm(dat)
dat <- read.table("data.txt", header = TRUE, sep = ",")
dat <- transform(dat, date = as.POSIXct(strptime(date, "%Y-%m-%dT%H:%M:%OS")))

rm(dat.m)
dat.m <- melt(dat, id = c('ccy','date','var1'))

lm_eqn = function(df){
  m = lm(var1 ~ value, df);
  eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
                   list(a = format(coef(m)[1], digits = 2), 
                        b = format(coef(m)[2], digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3)))
  as.character(as.expression(eq));                 
}

mymax = function(df){
  max(df$value)
}

rm(regs)
regs <- ddply(dat.m, .(ccy,variable), lm_eqn)
regs.xpos <- ddply(dat.m, .(variable), function(df) (min(df$value)+max(df$value))/2)
regs.ypos <- ddply(dat.m, .(ccy,variable), function(df) min(df$var1) + 0.05*(max(df$var1)-min(df$var1)))

regs$y <- regs.ypos$V1
regs$x <- regs.xpos$V1

rm(gp)
gp <- ggplot(data=dat.m, aes(value, var1)) + geom_point(size = 1, alpha=0.75) + geom_smooth() + geom_smooth(method="lm", se=FALSE, color="red") + geom_text(data=regs, size=3, color="red", aes(x=x, y=y, label=V1), parse=TRUE) + facet_grid(ccy~variable, scales="free")
ggsave("data.png", gp, scale=1.5, width=11, height=8)
0
votes

Nice solution. I'm surprised ggplot doesn't have a function built in to do this... I needed to display equations and R2 values from polynomial fits (generated by the ns(x,order) function in the splines package), and have expanded your lm_eqn function to accomodate polynomials of varying orders.

Disclaimer: I'm still quite new to R coding, and I'm aware that this code is very messy. There must be a nicer way to do it, and I'm going to start another thread to ask people to refine the code, and possibly expand it to other fit models... You can follow it here: https://groups.google.com/forum/?fromgroups#!forum/ggplot2

lm_eqn = function(df,x.var,y.var,signif.figs,eq.plot=T,model.type,order){
  if(missing(x.var) | missing(y.var) | class(x.var)!='character' | class(y.var)!='character') stop('x.var and y.var must be the names of the columns you want to use as x and y as a character string.' )
  if(missing(model.type)) stop("model.type must be 'lin' (linear y~x model) or 'poly' (polynomial y~ns(x,order) model, generated by splines package).")
  if(model.type=='poly' & missing(order)) stop("order must be specified if poly method is used.")

  if(eq.plot==T) {
    # Linear y=mx+c equation
    if(model.type=='lin') {
      fit = lm(df[[y.var]] ~ df[[x.var]]);
      eq <- substitute(italic(y) == c + m %.% italic(x)*","~~italic(r)^2~"="~r2, 
                       list(c = signif(coef(fit)[1], signif.figs), 
                            m = signif(coef(fit)[2], signif.figs), 
                            r2 = signif(summary(fit)$r.squared, signif.figs)))
      as.character(as.expression(eq));
    }
    # polynomial expression generated with the ns(x,order) function [splines package]
    if(model.type=='poly') {
      fit = lm(df[[y.var]] ~ ns(df[[x.var]],order));

      base = gsub('!c!',signif(coef(fit)[1],signif.figs),"italic(y) == !c! + ")
      element.1 = "!m! %.% italic(x)~"
      element.2 = " + !m! %.% italic(x)^!o!~"
      element.r2 = gsub('!r2!',signif(summary(fit)$r.squared,signif.figs),"~~italic(r)^2~\"=\"~!r2!")
      eq=""

      for(o in 1:(order)) {
        if(o==1) {
          if(coef(fit)[(o+1)]<0) tmp=gsub("[+]","",base) else tmp=base
          eq=paste(tmp,gsub('!m!',signif(coef(fit)[(o+1)],signif.figs),element.1),sep="")
        }
        if(o>1) {
          if(coef(fit)[(o+1)]<0) tmp=gsub("[+]","",element.2) else tmp=element.2
          eq=paste(eq,gsub('!o!',o,gsub('!m!',signif(coef(fit)[(o+1)],signif.figs),tmp)),sep="")
        }
        if(o==(order)) eq=paste(eq,"\",\"",element.r2,sep="")
      }
    }
  }
  if(eq.plot==F) {
    # Linear y=mx+c equations
    if(model.type=='lin') {
      fit = lm(df[[y.var]] ~ df[[x.var]]);
      eq <- substitute(italic(r)^2~"="~r2, 
                       list(r2 = signif(summary(fit)$r.squared, signif.figs)))
      as.character(as.expression(eq));
    }
    # polynomial expression generated with the ns() function [splines package]
    if(model.type=='poly') {
      fit = lm(df[[y.var]] ~ ns(df[[x.var]],order));

      eq = gsub('!r2!',signif(summary(fit)$r.squared,signif.figs),"italic(r)^2~\"=\"~!r2!")
    }
  }
  return(eq)
}