0
votes

I'm interested in bias corrected bootstrap confidence intervals for my model predictions. I've written some code to perform the bootstrap, show below:

library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.0.2
#> Warning: package 'tibble' was built under R version 4.0.2
#> Warning: package 'tidyr' was built under R version 4.0.2
#> Warning: package 'dplyr' was built under R version 4.0.2
library(boot)


x = rnorm(25)
eta = -0.5 + 0.2*x
y = rpois(25, exp(eta))
d = tibble(x, y)



predictions<-function(data, ind){
  model = glm(y ~ x, data = data[ind,])
  newdata = tibble(x = seq(-2,2, 0.01))
  
  eta = predict(model, newdata = newdata)
  exp(eta)
}


bt = boot(d, predictions, 1000)

boot.ci(bt, type = 'bca')
#> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
#> Based on 1000 bootstrap replicates
#> 
#> CALL : 
#> boot.ci(boot.out = bt, type = "bca")
#> 
#> Intervals : 
#> Level       BCa          
#> 95%   ( 0.654,  6.095 )  
#> Calculations and Intervals on Original Scale
#> Some BCa intervals may be unstable

The bootstrapping works as anticipated, however the boot.ci call only computes a single confidence interval.

My desired output would be a dataframe which has:

  • One column for bootstrap predictions (essentially, the mean of the bootstraps)
  • One column for the lower 100*(1-a) confidence limit, and
  • One column for the upper 100*(1-a) confidence limit.

How can I use boot to obtain the desired output?

2

2 Answers

1
votes

You could use replicate for the bootstrap.

predictions <- function(X) {
  model <- glm(y ~ x, data=X)
  newdata <- data.frame(x=seq(-2, 2, 0.01))
  eta <- predict(model, newdata=newdata)
  exp(eta)
}

set.seed(42)
b <- replicate(200, predictions(d[sample(1:nrow(d), nrow(d), replace=T), ]))
res <- cbind(estimate=colMeans(b), matrixStats::colQuantiles(b, probs=c(.025, .975)))
head(res)
#      estimate     2.5%    97.5%
# [1,] 2.913660 1.324121 5.363724
# [2,] 2.432935 1.327170 3.990526
# [3,] 2.608683 1.221961 4.715235
# [4,] 2.681802 1.395430 4.537477
# [5,] 2.592316 1.947494 3.357226
# [6,] 3.705157 1.606632 7.012669

Data:

d <- structure(list(x = c(-0.733089270649064, 0.843270552215953, -0.860309462865639, 
1.57992224055388, 0.606460750363077, -1.60987545170912, 0.116723009723847, 
-1.35220761859743, -0.721654356711471, -0.614137997795221, -0.0900374904672829, 
1.20655915421524, 1.46948958927721, -0.210496187862193, -1.21763978187878, 
-0.566774409165728, 2.49263604822452, 1.37169581528637, -0.407181458046919, 
-0.247944038851957, -0.703358091631417, -0.311797310571164, -0.990496073627748, 
0.515047211912022, 0.355009786755435), y = c(0L, 1L, 0L, 0L, 
1L, 2L, 0L, 1L, 1L, 2L, 3L, 1L, 0L, 2L, 1L, 0L, 0L, 0L, 1L, 2L, 
0L, 1L, 1L, 1L, 2L)), class = "data.frame", row.names = c(NA, 
-25L))
0
votes

You can pass the index= argument to get the column of interest:

res = lapply(1:ncol(bt$t),function(i){
ci = boot.ci(bt,index=i,type = 'bca')
data.frame(
var = i,
mean = mean(bt$t[,i]),
lower = ci$bca[4],
upper = ci$bca[5]
)
})

res = do.call(rbind,res)
head(res)
  var     mean     lower    upper
1   1 2.736563 0.9131014 5.768826
2   2 2.729717 0.9155394 5.743819
3   3 2.722903 0.9174191 5.709087
4   4 2.716120 0.9215527 5.706216

Not very easy to see with your example, we can see that it works with iris:

bo = boot(iris,function(d,i)colMeans(d[i,-5]),1000)
data.frame(
var=colnames(iris)[1:4],
mean = colMeans(bo$t),
t(sapply(1:4,function(i)boot.ci(bo,index=i,type="bca")$bca[4:5]))
)

           var     mean       X1       X2
1 Sepal.Length 5.847009 5.712667 5.969333
2  Sepal.Width 3.055873 2.982667 3.128428
3 Petal.Length 3.767644 3.487473 4.029977
4  Petal.Width 1.203107 1.083937 1.313888