when you do coef(cvfit, s = "lambda.min"), you are taking the lambda that is 1 standard error from the best lambda, see this discusssion So you can average the MSEs across different cv runs
library(glmnet)
library(mlbench)
data(BostonHousing)
X = as.matrix(BostonHousing[,-c(4,14)])
Y = BostonHousing[,14]
nfolds = 5
nreps = 10
res = lapply(1:nreps,function(i){
fit = cv.glmnet(x=X,y=Y,nfolds=nfolds)
data.frame(MSE_mean=fit$cvm,lambda=fit$lambda,se=fit$cvsd)
})
res = do.call(rbind,res)
We can summarize the results, the standard deviation is approximated by just taking the mean, but if you wanna be precise, might have to look into the formula for pooled standard deviation:
library(dplyr)
summarized_res = res %>%
group_by(lambda) %>%
summarise(MSE=mean(MSE_mean),se=mean(se)) %>%
arrange(desc(lambda))
idx = which.min(summarized_res$MSE)
lambda.min = summarized_res$lambda[idx]
lambda.min
[1] 0.019303
index_1se = with(summarized_res,which(MSE < MSE[idx]+se[idx])[1])
lambda_1se = summarized_res$lambda[index_1se]
lambda_1se
[1] 0.3145908
We can plot this:
library(ggplot2)
ggplot(res,aes(x=log(lambda),y=MSE_mean)) + stat_summary(fun=mean,size=2,geom="point") +
geom_vline(xintercept=c(lambda.min,lambda_1se))
