Here is what I use to automate this process:
data = iris
formula = Sepal.Length ~ Species + Sepal.Width + Petal.Length
# get the coefficient terms, including expanded dummy columns for factors
formula_terms = colnames(
model.matrix(
formula,
data=data
)
)
# remove intercept (it can specified separately)
formula_terms = setdiff(formula_terms, '(Intercept)')
# create named vectors mapping some of the coefficients to priors
priors_location = c(
'Speciesversicolor'=-1,
'Speciesvirginica'=-1.5,
'Petal.Length'=1
)
priors_scale = c(
'Speciesversicolor'=5,
'Speciesvirginica'=5,
'Petal.Length'=8
)
Now, define a helper function, which can be put elsewhere:
priors_or_default_for = function(formula_terms, priors, default) {
as.array(
sapply(
formula_terms,
function(term) ifelse(term %in% names(priors), priors[term], default)
)
)
}
And use it to create the priors vector:
location = priors_or_default_for(formula_terms, priors_location, default=0)
scale = priors_or_default_for(formula_terms, priors_scale, default=10)
fit = rstanarm::stan_glm(
formula,
data=data,
prior=normal(
location=location,
scale=scale
)
)
# (recommended) check that the priors were properly assigned
parameters::parameters(fit)
Parameter |
Median |
89% CI |
pd |
% in ROPE |
Rhat |
ESS |
Prior |
---|
(Intercept) |
2.40 |
[ 1.97, 2.79] |
100% |
0% |
1.000 |
2556.00 |
Normal (5.84 +- 2.07) |
Speciesversicolor |
-0.95 |
[-1.31, -0.61] |
100% |
0% |
1.002 |
1274.00 |
Normal (-1.00 +- 5.00) |
Speciesvirginica |
-1.39 |
[-1.86, -0.93] |
100% |
0% |
1.003 |
1285.00 |
Normal (-1.50 +- 5.00) |
Sepal.Width |
0.43 |
[ 0.29, 0.55] |
100% |
0% |
1.001 |
1803.00 |
Normal (0.00 +- 10.00) |
Petal.Length |
0.77 |
[ 0.67, 0.88] |
100% |
0% |
1.002 |
1359.00 |
Normal (1.00 +- 8.00) |