1
votes

I have a dataset that combines several surveys, with different countries, over years. My dependent variable (lrparty) is the ideological position of a party (ranging from 0 to 10) according to survey's respondents. I have several independent variables such as the age, sex, education, partisanship, and income of respondents.

Then, for each party and each survey, I would like to plot the predicted values of lrparty according to the modal individual (e.g., respondent with age = 31, female = 1, education = 2, income = 2, and partisan = 1) over time. So, the graph would look like: x-axis = years; y-axis = predicted values of lrparty according to the modal individual.

In sum, these are the stages of what I am trying to do: 1. Estimate the model: Ordered logit of placement of the party (lrparty), regressing on gender, age, education, income, and partisanship of respondents.

  1. Take posterior draws.

  2. Predict placement of the party for the modal respondent (e.g., 500 draws)

  3. Then, I am expecting to have a dataset that should look like: Year, Survey, Country, Party (cmp code), %missing placements, x1:x500 (from the draws)

  4. From that data set I would generate my plots. For example, for UK, according to survey CSES.

In order to figure it out the code, I started using only one survey (cses), one country (uk), and one party (conservatives) as you can see in my code below. But I don't know how to get from where I am in the code to the plot I want (above described).

    library(rstan)
    library(tidyverse)
    library(brms)
    library(ggplot2)
    library(ggthemes)
    library(ggmcmc)

    ## Data:
    load("pbrands.RData")

    ## Keeping only country = uk; survey = cses; party = conservatives
    uk_cses_con = pbrands %>% 
    select(lrparty, female, age, education, income, partisan, year, survey,                                 
    country, cmp, party_name_short, party_name_english, lrs) %>% 
    filter(survey == "cses") %>% 
    filter(country == "uk") %>% 
    filter(cmp == 51620)

    ## Conducting a Bayesian ordered logit model
    fit <- brm(lrparty ~ age + income + education + female + partisan, 
       data = uk_cses_con, family = "cumulative", chains = 4, iter = 1000)

    ## Trace and Density Plots for MCMC Samples
    plot(fit)

    ## Posterior Predictive Checks
    pp_check(fit)

    ## Getting variables' modes: 
    getmode <- function(v) {
    uniqv <- unique(v)
    uniqv[which.max(tabulate(match(v, uniqv)))]
    }

    getmode(uk_cses_con$age)
    getmode(uk_cses_con$female)
    getmode(uk_cses_con$education)
    getmode(uk_cses_con$income)
    getmode(uk_cses_con$partisan)

    ## Creating the data frame for the modal individual 
    newavg <- data.frame(age = 31, female = 1, education = 2, income = 2,              
    partisan = 0, years = uk_cses_con$year)

    ## predict response for new data
    pred <- predict(fit, newdata = newavg)

    # extract posterior samples of population-level effects
    samples1 <- posterior_samples(fit)

    ## Display marginal effects of predictors
    marginal <- marginal_effects(fit)

    ## Plot predicted lrparty (my dependent variable) over time (with error:         
    confidence interval) based on the modal respondent (age = 31, female = 1,                 
    education = 0, income = 0, partisan = 0)
    ##?

Thanks in advance!

1

1 Answers

1
votes

Ok. After several trial and error attempts, I figured out the code. As it might be of interest for someone else, I am posting the code below.

    ## Packages
    install.packages(c("bmrs", "coda", "mvtnorm", "devtools"))
    library(devtools)
    library(tidyverse)
    library(brms)

    ## Loading the data
    load('~/Data/mydata.RData')

    ## Keeping the variables of our interest
    mydata = mydata %>% 
    select(lrparty, female, age, education, income, partisan, year, survey, 
     country, cmp, party_name_short, party_name_english, lrs) 

    ## Function for calculating modes
    getmode <- function(v) {
    uniqv = unique(v)
    uniqv[which.max(tabulate(match(v, uniqv)))]
    }

    ## Finding Modal respondents by country, survey, and party:

    ## Modes by country 
    mode_by_country = mydata %>% 
    group_by(country) %>% 
    mutate(modal_age = getmode(na.omit(age))) %>% 
    mutate(modal_inc = getmode(na.omit(income))) %>% 
    mutate(modal_female = getmode(na.omit(female))) %>% 
    mutate(modal_edu = getmode(na.omit(education))) %>% 
    mutate(modal_partisan = getmode(na.omit(partisan))) %>% 
    filter(!duplicated(country))

    mode_by_country = mode_by_country[ , c(9, 14:18)]

    mode_by_country = mode_by_country[-40, ]

    ## Build object to receive the information we want to store
    runner <- c()
    pred = matrix(NA, 2000, 11)
    yhat = matrix(NA, 2000, 1)

    ###### Conducting the model for UK with two parties #########
    uk = mydata %>% 
    select(lrparty, female, age, education, income, partisan, year, survey,               
    country, cmp, party_name_short, party_name_english, lrs) %>% 
    filter(survey == "cses") %>% 
    filter(country == "uk") %>% 
    filter(cmp == 51320 | cmp == 51620)

    ## Finding how many regressions will be conducted
    reglength <- length(unique(uk$survey)) * length(unique(uk$year)) *  length(unique(uk$cmp))

    ## Creating our modal British individual based on mode_by_country
    mode_by_country[mode_by_country$country == "uk", c(2:6)]

    newavg <- data.frame(age = 35, income = 2, female = 1, education = 2,  partisan = 0)

    ## Loop to conduct the ordered logit in rstan, using iter=1000, and           chains=4

    for(p in na.omit(unique(uk$cmp))){

    hold <- uk[uk$cmp == p, ]
    country <- hold$country[1]

    for(s in na.omit(unique(hold$survey))){

        hold1 <- hold[hold$survey == s, ]

        for(y in na.omit(unique(hold1$year))){

            mod <- brm(lrparty ~ age + female + education + income + partisan, data = hold1[hold1$year == y, ], family = "cumulative", chains = 4, iter = 1000)

            for(i in 1:2000) {
              pred[i,] <- predict(mod, newdata = newavg, probs = c(0.025, 0.975), summary=TRUE) 
              yhat[i] <- sum(pred[i, ] * c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))
            }

              newData <- data.frame(country, p, s, y, pred, yhat)
              newData$m <- mean(newData$yhat)
              newData$sd <- sd(newData$yhat)
              newData$lower <- newData$m - 1.96*newData$sd 
              newData$upper <- newData$m + 1.96*newData$sd   

            runner <- rbind(runner, newData)
        }
      }
   }

    ## Keeping unique values within dataset
    uniqdata = runner %>% 
    filter(!duplicated(m))

    ## Creating the Figure
    uniqdata2 <- uniqdata[, c(1:4, 17:20)]

    uniqdata3 <- uniqdata2 %>% 
    gather(variable, value, -(y:p)) %>%
    unite(temp, p, variable) %>%
    spread(temp, value)

    uniqdata3 = uniqdata3[ , -c(3,6,8,11)]

    names(uniqdata3)[3:8] = c("lower_lab", "m_lab", "upper_lab", "lower_con", "m_con", "upper_con")

    uniqdata3[3:8] = as.numeric(unlist(uniqdata3[3:8]))

    ## Plot: Predicted Party Ideological Placement for Modal British Respondent

    ggplot(uniqdata3, aes(x = (y))) + geom_line(aes(y = m_lab, colour = "Labor")) + geom_ribbon(aes(ymin = lower_lab,ymax = upper_lab,
              linetype=NA), alpha = .25) +
    geom_line(aes(y = m_con, color = "Conservatives")) +
    geom_ribbon(aes(ymin = lower_con,
              ymax = upper_con,
              linetype=NA), alpha = .25) +
    theme_bw() + 
    theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) + labs(title = "Predicted Party Ideological Placement for Modal British Respondent \n Survey = CSES") + theme(plot.title = element_text(color="black", size=20, face="bold.italic"), axis.title.x = element_text(color="black", size=15, face="italic"), axis.title.y = element_text(color="black", size=15, face="italic")) + 
    theme(legend.title = element_blank()) +
    theme(axis.text.x = element_text(color="black", size= 12.5), axis.text.y = element_text(color="black", size=12.5)) + theme(legend.text = element_text(size=15)) + scale_x_continuous(name="Year", breaks=seq(1997, 2005, 2)) + scale_y_continuous(name="Left-Right Party Position", limits=c(0, 10))