2
votes

I want to produce a 3D scatterplot and add a surface fitted with a linear regression, using plotly. My data:

structure(list(political_trust = c(1, 6, 7, 5, 0, 2, 1, 3, 5, 
0, 2, 5, 5, 6, 6, 3, 3, 2, 5, 8, 3, 7, 3, 4, 5, 4, 5, 0, 0, 4, 
6, 1, 0, 4, 0, 5, 5, 6, 7, 3, 5, 4, 5, 2, 4, 4, 7, 6, 7, 5, 4, 
6, 7, 5, 7, 3, 3, 3, 2, 5, 2, 7, 3, 2, 7, 2, 3, 0, 7, 5, 7, 3, 
0, 7, 2, 6, 3, 8, 7, 2, 2, 5, 0, 1, 6, 3, 6, 5, 1, 3, 4, 4, 5, 
3, 3, 0, 2, 4, 9, 6, 3, 3, 2, 3, 4, 5, 8, 0, 4, 1, 5, 0, 4, 0, 
5, 6, 3, 2, 7, 5, 4, 3, 8, 3, 4, 0, 3, 6, 7, 7, 2, 3, 5, 5, 5, 
0, 3, 2, 1, 7, 5, 0, 4, 0, 2, 7, 3, 0, 8, 3, 2, 4, 5, 5, 3, 2, 
3, 8, 6, 5, 6, 7, 0, NA, 7, 7, 2, 0, 3, 4, 7, 2, 1, 2, 0, 0, 
4, 3, 3, 6, 6, 1, 4, 0, 4, 0, 0, 7, 6, 4, 4, 6, 5, 4, 3, 3, 0, 
NA, 2, 5), political_interest = c(2, 0, 3, 3, 2, 1, 2, 2, 2, 
2, 2, 2, 3, 3, 3, 3, 2, 2, 3, 2, 1, 2, 2, 2, 2, 0, 2, 1, 3, 1, 
1, 1, 1, 1, 2, 3, 2, 2, 2, 1, 3, 3, 2, 3, 2, 1, 3, 2, 0, 3, 1, 
1, 2, 1, 2, 2, 1, 3, 3, 2, 3, 2, 3, 2, 2, 1, 2, 0, 3, 1, 2, 2, 
1, 3, 2, 2, 1, 2, 2, 0, 3, 2, 2, 1, 2, 1, 1, 3, 1, 1, 3, 2, 0, 
2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 0, 1, 1, 2, 2, 2, 2, 
2, 0, 0, 2, 3, 2, 2, 2, 3, 3, 0, 3, 3, 1, 2, 1, 1, 1, 2, 3, 2, 
2, 2, 0, 2, 2, 2, 1, 2, 3, 3, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2, 
2, 2, 3, 2, 1, 2, 2, 0, 0, 3, 2, 2, 2, 1, 2, 3, 0, 1, 2, 3, 2, 
2, 2, 1, 3, 1, 1, 2, 2, 3, 3, 1, 2, 2, 2, 2, 2, 1, 0, 1, 1, 0, 
3, 3), education_level = c(0, 2, 1, 5, 5, 0, 4, 4, 0, 0, 3, 2, 
3, 4, 0, 4, 4, 4, 4, 3, 0, NA, 4, 0, 4, 3, 4, 1, 5, 2, NA, 0, 
0, 4, 3, 3, 5, 3, 4, 0, 4, 4, 0, 4, 5, 4, 2, 2, 0, 5, 3, 0, 4, 
1, 5, 4, 0, 4, 4, 5, 5, 4, 4, 4, 5, 2, 3, 2, 4, 0, 4, 0, 5, 4, 
4, 4, 4, 4, 4, 2, 4, 5, 3, 4, 3, 0, 4, 4, 4, 3, 4, 4, 0, 3, 4, 
2, 3, 3, 0, 4, 4, 4, 5, 4, 0, 4, 4, 4, 0, 3, 1, 4, NA, 4, 0, 
1, 2, 4, 0, 2, 1, 4, 4, 4, 3, NA, 5, 2, 1, 0, 0, 4, 3, 3, 4, 
3, 0, 3, NA, 4, 0, 0, 4, 5, 4, 5, 2, 2, 0, 3, 4, 3, 1, 3, 2, 
3, 5, 0, 4, 5, 0, 5, 2, 0, 3, NA, NA, 2, 4, 3, 4, 3, 2, 2, 4, 
4, 3, 0, 4, 0, 4, 4, 3, 0, 4, 4, 3, 5, 0, 3, 0, 4, 3, 0, 3, 3, 
3, 4, 5, 1)), row.names = c(NA, -200L), class = "data.frame")

I start by defining a list of relevant variables - this is not necessary but basically a consequence of using the code in a Shiny up:

input <- list()
input$x <- "education_level"
input$y <- "political_trust"
input$z <- "political_interest"

Next, creating the surface data:

# Regressing "political_interest" on "education_level" and "political_trust":
lm <- lm(as.formula(paste0(input$z, " ~ ", input$x, " + ", input$y)), data)

# Defining range of values that outcome will be predicted for
axis_x <- seq(min(data[, input$x], na.rm = T),
                    max(data[, input$x], na.rm = T), by = 0.2)
axis_y <- seq(min(data[, input$y], na.rm = T),
                    max(data[, input$y], na.rm = T), by = 0.2)

# Predicting outcome, and getting data into surface format
lm_surface <- expand.grid(x = axis_x, y = axis_y, KEEP.OUT.ATTRS = F)
colnames(lm_surface) <- c(input$x, input$y)
lm_surface <- acast(lm_surface, as.formula(paste0(input$x, " ~ ", input$y)), 
                                     value.var = input$z)

Last, plotting this with plotly:

data %>%
        filter(!is.na(get(input$z))) %>%
        filter(!is.na(get(input$x))) %>%
        filter(!is.na(get(input$y))) %>% 
        plot_ly(., x = ~jitter(get(input$x), factor = 2.5), 
                y = ~jitter(get(input$y), factor = 2.5), 
                z = ~jitter(get(input$z), factor = 2.5),
                       type = "scatter3d", mode = "markers",
                       marker = list(size = 2, color = "#cccccc")) %>%
        add_surface(., z = lm_surface,
                x = axis_x,
                y = axis_y,
                type = "surface")

This gives me the following. As you can see, the surface does not cover the full range of the y-dimension. Note also that the surface plotted is "quadratic" - i.e. same length in x and y - although it should have non-quadratic dimensions.

enter image description here

I can bring plotly to draw larger surface area, e.g. by changing the range of values like below, but it always stays quadratic.

axis_x <- seq(0, 10, by = 0.2)
axis_y <- seq(0, 10, by = 0.2)
1

1 Answers

0
votes

Ok, question solved.

It's important which dimension of the surface matrix (lm_surface) is which. Swapping x and y when applying acast fixes the issue:

lm_surface <- acast(lm_surface, as.formula(paste0(input$y, " ~ ", input$x)),
                                     value.var = input$z)