I want to produce a 3D scatterplot and add a surface fitted with a linear regression, using plotly. My data:
structure(list(political_trust = c(1, 6, 7, 5, 0, 2, 1, 3, 5,
0, 2, 5, 5, 6, 6, 3, 3, 2, 5, 8, 3, 7, 3, 4, 5, 4, 5, 0, 0, 4,
6, 1, 0, 4, 0, 5, 5, 6, 7, 3, 5, 4, 5, 2, 4, 4, 7, 6, 7, 5, 4,
6, 7, 5, 7, 3, 3, 3, 2, 5, 2, 7, 3, 2, 7, 2, 3, 0, 7, 5, 7, 3,
0, 7, 2, 6, 3, 8, 7, 2, 2, 5, 0, 1, 6, 3, 6, 5, 1, 3, 4, 4, 5,
3, 3, 0, 2, 4, 9, 6, 3, 3, 2, 3, 4, 5, 8, 0, 4, 1, 5, 0, 4, 0,
5, 6, 3, 2, 7, 5, 4, 3, 8, 3, 4, 0, 3, 6, 7, 7, 2, 3, 5, 5, 5,
0, 3, 2, 1, 7, 5, 0, 4, 0, 2, 7, 3, 0, 8, 3, 2, 4, 5, 5, 3, 2,
3, 8, 6, 5, 6, 7, 0, NA, 7, 7, 2, 0, 3, 4, 7, 2, 1, 2, 0, 0,
4, 3, 3, 6, 6, 1, 4, 0, 4, 0, 0, 7, 6, 4, 4, 6, 5, 4, 3, 3, 0,
NA, 2, 5), political_interest = c(2, 0, 3, 3, 2, 1, 2, 2, 2,
2, 2, 2, 3, 3, 3, 3, 2, 2, 3, 2, 1, 2, 2, 2, 2, 0, 2, 1, 3, 1,
1, 1, 1, 1, 2, 3, 2, 2, 2, 1, 3, 3, 2, 3, 2, 1, 3, 2, 0, 3, 1,
1, 2, 1, 2, 2, 1, 3, 3, 2, 3, 2, 3, 2, 2, 1, 2, 0, 3, 1, 2, 2,
1, 3, 2, 2, 1, 2, 2, 0, 3, 2, 2, 1, 2, 1, 1, 3, 1, 1, 3, 2, 0,
2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 0, 1, 1, 2, 2, 2, 2,
2, 0, 0, 2, 3, 2, 2, 2, 3, 3, 0, 3, 3, 1, 2, 1, 1, 1, 2, 3, 2,
2, 2, 0, 2, 2, 2, 1, 2, 3, 3, 1, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2,
2, 2, 3, 2, 1, 2, 2, 0, 0, 3, 2, 2, 2, 1, 2, 3, 0, 1, 2, 3, 2,
2, 2, 1, 3, 1, 1, 2, 2, 3, 3, 1, 2, 2, 2, 2, 2, 1, 0, 1, 1, 0,
3, 3), education_level = c(0, 2, 1, 5, 5, 0, 4, 4, 0, 0, 3, 2,
3, 4, 0, 4, 4, 4, 4, 3, 0, NA, 4, 0, 4, 3, 4, 1, 5, 2, NA, 0,
0, 4, 3, 3, 5, 3, 4, 0, 4, 4, 0, 4, 5, 4, 2, 2, 0, 5, 3, 0, 4,
1, 5, 4, 0, 4, 4, 5, 5, 4, 4, 4, 5, 2, 3, 2, 4, 0, 4, 0, 5, 4,
4, 4, 4, 4, 4, 2, 4, 5, 3, 4, 3, 0, 4, 4, 4, 3, 4, 4, 0, 3, 4,
2, 3, 3, 0, 4, 4, 4, 5, 4, 0, 4, 4, 4, 0, 3, 1, 4, NA, 4, 0,
1, 2, 4, 0, 2, 1, 4, 4, 4, 3, NA, 5, 2, 1, 0, 0, 4, 3, 3, 4,
3, 0, 3, NA, 4, 0, 0, 4, 5, 4, 5, 2, 2, 0, 3, 4, 3, 1, 3, 2,
3, 5, 0, 4, 5, 0, 5, 2, 0, 3, NA, NA, 2, 4, 3, 4, 3, 2, 2, 4,
4, 3, 0, 4, 0, 4, 4, 3, 0, 4, 4, 3, 5, 0, 3, 0, 4, 3, 0, 3, 3,
3, 4, 5, 1)), row.names = c(NA, -200L), class = "data.frame")
I start by defining a list of relevant variables - this is not necessary but basically a consequence of using the code in a Shiny up:
input <- list()
input$x <- "education_level"
input$y <- "political_trust"
input$z <- "political_interest"
Next, creating the surface data:
# Regressing "political_interest" on "education_level" and "political_trust":
lm <- lm(as.formula(paste0(input$z, " ~ ", input$x, " + ", input$y)), data)
# Defining range of values that outcome will be predicted for
axis_x <- seq(min(data[, input$x], na.rm = T),
max(data[, input$x], na.rm = T), by = 0.2)
axis_y <- seq(min(data[, input$y], na.rm = T),
max(data[, input$y], na.rm = T), by = 0.2)
# Predicting outcome, and getting data into surface format
lm_surface <- expand.grid(x = axis_x, y = axis_y, KEEP.OUT.ATTRS = F)
colnames(lm_surface) <- c(input$x, input$y)
lm_surface <- acast(lm_surface, as.formula(paste0(input$x, " ~ ", input$y)),
value.var = input$z)
Last, plotting this with plotly:
data %>%
filter(!is.na(get(input$z))) %>%
filter(!is.na(get(input$x))) %>%
filter(!is.na(get(input$y))) %>%
plot_ly(., x = ~jitter(get(input$x), factor = 2.5),
y = ~jitter(get(input$y), factor = 2.5),
z = ~jitter(get(input$z), factor = 2.5),
type = "scatter3d", mode = "markers",
marker = list(size = 2, color = "#cccccc")) %>%
add_surface(., z = lm_surface,
x = axis_x,
y = axis_y,
type = "surface")
This gives me the following. As you can see, the surface does not cover the full range of the y-dimension. Note also that the surface plotted is "quadratic" - i.e. same length in x and y - although it should have non-quadratic dimensions.
I can bring plotly to draw larger surface area, e.g. by changing the range of values like below, but it always stays quadratic.
axis_x <- seq(0, 10, by = 0.2)
axis_y <- seq(0, 10, by = 0.2)