2
votes

I'm trying to create a plot showing the CDFs of two different categories of data, with a legend to show which color corresponds to which (Plotly version 4.9.2.1). For some reason, it's a royal pain in the rear to get the legend to show. Below is a toy example with three of my attempts--only the last one works, but it's obnoxiously contrived and makes the resulting data appear misleadingly dense in the plot. Can anyone explain how to do this right?

library(plotly)
library(magrittr)

color.dat <- runif(30)
x.mat <- matrix(0, nrow=500, ncol=30)
for (i in 1:30){
  x.mat[,i] <- rnorm(500, 0, color.dat[i])
}

### Attempt 1, no legend appears at all ###
p <- plot_ly(showlegend=TRUE) 

for (i in 1:30){
  tmp.cdf <- ecdf(x.mat[,i])
  
  p <- p %>%
    add_lines(x=sort(x.mat[,i]), y=tmp.cdf(sort(x.mat[,i])), 
              name=ifelse(color.dat[i] > 0.5, 'A', 'B'),
              showlegend=FALSE,
              line=list(color=ifelse(color.dat[i] > 0.5, 'blue', 'orange')))
}

p <- p %>% 
  add_lines(x=c(0,1), y=c(0,0), name='A', 
            line=list(color='blue'), 
            showlegend=TRUE, visible=FALSE) %>%
  add_lines(x=c(0,1), y=c(0,0), name='B', 
            line=list(color='orange'), 
            showlegend=TRUE, visible=FALSE)

### Attempt 2, legend entry appears only for class B (doesn't appear without invisible traces added at end) ###
p <- plot_ly(showlegend=TRUE) 

a.bool <- TRUE
b.bool <- TRUE

for (i in 1:30){
  tmp.cdf <- ecdf(x.mat[,i])
  
  if (color.dat[i] > 0.5 && a.bool){
    class.bool <- TRUE
    a.bool <- FALSE
  } else {
    class.bool <- FALSE
  }
  if (color.dat[i] < 0.5 && b.bool){
    class.bool <- TRUE
    b.bool <- FALSE
  } else {
    class.bool <- FALSE
  }
  
  p <- p %>%
    add_lines(x=sort(x.mat[,i]), y=tmp.cdf(sort(x.mat[,i])), 
              name=ifelse(color.dat[i] > 0.5, 'A', 'B'),
              showlegend=class.bool,
              line=list(color=ifelse(color.dat[i] > 0.5, 'blue', 'orange')))
}

p <- p %>% 
  add_lines(x=c(0,1), y=c(0,0), name='A', 
            line=list(color='blue'), 
            showlegend=TRUE, visible=FALSE) %>%
  add_lines(x=c(0,1), y=c(0,0), name='B', 
            line=list(color='orange'), 
            showlegend=TRUE, visible=FALSE)

### Attempt 3, both legend entries appear, but plot is misleading and obscures a lot of detail ###
p <- plot_ly(showlegend=TRUE) 

flat.mat.a <- c()
flat.mat.b <- c()
flat.cdf.a <- c()
flat.cdf.b <- c()

for (i in 1:30){
  tmp.cdf <- ecdf(x.mat[,i])
  if (color.dat[i] > 0.5){
    flat.mat.a <- c(flat.mat.a, sort(x.mat[,i]))
    flat.cdf.a <- c(flat.cdf.a, tmp.cdf(sort(x.mat[,i])))
  } else {
    flat.mat.b <- c(flat.mat.b, sort(x.mat[,i]))
    flat.cdf.b <- c(flat.cdf.b, tmp.cdf(sort(x.mat[,i])))
  }
}

p <- p %>%
  add_lines(x=flat.mat.a, y=flat.cdf.a, 
            showlegend=TRUE, name='A',
            line=list(color='blue')) %>%
  add_lines(x=flat.mat.b, y=flat.cdf.b, 
            showlegend=TRUE, name='B',
            line=list(color='orange'))
1

1 Answers

0
votes

My preferred approach to plotting stuff with ploty is to put the data in dataframe.

After the data preparation steps it's just takes two lines of code to get the plot and the legend.

library(plotly)
library(tidyr)
library(dplyr)

set.seed(42)

color.dat <- runif(30)
x.mat <- matrix(0, nrow=500, ncol=30)
for (i in 1:30){
  x.mat[,i] <- rnorm(500, 0, color.dat[i])
}

# Put the data in a dataframe
dfx <- data.frame(x.mat) %>% 
  tidyr::pivot_longer(everything()) %>% 
  arrange(name,  value) %>% 
  mutate(id = as.integer(gsub("^X", "", name)),
         color = color.dat[id],
         color = ifelse(color > 0.5, 'blue', 'orange')) %>% 
  group_by(name) %>% 
  mutate(cdf = ecdf(value)(value)) %>% 
  ungroup()

p <- dfx %>% 
  group_by(name) %>% 
  plot_ly(showlegend=TRUE) %>%
  add_lines(x = ~value, y =~cdf, color = ~color, colors = c(blue = "blue", orange = "orange"))
p

enter image description here