0
votes

I am trying to plot a scatter plot and keep failing at that. My data df[1:10,] looks like this:

# A tibble: 13 x 5
   `Ticket Created` `Ticket Closed` `Case Owner`                              Frequency
   <chr>            <chr>           <fct>                                       <dbl>
 1 NA               NA              Animal_Services                             16395   
 2 NA               NA              Public_Works_Road_And_Bridges_16_60         6090
 3 NA               NA              COM_Code_Enforcement                        4099
 4 NA               2017-02-06      COM_Code_Enforcement                        123
 5 NA               2015-09-07      COM_Code_Enforcement                        96
 6 NA               2015-03-12      Animal_Services                             88
 7 NA               2017-01-06      COM_Code_Enforcement                        88
 8 2014-07-04       2014-07-04      Public_Works_Road_And_Bridges_16_60         78
 9 NA               2014-07-10      COM_Code_Enforcement                        65
10 NA               2014-08-09      COM_Code_Enforcement                        65
11 2013-11-03       2013-11-03      Public_Works_Road_And_Bridges_16_60         60
12 2014-07-01       2014-07-01      Public_Works_Road_And_Bridges_16_60         59
13 NA               2015-12-02      COM_Code_Enforcement                        55

I need a graph where Ticket Created and Ticket Closed are in x-axis, different colours and Frequency is in y-axix. This is how I did my ggplot:

ggplot2::ggplot()+
  geom_point(data= c, aes(lubridate::date(`Ticket Created`), Frequency, 
                          color=destring(`Ticket Created`)))+ 
  geom_point(data= c, aes(lubridate::date(`Ticket Closed`), Frequency, 
                          color=destring(`Ticket Closed`)))+ 
  theme_bw()+
  scale_x_date(date_breaks = "1 month", date_labels =  "%d %b %Y") +
  ylim(0, 150)+
  scale_alpha(guide = 'none')+
  theme(plot.title = element_text(hjust = 0.5), legend.position = "top", legend.title = element_text(face = "bold.italic"),
        axis.text.x=element_text(angle=60, hjust=1))+
  facet_wrap(~`Case Owner`, ncol = 1, scales = "free_y")+
  guides(fill= F)+
  labs(x="Day",y= "Freq. of Closing", caption = "**distributed by month-year")+
  ggtitle("Monthly Frequency of Ticket Closing by Case Owners, per Year")

This is the result I got: enter image description here With warning:

Warning messages:
1: In destring(`Ticket Created`) : NAs introduced by coercion
2: In destring(`Ticket Created`) : NAs introduced by coercion
3: In destring(`Ticket Closed`) : NAs introduced by coercion
4: Removed 50 rows containing missing values
(geom_point). 
5: Removed 22 rows containing missing values
(geom_point). 

The reason why I am told not to remove NAs from the data is so we can see which Case Owner opens and closes the tickets on the same day. I have tried multiple methods to colouring....including where I turn the date columns into integers and have followed these posts:
Scatter plot with ggplot2 colored by dates

Color points by date in ggplot2

dput() is this if you want to see it. I would appreciate if I can have the two columns in different colours so it is more clear. Or hint will be valuable too!

structure(list(`Ticket Created` = c(NA, NA, NA, NA, NA, NA, NA, 
"2014-07-04", NA, NA, "2013-11-03", "2014-07-01", NA, "2013-04-04", 
"2013-10-04", NA, "2013-09-01", NA, "2014-10-07", NA, "2013-04-02", 
NA, NA, "2014-07-08", "2013-10-07", "2014-02-06", "2015-11-06", 
"2014-09-07", "2014-11-06", NA, "2015-07-07", NA, "2013-08-05", 
"2014-03-09", "2017-06-04", NA, "2014-01-05", "2014-06-01", NA, 
"2014-03-07", "2013-05-11", "2014-01-07", "2014-11-03", "2015-08-07", 
NA, NA, "2013-02-04", "2014-08-07", NA, NA, "2013-09-09", "2013-11-06", 
NA, NA, NA, "2014-08-04", "2014-10-11", "2014-12-02", "2013-03-06", 
"2013-05-02", NA, "2014-05-03", "2014-05-08", "2014-10-03", "2015-09-07", 
NA, "2013-01-04", "2014-09-01", NA, NA, NA, "2013-06-05", "2013-12-06", 
"2014-02-07", NA, NA, NA, "2013-12-08", "2014-10-01", "2014-11-08", 
"2014-12-02", NA, "2013-04-03", "2013-08-08", "2013-11-02", "2014-01-10", 
"2014-07-07", "2014-12-11", NA, NA, NA, "2014-03-04", "2014-12-09", 
"2015-02-07", NA, NA, "2013-07-08", "2013-11-12", "2014-06-05", 
"2014-10-02", "2014-12-05", "2015-01-09", "2015-09-12", "2016-09-02", 
NA, NA, "2013-01-05", "2013-12-12", "2013-12-12", "2014-12-05", 
"2015-02-09", "2016-05-05", "2016-07-06", "2016-12-04", "2016-12-10", 
"2016-12-12", NA, NA, NA, NA, NA, "2013-01-10", "2013-09-12", 
"2013-12-03", "2014-01-08", "2014-07-05", "2015-05-05", "2016-12-02", 
"2017-07-09", NA, NA, NA), `Ticket Closed` = c(NA, NA, NA, "2017-02-06", 
"2015-09-07", "2015-03-12", "2017-01-06", "2014-07-04", "2014-07-10", 
"2014-08-09", "2013-11-03", "2014-07-01", "2015-12-02", "2013-04-04", 
"2013-10-04", "2016-01-12", "2013-09-01", "2016-05-01", "2014-10-07", 
"2017-08-04", "2013-04-02", "2014-02-09", "2015-02-02", "2014-07-08", 
"2013-10-07", "2014-02-06", "2015-11-06", "2014-09-07", "2014-11-06", 
"2017-08-05", "2015-07-07", "2015-09-03", "2013-08-05", "2014-03-09", 
"2017-06-04", "2015-12-06", "2014-01-05", "2014-06-01", "2017-01-11", 
"2014-03-07", "2013-05-11", "2014-01-07", "2014-11-03", "2015-08-07", 
"2016-05-08", "2018-01-02", "2013-02-04", "2014-08-07", "2014-06-10", 
"2014-12-06", "2013-09-09", "2013-11-06", "2014-03-01", "2014-11-06", 
"2015-06-12", "2014-08-04", "2014-10-11", NA, "2013-03-06", "2013-05-02", 
"2015-10-03", "2014-05-03", "2014-05-08", "2014-10-03", "2015-09-07", 
"2013-04-04", "2013-01-04", "2014-09-01", "2014-03-06", "2014-06-12", 
"2014-08-08", "2013-06-05", NA, "2014-02-07", "2014-07-05", "2016-02-08", 
"2017-09-05", NA, NA, "2014-11-08", "2014-12-02", "2017-01-05", 
"2013-04-03", "2013-08-08", "2013-11-02", "2014-01-10", "2014-07-07", 
NA, "2013-04-09", "2016-08-01", "2017-02-05", NA, NA, "2015-02-07", 
"2014-02-04", "2015-07-04", "2013-07-08", "2013-11-12", "2014-06-05", 
NA, "2014-12-05", "2015-01-09", "2015-09-12", NA, "2014-06-03", 
"2016-04-05", "2013-01-05", "2013-12-12", NA, NA, "2015-02-09", 
NA, NA, NA, NA, NA, "2014-01-05", "2014-05-02", "2015-01-09", 
"2015-02-08", "2017-11-01", "2013-01-10", "2013-11-12", "2013-12-03", 
"2014-08-09", NA, "2015-05-05", NA, NA, "2013-01-08", "2015-03-02", 
"2017-08-12"), `Case Owner` = structure(c(1L, 3L, 2L, 2L, 2L, 
1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 
3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 1L, 
2L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 
2L, 1L, 3L, 1L, 3L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 
3L, 3L, 1L, 2L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 
3L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 2L), .Label = c("Animal_Services", 
"COM_Code_Enforcement", "Public_Works_Road_And_Bridges_16_60"
), class = "factor"), Frequency = c(16395L, 6090L, 4099L, 123L, 
96L, 88L, 88L, 78L, 65L, 65L, 60L, 59L, 55L, 54L, 53L, 51L, 50L, 
50L, 49L, 48L, 47L, 47L, 46L, 45L, 44L, 42L, 42L, 41L, 41L, 41L, 
40L, 40L, 39L, 39L, 39L, 39L, 38L, 37L, 37L, 36L, 35L, 35L, 35L, 
35L, 35L, 35L, 34L, 34L, 34L, 33L, 32L, 32L, 32L, 32L, 32L, 31L, 
31L, 31L, 30L, 30L, 30L, 29L, 29L, 29L, 29L, 29L, 28L, 28L, 28L, 
28L, 28L, 27L, 27L, 27L, 27L, 27L, 27L, 26L, 26L, 26L, 26L, 26L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 24L, 24L, 24L, 24L, 
24L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 22L, 22L, 
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L)), row.names = c(NA, 
-132L), class = c("tbl_df", "tbl", "data.frame"))
1
What should sclae_color_b be in your call to ggplot?Peter
Hi peter, apologies. I made a typo, sorry about that. It's not there. I meant to say scale_color_brewer(), but that doesn't matter for the problem in hand.WannabeSmith

1 Answers

1
votes

If you modify the data into long format it should work:


library(tidyr)

c1 <- 
  c %>% 
  pivot_longer(cols = c(`Ticket Created`, `Ticket Closed`), names_to = "tick", values_to = "date")



c1


ggplot2::ggplot()+
  geom_point(data= c1, aes(lubridate::date(date), Frequency, colour= tick))+ 
  theme_bw()+
  scale_x_date(date_breaks = "1 month", date_labels =  "%d %b %Y") +
  ylim(0, 150)+
  scale_alpha(guide = 'none')+
  theme(plot.title = element_text(hjust = 0.5), legend.position = "top", legend.title = element_text(face = "bold.italic"),
        axis.text.x=element_text(angle=60, hjust=1))+
  facet_wrap(~`Case Owner`, ncol = 1, scales = "free_y")+
guides(fill= F)+
  labs(x="Day",
       y= "Freq. of Closing", 
       caption = "**distributed by month-year",
       colour = "Ticket type")+
  ggtitle("Monthly Frequency of Ticket Closing by Case Owners, per Year")


Which results in:

enter image description here