0
votes

Aloha all,

I've struggled to build a legend for a mix/match of time series data I'm making. Here is some code:

My understanding is that I need to somehow clean my data and put it all in the same data frame, but all of the time series don't line up very well. Some is at 15 minutes, other one hour. Is there any way to force a legend for these datasets? I don't know what else to post here - since the 5 datasets are quite large.

Plot I'm working on:

enter image description here

q<- ggplot(subset(cr200_Auwai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS)) +
  geom_point(color="coral4")+
  geom_point(data=subset(dsloi_wl, timedate>startd & timedate<endd), aes(timedate, level), color="blue")+
  #geom_point(data=subset(flow_data, mdate>startd & mdate<endd), aes(as.POSIXct(mdate), flow_cfs*1000), color="red")+
  geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Lvl_m*1000), color="forestgreen", size=1)+ #aquamarine3
  geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS), color="orange")+
  #geom_point(data=subset(hihimanu_wl, timedate>startd & timedate<endd), aes(timedate, level), color="azure4", size=0.1)+
  #geom_point(data=subset(rain_data, timedate>startd & timedate<endd), aes(timedate, rainmm), color="red",size=5)+
  geom_point(data=subset(haptuk_ysi, datetime>startd & datetime<endd), aes(datetime, Turb), color="pink")+
  
  #scale_x_date(breaks=date_breaks("month"), labels = date_format("%b-%y"))+
  xlab("Date")+
  ylab("Turbidity (NTU) and Water Level (mm)")+
  coord_cartesian(ylim=c(0, 1500))+
  theme_bw()+
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=16,face="bold"),
        legend.justification = c(1, 1), 
        legend.position = c(1, 1),
        legend.title=element_text(size=14),
        legend.text=element_text(size=12))

Here is a sample of two of the datasets: Note that the times don't line up at all... since I'm mixing sources.

dsloi_wl:

structure(list(ReceptionTime = c(1533895414.1134, 1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648, 1533896013.87724, 1533896013.8894), d2w = c(776.7, 789.7, 790.2, 777.1, 777.2, 777.7, 778.4, 793.4, 779.6, 794.1, 819.9, 780.7, 794.1, 806.9, 781.9, 781.9, 782.7, 782.8, 783.1, 783.4), timedate = structure(c(1533895414.1134, 1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648, 1533896013.87724, 1533896013.8894), class = c("POSIXct", "POSIXt"), tzone = ""), level = c(723.3, 710.3, 709.8, 722.9, 722.8, 722.3, 721.6, 706.6, 720.4, 705.9, 680.1, 719.3, 705.9, 693.1, 718.1, 718.1, 717.3, 717.2, 716.9, 716.6)), .Names = c("ReceptionTime", "d2w", "timedate", "level"), row.names = c(NA, 20L), class = "data.frame")

CR300_Wai1

structure(list(RECORD = 73027:73046, Temp_C = c(24.62861, 24.62332, 24.61533, 24.60857, 24.60189, 24.59733, 24.59068, 24.58404, 24.57869, 24.57327, 24.56781, 24.5606, 24.55551, 24.55218, 24.54648, 24.5416, 24.5358, 24.5319, 24.52781, 24.52294), Turb_BS = c(94.50522, 88.65939, 109.354, 57.71527, 134.1903, 46.37191, 78.17719, 52.22319, 58.07111, 96.95719, 51.47488, 44.65616, 70.43825, 99.58217, 93.68374, 87.4787, 175.5395, 167.6757, 110.8119, 132.5971), Turb_SS = c(36.63349, 34.31228, 37.02223, 32.97258, 36.68553, 33.82083, 37.43391, 33.43639, 31.17306, 33.6327, 34.69954, 30.99891, 34.69988, 33.64369, 32.54948, 32.1177, 32.86558, 48.97706, 30.65004, 33.71646), Temp_C_2 = c(24.9014, 24.89474, 24.88837, 24.88279, 24.87574, 24.86852, 24.86357, 24.85751, 24.85236, 24.84759, 24.84091, 24.83577, 24.83192, 24.82713, 24.8229, 24.81832, 24.81237, 24.80821, 24.8051, 24.80015), WD_OBS = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Lvl_m = c(0.6907353, 0.6905226, 0.6896195, 0.6890779, 0.6881586, 0.6878724, 0.6862501, 0.6848835, 0.6844589, 0.6837503, 0.6836612, 0.6831629, 0.6821692, 0.6812283, 0.6799452, 0.6791196, 0.6782504, 0.6772775, 0.6763596, 0.6755115), timedate = structure(c(1533895500, 1533895800, 1533896100, 1533896400, 1533896700, 1533897000, 1533897300, 1533897600, 1533897900, 1533898200, 1533898500, 1533898800, 1533899100, 1533899400, 1533899700, 1533900000, 1533900300, 1533900600, 1533900900, 1533901200), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("RECORD", "Temp_C", "Turb_BS", "Turb_SS", "Temp_C_2", "WD_OBS", "Lvl_m", "timedate"), row.names = c(NA, 20L), class = "data.frame")

1
please share sample of your data using dput() (not str or head or picture/screenshot) so others can help. See more here stackoverflow.com/questions/5963269/… - Tung
I guess a solution is to have a time column with the lowest step (15 min in your example) and complete with NA when some variables are recorded with a larger timestep (1 hour for example). Then prefer a long format data frame (see gather from tidyverse or melt), you might have 3 columns : time, variable_name and variable_value, then it's easy to plot, just add color = variable_name inside aes(...). - bVa
@thanhtungmilan I shared a sample of the dataset to help. The problem is the times won't line up on the hour/fifteen etc, since all the clocks are different on all the different sensors. I'll try below and be back. - Kim Falinski
@KimFalinski : data frame cr200_Auwai1 is missing. I edited my answer below with the 2 datasets available. - bVa
great, thank you. success! - Kim Falinski

1 Answers

0
votes

Here is a solution using mock data (next time provide a sample of your data) :

library(tidyverse)
library(lubridate)
#> 
#> Attachement du package : 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date

# mock data
time_15m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "15 min")
time_30m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "30 min")
time_60m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "60 min")

data_1 <- data.frame(time = time_15m,
                     var_1 = cos(hour(time_15m) + minute(time_15m)))

data_2 <- data.frame(time = time_30m,
                     var_2 = sin(hour(time_30m) + minute(time_30m)))

data_3 <- data.frame(time = time_60m,
                     var_3 = cos(1 - hour(time_60m) + minute(time_60m)))

# the kind of plot you have (prefer the 2nd version)
ggplot(data_1, aes(x = time, y = var_1)) +
  geom_point(color = "red") +
  geom_point(data = data_2, aes(time, var_2), color = "green") + 
  geom_point(data = data_3, aes(time, var_3), color = "blue") +
  theme_bw()

# a version with long format data and use of gather function
data_1 %>%
  left_join(data_2) %>% # join data from data_2 (timestep = 30m), missing data is NA
  left_join(data_3) %>% # join data from data_3 (timestep = 60m), missing data is NA
  gather(variable_name, variable_value, var_1, var_2, var_3) %>% # gather var_1, var_2 and var_3 in a single column
  ggplot(., aes(x = time, y = variable_value, color = variable_name)) +
  theme_bw() +
  geom_point(size = 2)
#> Joining, by = "time"
#> Joining, by = "time"
#> Warning: Removed 120 rows containing missing values (geom_point).

Created on 2018-08-22 by the reprex package (v0.2.0).

EDIT 1 (include provided datasets)

library(tidyverse)
dsloi_wl %>%
  full_join(cr300_Wai1) %>%
  mutate(Lvl_m = 100 * Lvl_m) %>%
  gather(variable_name, variable_value, level, Lvl_m, Turb_SS)  %>%
  ggplot(., aes(x = timedate, y = variable_value, color = variable_name)) +
  geom_point() +
  scale_color_manual("Legend title", 
                     values = c("level" = "blue",
                                "Lvl_m" = "forestgreen",
                                "Turb_SS" = "orange"))
#> Joining, by = "timedate"
#> Warning: Removed 60 rows containing missing values (geom_point).

Created on 2018-08-23 by the reprex package (v0.2.0).