1
votes

I am working on visualizing a data set consists of a many time series measured by time: 1:2341. A sample of of this data contains 2 vectors V1, V2, where each one of them represents a time series.

dput(df2)
#I deleted some data because of the limited space.
#EDIT: I include only 200 line from the data frame

dput(df2[1:200,])
    structure(list(V1 = c(11489, 11495, 11409, 11441, 11413, 11434, 
11390, 11496, 11388, 11426, 11392, 11428, 11465, 11456, 11445, 
11501, 11543, 11465, 11435, 11430, 11484, 11468, 11503, 11476.25, 
11449.5, 11331, 11368, 11402, 11470, 11512, 11488, 11511, 11529, 
11442, 11483, 11474, 11424, 11504, 11548, 11537, 11464, 11448, 
11464, 11303, 11394, 11269, 11430, 11491, 11468, 11509, 11376, 
11438, 11547, 11452, 11471, 11448, 11425, 11420, 11489, 11457, 
11609, 11450, 11421, 11421, 11474, 11355, 11321, 11292, 11396, 
11320, 11184, 11266, 11335, 11424, 11225, 11279, 11179, 11286, 
11270, 11322, 11333, 11267, 11395, 11243, 11392, 11242, 11131, 
11341, 11285, 11245, 11255, 11257, 11261, 11255, 11293, 11297, 
11348, 11354, 11213, 11180, 11372, 11388, 11192, 11411, 11198, 
11271, 11150, 11268, 11309, 11349, 11330, 11285, 11112, 11328, 
11368, 11405, 11359, 11459, 11443, 11268, 11316, 11392, 11324, 
11366, 11328, 11354, 11305, 11331, 11397, 11305, 11452, 11275, 
11308, 11385, 11375, 11332, 11344, 11401, 11206, 11347, 11388, 
11374, 11349, 11279, 11427, 11455, 11500, 11289, 11330, 11418, 
11388, 11332, 11352, 11284, 11322, 11184, 11423, 11372, 11468, 
11456, 11397, 11567, 11418, 11415, 11386, 11426, 11303, 11351, 
11327, 11424, 11284, 11504, 11321, 11311, 11328, 11456, 11420, 
11511, 11263, 11461, 11491, 11264, 11405, 11358, 11434, 11445, 
11355, 11467, 11403, 11530, 11444, 11378, 11495, 11619, 11652, 
11669, 11590, 11793, 11772, 11744), V2 = c(6846, 6796, 6835, 
6761, 6870, 6798, 6800, 6848, 6824, 6834, 6812, 6820, 6857, 6841, 
6870, 6809, 6835, 6796, 6864, 6862, 6864, 6866, 6752, 6813, 6874, 
6848, 6856, 6816, 6784, 6864, 6821, 6867, 6810, 6835, 6828, 6802, 
6838, 6821, 6849, 6826, 6887, 6820, 6882, 6848, 6840, 6866, 6857, 
6872, 6823, 6873, 6852, 6880, 6806, 6824, 6841, 6844, 6847, 6874, 
6862, 6792, 6802, 6780, 6747, 6784, 6744, 6809, 6825, 6811, 6859, 
6802, 6866, 6853, 6724, 6863, 6830, 6827, 6818, 6885, 6855, 6707, 
6876, 6821, 6828, 6874, 6858, 6871, 6840, 6852, 6866, 6837, 6786, 
6884, 6837, 6831, 6845, 6807, 6878, 6827, 6840, 6850, 6870, 6885, 
6862, 6884, 6867, 6841, 6871, 6884, 6787, 6877, 6817, 6855, 6860, 
6803, 6863, 6779, 6883, 6864, 6866, 6824, 6797, 6867, 6859, 6886, 
6864, 6868, 6848, 6818, 6770, 6851, 6849, 6873, 6879, 6858, 6864, 
6849, 6866, 6880, 6838, 6881, 6828, 6849, 6880, 6838, 6833, 6856, 
6855, 6862, 6868, 6788, 6851, 6871, 6842, 6832, 6871, 6801, 6854, 
6830, 6871, 6887, 6830, 6880, 6833, 6842, 6849, 6857, 6894, 6871, 
6867, 6881, 6895, 6874, 6872, 6857, 6856, 6847, 6899, 6874.5, 
6850, 6902, 6887, 6883.5, 6880, 6862, 6873, 6884, 6879, 6874, 
6873.5, 6873, 6882.5, 6892, 7111, 7114.11111111111, 7117.22222222222, 
7120.33333333333, 7123.44444444444, 7126.55555555556, 7129.66666666667, 
7132.77777777778), Time = 1:200), row.names = c(NA, 200L), class = "data.frame")

Each vector among the above two vectors is labeled by another vectors is called class, so class1 and class2 are as

#I can't dput class1 and class2 because of the limited space
> str(class1)
 num [1:2341] 5 5 5 5 5 5 5 5 5 5 ...
> summary(class1)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    1.00    1.00    2.51    4.00    8.00 
> str(class2)
 num [1:2341] 2 2 5 2 2 2 5 5 2 2 ...
> summary(class2)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   1.000   4.000   4.922   8.000   8.000 
 

I can plot successfully each time series(for each vector V1 and V2) and color it according to the the label of the point but separately.

str(df2)
'data.frame':   2341 obs. of  3 variables:
 $ V1  : num  11489 11495 11409 11441 11413 ...
 $ V2  : num  6846 6796 6835 6761 6870 ...
 $ Time: int  1 2 3 4 5 6 7 8 9 10 ...

 str(class1)
 num [1:2341] 5 5 5 5 5 5 5 5 5 5 ...
 str(class2)
 num [1:2341] 2 2 5 2 2 2 5 5 2 2 ...

 
ggplot(df2, aes(x = Time, y = V1, colour = factor(class1), group = 1)) + 
+    geom_line()
ggplot(df2, aes(x = Time, y = V2, colour = factor(class2), group = 1)) + 
+    geom_line()

MY Question: How can I join both plots in one plot, so to have a plot contains two curves. Additionally, to fix each color in both plots to the same class label, knowing that, both of class1 and class2 contain values between 1:8.

NOTE: I know it is possible to plot many series in the same plot using ts.plot(). I didn't use it beacuse I don't know how to calibrate the color according to the class label. Additionally, ts.plot() accepts up to 10 series but my original data is more!

2
Hi Flore, please could you paste again the dput()? Your actual dput() is giving errors!Duck
Hi @Duck . There is no enough space to include all data in the question so I deleted an amount of data. IFlore
@Duck now I included only the first 200 lines among the 2341 lines of the data frameFlore

2 Answers

2
votes

One approach is to combine all of the variables into one data.frame and then use pivot_longer from tidyr.

library(tidyverse)
df2 <- df2 %>%
  mutate(class1 = class1,
         class2 = class2) %>%
  pivot_longer(-Time,names_to = c(".value","Series"),
               names_pattern = "(.+)([0-9]+$)")

df2
## A tibble: 432 x 4
#    Time Series     V class
#   <int> <chr>  <dbl> <fct>
# 1     1 1      11489 1    
# 2     1 2       6846 1    
# 3     2 1      11495 1    
# 4     2 2       6796 1    
# 5     3 1      11409 1    
# 6     3 2       6835 1    
# 7     4 1      11441 1    
# 8     4 2       6761 1    
# 9     5 1      11413 1    
#10     5 2       6870 1    
## … with 422 more rows

Now each row contains both the V variable and the class variable for a single series at a given time. The series has been extracted from the end of the variable names and is accomplished with the names_pattern argument of tidyr::pivot_longer.

ggplot(df2, aes(x = Time, y = V, color = as.factor(class), group = Series)) +
  geom_line() + ylim(500,15000)

enter image description here

Sample Data:

class1 <- factor(as.integer(cut(1:216,5)),levels = 1:8)
class2 <- factor(as.integer(cut(1:216,6)),levels = 1:8)
2
votes

I also add an example with dummy data, you can add your class variables to df2 as @IanCampbell smartly does so that they will be in the data. Here a code that can helpful:

library(dplyr)
library(ggplot2)
library(ggrepel)
#Data
df <- data.frame(Time=1:20,
                 V1=cumsum(runif(20,1,10)),
                 V2=cumsum(runif(20,1,10)),
                 class1=sample(1:5,20,replace = T),
                 class2=sample(1:5,20,replace = T))

#Code
df %>% pivot_longer(-c(Time,V1,V2)) %>%
  rename(class=value) %>% select(-name) %>%
  pivot_longer(-c(Time,class)) %>%
  mutate(Label=ifelse(Time==max(Time,na.rm = T),name,NA),
         Label=ifelse(duplicated(Label),NA,Label)) %>%
  ggplot(aes(x=Time,y=value,color=factor(class),group=name))+
  geom_line()+
  labs(color='class')+
  scale_color_manual(values=c('magenta','pink','cyan','red','blue'))+
  geom_label_repel(aes(label = Label),
                   nudge_x = 1.5,
                   na.rm = TRUE,show.legend = F,color='black')

Output:

enter image description here