Timeseries and its visualisation

Question

I have a quarter of a million events lieke this:

                 Slot Anzahl Nutzung TimeSlotNr WochenSlots Tag
1 2011-01-01 00:00:00      2   Firma          1         242   1
2 2011-01-01 00:00:00     50  Privat          1         242   1
3 2011-01-01 00:30:00      1   Firma          2         243   1
4 2011-01-01 00:30:00     49  Privat          2         243   1
5 2011-01-01 01:00:00      1   Firma          3         244   1
6 2011-01-01 01:00:00     48  Privat          3         244   1

A slot represents half 30 minutes, "Anzahl" is the number of events in a slot, the first slot starts at 2011-01-01 00:00:00 "WochenSlots" is the TimeSlotNr %% 336 and starts on a saturday 00:00:00. So i want to see the distribution in a week.

What I want to do now is:

show the dates in x-scale (monday 00:00 - sunday 24:00)
show lines (envelopes) which shows the distribution for x% of the events.

And i have no idea, how to do that.

  ggplot(data=PB2) + 
    geom_point(mapping = aes(x = WochenSlots, y = Anzahl, colour = Nutzung), alpha=0.6) +
    scale_y_continuous(labels = scales::percent) +
    facet_wrap(~Nutzung,
               shrink = TRUE,
               nrow = 2,
               scales = "free_y")

dput(PB2[1:100, ]) structure(list(Slot = structure(c(1293840000, 1293840000, 1293841800, 1293841800, 1293843600, 1293843600, 1293845400, 1293845400, 1293847200, 1293847200, 1293849000, 1293849000, 1293850800, 1293850800, 1293852600, 1293852600, 1293854400, 1293854400, 1293856200, 1293856200, 1293858000, 1293858000, 1293859800, 1293859800, 1293861600, 1293861600, 1293863400, 1293863400, 1293865200, 1293865200, 1293867000, 1293867000, 1293868800, 1293868800, 1293870600, 1293870600, 1293872400, 1293872400, 1293874200, 1293874200, 1293876000, 1293876000, 1293877800, 1293877800, 1293879600, 1293879600, 1293881400, 1293881400, 1293883200, 1293883200, 1293885000, 1293885000, 1293886800, 1293886800, 1293888600, 1293888600, 1293890400, 1293890400, 1293892200, 1293892200, 1293894000, 1293894000, 1293895800, 1293895800, 1293897600, 1293897600, 1293899400, 1293899400, 1293901200, 1293901200, 1293903000, 1293903000, 1293904800, 1293904800, 1293906600, 1293906600, 1293908400, 1293908400, 1293910200, 1293910200, 1293912000, 1293912000, 1293913800, 1293913800, 1293915600, 1293915600, 1293917400, 1293917400, 1293919200, 1293919200, 1293921000, 1293921000, 1293922800, 1293922800, 1293924600, 1293924600, 1293926400, 1293926400, 1293928200, 1293928200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Anzahl = c(2L, 50L, 1L, 49L, 1L, 48L, 1L, 43L, 1L, 43L, 1L, 30L, 1L, 27L, 0L, 22L, 0L, 19L, 0L, 20L, 0L, 18L, 0L, 17L, 0L, 17L, 0L, 17L, 0L, 17L, 0L, 18L, 0L, 19L, 2L, 19L, 2L, 19L, 2L, 20L, 2L, 21L, 2L, 21L, 2L, 20L, 2L, 18L, 2L, 22L, 2L, 24L, 3L, 25L, 1L, 28L, 1L, 30L, 1L, 33L, 1L, 32L, 1L, 28L, 2L, 24L, 2L, 25L, 2L, 25L, 2L, 22L, 2L, 20L, 1L, 15L, 2L, 14L, 1L, 13L, 1L, 11L, 1L, 12L, 1L, 11L, 1L, 9L, 1L, 8L, 1L, 7L, 1L, 5L, 1L, 4L, 1L, 3L, 0L, 3L), Nutzung = c("Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat", "Firma", "Privat"), TimeSlotNr = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 30L, 30L, 31L, 31L, 32L, 32L, 33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L, 38L, 38L, 39L, 39L, 40L, 40L, 41L, 41L, 42L, 42L, 43L, 43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 48L, 48L, 49L, 49L, 50L, 50L), WochenSlots = c(242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 256, 256, 257, 257, 258, 258, 259, 259, 260, 260, 261, 261, 262, 262, 263, 263, 264, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 270, 270, 271, 271, 272, 272, 273, 273, 274, 274, 275, 275, 276, 276, 277, 277, 278, 278, 279, 279, 280, 280, 281, 281, 282, 282, 283, 283, 284, 284, 285, 285, 286, 286, 287, 287, 288, 288, 289, 289, 290, 290, 291, 291), Tag = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L)), .Names = c("Slot", "Anzahl", "Nutzung", "TimeSlotNr", "WochenSlots", "Tag"), row.names = c(NA, 100L), class = "data.frame")

For the first bullet, ensure that Slot is of class POSIXct (PB2$Slot = as.POSIXct(PB2$Slot)) and then use Slot as the x mapping. For the second, do you mean you want a curve that shows, for each x-value, the y-value above which X% of the events occur? — eipi10
For the second, do you mean you want a curve that shows, for each x-value, the y-value above which X% of the events occur? YES — Rüdiger Kladt
Can you provide a data sample using dput? For example, paste into your question the output of dput(PB2[1:100, ]) to provide the first 100 rows of data. — eipi10

eipi10 eipi10 · Accepted Answer · 2017-10-06T05:04:27

It looks like quantile regression might be what you need. The data sample you posted only has one observation at each time point, so I've created some fake data for illustration. In the plot below, we use a flexible spline function for the regression function and we draw regression lines at the 25th and 75th percentiles of the data. Let me know if this is what you had in mind.

library(ggplot2)
library(quantreg)
library(splines)

# Fake data
set.seed(2)
dat = data.frame(x=runif(1e4,0,20))
dat$y = cos(dat$x) + 10 + rnorm(1e4, 2)

ggplot(dat, aes(x,y)) +
  geom_point(alpha=0.1, colour="blue", size=0.5) +
  geom_quantile(formula=y ~ ns(x, 10), quantiles=c(0.25, 0.75),
                colour="red", size=1) +
  theme_classic()

Timeseries and its visualisation

1 Answers