I have some data which looks like:
# A tibble: 6 x 3
Time Date Weather
<chr> <date> <chr>
1 "7:00 " 2010-01-01 Passing clouds
2 "7:30 " 2010-01-01 Passing clouds
3 "8:00 " 2010-01-01 Passing clouds
4 "8:30 " 2010-01-01 Passing clouds
5 "9:00 " 2010-01-01 Partly sunny
6 "9:30 " 2010-01-01 Drizzle Partly sunny
Which has hourly data for each day. I am trying to collapse it down into a daily series and create some dummy variables but not for each 30 minute interval.
That is, when I create dummy variables currently it creates too many columns. Which is why I am trying to collapse it based on some condition. The condition is if the Weather
has 4 consecutive observations the same then keep it. i.e. Passing clouds
has 4 consecutive Weather
conditions but Partly sunny
does not, the same for Drizzle Partly sunny
.
I currently have the following:
library(splitstackshape)
df %>%
group_by(Date) %>%
arrange(Weather) %>%
distinct(Weather) %>%
summarise(text = paste(Weather, collapse = "_")) %>%
cSplit_e(., split.col = "text", sep = "_", type = "character",
mode = "binary", fixed = TRUE, fill = 0)
But this does it for all unique observations in the Weather
column which gives me too many columns. So I am trying to add a condition to only keep the columns if they have 4 or more consecutive observations.
Data:
df <- structure(list(Time = c("7:00 ", "7:30 ", "8:00 ", "8:30 ", "9:00 ",
"9:30 ", "10:00", "10:30", "11:00", "11:30", "12:00", "12:30",
"1:00 ", "1:30 ", "2:00 ", "2:30 ", "3:00 ", "3:30 ", "4:00 ",
"4:30 ", "5:00 ", "5:30 ", "6:00 ", "6:30 ", "7:00 ", "7:00 ",
"7:30 ", "8:00 ", "8:30 ", "9:00 ", "9:30 ", "10:00", "10:30",
"11:00", "11:30", "12:00", "12:30", "1:00 ", "1:30 ", "2:00 ",
"2:30 ", "3:00 ", "3:30 ", "4:00 ", "4:30 ", "5:00 ", "5:30 ",
"6:00 ", "6:30 ", "7:00 ", "7:30 ", "8:00 ", "8:30 ", "9:00 ",
"9:30 ", "10:00", "7:00 ", "7:30 ", "8:00 ", "8:30 ", "9:00 ",
"9:30 ", "10:00", "10:30", "11:00", "11:30", "12:00", "12:30",
"1:00 ", "1:30 ", "2:00 ", "2:30 ", "3:00 ", "3:30 ", "4:00 ",
"4:30 ", "5:00 ", "5:30 ", "6:00 ", "6:30 ", "7:00 ", "7:30 ",
"8:00 ", "8:30 ", "9:00 ", "9:30 ", "10:00", "7:00 ", "7:30 ",
"8:00 ", "8:30 ", "9:00 ", "9:30 ", "10:00", "10:30", "11:00",
"11:30", "12:00", "12:30", "1:00 ", "1:30 ", "2:00 ", "2:30 ",
"3:00 ", "3:30 ", "4:00 ", "4:30 ", "5:00 ", "5:30 ", "6:00 ",
"6:30 ", "7:00 ", "7:30 ", "8:00 ", "8:30 ", "9:00 ", "9:30 ",
"10:00", "7:00 ", "7:30 ", "8:00 ", "8:30 ", "9:00 ", "9:30 ",
"10:00", "10:30", "11:00", "11:30", "12:00", "12:30", "1:00 ",
"1:30 ", "2:00 ", "2:30 ", "3:00 ", "3:30 ", "4:00 ", "4:30 ",
"5:00 ", "5:30 ", "6:00 ", "6:30 ", "7:00 ", "7:30 ", "7:00 ",
"7:30 ", "8:00 ", "8:30 ", "9:00 ", "9:30 ", "10:00", "10:30",
"11:00", "11:30", "12:00", "1:00 ", "1:30 ", "2:00 ", "2:30 ",
"3:00 ", "3:30 ", "4:00 ", "4:30 ", "5:00 ", "5:30 ", "6:00 ",
"6:30 ", "7:00 ", "7:00 ", "7:30 ", "8:00 ", "8:30 ", "9:00 ",
"9:30 ", "10:00", "10:30", "11:00", "11:30", "12:00", "12:30",
"1:00 ", "1:30 ", "2:05 ", "2:30 ", "3:00 ", "3:30 ", "4:00 ",
"4:30 ", "5:00 ", "5:30 ", "6:00 ", "6:30 ", "7:00 ", "7:30 ",
"8:00 ", "8:30 ", "9:00 ", "9:30 ", "10:00", "7:00 "), Date = structure(c(14610,
14610, 14610, 14610, 14610, 14610, 14610, 14610, 14610, 14610,
14610, 14610, 14610, 14610, 14610, 14610, 14610, 14610, 14610,
14610, 14610, 14610, 14610, 14610, 14610, 14611, 14611, 14611,
14611, 14611, 14611, 14611, 14611, 14611, 14611, 14611, 14611,
14611, 14611, 14611, 14611, 14611, 14611, 14611, 14611, 14611,
14611, 14611, 14611, 14611, 14611, 14611, 14611, 14611, 14611,
14611, 14612, 14612, 14612, 14612, 14612, 14612, 14612, 14612,
14612, 14612, 14612, 14612, 14612, 14612, 14612, 14612, 14612,
14612, 14612, 14612, 14612, 14612, 14612, 14612, 14612, 14612,
14612, 14612, 14612, 14612, 14612, 14613, 14613, 14613, 14613,
14613, 14613, 14613, 14613, 14613, 14613, 14613, 14613, 14613,
14613, 14613, 14613, 14613, 14613, 14613, 14613, 14613, 14613,
14613, 14613, 14613, 14613, 14613, 14613, 14613, 14613, 14613,
14614, 14614, 14614, 14614, 14614, 14614, 14614, 14614, 14614,
14614, 14614, 14614, 14614, 14614, 14614, 14614, 14614, 14614,
14614, 14614, 14614, 14614, 14614, 14614, 14614, 14614, 14615,
14615, 14615, 14615, 14615, 14615, 14615, 14615, 14615, 14615,
14615, 14615, 14615, 14615, 14615, 14615, 14615, 14615, 14615,
14615, 14615, 14615, 14615, 14615, 14616, 14616, 14616, 14616,
14616, 14616, 14616, 14616, 14616, 14616, 14616, 14616, 14616,
14616, 14616, 14616, 14616, 14616, 14616, 14616, 14616, 14616,
14616, 14616, 14616, 14616, 14616, 14616, 14616, 14616, 14616,
14617), class = "Date"), Weather = c("Passing clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Partly sunny", "Drizzle Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Partly sunny", "Drizzle Partly sunny", "Drizzle Partly sunny",
"Scattered clouds", "Scattered clouds", "Scattered clouds", "Scattered clouds",
"Passing clouds", "Passing clouds", "Passing clouds", "Fog",
"Passing clouds", "Passing clouds", "Light fog", "Scattered clouds",
"Scattered clouds", "Scattered clouds", "Scattered clouds", "Scattered clouds",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Broken clouds", "Partly cloudy", "Partly cloudy", "Partly cloudy",
"Partly cloudy", "Passing clouds", "Passing clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Passing clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Partly sunny", "Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Rain Partly sunny", "Rain Partly sunny", "Rain Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Partly sunny", "Passing clouds", "Passing clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Passing clouds", "Passing clouds",
"Passing clouds", "Drizzle Fog", "Drizzle Fog", "Drizzle Fog",
"Drizzle Fog", "Drizzle Fog", "Drizzle Fog", "Drizzle Fog", "Fog",
"Fog", "Fog", "Fog", "Light rain Fog", "Light rain Fog", "Rain Fog",
"Rain Fog", "Rain Fog", "Rain Fog", "Rain Fog", "Rain Fog", "Fog",
"Partly sunny", "Broken clouds", "Broken clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Passing clouds", "Light rain Passing clouds",
"Passing clouds", "Passing clouds", "Passing clouds", "Fog",
"Fog", "Fog", "Fog", "Fog", "Fog", "Fog", "Fog", "Fog", "Partly sunny",
"Broken clouds", "Broken clouds", "Broken clouds", "Broken clouds",
"Broken clouds", "Broken clouds", "Broken clouds", "Partly sunny",
"Partly sunny", "Partly sunny", "Partly sunny", "Partly sunny",
"Scattered clouds", "Passing clouds", "Passing clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Passing clouds", "Partly cloudy",
"Broken clouds", "Scattered clouds", "Scattered clouds", "Scattered clouds",
"Scattered clouds", "Scattered clouds", "Scattered clouds", "Scattered clouds",
"Scattered clouds", "Broken clouds", "Broken clouds", "Broken clouds",
"Scattered clouds", "Scattered clouds", "Scattered clouds", "Scattered clouds",
"Scattered clouds", "Scattered clouds", "Passing clouds", "Passing clouds",
"Rain Low clouds", "Rain Low clouds", "Rain Low clouds", "Rain Low clouds",
"Light rain Mostly cloudy", "Light rain Mostly cloudy", "Light rain Mostly cloudy",
"Light rain Mostly cloudy", "Rain Low clouds", "Light rain Mostly cloudy",
"Light rain Mostly cloudy", "Rain Mostly cloudy", "Snow Mostly cloudy",
"Snow Mostly cloudy", "Snow Ice fog", "Snow Ice fog", "Snow Ice fog",
"Snow Ice fog", "Snow Ice fog", "Snow Ice fog", "Snow Ice fog",
"Snow Ice fog", "Light snow Ice fog", "Light snow Ice fog", "Ice fog",
"Passing clouds", "Partly cloudy", "Passing clouds", "Passing clouds",
"Passing clouds", "Passing clouds", "Passing clouds")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -200L))