0
votes

What I have:

a <- c(49, 55, '2017-11-01', '13:54:57', '2017-11-01', '14:13:46')
b <- c(173, 171, '2017-11-01', '14:16:19', '2017-11-01', '14:23:34')
c <- c(171, 173, '2017-11-01', '14:57:01', '2017-11-01', '15:05:11')
d <- c(55, 49, '2017-11-01', '15:07:07', '2017-11-01', '15:27:30')
e <- as.data.frame(t(data.frame(a, b, c, d)))
colnames(e) <- c('start', 'stop', 'fdate', 'ftime', 'tdate', 'ttime')
e

  start stop      fdate    ftime      tdate    ttime 
     49   55 2017-11-01 13:54:57 2017-11-01 14:13:46
    173  171 2017-11-01 14:16:19 2017-11-01 14:23:34
    171  173 2017-11-01 14:57:01 2017-11-01 15:05:11
     55   49 2017-11-01 15:07:07 2017-11-01 15:27:30

I want to combine rows in one by next condition: if diff between ftime and ttime from previous row less than 30 minutes we have to write one row with:

start from first row, stop from last row, ftime from first row, ttime from last row

What I want for example:

  start stop      fdate    ftime      tdate    ttime
     49  171 2017-11-01 13:54:57 2017-11-01 14:23:34
    171   49 2017-11-01 14:57:01 2017-11-01 15:27:30
2
The last difference is greater than 30 minutes.. (i.e. 15:27:30 - 14:57:01 = 30 mins and 29 secs)Joseph Wood

2 Answers

1
votes

Try the following

library(lubridate)
library(dplyr)

myfun <- function(e) {
    dt_f <- ymd_hms(paste(e$fdate, e$ftime, sep=" "))    # convert to Date
    lag_dt_t <- ymd_hms(paste(dplyr::lag(e$tdate, 1, default=head(e$fdate, 1)), dplyr::lag(e$ttime, 1, default=NA), sep=" "))   # convert to Date
    lgl <- dt_f - lag_dt_t <= make_difftime(minute = 30)   # is difference <= 30 min?
    lgl[1] <- FALSE       # Always set first row to FALSE since there is no previous row to compare to
    return(cumsum(!lgl))    # Returns `grp` values
}

temp <- e
temp$grp <- myfun(temp)
ans <- temp %>%
        group_by(grp) %>%
        summarise(  start = head(start, 1),
                stop = tail(stop, 1),
                fdate = head(fdate, 1),
                ftime = head(ftime, 1),
                tdate = tail(tdate, 1),
                ttime = tail(ttime, 1)) %>%
        ungroup() %>%
        select(-grp)

# A tibble: 2 x 6
   # start   stop      fdate    ftime      tdate    ttime
  # <fctr> <fctr>     <fctr>   <fctr>     <fctr>   <fctr>
# 1     49    171 2017-11-01 13:54:57 2017-11-01 14:23:34
# 2    171     49 2017-11-01 14:57:01 2017-11-01 15:27:30
1
votes

Here is another base R alternative:

# Time differences between rows
d <- as.numeric(tail(as.POSIXct(paste(e$fdate, e$ftime)), -1) -
                  head(as.POSIXct(paste(e$tdate, e$ttime)), -1),
                units = "mins")

# Aggregation
do.call(rbind, by(e, c(0, cumsum(d > 30)), function(x) {
  x[, c("start", "fdate", "ftime")] <- head(x[, c("start", "fdate", "ftime")], 1)
  x[, c("stop", "tdate", "ttime")] <- tail(x[, c("stop", "tdate", "ttime")], 1)
  x[1, ]
}))

#   start stop      fdate    ftime      tdate    ttime
# 0    49  171 2017-11-01 13:54:57 2017-11-01 14:23:34
# 1   171   49 2017-11-01 14:57:01 2017-11-01 15:27:30