Two additional options (with an example dataframe with more than one row to better show the working of the code):
1) with base R:
l <- lapply(split.default(d[-1], cumsum(grepl('Start$', names(d)[-1]))),
setNames, c('DateRangeStart','DateRangeEnd','Value'))
data.frame(ID = d[,1], do.call(rbind, l), row.names = NULL)
which gives:
ID DateRangeStart DateRangeEnd Value
1 1 1/1/90 3/1/90 4.4
2 2 1/2/90 3/2/90 6.1
3 1 4/5/91 6/7/91 6.2
4 2 4/6/91 6/8/91 3.2
5 1 5/5/95 6/6/96 3.3
6 2 5/5/97 6/6/98 1.3
2) with the tidyverse
:
library(dplyr)
library(purrr)
split.default(d[-1], cumsum(grepl('Start$', names(d)[-1]))) %>%
map_dfr(~set_names(., c('DateRangeStart','DateRangeEnd','Value'))) %>%
bind_cols(ID = rep(d$ID, nrow(.)/nrow(d)), .)
3) with the sjmisc
-package:
library(sjmisc)
to_long(d, keys = 'group',
values = c('DateRangeStart','DateRangeEnd','Value'),
c('DateRange1Start','DateRange2Start','DateRange3Start'),
c('DateRange1End','DateRange2End','DateRange3End'),
c('Value1','Value2','Value3'))[,-2]
If you also want a group/time column, you can adapt the approaches above to:
1) with base R:
l <- lapply(split.default(d[-1], cumsum(grepl('Start$', names(d)[-1]))),
setNames, c('DateRangeStart','DateRangeEnd','Value'))
data.frame(ID = d[,1],
group = rep(seq_along(l), each = nrow(d)),
do.call(rbind, l), row.names = NULL)
which gives:
ID group DateRangeStart DateRangeEnd Value
1 1 1 1/1/90 3/1/90 4.4
2 2 1 1/2/90 3/2/90 6.1
3 1 2 4/5/91 6/7/91 6.2
4 2 2 4/6/91 6/8/91 3.2
5 1 3 5/5/95 6/6/96 3.3
6 2 3 5/5/97 6/6/98 1.3
2) with the tidyverse
:
split.default(d[-1], cumsum(grepl('Start$', names(d)[-1]))) %>%
map_dfr(~set_names(., c('DateRangeStart','DateRangeEnd','Value'))) %>%
bind_cols(ID = rep(d$ID, nrow(.)/nrow(d)),
group = rep(1:(nrow(.)/nrow(d)), each = nrow(d)), .)
3) with the sjmisc
-package:
library(sjmisc)
to_long(d, keys = 'group', recode.key = TRUE,
values = c('DateRangeStart','DateRangeEnd','Value'),
c('DateRange1Start','DateRange2Start','DateRange3Start'),
c('DateRange1End','DateRange2End','DateRange3End'),
c('Value1','Value2','Value3'))
Used data:
d <- read.table(text = "ID DateRange1Start DateRange1End Value1 DateRange2Start DateRange2End Value2 DateRange3Start DateRange3End Value3
1 1/1/90 3/1/90 4.4 4/5/91 6/7/91 6.2 5/5/95 6/6/96 3.3
2 1/2/90 3/2/90 6.1 4/6/91 6/8/91 3.2 5/5/97 6/6/98 1.3", header = TRUE, stringsAsFactors = FALSE)
reshape2/melt/recast/tidyr
? (This question makes a better, more general dupe target if not) – smci