2
votes

I have a data frame(df):

group col
a     12
a     15
a     13
b     21
b     23

Desired output is also a data frame(df1):

col1  col2
12    21
15    23
13    0

Namley, I want to partition "col" of "df" by "group" into multi columns as "col1" and "col2".

When the length of each column is not equal to each other, "0" must be added end of each column untill the length of each column reaches to the maximum column length.

3

3 Answers

4
votes

How to do it with dplyr...

library(dplyr)
library(tidyr)

df1 %>%
  group_by(group) %>%
  mutate(n = row_number()) %>%
  spread(group, col) %>%
  select(-n) %>%
  (function(x) { x[is.na(x)] <- 0; x })
4
votes

We could either use base R functions split or unstack to split the 'col' by 'group' into a list, then pad NA to list elements that are less than the maximum length of the list element. Change the column names, replace 'NA' by 0.

  lst <- unstack(df1, col~group)
  d1 <- as.data.frame(sapply(lst, `length<-`, max(sapply(lst, length))))
  d1[is.na(d1)] <- 0
  colnames(d1) <- paste0('col', 1:ncol(d1))
  d1
 #  col1 col2
 #1   12   21
 #2   15   23
 #3   13    0

Or use stri_list2matrix from stringi

library(stringi)
d1 <- as.data.frame(stri_list2matrix(unstack(df1, col~group),
            fill=0), stringsAsFactors=FALSE)
d1[] <- lapply(d1, as.numeric)

Or using data.table/splitstackshape

library(splitstackshape)
setnames(dcast(getanID(df1, 'group'), .id~group, value.var='col',
             fill=0L)[, .id:= NULL], paste0('col', 1:2))[]
#    col1 col2
#1:   12   21
#2:   15   23
#3:   13    0
3
votes

Since you fill with zeroes, another idea:

xtabs(col ~ ave(DF$col, DF$group, FUN = seq_along) + group, DF)
#                                      group
#ave(DF$col, DF$group, FUN = seq_along)  a  b
#                                     1 12 21
#                                     2 15 23
#                                     3 13  0

Where "DF":

DF = structure(list(group = structure(c(1L, 1L, 1L, 2L, 2L), .Label = c("a", 
"b"), class = "factor"), col = c(12L, 15L, 13L, 21L, 23L)), .Names = c("group", 
"col"), class = "data.frame", row.names = c(NA, -5L))