8
votes

I have a data.frame that looks like this:

> head(ff.df)
  .id pio caremgmt prev price surveyNum
1   1   2        2    1     2         1
2   1   2        1    2     1         2
3   1   1        1    2     2         3
4   1   2        2    1     5         4
5   1   1        1    1     3         5
6   1   1        2    2     4         6

I'd like to reshape all four non-id variables wide by id. In other words, I want colnames:

surveyNum pio1 pio2 pio3 caremgmt1 caremgmt2 caremgmt3 prev1 prev2 prev3 price1 price2 price3

I can do that for a single variable:

> cast( ff.df, surveyNum~.id, value=c("pio"))
   surveyNum 1 2 3
1          1 2 2 2
2          2 2 1 2
3          3 1 2 1
4          4 2 1 1
5          5 1 2 2
6          6 1 2 1
7          7 1 1 2
8          8 2 2 1
9          9 1 1 2
10        10 1 1 1
11        11 2 2 1
12        12 1 2 2
13        13 1 1 1
14        14 2 1 1
15        15 1 2 1
16        16 2 1 2
17        17 1 2 2
18        18 2 1 2
19        19 1 2 2
20        20 2 2 2
21        21 2 1 1
22        22 1 2 1
23        23 2 1 1
24        24 2 1 2

But when I try it for a few it just fails utterly:

> cast( ff.df, surveyNum~.id, value=c("pio","caremgmt","prev","price"))
Error in data.frame(data[, c(variables), drop = FALSE], result = data$value) : 
  arguments imply differing number of rows: 72, 0
In addition: Warning message:
In names(data) == value :
  longer object length is not a multiple of shorter object length

Any hints? I can use the base (stats) reshape command, but I'm really trying to get away from it as it causes too much manual scalp trauma from hair-pulling....

ff.df <- structure(list(.id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), pio = structure(c(2L, 
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 
1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"), 
    caremgmt = structure(c(2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L), .Label = c("1", "2"), class = "factor"), prev = structure(c(1L, 
    2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 
    2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("1", 
    "2"), class = "factor"), price = structure(c(2L, 1L, 2L, 
    5L, 3L, 4L, 1L, 5L, 4L, 3L, 1L, 2L, 6L, 6L, 5L, 4L, 6L, 3L, 
    5L, 6L, 3L, 1L, 2L, 4L, 3L, 5L, 2L, 5L, 4L, 5L, 6L, 6L, 4L, 
    6L, 4L, 1L, 2L, 3L, 1L, 2L, 2L, 5L, 1L, 6L, 1L, 3L, 4L, 3L, 
    6L, 5L, 5L, 4L, 4L, 2L, 2L, 2L, 6L, 3L, 1L, 4L, 4L, 5L, 1L, 
    3L, 6L, 1L, 3L, 5L, 1L, 3L, 6L, 2L), .Label = c("1", "2", 
    "3", "4", "5", "6"), class = "factor"), surveyNum = c(1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 
    3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
    16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
    17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L)), .Names = c(".id", 
"pio", "caremgmt", "prev", "price", "surveyNum"), row.names = c(NA, 
-72L), class = "data.frame")
3

3 Answers

13
votes

I think the problem is that ff.df is not yet sufficiently molten. Try this:

library(reshape)

# Melt it down
ff.melt <- melt(ff.df, id.var = c("surveyNum", ".id"))

# Note the new "variable" column, which will be combined
# with .id to make each column header
head(ff.melt)

  surveyNum .id variable value
1         1   1      pio     2
2         2   1      pio     2
3         3   1      pio     1
4         4   1      pio     2
5         5   1      pio     1
6         6   1      pio     1

# Cast it out - note that .id comes after variable in the formula;
# I think the only effect of that is that you get "pio_1" instead of "1_pio"
ff.cast <- cast(ff.melt, surveyNum ~ variable + .id)

head(ff.cast)

  surveyNum pio_1 pio_2 pio_3 caremgmt_1 caremgmt_2 caremgmt_3 prev_1 prev_2 prev_3 price_1 price_2 price_3
1         1     2     2     2          2          1          1      1      2      2       2       6       3
2         2     2     1     2          1          2          2      2      2      1       1       5       5
3         3     1     2     1          1          2          1      2      1      2       2       5       2
4         4     2     1     1          2          2          2      1      2      2       5       4       5
5         5     1     2     2          1          2          1      1      1      1       3       4       4
6         6     1     2     1          2          1          1      2      1      1       4       2       5

Does that do the trick for you?

Essentially, when casting, the variables indicated on the right-hand side of the casting formula dictate the columns that will appear in the cast result. By indicating only .id, I believe that you were asking cast to somehow cram all of those vectors of values into just three columns - 1, 2, and 3. Melting the data all the way down creates the variable column, which lets you specify that the combination of the .id and variable vectors should define the columns of the cast data frame.

(Sorry if I'm being repetitious/pedantic! I'm trying to work it out for myself, too)

7
votes

You can do this with the reshape function in base R. The column ordering is a little weird, but can be fixed easily.

 reshape(ff.df, direction = 'wide', idvar = "surveyNum", timevar = '.id')
4
votes

You could do this using dcast from the devel version of data.table i.e. v1.9.5

  library(data.table)
  ff.cast <- dcast(setDT(ff.df), surveyNum~.id, 
        value.var=c('pio', 'caremgmt', 'prev', 'price'))
  head(ff.cast,3)
  #  surveyNum 1_pio 2_pio 3_pio 1_caremgmt 2_caremgmt 3_caremgmt 1_prev 2_prev
  #1:         1     2     2     2          2          1          1      1      2
  #2:         2     2     1     2          1          2          2      2      2
  #3:         3     1     2     1          1          2          1      2      1
  #   3_prev 1_price 2_price 3_price
  #1:      2       2       6       3
  #2:      1       1       5       5
  #3:      2       2       5       2