Reshape vs. tidyr for repeated measures with multiple dependent variables

Question

I have the following sample data of 10 cases with three repeated measures for two dependent variables "Rapport" and "STRS":

structure(list(SubID = structure(1:10, .Label = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", 
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", 
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", 
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", 
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70", 
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", 
"82", "83", "84"), class = "factor"), Gender = structure(c(3L, 
2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L), .Label = c("#NULL!", "1", 
"2"), class = "factor"), Age = structure(c(5L, 3L, 2L, 2L, 3L, 
5L, 5L, 2L, 2L, 3L), .Label = c("#NULL!", "10", "11", "8", "9"
), class = "factor"), Rapport.1 = structure(c(22L, 25L, 19L, 
10L, 18L, 19L, 20L, 20L, 21L, 16L), .Label = c("#NULL!", "1.1", 
"1.85", "2.45", "2.5", "2.55", "2.6", "2.75", "2.8", "2.85", 
"2.9", "2.95", "3.2", "3.25", "3.3", "3.35", "3.4", "3.45", "3.5", 
"3.55", "3.6", "3.65", "3.7", "3.75", "3.8", "3.85", "3.9", "3.95"
), class = "factor"), Rapport.2 = structure(c(29L, 31L, 27L, 
17L, 9L, 26L, 24L, 21L, 30L, 32L), .Label = c("#NULL!", "1.25", 
"1.4", "1.6", "1.95", "2.05", "2.3", "2.35", "2.45", "2.5", "2.65", 
"2.7", "2.75", "2.8", "2.85", "3", "3.05", "3.1", "3.15", "3.2", 
"3.35", "3.4", "3.45", "3.5", "3.55", "3.6", "3.65", "3.7", "3.75", 
"3.8", "3.85", "3.9", "3.95", "4"), class = "factor"), Rapport.3 =     structure(c(32L, 
35L, 22L, 22L, 5L, 25L, 30L, 21L, 25L, 34L), .Label = c("#NULL!", 
"1.35", "1.45", "1.6", "1.75", "1.85", "1.9", "1.95", "2.05", 
"2.1", "2.25", "2.3", "2.35", "2.4", "2.45", "2.6", "2.75", "2.8", 
"2.9", "2.95", "3", "3.05", "3.1", "3.2", "3.25", "3.3", "3.35", 
"3.4", "3.45", "3.5", "3.55", "3.6", "3.7", "3.75", "3.8", "3.85"
), class = "factor"), STRS.1 = structure(c(33L, 10L, 8L, 18L, 
29L, 22L, 7L, 28L, 37L, 26L), .Label = c("#NULL!", "100", "102", 
"103", "104", "106", "107", "108", "109", "110", "111", "112", 
"113", "114", "115", "116", "117", "118", "119", "120", "122", 
"123", "124", "125", "126", "127", "128", "129", "132", "133", 
"69", "71", "73", "85", "88", "89", "92", "97", "99"), class = "factor"), 
STRS.2 = structure(c(37L, 19L, 9L, 22L, 21L, 22L, 16L, 16L, 
42L, 31L), .Label = c("#NULL!", "100", "101", "103", "104", 
"105", "106", "107", "108", "110", "111", "113", "114", "115", 
"116", "117", "118", "119", "120", "121", "122", "123", "124", 
"125", "126", "127", "128", "129", "131", "132", "136", "137", 
"138", "139", "158", "63", "76", "80", "91", "94", "95", 
"98", "99"), class = "factor"), STRS.3 = structure(c(31L, 
11L, 19L, 23L, 22L, 13L, 17L, 17L, 34L, 29L), .Label = c("#NULL!", 
"102", "104", "105", "106", "107", "108", "109", "110", "111", 
"112", "114", "117", "118", "119", "120", "122", "123", "124", 
"125", "126", "127", "128", "129", "130", "131", "132", "133", 
"134", "135", "66", "70", "75", "81", "85", "87", "88", "94", 
"98"), class = "factor")), .Names = c("SubID", "Gender", 
"Age", "Rapport.1", "Rapport.2", "Rapport.3", "STRS.1", "STRS.2", 
"STRS.3"), row.names = c(NA, 10L), class = "data.frame")

I tried to use the "melt" function in reshape and the "gather" function in tidyr but both produce one column with the variable names "Rapport" and "STRS" stacked and another column with their values. I haven't been able to figure out how to produce a single column for the "Rapport" values and another column for the "STRS" values so that I can use a random effects model (note:I left out the other demograpic variables and covariates). Any help with these two functions would be much appreciated.

teachermelt <- melt(TeacherW,
id.vars=c("SubID", "Gender","Age"), 
measure.vars=c("Rapport.1", "Rapport.2", "Rapport.3", "STRS.1","STRS.2","STRS.3" ),
variable.name="Rapport","STRS",
value.name="Rapport","STRS)

teachertidy <- gather(TeacherW, Rapport, STRS, Rapport.1:STRS.3)

I was finally able to obtain the longform using this "reshape" function, which seems quite simple but I'm not sure if there's anything I need to be aware of when doing it this way:

Teacherl<-reshape(TeacherW, varying = 4:9, sep = ".", idvar="SubID", direction = 'long')
View(Teacherl)

npjc npjc · Accepted Answer · 2015-05-05T08:22:45

I'ts difficult to be sure if this is what you want but here's a

Starting with df:

  SubID Gender Age Rapport.1 Rapport.2 Rapport.3 STRS.1 STRS.2 STRS.3
1     1      2   9      3.65      3.75       3.6     73     76     66
2     2      1  11       3.8      3.85       3.8    110    120    112
3     3      2  10       3.5      3.65      3.05    108    108    124
4     4      1  10      2.85      3.05      3.05    118    123    128
5     5      2  11      3.45      2.45      1.75    132    122    127

`Tidyr` sol'n:

library(dplyr)
library(tidyr)

df %>%
unite(one,contains("1")) %>%  # unite all columns that contain '1' with default sep = "_" into single new column named "one"
unite(two, contains("2")) %>% 
unite(three, contains("3")) %>% 
gather(replicate,values,one:three) %>%   # gather all columns between that named "one" and that named "three" (inclusive) into two new columns: a key column (named "replicate") and a value column (named "values")
separate(values,c("Rapport","STRS"),sep = "_") # separate the column named "values" into two new columns named "Rapport" and "STRS" according to the separator "_".

which gives:

   SubID Gender Age replicate Rapport STRS
1      1      2   9       one    3.65   73
2      2      1  11       one     3.8  110
3      3      2  10       one     3.5  108
4      4      1  10       one    2.85  118
5      5      2  11       one    3.45  132
6      1      2   9       two    3.75   76
7      2      1  11       two    3.85  120
8      3      2  10       two    3.65  108
9      4      1  10       two    3.05  123
10     5      2  11       two    2.45  122
11     1      2   9     three     3.6   66
12     2      1  11     three     3.8  112
13     3      2  10     three    3.05  124
14     4      1  10     three    3.05  128
15     5      2  11     three    1.75  127

explanation:

What you are asking for (i think) is to gather both Rapport and STRS cols but linked according to their nominations (.1,.2,.3). To tidy this up you:

unite() the linked variables together into one column for each (forming varibles one, two, three). After this you can
gather() these columns according to a key-value pair (here replicate and values). Lastly,
separate() the values variable back into its constituent variables Rapport and STRS.

N.B.

I think the appropriate "tidy" data structure here would be: (just to be safe)

df %>%
  gather(key, value, -SubID,-Gender,-Age) %>% 
  separate(key, into = c("var","idx"), sep="\\.")

Reshape vs. tidyr for repeated measures with multiple dependent variables

1 Answers

Tidyr sol'n:

explanation:

N.B.

`Tidyr` sol'n: