0
votes

I have a tibble with this structure (dput below):

# A tibble: 7 x 3
  Date  CC            Variance_Type
  <fct> <list>        <list>       
1 2-17  <dbl [8 x 2]> <chr [8]>    
2 2-18  <dbl [8 x 2]> <chr [8]>    
3 2-19  <dbl [8 x 2]> <chr [8]>    
4 2-20  <dbl [8 x 2]> <chr [8]>    
5 2-21  <dbl [8 x 2]> <chr [8]>    
6 2-22  <dbl [8 x 2]> <chr [8]>    
7 2-23  <dbl [8 x 2]> <chr [8]>    

My desired output is a 56 row x 4 column tibble where:

  1. The items of the lists have been unnested each to their own row.
  2. The values in Date fill down the column to match the items that are presently in their row.
  3. The columns of the nested tables in column CC are split into separate columns.

I've been trying to do this in the tidyverse with a combination of mutate(), map() and bind_rows() but I can't make it work. Any help is much appreciated.

The tibble:

structure(list(Date = structure(1:7, .Label = c("2-17", "2-18", 
"2-19", "2-20", "2-21", "2-22", "2-23"), class = "factor"), CC = list(
    structure(c(0.0292, 0.1803, 0.1063, 0.3555, 0.0227, 0.0471, 
    0.2588, 1, 2.92, 18.03, 10.63, 35.55, 2.27, 4.71, 25.88, 
    100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0414, 0.2307, 0.1813, 0.3428, 0.0408, 
    0.0137, 0.1494, 1, 4.14, 23.07, 18.13, 34.28, 4.08, 1.37, 
    14.94, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0348, 0.2298, 0.4534, 0.0453, 0.1252, 
    0.0406, 0.0709, 1, 3.48, 22.98, 45.34, 4.53, 12.52, 4.06, 
    7.09, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0693, 0.1241, 0.3104, 0.0885, 0.1706, 
    0.0563, 0.1807, 1, 6.93, 12.41, 31.04, 8.85, 17.06, 5.63, 
    18.07, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0431, 0.1621, 0.3896, 0.2243, 0.1005, 
    -0.0339, 0.1143, 1, 4.31, 16.21, 38.96, 22.43, 10.05, -3.39, 
    11.43, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0656, 0.1021, 0.2638, 0.1299, 0.2102, 
    0.0061, 0.2223, 1, 6.56, 10.21, 26.38, 12.99, 21.02, 0.61, 
    22.23, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0802, 0.1114, 0.6962, 0.0845, 0.1544, 
    -0.0958, -0.0309, 1, 8.02, 11.14, 69.62, 8.45, 15.44, -9.58, 
    -3.09, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    )))), Variance_Type = list(c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -7L))
2

2 Answers

0
votes

Here is another solution based on data.table package:

library(data.table)
df2 <- setDT(df)[, as.data.table(.SD$CC[[1]], .SD[["Variance_Type"]]), Date]
setnames(df2, "rn", "Variance_Type")

df2
#       Date              Variance_Type Coefficient     % Total
#  1:   2-17 Unique to Jsc                   0.0292        2.92
#  2:   2-17 Unique to Uoc                   0.1803       18.03
#  3:   2-17 Unique to FF                    0.1063       10.63
#  4:   2-17 Common to Jsc, and Uoc          0.3555       35.55
#  5:   2-17 Common to Jsc, and FF           0.0227        2.27
#  6:   2-17 Common to Uoc, and FF           0.0471        4.71
#  7:   2-17 Common to Jsc, Uoc, and FF      0.2588       25.88
#  8:   2-17 Total                           1.0000      100.00
#  9:   2-18 Unique to Jsc                   0.0414        4.14
# 10:   2-18 Unique to Uoc                   0.2307       23.07
# 11:   2-18 Unique to FF                    0.1813       18.13
# 12:   2-18 Common to Jsc, and Uoc          0.3428       34.28
# 13:   2-18 Common to Jsc, and FF           0.0408        4.08
# 14:   2-18 Common to Uoc, and FF           0.0137        1.37
# 15:   2-18 Common to Jsc, Uoc, and FF      0.1494       14.94
# 16:   2-18 Total                           1.0000      100.00
# 17:   2-19 Unique to Jsc                   0.0348        3.48
# 18:   2-19 Unique to Uoc                   0.2298       22.98
# 19:   2-19 Unique to FF                    0.4534       45.34
# 20:   2-19 Common to Jsc, and Uoc          0.0453        4.53
# 21:   2-19 Common to Jsc, and FF           0.1252       12.52
# 22:   2-19 Common to Uoc, and FF           0.0406        4.06
# 23:   2-19 Common to Jsc, Uoc, and FF      0.0709        7.09
# ....
0
votes

While this is generally the type of problem that unnest() from tidyr will help with, your CC variable is a matrix instead of a data.frame (or tibble). This causes some problems. Once you convert the matrix to a data.frame, I think you'll find unnest() to be the go-to tool.

My first step was then to loop through the matrices in CC and convert them to data.frames, using a purrr::map loop within mutate(). Then I use unnest() to unnest both variables at one time.

Here's how this works post tidyr version 1.0.0.

library(tidyr)
library(purrr)
library(dplyr)
dat %>%
     mutate(CC = map(CC, as.data.frame) ) %>%
     unnest(cols = c(CC, Variance_Type) )

# A tibble: 56 x 4
   Date  Coefficient `    % Total` Variance_Type               
   <fct>       <dbl>         <dbl> <chr>                       
 1 2-17       0.0292          2.92 "Unique to Jsc             "
 2 2-17       0.180          18.0  "Unique to Uoc             "
 3 2-17       0.106          10.6  "Unique to FF              "
 4 2-17       0.356          35.6  "Common to Jsc, and Uoc    "
 5 2-17       0.0227          2.27 "Common to Jsc, and FF     "
 6 2-17       0.0471          4.71 "Common to Uoc, and FF     "
 7 2-17       0.259          25.9  Common to Jsc, Uoc, and FF  
 8 2-17       1             100    "Total                     "
 9 2-18       0.0414          4.14 "Unique to Jsc             "
10 2-18       0.231          23.1  "Unique to Uoc             "

For previous versions of tidyr, the unnest() code looks slightly different.

dat %>%
     mutate(CC = map(CC, as.data.frame) ) %>%
     unnest(CC, Variance_Type )