0
votes

How do we do column sums on elements of nested lists. I have a nested list of 10 each containing a sublist of 50 elements which are matrices with 4 columns with differing number of rows. Here is a simpler example. I need to add the columns across the lists. I need a vector of colsums.ll<-c(26, 66, 106, 146). I tried to do do.call("rbind",ll) to collapse the list into 1 dataframe and then apply colSums but was not successful.

    l1<-matrix(c(1:8),nrow=2)
    l2<-matrix(c(1:16),nrow=4)
    l<-list(l1,l2)
    ll<-list(l,l)
     ll
    [[1]]
   [[1]][[1]]
        [,1] [,2] [,3] [,4]
  [1,]    1    3    5    7
  [2,]    2    4    6    8

   [[1]][[2]]
         [,1] [,2] [,3] [,4]
    [1,]    1    5    9   13
    [2,]    2    6   10   14
    [3,]    3    7   11   15
    [4,]    4    8   12   16


    [[2]]
    [[2]][[1]]
         [,1] [,2] [,3] [,4]
    [1,]    1    3    5    7
    [2,]    2    4    6    8

    [[2]][[2]]
        [,1] [,2] [,3] [,4]
    [1,]    1    5    9   13
    [2,]    2    6   10   14
    [3,]    3    7   11   15
    [4,]    4    8   12   16
4
lapply(unlist(ll, recursive = FALSE), colSums) OR lapply(lapply(rapply(ll, enquote, how="unlist"), eval), colSums) - d.b
Thanks. But that just sums first columns of each submatrix. I need a sum of first column of all the elements of the list. In the above case the answer should be c(26, 66, 106, 146). - user24318

4 Answers

2
votes

Here are some ways:

1) Flatten the first level of ll, take the column sums and then take the row sums of the result:

rowSums(sapply(do.call(c, ll), colSums))
## [1]  26  66 106 146

2) Another way is after flattening then rbind all the matrices together and then take colSums of that. Note that in contrast to (1) this one has no sapply or lapply.

colSums(do.call(rbind, do.call(c, ll)))
## [1]  26  66 106 146

3) The above do not use any packages but could be translated to use magrittr like this where the first pipeline corresponds to (1) and the second to (2)

library(magrittr)

ll %>% do.call(what = c) %>% sapply(colSums) %>% rowSums
## [1]  26  66 106 146

ll %>% do.call(what = c) %>% do.call(what = rbind) %>% colSums
## [1]  26  66 106 146
1
votes
colSums(do.call(rbind, lapply(lapply(rapply(ll, enquote, how="unlist"), eval), colSums)))
[1]  26  66 106 146
1
votes

Here's a purrr option:

library(purrr)

ll %>% flatten() %>%    # reduce depth to one level
    map(colSums) %>%    # get column sums of each element
    reduce(`+`)         # add respective colSums
#> [1]  26  66 106 146

or reduce it to a single matrix first:

ll %>% flatten() %>% reduce(rbind) %>% colSums()    

or use invoke instead of reduce, which saves calls at the cost of slightly-less-beautiful syntax:

ll %>% flatten() %>% invoke(rbind, .) %>% colSums()

All return the same thing.

1
votes

I like Reduce in base R, so you could do:

Reduce("+", lapply(Reduce(c, ll), colSums))
# [1]  26  66 106 146

BENCHMARKING

set.seed(100)
l1 <- matrix(sample(100, 100000, replace = TRUE),ncol=1000)
l2 <- matrix(sample(100, 1000000, replace = TRUE),ncol=1000)
l <- list(l1,l2)
ll <- rep(list(l), 100)
str(ll)
# List of 100
 # $ :List of 2
  # ..$ : int [1:100, 1:1000] 31 26 56 6 47 49 82 38 55 18 ...
  # ..$ : int [1:1000, 1:1000] 45 22 77 7 85 32 80 71 52 38 ...
 # $ :List of 2
  # ..$ : int [1:100, 1:1000] 31 26 56 6 47 49 82 38 55 18 ...
  # ..$ : int [1:1000, 1:1000] 45 22 77 7 85 32 80 71 52 38 ...
 # $ :List of 2
  # ..$ : int [1:100, 1:1000] 31 26 56 6 47 49 82 38 55 18 ...
  # ..$ : int [1:1000, 1:1000] 45 22 77 7 85 32 80 71 52 38 ...
 # $ :List of 2
  # ..$ : int [1:100, 1:1000] 31 26 56 6 47 49 82 38 55 18 ...
  # ..$ : int [1:1000, 1:1000] 45 22 77 7 85 32 80 71 52 38 ...

library(purrr)
library(microbenchmark)
library(magrittr)

f_G.Grothendieck1 <- function(ll) rowSums(sapply(do.call(c, ll), colSums)) 
f_G.Grothendieck2 <- function(ll) colSums(do.call(rbind, do.call(c, ll)))
f_G.Grothendieck3 <- function(ll) 
    ll %>% do.call(what = c) %>% sapply(colSums) %>% rowSums
f_G.Grothendieck4 <- function(ll) 
    ll %>% do.call(what = c) %>% do.call(what = rbind) %>% colSums
f_d.b <- function(ll) 
  colSums(do.call(rbind, lapply(lapply(rapply(ll, enquote, how="unlist"), eval), colSums)))
f_alistaire1 <- function(ll) ll %>% flatten() %>%  map(colSums) %>%  reduce(`+`)
f_alistaire2 <- function(ll) ll %>% flatten() %>% invoke(rbind, .) %>% colSums()
f_989 <- function(ll) Reduce("+", lapply(Reduce(c, ll), colSums))

r <- f_G.Grothendieck1(ll)
# [1] TRUE
all(r == f_G.Grothendieck2(ll))
# [1] TRUE
all(r == f_G.Grothendieck3(ll))
# [1] TRUE
all(r == f_G.Grothendieck4(ll))
# [1] TRUE
all(r == f_d.b(ll))
# [1] TRUE
all(r == f_alistaire1(ll))
# [1] TRUE
all(r == f_alistaire2(ll))
# [1] TRUE
all(r == f_989(ll))
# [1] TRUE
res <- microbenchmark(
f_G.Grothendieck1(ll), f_G.Grothendieck2(ll), f_G.Grothendieck3(ll), 
f_G.Grothendieck4(ll), f_d.b(ll), f_alistaire1(ll), f_alistaire2(ll), f_989(ll))

print(res, order="mean")
    # Unit: milliseconds
                      # expr        min         lq       mean     median         uq        max neval
                 # f_989(ll)   84.67007   87.05084   87.50351   87.70766   88.25692   91.12715   100
          # f_alistaire1(ll)   85.00209   87.35116   87.83935   87.91318   88.32242   98.69927   100
                 # f_d.b(ll)   85.15563   87.74943   88.01660   88.23258   88.72280   89.89943   100
     # f_G.Grothendieck1(ll)   85.38729   87.77707   88.40864   88.45328   89.03604  100.78963   100
     # f_G.Grothendieck3(ll)   85.85933   87.85805   88.69445   88.68118   89.28618  104.93881   100
     # f_G.Grothendieck4(ll) 1150.27718 1200.80601 1205.76164 1206.48442 1211.72250 1310.64802   100
          # f_alistaire2(ll) 1178.14509 1202.61153 1207.05208 1205.89009 1211.49820 1325.72315   100
     # f_G.Grothendieck2(ll) 1177.02283 1204.55166 1210.40954 1208.95338 1213.82218 1278.82715   100