Here's some data.frame-centered options using the tidyverse for manipulations:
library(tidyverse)
set.seed(123)
mat <- matrix(rbinom(20, 100, 0.01), 4, 5,
dimnames = list(LETTERS[1:4], letters[1:5]))
rowKey <- c("A" = "N1", "B" = "N1", "C" = "N2", "D" = "N2")
output1 <- mat %>%
as.data.frame() %>%
rownames_to_column('N1') %>%
gather(N3, value, -N1) %>% # reshape to long form
crossing(N2 = .$N1) %>% # add combinations of rowname values
filter(N1 != N2, rowKey[N1] != rowKey[N2]) %>% # drop unwanted combinations
mutate(value = na_if(value, 0), # change 0 values to NA so sum will be 0
# sort rowname values to make group ID column for aggregation
id = map2_chr(N1, N2, ~toString(sort(c(.x, .y))))) %>%
group_by(id, N3) %>%
summarise(N1 = min(N1), # get alpabetically first rowname for N1
N2 = max(N2), # and last for N2
value = coalesce(sum(value), 0L)) %>% # sum and replace NAs with 0s again
# clean up
ungroup() %>%
select(N1, N2, N3, value) %>%
arrange(N2)
output1
#> # A tibble: 20 x 4
#> N1 N2 N3 value
#> <chr> <chr> <chr> <int>
#> 1 A C a 0
#> 2 A C b 4
#> 3 A C c 4
#> 4 A C d 0
#> 5 A C e 0
#> 6 B C a 3
#> 7 B C b 0
#> 8 B C c 4
#> 9 B C d 0
#> 10 B C e 0
#> 11 A D a 0
#> 12 A D b 5
#> 13 A D c 2
#> 14 A D d 3
#> 15 A D e 0
#> 16 B D a 4
#> 17 B D b 0
#> 18 B D c 2
#> 19 B D d 3
#> 20 B D e 0
Like expand.grid
, tidyr::crossing
expands more than necessary (e.g. A
/A
combinations), which may slow things down at scale. A combn
-based approach may be faster, if more annoying to write.
Splitting and using a self-join is a more direct way to create the combinations through adding columns instead of rows. It requires some light list gymnastics, either with split
:
output2 <- mat %>%
as.data.frame() %>%
rownames_to_column('N') %>%
gather(N3, value, -N) %>%
mutate(key = rowKey[N], # add column with key
value = na_if(value, 0)) %>%
split(.$key) %>% # split list by key
# join list elements to add N1/N2 and value combinations
reduce(full_join, by = 'N3', suffix = sub('N', '', names(.))) %>%
transmute(N1, N2, N3,
value = coalesce(value1 + value2, 0L)) %>%
arrange(N2, N1)
all_equal(output1, output2)
#> [1] TRUE
...or tidyr::nest
:
output3 <- mat %>%
as.data.frame() %>%
rownames_to_column('N') %>%
gather(N3, value, -N) %>%
mutate(key = rowKey[N],
value = na_if(value, 0)) %>%
nest(-key) %>% # store all but key column as nested data frame
# join nested data frames by N3 to get N1/N2 and value combinations
{ reduce(.$data, full_join, by = 'N3', suffix = sub('N', '', .$key)) } %>%
transmute(N1, N2, N3,
value = coalesce(value1 + value2, 0L)) %>%
arrange(N2, N1)
all_equal(output1, output3)
#> [1] TRUE
The reduce
calls could be replaced by purrr::invoke
/do.call
as reduce
only calls full_join
once, but reducing over a join is a common idiom and may make the approach easier to scale.