1
votes

I would like to mutate (in this case transmute) new columns that add all columns which contain a certain pattern. The new columns should also be called the pattern. I would like to avoid pivoting the data into long format and avoid referring to strings attached to the pattern.

In the example below, 'b\\d+' is the pattern:

 df <- tribble(
  ~a1_b1, ~a1_b2, ~a1_b3, ~a2_b1, ~a2_b2, ~a2_b3, ~a3_b1, ~a3_b2, ~a3_b3, ~a4_b1, ~a4_b2, ~a4_b3,
  1,      2,      3,      4,      5,      6,      7,      8,      9,      10,     11,     12
)

Expected output:

df %>%
  transmute(b1 = a1_b1 + a2_b1 + a3_b1 + a4_b1,
         b2 = a1_b2 + a2_b2 + a3_b2 + a4_b2,
         b3 = a1_b3 + a2_b3 + a3_b3 + a4_b3)
# A tibble: 1 x 3
     b1    b2    b3
  <dbl> <dbl> <dbl>
1    22    26    30
1

1 Answers

2
votes

Without using pivot_longer

library(stringr)
library(dplyr)
df %>% 
  transmute(across(starts_with('a1'),
      ~ . + get(str_replace(cur_column(), '.*_', 'a2_')))) %>%
  rename_all(~ str_remove(., '.*_'))

-output

# A tibble: 1 x 3
#     b1    b2    b3
#  <dbl> <dbl> <dbl>
#1     5     7     9

Or with split.default

library(purrr)
df %>% 
  split.default(str_remove(names(.), '_.*')) %>% 
  reduce(`+`) %>%
  rename_all(~ str_remove(., '.*_')) 
#   b1 b2 b3
#1  5  7  9

Or using the updated dataset

df %>% 
   split.default(str_remove(names(.), '_.*')) %>% 
   reduce(`+`) %>%
   rename_all(~ str_remove(., '.*_')) 
#  b1 b2 b3
#1 22 26 30

Or using map

library(purrr)
names(df) %>%
  str_remove('.*_') %>% 
  unique %>% 
  map_dfc(~ df %>% 
           select(ends_with(.x)) %>%
           transmute(!! .x := rowSums(.)))
# A tibble: 1 x 3
#     b1    b2    b3
#  <dbl> <dbl> <dbl>
#1    22    26    30

Here is an option with pivot_longer

library(tidyr)
library(tibble)
df %>% 
   pivot_longer(cols = everything(), names_to = c(".value", 'group'),
      names_sep = "_") %>% 
  transmute(group, new = a1 + a2) %>%
  deframe %>% 
  as_tibble_row

Or another way is

df %>% 
   pivot_longer(cols = everything(), 
    names_to = c( 'group', '.value'), names_sep = "_") %>%
   summarise(across(c(b1:b3), sum))
# A tibble: 1 x 3
#     b1    b2    b3
#  <dbl> <dbl> <dbl>
#1    22    26    30