13
votes

I understand how to use map to iterate over arguments in a df and create a new list column.

For example,

params <- expand.grid(param_a = c(2, 4, 6)
                  ,param_b = c(3, 6, 9)
                  ,param_c = c(50, 100)
                  ,param_d = c(1, 0)
                  )

df.preprocessed <- dplyr::as.tbl(params) %>%
  dplyr::mutate(test_var = purrr::map(param_a, function(x){
      rep(5, x)
      }
    ))

However, how do I use the analogous syntax with pmap in the event that I want to specify more than 2 parameters?

df.preprocessed <- dplyr::as.tbl(params) %>%
  dplyr::mutate(test_var = purrr::pmap(list(x = param_a
                                     ,y = param_b
                                     ,z = param_c
                                     ,u = param_d), function(x, y){
                                        rep(5,x)*y
                                     }
  )
  )

Error output:

Error in mutate_impl(.data, dots) : Evaluation error: unused arguments (z = .l[[c(3, i)]], u = .l[[c(4, i)]]).

4

4 Answers

17
votes

With pmap, the first argument is a list, so you can pass it your data frame directly, and then name your arguments in your function with the same names as the columns in your data frame. You'll need unnest() to unpack the list elements returned by pmap():

df.preprocessed <- dplyr::as.tbl(params) %>%
    dplyr::mutate(test_var = purrr::pmap(., function(param_a, param_b, ...){
                                        rep(5, param_a) * param_b
                                     })) %>%
    tidyr::unnest()


> df.preprocessed
# A tibble: 144 x 5
   param_a param_b param_c param_d test_var
     <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
 1       2       3      50       1       15
 2       2       3      50       1       15
 3       4       3      50       1       15
 4       4       3      50       1       15
 5       4       3      50       1       15
 6       4       3      50       1       15
 7       6       3      50       1       15
 8       6       3      50       1       15
 9       6       3      50       1       15
10       6       3      50       1       15
# ... with 134 more rows
6
votes

How about using rowwise and mutate directly without map:

my_fun <- function(param_a, param_b){
  rep(5, param_a) * param_b
}
df.preprocessed <- dplyr::as.tbl(params) %>%
  rowwise() %>% 
  dplyr::mutate(test_var = list(my_fun(param_a, param_b))) %>% 
  tidyr::unnest()
2
votes

We could try

f1 <- function(x, y, ...) rep(5, x)*y

df.preprocessed <- dplyr::as.tbl(params) %>%
        dplyr::mutate(test_var = purrr::pmap(list(x = param_a
                                 ,y = param_b
                                 ,z = param_c
                                 ,u = param_d),f1
    )
   )
df.preprocessed
# A tibble: 36 x 5
#   param_a param_b param_c param_d  test_var
#     <dbl>   <dbl>   <dbl>   <dbl>    <list>
# 1       2       3      50       1 <dbl [2]>
# 2       4       3      50       1 <dbl [4]>
# 3       6       3      50       1 <dbl [6]>
# 4       2       6      50       1 <dbl [2]>
# 5       4       6      50       1 <dbl [4]>
# 6       6       6      50       1 <dbl [6]>
# 7       2       9      50       1 <dbl [2]>
# 8       4       9      50       1 <dbl [4]>
# 9       6       9      50       1 <dbl [6]>
#10       2       3     100       1 <dbl [2]>
# ... with 26 more rows
2
votes

You can do this:

df.preprocessed <- dplyr::as.tbl(params) %>%
  dplyr::mutate(test_var = purrr::pmap(list(x = param_a
                                            ,y = param_b
                                            ,z = param_c
                                            ,u = param_d),
                                              ~ rep(5,.x)*.y                                                
  )
  )

or

df.preprocessed <- dplyr::as.tbl(params) %>%
  dplyr::mutate(test_var = purrr::pmap(list(x = param_a
                                            ,y = param_b
                                            ,z = param_c
                                            ,u = param_d),
                                       ~ rep(5,..1)*..2                                       
  )
  )

The second way is more general as you can use ..3, ..4 etc...