1
votes

Suppose we have the following dataframe:

df <- data.frame(seq(1, 21, 1),
             seq(-60, 0, 3),
             seq(200, 300, 5),
             sample(1:3))
colnames(df) <- c("Var1", "Var2", "Var3", "Sample") 
Var1 Var2 Var3 Sample
1 -60 200 3
2 -57 205 2
3 -54 110 1
... ... ... ...

I want to create a new variable, whose value is selected from the column corresponding to the value in "Sample." That is, for the above the example, the result should resemble

Var1 Var2 Var3 Sample Newvar
1 -60 200 3 200
2 -57 205 2 -57
3 -54 110 1 3
... ... ... ... ...

I'm working with dplyr, so tried the following, but I'm not sure how to solve the fact that paste0 is not registering "Sample" as an object:

df %>%
  mutate(Newvar = !!as.symbol(paste0("Var", Sample)))

Any help would be appreciated.

5

5 Answers

4
votes

You can use c_across().

df %>% 
  rowwise() %>% 
  mutate(newvar = c_across(Var1:Var3)[Sample]) %>% 
  ungroup()

# # A tibble: 21 x 5
#     Var1  Var2  Var3 Sample newvar
#    <dbl> <dbl> <dbl>  <int>  <dbl>
#  1     1   -60   200      2    -60
#  2     2   -57   205      1      2
#  3     3   -54   210      3    210
# ...
0
votes
library(tidyverse)

df <- data.frame(seq(1, 21, 1),
                 seq(-60, 0, 3),
                 seq(200, 300, 5),
                 sample(1:3))
colnames(df) <- c("Var1", "Var2", "Var3", "Sample")



df %>%
    mutate(Newbar = df$Sample %>%
               map2_dbl(1:length(.), ~ df[..2, ..1]) )
#>    Var1 Var2 Var3 Sample Newbar
#> 1     1  -60  200      3    200
#> 2     2  -57  205      1      2
#> 3     3  -54  210      2    -54
#> 4     4  -51  215      3    215
#> 5     5  -48  220      1      5
#> 6     6  -45  225      2    -45
#> 7     7  -42  230      3    230
#> 8     8  -39  235      1      8
#> 9     9  -36  240      2    -36
#> 10   10  -33  245      3    245
#> 11   11  -30  250      1     11
#> 12   12  -27  255      2    -27
#> 13   13  -24  260      3    260
#> 14   14  -21  265      1     14
#> 15   15  -18  270      2    -18
#> 16   16  -15  275      3    275
#> 17   17  -12  280      1     17
#> 18   18   -9  285      2     -9
#> 19   19   -6  290      3    290
#> 20   20   -3  295      1     20
#> 21   21    0  300      2      0

Created on 2021-06-11 by the reprex package (v2.0.0)

0
votes

Using map2_dbl from purrr with column name as reference instead of index

library(dplyr)
library(purrr)
df$newvar <- map2_dbl(seq_len(nrow(df)), paste0("Var", df$Sample),
  function(x, y) { df[x, y]})

df
#>    Var1 Var2 Var3 Sample newvar
#> 1     1  -60  200      1      1
#> 2     2  -57  205      3    205
#> 3     3  -54  210      2    -54
#> 4     4  -51  215      1      4
#> 5     5  -48  220      3    220
#> 6     6  -45  225      2    -45
#> 7     7  -42  230      1      7
#> 8     8  -39  235      3    235
#> 9     9  -36  240      2    -36
#> 10   10  -33  245      1     10
#> 11   11  -30  250      3    250
#> 12   12  -27  255      2    -27
#> 13   13  -24  260      1     13
#> 14   14  -21  265      3    265
#> 15   15  -18  270      2    -18
#> 16   16  -15  275      1     16
#> 17   17  -12  280      3    280
#> 18   18   -9  285      2     -9
#> 19   19   -6  290      1     19
#> 20   20   -3  295      3    295
#> 21   21    0  300      2      0

Created on 2021-06-12 by the reprex package (v2.0.0)

0
votes

Here's a base R solution. You restrict the selection to the first 3 rows with the first "[.]" and then "chain" the selection with a two-column matrix inside the second "[.]":

df$Newvar <- df[1:3][cbind(1:nrow(df), df$Sample)]
#-------------
> df
   Var1 Var2 Var3 Sample Newvar
1     1  -60  200      1      1
2     2  -57  205      2    -57
3     3  -54  210      3    210
4     4  -51  215      1      4
5     5  -48  220      2    -48
6     6  -45  225      3    225
7     7  -42  230      1      7
8     8  -39  235      2    -39
9     9  -36  240      3    240
10   10  -33  245      1     10
11   11  -30  250      2    -30
12   12  -27  255      3    255
13   13  -24  260      1     13
14   14  -21  265      2    -21
15   15  -18  270      3    270
16   16  -15  275      1     16
17   17  -12  280      2    -12
18   18   -9  285      3    285
19   19   -6  290      1     19
20   20   -3  295      2     -3
21   21    0  300      3    300

I get different selections than akrun because there was no call to set.seed and because your call to sample was not sufficiently "rich". Maybe there is a different sample function in dplyr that has a different default replace argument? (No, that's not it. It's just that calling sample with only a single vector only gives you a permutation of that vector, so the pattern got repeated 7 times. You would have gotten a warning is the length of the sampled vector was not an exact multiple of the number of rows in the dataframe.)

0
votes

you may use get along with rowwise()

set.seed(1)
df <- data.frame(Var1 = sample(1:100, 5),
                 Var2 = sample(1:100, 5),
                 Var3 = sample(1:100, 5),
                 sample = sample(1:3, 5, T))

df
#>   Var1 Var2 Var3 sample
#> 1   68   43   97      3
#> 2   39   14   85      1
#> 3    1   82   21      3
#> 4   34   59   54      1
#> 5   87   51   74      1
library(dplyr)

df %>% rowwise() %>%
  mutate(newcol = get(paste0('Var', sample)))

#> # A tibble: 5 x 5
#> # Rowwise: 
#>    Var1  Var2  Var3 sample newcol
#>   <int> <int> <int>  <int>  <int>
#> 1    68    43    97      3     97
#> 2    39    14    85      1     39
#> 3     1    82    21      3     21
#> 4    34    59    54      1     34
#> 5    87    51    74      1     87

Created on 2021-06-12 by the reprex package (v2.0.0)