1
votes

I have the following data…

have_df <- tibble(
    year = c(2015, 2016, 2017, 2018, 2019, 2015, 2016, 2017, 2018, 2019),
    da_assist = c(0, 0, 0, 1, 1, 0, 0, 0, 0, 0),
    priority = c(NA, NA, NA, "Priority4", "Priority4", NA, NA, NA, NA, NA),
    nces_dist = c(065441, 065441, 065441, 065441, 065441,074911, 074911, 074911, 074911, 074911))

I would like to use dplyr to say when ‘priority’ in 2018 = ‘Priority4’ then turn the NAs for ‘priority’ in 2015, 2016, and 2017 to 'Priority4'. I want to change the values in the priority variable only for those three years for the specific ids (nces_dist) where ‘priority’ in 2018 = ‘Priority4’, so the data would look like this:

need_df <- tibble(
    year = c(2015, 2016, 2017, 2018, 2019, 2015, 2016, 2017, 2018, 2019),
    da_assist = c(0, 0, 0, 1, 1, 0, 0, 0, 0, 0),
    priority = c("Priority4", "Priority4", "Priority4","Priority4", "Priority4", NA, NA, NA, NA, NA), 
    nces_dist = c(065441, 065441, 065441, 065441, 065441,074911, 074911, 074911, 074911, 074911)

I’ve tried searching through a dozen mutate posts but can’t find a way to mutate a subset of a variable using a subset from another variable. Thanks.

3

3 Answers

0
votes

For each nces_dist you could fill the priority values in "up" direction.

library(dplyr)
library(tidyr)

need_df <- have_df %>% group_by(nces_dist) %>% fill(priority, .direction = 'up')
need_df

#    year da_assist priority  nces_dist
#   <dbl>     <dbl> <chr>         <dbl>
# 1  2015         0 Priority4     65441
# 2  2016         0 Priority4     65441
# 3  2017         0 Priority4     65441
# 4  2018         1 Priority4     65441
# 5  2019         1 Priority4     65441
# 6  2015         0 NA            74911
# 7  2016         0 NA            74911
# 8  2017         0 NA            74911
# 9  2018         0 NA            74911
#10  2019         0 NA            74911
0
votes

We can use replace

library(dplyr)
have_df %>% 
    mutate(priority = replace(priority,
             seq_len(match(2018, year)), priority[year == 2018][1]))

-output

# A tibble: 10 x 4
#    year da_assist priority  nces_dist
#   <dbl>     <dbl> <chr>         <dbl>
# 1  2015         0 Priority4     65441
# 2  2016         0 Priority4     65441
# 3  2017         0 Priority4     65441
# 4  2018         1 Priority4     65441
# 5  2019         1 Priority4     65441
# 6  2015         0 <NA>          74911
# 7  2016         0 <NA>          74911
# 8  2017         0 <NA>          74911
# 9  2018         0 <NA>          74911
#10  2019         0 <NA>          74911
0
votes

The following code should also help, if I have correctly understood

have_df %>% 
  mutate(dummy = ifelse(year == 2018 & priority == "Priority4", 1, 0)) %>%
  group_by(nces_dist) %>%
  mutate(dummy = ifelse(is.na(dummy) == T, 0, dummy),
         dummy = cumsum(dummy),
         priority = ifelse(last(dummy) >0, "Priority4", NA)) %>%
  select(-dummy)

# A tibble: 10 x 4
# Groups:   nces_dist [2]
    year da_assist priority  nces_dist
   <dbl>     <dbl> <chr>         <dbl>
 1  2015         0 Priority4     65441
 2  2016         0 Priority4     65441
 3  2017         0 Priority4     65441
 4  2018         1 Priority4     65441
 5  2019         1 Priority4     65441
 6  2015         0 NA            74911
 7  2016         0 NA            74911
 8  2017         0 NA            74911
 9  2018         0 NA            74911
10  2019         0 NA            74911

Cumulative sum on dummy variable will change the last of each group (grouped on Ids) to 1 (or >0) if anywhere a condition is met; Thereafter Priority column can be mutated easily