0
votes

I need match each case from df1 to shifts in df2 based on multiple conditions to create df3.

library(lubridate)

df1 <- data.frame("Name" = c("Adams", "Adams", "Adams", "Adams", "Ball", "Ball", "Cash", "Cash", "David", "David"),
                  "Date.of.Service" = ymd(c("2005-10-01", "2005-10-01", "2005-10-01", "2005-10-02", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-02", "2005-10-01", "2005-10-02")),
                  "StartTime" = c(845, 955, 2333, 0300, 1045, 1322, 1145, 344, 858, 123),
                  "Code" = c("101", "500", "103", "104", "501", "103", "102", "106", "102", "109"))
df2 <- data.frame("Name" = c("Adams", "Adams", "Ball", "Cash", "Cash", "David", "David"),
                  "Date.of.Shift" = ymd(c("2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01")),
                  "Shift" = c("CVCALL", "ORD", "OB", "ORD2", "OB", "SUP", "OB"),
                  "Day.Night.Shift" = c("Full24", "Full24", "Day", "Day", "Night", "Day", "Full24"))

The conditions:

  1. if a person has 1 shift in a day, then cases matching the shift date should go to that shift

  2. if the df1$code is a "heart code" and the person has a "CVCALL" shift, then provide that shift

  3. if a person has 2 shifts in a day, then cases on that day should be assigned to shifts based on StartTime (The day shifts happen between 629 and 1629, night shifts happen between 2059 and 2359)

  4. if a case StartTime is between 000 and 700 the next day and a person was a "Night" shift or a "FULL24" shift the day before, it should go to that shift (if they are on a Night AND Full24, give NA)

I have tried the code below. The first left_join and mutate works, but I get an error when I get to the second left_join and mutate. Error in mutate_impl(.data, dots) : Evaluation error: object 'Day.Night.Shift' not found.

library(dplyr)

Heart.Codes <- c("500", "501")

df3 = df1 %>%
  # Bring in matching records in availability points.  Filter df2 to records that are either
  # (1) the only record for that person, or (2) CV shifts.
  left_join(df2 %>%
              group_by(Name, Date.of.Shift) %>%
              mutate(num.shifts = n()) %>%
              filter(num.shifts == 1 | Shift %in% c("CVCALL")),
            by = c("Name", "Date.of.Service" = "Date.of.Shift")) %>%
  # We want to keep Shift and ShiftDate for records from availability that are either
  # (1) the only record for that person, or (2) CV shifts that join to a
  # "heart" type in df1.
  mutate(Shift = case_when(num.shifts == 1 ~ Shift,
                           Code %in% Heart.Codes & Shift == "CVCALL" ~ Shift,
                           T ~ NA_integer_),
         Date.of.Shift = case_when(num.shifts == 1 ~ Date.of.Service, 
                                   Code %in% Heart.Codes & Shift == "CVCALL" ~ Date.of.Service),
         Day.Night.Shift = case_when(num.shifts == 1 ~ Day.Night.Shift, 
                                     Code %in% Heart.Codes & Shift == "CVCALL" ~ Day.Night.Shift)) %>%
  select(Name, Date.of.Service, StartTime, Code, Date.of.Shift, Shift, Day.Night.Shift) %>% 
  # assign correct shift when there are two shifts. Filter df2 to records that have two shifts in a day.
  left_join(df2 %>%
              group_by(Name, Date.of.Shift) %>%
              mutate(num.shifts = n()) %>% 
              filter(num.shifts == 2),
            by = c("Name", "Date.of.Service" = "Date.of.Shift")) %>%
  mutate(Shift = case_when(num.shifts == 2 & StartTime > 629 & StartTime < 1629 & Day.Night.Shift == "Day" ~ Shift,
                           num.shifts == 2 & StartTime > 2059 & StartTime < 2359 & Day.Night.Shift == "Night" ~ Shift,
                           T ~ NA_integer_),
         Date.of.Shift = case_when(num.shifts == 2 & StartTime > 629 & StartTime < 1629 & Day.Night.Shift == "Day" ~ Date.of.Shift,
                                   num.shifts == 2 & StartTime > 2059 & StartTime < 2359 & Day.Night.Shift == "Night" ~ Date.of.Shift)) %>%
  select(Name, Date.of.Service, StartTime, Code, Date.of.Shift, Shift, Day.Night.Shift) %>% 
  # Bring in records whose shift date is the day before the case date.
  left_join(df2 %>%
            group_by(Name, Date.of.Shift) %>%
            mutate(ShiftDateOneDayLater = Date.of.Shift + 1),
          by = c("Name", "Date.of.Service" = "ShiftDateOneDayLater")) %>%
  # Keep Shift and Date of Shift only if StartTime is between 0000 and 0659.
  mutate(Shift = case_when(!is.na(Shift.x) ~ Shift.x,
                         Start.Time > 0 & Start.Time < 659 ~ Shift.y),
       Date.of.Shift = case_when(!is.na(Date.of.Shift.x) ~ Date.of.Shift.x,
                                 Start.Time > 0 & Start.Time < 659 ~ Date.of.Shift.y)) %>%
  select(Name, Date.of.Service, StartTime, Code, Date.of.Shift, Shift, Day.Night.Shift)

Based on these conditions, the code would produce this new df3 dataframe.

df3 <- data.frame("Name" = c("Adams", "Adams", "Adams", "Adams", "Ball", "Ball", "Cash", "Cash", "David", "David"),
                  "Date.of.Service" = ymd(c("2005-10-01", "2005-10-01", "2005-10-01", "2005-10-02", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-02", "2005-10-01", "2005-10-02")),
                  "StartTime" = c(845, 955, 2333, 0300, 1045, 1322, 1145, 344, 858, 123),
                  "Code" = c("101", "500", "103", "104", "501", "103", "102", "106", "102", "109"),
                  "Date.of.Shift" = ymd(c("2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", "2005-10-01", NA, "2005-10-01")),
                  "Shift" = c("ORD", "CVCALL", "ORD", "ORD", "OB", "OB", "ORD2", "OB", NA, "OB"),
                  "Day.Night.Shift" = c("Full24", "Full24", "Full24", "Full24", "Day", "Day", "Day", "Night", NA, "Full24"))
1
So just to be clear: df3 is your expected output? Or df3 is as far as you've gotten with your existing code? - Maurits Evers
df3 is my expected output - Cat

1 Answers

0
votes

It's giving this error message because, in the second join, both the left and right tables have a column called Day.Night.Shift. When the tables have a column with the same name (and that column isn't part of the join), dplyr automatically renames them to Day.Night.Shift.x and Day.Night.Shift.y. I find it helpful to run everything up to the join in order to see what's going on:

df3 = df1 %>%
  # Bring in matching records in availability points.  Filter df2 to records that are either
  # (1) the only record for that person, or (2) CV shifts.
  left_join(df2 %>%
              group_by(Name, Date.of.Shift) %>%
              mutate(num.shifts = n()) %>%
              filter(num.shifts == 1 | Shift %in% c("CVCALL")),
            by = c("Name", "Date.of.Service" = "Date.of.Shift")) %>%
  # We want to keep Shift and ShiftDate for records from availability that are either
  # (1) the only record for that person, or (2) CV shifts that join to a
  # "heart" type in df1.
  mutate(Shift = case_when(num.shifts == 1 ~ Shift,
                           Code %in% Heart.Codes & Shift == "CVCALL" ~ Shift,
                           T ~ NA_integer_),
         Date.of.Shift = case_when(num.shifts == 1 ~ Date.of.Service, 
                                   Code %in% Heart.Codes & Shift == "CVCALL" ~ Date.of.Service),
         Day.Night.Shift = case_when(num.shifts == 1 ~ Day.Night.Shift, 
                                     Code %in% Heart.Codes & Shift == "CVCALL" ~ Day.Night.Shift)) %>%
  select(Name, Date.of.Service, StartTime, Code, Date.of.Shift, Shift, Day.Night.Shift) %>% 
  # assign correct shift when there are two shifts. Filter df2 to records that have two shifts in a day.
  left_join(df2 %>%
              group_by(Name, Date.of.Shift) %>%
              mutate(num.shifts = n()) %>% 
              filter(num.shifts == 2),
            by = c("Name", "Date.of.Service" = "Date.of.Shift"))

You can eliminate the error by referring to either Day.Night.Shift.x or Day.Night.Shift.y, as appropriate, in the mutate (and in the following select).