I'm trying to create a new column in my data frame that is based off another column having a certain unique value. I have successfully done this by using the case_when argument inside a mutate. However, I would like to use a str_detect because I have a great deal of data and I don't want to have to type every single unique value because most are the same just different by the last few numbers in the string.. I keep getting errors when I try to use str_detect inside of the case_when.. does any one know how I can achieve this?
dput of data:
dput(head(HUC_wq_summary,10))
structure(list(locid = c("21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380"), parameter = c("Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Ammonia-nitrogen as N", "Ammonia-nitrogen as N", "Ammonia-nitrogen as N",
"Ammonia-nitrogen as N", "Barometric pressure"), ActivityStartDate = structure(c(13902,
13986, 14075, 14179, 14271, 13902, 13986, 14075, 14271, 13902
), class = "Date"), ActivityEndDate = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Samples = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), MinValue = c(26, 44, 40,
28, 25, 0.12, 0.01, 0.05, 0.13, 760), MaxValue = c(26, 44, 40,
28, 25, 0.12, 0.01, 0.05, 0.13, 760), Average = c(26, 44, 40,
28, 25, 0.12, 0.01, 0.05, 0.13, 760), HUC14 = c("HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070"
), Trib = c("HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070", "HUC02040201040070", "HUC02040201040070",
"HUC02040201040070")), .Names = c("locid", "parameter", "ActivityStartDate",
"ActivityEndDate", "Samples", "MinValue", "MaxValue", "Average",
"HUC14", "Trib"), row.names = c(NA, -10L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("locid", "parameter",
"ActivityStartDate"), drop = TRUE, indices = list(0L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L), group_sizes = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
locid = c("21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380", "21NJDEP1-01464380", "21NJDEP1-01464380",
"21NJDEP1-01464380"), parameter = c("Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Alkalinity, Carbonate as CaCO3", "Alkalinity, Carbonate as CaCO3",
"Ammonia-nitrogen as N", "Ammonia-nitrogen as N", "Ammonia-nitrogen as N",
"Ammonia-nitrogen as N", "Barometric pressure"), ActivityStartDate = structure(c(13902,
13986, 14075, 14179, 14271, 13902, 13986, 14075, 14271, 13902
), class = "Date")), row.names = c(NA, -10L), class = "data.frame", vars = c("locid",
"parameter", "ActivityStartDate"), drop = TRUE, .Names = c("locid",
"parameter", "ActivityStartDate")))
Code:
HUC_wq_summary<-HUC_wq_summary%>%
mutate(Trib=case_when(HUC14 == "HUC02040202100060" ~ "Pennsauken",
HUC14 == "HUC02040202100050" ~ "Pennsauken",
HUC14 == "HUC02040202100020" ~ "Pennsauken",
HUC14 == "HUC02040202100030" ~ "Pennsauken",
HUC14 == "HUC02040202100040" ~ "Pennsauken",
HUC14 == "HUC02040202100010" ~ "Pennsauken",
HUC14 == "HUC02040202150010" ~ "Raccoon",
HUC14 == "HUC02040202150060" ~ "Raccoon",
HUC14 == "HUC02040202150020" ~ "Raccoon",
HUC14 == "HUC02040202150040" ~ "Raccoon",
HUC14 == "HUC02040202150050" ~ "Raccoon",
HUC14 == "HUC02040202150030" ~ "Raccoon",
HUC14 == "HUC02040202080040"~ "Rancocas",
HUC14 == "HUC02040202080030"~ "Rancocas",
HUC14 == str_detect(HUC14,"HUC020402020600"~ "Rancocas"),TRUE ~ HUC14))
As you can see most of the HUC14 strings are the same for each just different by a few numbers.
str_detect
? It seems like this might actually be a regex question more than acase_when
question, since you already have acase_when
call setup – camillestr_detect(HUC14,"HUC020402020600") ~ "Rancocas"
– ChrisHUC14 == str_detect(HUC14,"HUC020402020600"~ "Rancocas")
doesn't make sense.str_detect
returns a true/false, so unlessHUC14
might contain a logical value, you wouldn't be comparing those two things. You also have the~
part of that line inside of youstr_detect
– camille