1
votes
for (i in colnames(outlier_df)){
  outliers <- outlier_df %>%
    group_by(factor, segment) %>%
    mutate(hinge_spread = 1.5*IQR(i),
           lwr = quantile(i, .25) - hinge_spread,
           upr = quantile(i, .75) + hinge_spread) %>%
    filter(i > upr | i < lwr)}

I am trying to loop over all kpi's in the dataframe and get the outlier the function works for one outlier but not in the loop

Error in eval(substitute(expr), envir, enclos) : missing values and NaN's not allowed if 'na.rm' is FALSE In addition: Warning message: In quantile(as.numeric(x), c(0.25, 0.75), na.rm = na.rm, names = FALSE, : NAs introduced by coercion

1

1 Answers

2
votes

The column names could be evaluated (!!) after converting to symbol or use .data. Also, instead of updating the same object in each iteration, assign the output to a list

outliers_list <- vector('list', ncol(outlier_df))
names(outliers_list) <- names(outlier_df)


for (i in colnames(outlier_df)){
  outliers_list[[i]] <- outlier_df %>%
    group_by(factor, segment) %>%
    mutate(hinge_spread = 1.5*IQR(.data[[i]]),
           lwr = quantile(.data[[i]], .25) - hinge_spread,
           upr = quantile(.data[[i]], .75) + hinge_spread) %>%
    ungroup %>%
    filter(.data[[i]] > upr | .data[[i]] < lwr)
 
}