1) We can do this within dplyr
itself. Grouped by 'Species', summarise
the column we need i.e 'Sepal.length' by concatenating the mean
of the column and the subset of the full data column by making use of cur_group_id()
(No other packages except dplyr
is used)
library(dplyr)
iris %>%
group_by(Species) %>%
summarise(InGroup = c('Yes', 'No'), MeanSepalLength = c(mean(Sepal.Length),
mean(.$Sepal.Length[as.numeric(.$Species) != cur_group_id()])),
.groups = 'drop')
# A tibble: 6 x 3
# Species InGroup MeanSepalLength
# <fct> <chr> <dbl>
#1 setosa Yes 5.01
#2 setosa No 6.26
#3 versicolor Yes 5.94
#4 versicolor No 5.80
#5 virginica Yes 6.59
#6 virginica No 5.47
2) If we want to do this in multiple columns, use across
iris %>%
group_by(Species) %>%
summarise(InGroup = c('Yes', 'No'),
across(where(is.numeric), ~ c(mean(.),
mean(iris[[cur_column()]][
as.numeric(iris$Species) != cur_group_id()])), .names = 'Mean{.col}'),
.groups = 'drop')
-output
# A tibble: 6 x 6
# Species InGroup MeanSepal.Length MeanSepal.Width MeanPetal.Length MeanPetal.Width
# <fct> <chr> <dbl> <dbl> <dbl> <dbl>
#1 setosa Yes 5.01 3.43 1.46 0.246
#2 setosa No 6.26 2.87 4.91 1.68
#3 versicolor Yes 5.94 2.77 4.26 1.33
#4 versicolor No 5.80 3.20 3.51 1.14
#5 virginica Yes 6.59 2.97 5.55 2.03
#6 virginica No 5.47 3.10 2.86 0.786
3) If we need a function, that can be created as well
f1 <- function(dat, grp) {
grp_str <- rlang::as_string(rlang::ensym(grp))
dat %>%
group_by({{grp}}) %>%
summarise(InGroup = c('Yes', 'No'),
across(where(is.numeric), ~ c(mean(.),
mean(dat[[cur_column()]][
as.numeric(dat[[grp_str]]) != cur_group_id()])),
.names = 'Mean{.col}'), .groups = 'drop')
}
-testing
f1(iris, Species)
# A tibble: 6 x 6
# Species InGroup MeanSepal.Length MeanSepal.Width MeanPetal.Length MeanPetal.Width
# <fct> <chr> <dbl> <dbl> <dbl> <dbl>
#1 setosa Yes 5.01 3.43 1.46 0.246
#2 setosa No 6.26 2.87 4.91 1.68
#3 versicolor Yes 5.94 2.77 4.26 1.33
#4 versicolor No 5.80 3.20 3.51 1.14
#5 virginica Yes 6.59 2.97 5.55 2.03
#6 virginica No 5.47 3.10 2.86 0.786
or with diamonds
f1(diamonds, cut)
# A tibble: 10 x 9
# cut InGroup Meancarat Meandepth Meantable Meanprice Meanx Meany Meanz
# <ord> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 Fair Yes 1.05 64.0 59.1 4359. 6.25 6.18 3.98
# 2 Fair No 0.790 61.7 57.4 3920. 5.72 5.72 3.53
# 3 Good Yes 0.849 62.4 58.7 3929. 5.84 5.85 3.64
# 4 Good No 0.793 61.7 57.3 3933. 5.72 5.72 3.53
# 5 Very Good Yes 0.806 61.8 58.0 3982. 5.74 5.77 3.56
# 6 Very Good No 0.796 61.7 57.3 3919. 5.73 5.72 3.53
# 7 Premium Yes 0.892 61.3 58.7 4584. 5.97 5.94 3.65
# 8 Premium No 0.766 61.9 57.0 3709. 5.65 5.66 3.50
# 9 Ideal Yes 0.703 61.7 56.0 3458. 5.51 5.52 3.40
#10 Ideal No 0.861 61.8 58.5 4249. 5.88 5.88 3.63
4) Or another option would be to take the difference in sum
and divide by the difference in number of rows
iris %>%
group_by(Species) %>%
summarise(InGroup = c('Yes', 'No'), across(where(is.numeric),
~ c(mean(.), (sum(iris[[cur_column()]]) -
sum(.))/(nrow(iris) - n())), .names = 'Mean{.col}'), .groups = 'drop')