0
votes

Say

data=structure(list(x1 = c(88L, 88L, 94L, 82L, 68L, 72L, 43L, 84L, 
65L, 91L, 65L, 80L, 82L, 63L, 67L, 58L, 100L, 32L, 75L, 66L, 
30L, 12L, 97L, 58L, 14L, 64L), group = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("female", "male"), class = "factor")), .Names = c("x1", 
"group"), class = "data.frame", row.names = c(NA, -26L))

In this data there is group variable (sex (male and female) I need get statistics mean and 25 percentile for ALL male which go before female. Male which after female, i don't touch. Also female i don't touch. So as output

x1  group   mean    25%
88  male    76,36   66,5
88  male    76,36   66,5
94  male    76,36   66,5
82  male    76,36   66,5
68  male    76,36   66,5
72  male    76,36   66,5
43  male    76,36   66,5
84  male    76,36   66,5
65  male    76,36   66,5
91  male    76,36   66,5
65  male    76,36   66,5
80  female      
82  female      
63  female      
67  female      
58  female      
100 female      
32  female      
75  male        
66  male        
30  male        
12  male        
97  male        
58  male        
14  male        
64  male        

How to do it?

edit

x1  group
88  male
88  male
94  male
82  male
68  male
72  male
43  male
84  male
65  male
91  male
65  male
80  female
82  female
63  female
67  female
58  female
100 female
32  female
**76,36 male
**76,36 male
30  male
12  male
**76,36 male
58  male
14  male
64  male

here result.

4
@SaurabhChauhan it is 25 percentilevarimax

4 Answers

4
votes

In data.table, you can edit the rows where rleid(group) == 1 i.e. the first group of rows, grouped by value of group.

library(data.table)
setDT(df)

df[rleid(group) == 1, `:=`(mean = mean(x1), Q25 = quantile(x1, 0.25))]

Result

#      x1  group     mean  Q25
#  1:  88   male 76.36364 66.5
#  2:  88   male 76.36364 66.5
#  3:  94   male 76.36364 66.5
#  4:  82   male 76.36364 66.5
#  5:  68   male 76.36364 66.5
#  6:  72   male 76.36364 66.5
#  7:  43   male 76.36364 66.5
#  8:  84   male 76.36364 66.5
#  9:  65   male 76.36364 66.5
# 10:  91   male 76.36364 66.5
# 11:  65   male 76.36364 66.5
# 12:  80 female       NA   NA
# 13:  82 female       NA   NA
# 14:  63 female       NA   NA
# 15:  67 female       NA   NA
# 16:  58 female       NA   NA
# 17: 100 female       NA   NA
# 18:  32 female       NA   NA
# 19:  75   male       NA   NA
# 20:  66   male       NA   NA
# 21:  30   male       NA   NA
# 22:  12   male       NA   NA
# 23:  97   male       NA   NA
# 24:  58   male       NA   NA
# 25:  14   male       NA   NA
# 26:  64   male       NA   NA
#      x1  group     mean  Q25
2
votes
library(dplyr)
library(data.table)

data %>%
  group_by(group, group2 = rleid(group)) %>%                       # group by gender and it's position
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               # calculate metrics only for male in position 1
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  ungroup() %>%                                                    # ungroup
  select(-group2) %>%                                              # remove column
  data.frame()                                                     # only for visualisation purposes

#     x1  group     MEAN  Q25
# 1   88   male 76.36364 66.5
# 2   88   male 76.36364 66.5
# 3   94   male 76.36364 66.5
# 4   82   male 76.36364 66.5
# 5   68   male 76.36364 66.5
# 6   72   male 76.36364 66.5
# 7   43   male 76.36364 66.5
# 8   84   male 76.36364 66.5
# 9   65   male 76.36364 66.5
# 10  91   male 76.36364 66.5
# 11  65   male 76.36364 66.5
# 12  80 female      NaN   NA
# 13  82 female      NaN   NA
# 14  63 female      NaN   NA
# 15  67 female      NaN   NA
# 16  58 female      NaN   NA
# 17 100 female      NaN   NA
# 18  32 female      NaN   NA
# 19  75   male      NaN   NA
# 20  66   male      NaN   NA
# 21  30   male      NaN   NA
# 22  12   male      NaN   NA
# 23  97   male      NaN   NA
# 24  58   male      NaN   NA
# 25  14   male      NaN   NA
# 26  64   male      NaN   NA

For updating x1 column according to the logic you mentioned you can use this:

data %>%
  group_by(group, group2 = rleid(group)) %>%                       
  mutate(MEAN = mean(x1[group=="male" & group2==1]),               
         Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
  ungroup() %>%
  mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
  ungroup() %>%
  select(-group2) %>%
  data.frame()

#     x1  group     MEAN  Q25
# 1   88.00000   male 76.36364 66.5
# 2   88.00000   male 76.36364 66.5
# 3   94.00000   male 76.36364 66.5
# 4   82.00000   male 76.36364 66.5
# 5   68.00000   male 76.36364 66.5
# 6   72.00000   male 76.36364 66.5
# 7   43.00000   male 76.36364 66.5
# 8   84.00000   male 76.36364 66.5
# 9   65.00000   male 76.36364 66.5
# 10  91.00000   male 76.36364 66.5
# 11  65.00000   male 76.36364 66.5
# 12  80.00000 female      NaN   NA
# 13  82.00000 female      NaN   NA
# 14  63.00000 female      NaN   NA
# 15  67.00000 female      NaN   NA
# 16  58.00000 female      NaN   NA
# 17 100.00000 female      NaN   NA
# 18  32.00000 female      NaN   NA
# 19  76.36364   male      NaN   NA
# 20  66.00000   male      NaN   NA
# 21  30.00000   male      NaN   NA
# 22  12.00000   male      NaN   NA
# 23  76.36364   male      NaN   NA
# 24  58.00000   male      NaN   NA
# 25  14.00000   male      NaN   NA
# 26  64.00000   male      NaN   NA

The extra piece of code I added (mutate) updates x1 only for males after females (i.e. group2 = 3') and only ifx1` is bigger than the quantile value.

0
votes

Here is an alternative dplyr approach which summarises by rleid() group and uses a left_join() to append the result columns:

library(dplyr)
result <- data %>% 
  group_by(rleid = data.table::rleid(group)) %>% 
  left_join(., filter(., rleid == 1) %>% 
              summarise(mean = mean(x1), q25 = quantile(x1, 0.25))
  ) %>% 
  ungroup() %>%
  select(-rleid)
result %>% 
  print(n = Inf)   # make sure to print all rows
# A tibble: 26 x 4
      x1 group   mean   q25
   <int> <fct>  <dbl> <dbl>
 1    88 male    76.4  66.5
 2    88 male    76.4  66.5
 3    94 male    76.4  66.5
 4    82 male    76.4  66.5
 5    68 male    76.4  66.5
 6    72 male    76.4  66.5
 7    43 male    76.4  66.5
 8    84 male    76.4  66.5
 9    65 male    76.4  66.5
10    91 male    76.4  66.5
11    65 male    76.4  66.5
12    80 female  NA    NA  
13    82 female  NA    NA  
14    63 female  NA    NA  
15    67 female  NA    NA  
16    58 female  NA    NA  
17   100 female  NA    NA  
18    32 female  NA    NA  
19    75 male    NA    NA  
20    66 male    NA    NA  
21    30 male    NA    NA  
22    12 male    NA    NA  
23    97 male    NA    NA  
24    58 male    NA    NA  
25    14 male    NA    NA  
26    64 male    NA    NA

Note that data is not modified unless the result is assigned back to data.

0
votes

Here is also an alternative data.table approach which answers OP's original question as well as the additional question raised by the OP in comments here and here.

For both questions we need to compute the aggregates for the first group of males and then update data by reference through an update join for the first question and an update non-equi join for the second question.

Compute aggregates for first group of males

library(data.table)
# coerce to data.table, append rleid for later joins
setDT(data)[, rleid := rleid(group)][
  # ensure that x1 has the same type as mean(x1)
  , x1 := as.double(x1)]
agg <- data[rleid == 1, .(mean(x1), quantile(x1, .25)), by = rleid]
agg
   rleid       V1   V2
1:     1 76.36364 66.5

Original question: Append statistics for first male group

This is achieved by an update join

data[agg, on = "rleid", c("mean", "q25") := .(V1, V2)]
data[]
     x1  group rleid     mean  q25
 1:  88   male     1 76.36364 66.5
 2:  88   male     1 76.36364 66.5
 3:  94   male     1 76.36364 66.5
 4:  82   male     1 76.36364 66.5
 5:  68   male     1 76.36364 66.5
 6:  72   male     1 76.36364 66.5
 7:  43   male     1 76.36364 66.5
 8:  84   male     1 76.36364 66.5
 9:  65   male     1 76.36364 66.5
10:  91   male     1 76.36364 66.5
11:  65   male     1 76.36364 66.5
12:  80 female     2       NA   NA
13:  82 female     2       NA   NA
14:  63 female     2       NA   NA
15:  67 female     2       NA   NA
16:  58 female     2       NA   NA
17: 100 female     2       NA   NA
18:  32 female     2       NA   NA
19:  75   male     3       NA   NA
20:  66   male     3       NA   NA
21:  30   male     3       NA   NA
22:  12   male     3       NA   NA
23:  97   male     3       NA   NA
24:  58   male     3       NA   NA
25:  14   male     3       NA   NA
26:  64   male     3       NA   NA
     x1  group rleid     mean  q25

Note that data has been updated by reference, i.e., without copying.

Additional question: Modify selected values in second male group

The OP has requested to replace any x1 value in the second male group which exceeds the 25 percent quantile q25 computed for the first male group by the mean computed for the first male group. Note that the second male group is identified by rleid == 3L as the female group comes in between.

This can be achieved by an update non-equi join. The join condition selects only those rows which belong to rleid == 3L and where x1 is greater q25.

data[agg[, .(rleid = 3, V1, V2)], on = .(rleid, x1 > V2), x1 := V1][]
# remove helper column no longer needed
data[, rleid := NULL]
data[]
           x1  group     mean  q25
 1:  88.00000   male 76.36364 66.5
 2:  88.00000   male 76.36364 66.5
 3:  94.00000   male 76.36364 66.5
 4:  82.00000   male 76.36364 66.5
 5:  68.00000   male 76.36364 66.5
 6:  72.00000   male 76.36364 66.5
 7:  43.00000   male 76.36364 66.5
 8:  84.00000   male 76.36364 66.5
 9:  65.00000   male 76.36364 66.5
10:  91.00000   male 76.36364 66.5
11:  65.00000   male 76.36364 66.5
12:  80.00000 female       NA   NA
13:  82.00000 female       NA   NA
14:  63.00000 female       NA   NA
15:  67.00000 female       NA   NA
16:  58.00000 female       NA   NA
17: 100.00000 female       NA   NA
18:  32.00000 female       NA   NA
19:  76.36364   male       NA   NA
20:  66.00000   male       NA   NA
21:  30.00000   male       NA   NA
22:  12.00000   male       NA   NA
23:  76.36364   male       NA   NA
24:  58.00000   male       NA   NA
25:  14.00000   male       NA   NA
26:  64.00000   male       NA   NA
           x1  group     mean  q25

Please, note that rows 19 and 23 have been updated as requested. Again, data is updated by reference.