1
votes

I have a similar problem as described here (R - find first, second and third largest values by row). I would like to output the five highest values by row as described in the other thread. In addition I need five other columns with the name of the column header of that value. I did this for the v1 column as an example below (names_first). I have been trying for hours but no success.

df <- data.frame(v1 = c(0,1,2,3,4,NA), 
                 v2 = c(23,6,3,21,4,NA), 
                 v3 = c(22,22,24,87,6,NA),
                 v4 = c(2,32,6,58,5,NA), 
                 v5 = c(5,22,65,86,4,NA)
)


df$first <- apply(df, 1, max)
df$second <- apply(df, 1, function(x) -sort(-x[1:5])[2])
df$third <- apply(df, 1, function(x) -sort(-x[1:5])[3])
df$fifth <- apply(df, 1, function(x) -sort(-x[1:5])[4])
df$sixth <- apply(df, 1, function(x) -sort(-x[1:5])[5])
df$sixth <- apply(df, 1, function(x) -sort(-x[1:5])[5])

names_first=c("v2","v4","v5","v3","v3")
df <- cbind(df,names_first)
2
Could you post what you want your desired output to look like?stlba

2 Answers

2
votes

You can sort in lapply after asplit and then simply cbind the results:

tt <- lapply(asplit(df, 1), function(x) head(sort(x, TRUE, TRUE), 5))
#tt <- lapply(asplit(df, 1), sort, TRUE, TRUE) #With the given data also this will work
cbind(df, do.call(rbind, tt), do.call(rbind, lapply(tt, names)))
#  v1 v2 v3 v4 v5 v2 v3 v5 v4 v1  1  2  3  4  5
#1  0 23 22  2  5 23 22  5  2  0 v2 v3 v5 v4 v1
#2  1  6 22 32 22 32 22 22  6  1 v4 v3 v5 v2 v1
#3  2  3 24  6 65 65 24  6  3  2 v5 v3 v4 v2 v1
#4  3 21 87 58 86 87 86 58 21  3 v3 v5 v4 v2 v1
#5  4  4  6  5  4  6  5  4  4  4 v3 v4 v1 v2 v5
#6 NA NA NA NA NA NA NA NA NA NA v1 v2 v3 v4 v5

With colnames:

tt <- lapply(asplit(df, 1), function(x) head(sort(x, TRUE, TRUE), 5))
n <- c("first","second","third","fourth","fifth")
df[n] <- do.call(rbind, tt)
df[paste0("names_", n)] <- do.call(rbind, lapply(tt, names))
df
#  v1 v2 v3 v4 v5 first second third fourth fifth names_first names_second names_third names_fourth names_fifth
#1  0 23 22  2  5    23     22     5      2     0          v2           v3          v5           v4          v1
#2  1  6 22 32 22    32     22    22      6     1          v4           v3          v5           v2          v1
#3  2  3 24  6 65    65     24     6      3     2          v5           v3          v4           v2          v1
#4  3 21 87 58 86    87     86    58     21     3          v3           v5          v4           v2          v1
#5  4  4  6  5  4     6      5     4      4     4          v3           v4          v1           v2          v5
#6 NA NA NA NA NA    NA     NA    NA     NA    NA          v1           v2          v3           v4          v5
1
votes

You can do all this operation in 1 apply command. Using order we get the index of first 5 values in each row. Using that index we can add top 5 values as well as column names.

n <- 5
cols <- paste0('col', 1:n)
name_cols <- paste0('name', 1:n)

df[c(cols, name_cols)] <- t(apply(df, 1, function(x) {
  inds <- order(x, decreasing = TRUE)[1:n]
  c(x[inds], names(df)[inds])
}))
df <- type.convert(df)
df

#  v1 v2 v3 v4 v5 col1 col2 col3 col4 col5 name1 name2 name3 name4 name5
#1  0 23 22  2  5   23   22    5    2    0    v2    v3    v5    v4    v1
#2  1  6 22 32 22   32   22   22    6    1    v4    v3    v5    v2    v1
#3  2  3 24  6 65   65   24    6    3    2    v5    v3    v4    v2    v1
#4  3 21 87 58 86   87   86   58   21    3    v3    v5    v4    v2    v1
#5  4  4  6  5  4    6    5    4    4    4    v3    v4    v1    v2    v5