Custom Precision-Recall AUC measure in mlr3

Question

I would like to create a custom Precision-Recall AUC measure in mlr3.

I am following the mlr3 book chapter on creating custom measures.

I feel I'm almost there, but R throws an annoying error that I don't know how to interpret.

Let's define the measure:

PRAUC = R6::R6Class("PRAUC",
  inherit = mlr3::MeasureClassif,
    public = list(
      initialize = function() {
        super$initialize(
          # custom id for the measure
          id = "classif.prauc",

          # additional packages required to calculate this measure
          packages = c('PRROC'),

          # properties, see below
          properties = character(),

          # required predict type of the learner
          predict_type = "prob",

          # feasible range of values
          range = c(0, 1),

          # minimize during tuning?
          minimize = FALSE
        )
      }
    ),

    private = list(
      # custom scoring function operating on the prediction object
      .score = function(prediction, ...) {

        truth1 <- ifelse(prediction$truth == levels(prediction$truth)[1], 1, 0) # Function PRROC::pr.curve assumes binary response is numeric, positive class is 1, negative class is 0
        PRROC::pr.curve(scores.class0 = prediction$prob, weights.class0 = truth1)

      }
    )
)

mlr3::mlr_measures$add("classif.prauc", PRAUC)

Let's see if it works:

task_sonar <- tsk('sonar')
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))

# Error in if (sum(weights < 0) != 0) { : 
#  missing value where TRUE/FALSE needed

Here's the traceback:

11.
check(length(sorted.scores.class0), weights.class0) 
10.
compute.pr(scores.class0, scores.class1, weights.class0, weights.class1, 
    curve, minStepSize, max.compute, min.compute, rand.compute, 
    dg.compute) 
9.
PRROC::pr.curve(scores.class0 = prediction$prob, weights.class0 = truth1) 
8.
measure$.__enclos_env__$private$.score(prediction = prediction, 
    task = task, learner = learner, train_set = train_set) 
7.
measure_score(self, prediction, task, learner, train_set) 
6.
m$score(prediction = self, task = task, learner = learner, train_set = train_set) 
5.
FUN(X[[i]], ...) 
4.
vapply(.x, .f, FUN.VALUE = .value, USE.NAMES = FALSE, ...) 
3.
map_mold(.x, .f, NA_real_, ...) 
2.
map_dbl(measures, function(m) m$score(prediction = self, task = task, 
    learner = learner, train_set = train_set)) 
1.
pred$score(msr("classif.prauc"))

It seems like the glitch is coming from PRROC::pr.curve. However, when trying this function on the actual prediction object pred, it works just fine:

PRROC::pr.curve(
  scores.class0 = pred$prob[, 1], 
  weights.class0 =  ifelse(pred$truth == levels(pred$truth)[1], 1, 0)
)

#  Precision-recall curve
#
#    Area under curve (Integral):
#     0.9081261
#
#    Area under curve (Davis & Goadrich):
#     0.9081837 
#
#    Curve not computed ( can be done by using curve=TRUE )

One likely scenario why the error occurs is because, inside PRAUC, PRROC::pr.curve's argument weights.class0 is NA. I haven't been able to confirm this, but I'm suspecting that weights.class0 is receiving NA instead of numeric, causing PRROC::pr.curve to malfunction inside PRAUC. If that's the case, I don't know why it's happening.

There may be other scenarios that I haven't thought of. Any help will be much appreciated.

EDIT

missuse's, answer helped me realize why my measure isn't working. First,

PRROC::pr.curve(scores.class0 = prediction$prob, weights.class0 = truth1)

should be

PRROC::pr.curve(scores.class0 = prediction$prob[, 1], weights.class0 = truth1).

Second, function pr.curve returns an object of class PRROC, while the mlr3 measure I've defined is actually expecting numeric. So it should be

PRROC::pr.curve(scores.class0 = prediction$prob[, 1], weights.class0 = truth1)[[2]]

or

PRROC::pr.curve(scores.class0 = prediction$prob[, 1], weights.class0 = truth1)[[3]],

depending on the method used to compute the AUC (see ?PRROC::pr.curve).

Note that although MLmetrics::PRAUC is far less confusing than PRROC::pr.curve, it seems like the former is poorly implemented.

Here's an implementation of the measure with PRROC::pr.curve that actually works:

PRAUC = R6::R6Class("PRAUC",
  inherit = mlr3::MeasureClassif,
    public = list(
      initialize = function() {
        super$initialize(
          # custom id for the measure
          id = "classif.prauc",

          # additional packages required to calculate this measure
          packages = c('PRROC'),

          # properties, see below
          properties = character(),

          # required predict type of the learner
          predict_type = "prob",

          # feasible range of values
          range = c(0, 1),

          # minimize during tuning?
          minimize = FALSE
        )
      }
    ),

    private = list(
      # custom scoring function operating on the prediction object
      .score = function(prediction, ...) {

        truth1 <- ifelse(prediction$truth == levels(prediction$truth)[1], 1, 0) # Looks like in mlr3 the positive class in binary classification is always the first factor level
        PRROC::pr.curve(
          scores.class0 = prediction$prob[, 1], # Looks like in mlr3 the positive class in binary classification is always the first of two columns
          weights.class0 = truth1
        )[[2]]

      }
    )
)

mlr3::mlr_measures$add("classif.prauc", PRAUC)

Example:

task_sonar <- tsk('sonar')
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))

#classif.prauc 
#     0.923816

However, the issue now is that changing the positive class results in a different score:

task_sonar <- tsk('sonar')
task_sonar$positive <- 'R' # Now R is the positive class
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))

#classif.prauc 
#    0.9081261

Even though MLmetrics::PRAUC might be inferior to PRROC::pr.curve the general idea of the implementation of the measure in mlr3 is the same. Its funny how I forgot this. You are correct that prediction$prob[,1] should be used since that corresponds to the positive class. As for the edited question, I am not sure where is the problem? Its like saying sensitivity changes when you change the positive class. This is inherent to the type of measure. — missuse
My bad. I thought PR AUC was symmetric like the standard ROC AUC. E.g. MLmetrics::AUC(pred$data$prob[, 1], as.integer(pred$truth == "M")) == MLmetrics::AUC(pred$data$prob[, 2], as.integer(pred$truth == "R")) is TRUE, while MLmetrics::PRAUC(pred$data$prob[, 1], as.integer(pred$truth == "M")) == MLmetrics::PRAUC(pred$data$prob[, 2], as.integer(pred$truth == "R")) is FALSE. — andreassot10
I eddied my answer to point to your implementation using PRROC::pr.curve which is more precise. All the best. — missuse

missuse missuse · Accepted Answer · 2020-05-13T19:33:34

?PRROC::pr.curve is rather confusing, so I will use MLmetrics::PRAUC to calculate PRAUC:

library(mlr3measures)
library(mlr3)

PRAUC = R6::R6Class("PRAUC",
                    inherit = mlr3::MeasureClassif,
                    public = list(
                      initialize = function() {
                        super$initialize(
                          # custom id for the measure
                          id = "classif.prauc",

                          # additional packages required to calculate this measure
                          packages = c('MLmetrics'),

                          # properties, see below
                          properties = character(),

                          # required predict type of the learner
                          predict_type = "prob",

                          # feasible range of values
                          range = c(0, 1),

                          # minimize during tuning?
                          minimize = FALSE
                        )
                      }
                    ),

                    private = list(
                      # custom scoring function operating on the prediction object
                      .score = function(prediction, ...) {

                        MLmetrics::PRAUC(prediction$prob[,1], #probs for 1st (positive class is in first column) class
                                         as.integer(prediction$truth == levels(prediction$truth)[1])) #truth for 1st class

                      }
                    )
)

To verify it works:

mlr3::mlr_measures$add("classif.prauc", PRAUC)
task_sonar <- tsk('sonar')
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))
classif.prauc 
     0.8489383  

MLmetrics::PRAUC(pred$data$prob[,1],
                 as.integer(pred$truth == "M"))
0.8489383

EDIT: the measure implementation using PRROC::pr.curve is given as edit to the question above. It is advisable to use that implementation since PRROC::pr.curve is more precise compared to MLmetrics::PRAUC.

Custom Precision-Recall AUC measure in mlr3

1 Answers