2
votes

I'm trying to use the nested dataframe (https://r4ds.had.co.nz/many-models.html) approach to fit multiple latent class growth curves using lcmm::lcmm() and purrr::pmap().

This process requires fitting a model with one class (k = 1) using lcmm() and then using this model as an input to lcmm::gridsearch(), which takes the starting values from this k = 1 model to feed into a k = 2+ class model. gridsearch() also requires the model call for the k = 2+ model (plus two other arguments), which passed as a call to lcmm() within the call to gridsearch(). My usual approach would be to use pmap() to pass a list of arguments to gridsearch(), but list() immediately evaluates the model call to lcmm() and tries to fit the model instead of passing the model call to gridsearch() (see confusing behavior of purrr::pmap with rlang; "to quote" or not to quote argument that is the Q).

NB Using RStudio's function viewer (F2), it seems that lcmm::gridsearch() uses match.call() to adjust the k = 2+ model call with a user-defined number of random starting values, and then iterate through these to find the preferred k = 2+ solution.

I've included a reprex below. When wrapping the call to gridsearch in pmap the command fails with "Error in mutate_impl(.data, dots) : Evaluation error: argument is of length zero." - I think this is because R is trying to evaluate the call to lcmm() for the k = 2+ model, but I could be wrong.

How can I delay the evaluation of lcmm() when passed as an argument to pmap()?

Reprex below:

library(lcmm)
#> Warning: package 'lcmm' was built under R version 3.5.2
#> Loading required package: survival
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(purrr)

# load lcmm example data

data("data_lcmm")

# take sample

set.seed(123)

data_lcmm <-
  data_lcmm %>%
  sample_frac(0.1)




# NB grouping variable is needed to reproduce desired data structure 

data_lcmm <-
  data_lcmm %>%
  mutate(group_var = sample(c(0, 1),
    size = nrow(data_lcmm),
    replace = TRUE
  ))



data_lcmm_nest <-
  data_lcmm %>%
  group_by(group_var) %>%
  nest() %>% 
  mutate(data= map(data, as.data.frame))


# lcmm call from ?lcmm

lcmm_k1 <- function(df) {
  lcmm(Ydep2 ~ Time + I(Time^2),
    random = ~Time, subject = "ID", ng = 1,
    data = data_lcmm_nest$data[[1]], link = "linear"
  )
}


# fit k = 1 models
data_lcmm_nest <-
  data_lcmm_nest %>%
  mutate(lcgm = map(data, lcmm_k1))
#> Be patient, lcmm is running ... 
#> The program took 0.18 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.19 seconds

# this works for a single row
desired_result <-
  gridsearch(
    m = lcmm(Ydep2 ~ Time + I(Time^2),
      mixture = ~Time,
      random = ~Time, subject = "ID", ng = 2,
      data = data_lcmm_nest$data[[1]], link = "linear"
    ),
    rep = 5,
    maxiter = 2,
    minit = data_lcmm_nest$lcgm[[1]]
  )
#> Be patient, lcmm is running ... 
#> The program took 0.45 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.45 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.45 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.45 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.47 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.61 seconds


# this fails with Error in mutate_impl(.data, dots) :
# Evaluation error: argument is of length zero.

data_lcmm_nest %>%
  mutate(lcgm_2 = pmap(
    list(
      m = lcmm(Ydep2 ~ Time + I(Time^2),
        mixture = ~Time,
        random = ~Time, subject = "ID", ng = 2,
        data = data, link = "linear"
      ),
      rep = 5,
      maxiter = 2,
      minit = lcgm
    ), gridsearch
  ))
#> Error in mutate_impl(.data, dots): Evaluation error: argument is of length zero.


# wrapping gridsearch in helper also fails

grid_search_helper <- function(g_rep, g_maxiter, g_minit, g_m) {
  gridsearch(
    m = lcmm(Ydep2 ~ Time + I(Time^2),
      mixture = ~Time,
      random = ~Time, subject = "ID", ng = 2,
      data = g_m, link = "linear"
    ),
    rep = g_rep,
    maxiter = g_maxiter,
    minit = g_minit
  )
}


data_lcmm_nest %>%
  mutate(lcgm_2 = pmap(
    list(
      5,
      2,
      lcgm,
      data
    ), grid_search_helper
  ))
#> Error in mutate_impl(.data, dots): Evaluation error: object 'g_m' not found.

Created on 2019-01-24 by the reprex package (v0.2.1)

1

1 Answers

0
votes

This is not exactly an answer my original question as it doesn't use purrr, but iterating using a for-loop does not have this delayed evaluation problem:

library(lcmm)
#> Loading required package: survival
#> Loading required package: parallel
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(purrr)


data("data_lcmm")

# take sample

set.seed(123)

data_lcmm <-
  data_lcmm %>%
  sample_frac(0.1)




# NB grouping variable is needed to reproduce desired data structure 

data_lcmm <-
  data_lcmm %>%
  mutate(group_var = sample(c(0, 1),
                            size = nrow(data_lcmm),
                            replace = TRUE
  ))



data_lcmm_nest <-
  data_lcmm %>%
  group_by(group_var) %>%
  nest() %>% 
  mutate(data= map(data, as.data.frame))



# lcmm call from ?lcmm

lcmm_k1 <- function(df) {
  lcmm(Ydep2 ~ Time + I(Time^2),
       random = ~Time, subject = "ID", ng = 1,
       data = data_lcmm_nest$data[[1]], link = "linear"
  )
}


# fit k = 1 models
data_lcmm_nest <-
  data_lcmm_nest %>%
  mutate(lcgm = map(data, lcmm_k1))
#> Be patient, lcmm is running ... 
#> The program took 0.19 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.22 seconds

# set-up output vector

results <- vector(mode = "list", length = nrow(data_lcmm_nest))

# fit models

for(i in 1:nrow(data_lcmm_nest)){
  
  results[[i]] <- gridsearch(
    m = lcmm(Ydep2 ~ Time + I(Time^2),
             mixture = ~Time,
             random = ~Time, subject = "ID", ng = 2,
             data = data_lcmm_nest$data[[i]], link = "linear"
    ),
    rep = 5,
    maxiter = 2,
    minit = data_lcmm_nest$lcgm[[i]]
  )
}
#> Be patient, lcmm is running ... 
#> The program took 0.56 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.42 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.47 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.48 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.52 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.5 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.33 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.32 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.39 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.38 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.37 seconds 
#> Be patient, lcmm is running ... 
#> The program took 0.47 seconds

data_lcmm_nest <- 
data_lcmm_nest %>% 
  ungroup() %>% 
  mutate(res = results)

Created on 2021-04-20 by the reprex package (v0.3.0)

devtools::session_info()
#> - Session info ---------------------------------------------------------------
#>  setting  value                       
#>  version  R version 4.0.3 (2020-10-10)
#>  os       Windows 10 x64              
#>  system   x86_64, mingw32             
#>  ui       RTerm                       
#>  language (EN)                        
#>  collate  English_United Kingdom.1252 
#>  ctype    English_United Kingdom.1252 
#>  tz       Europe/London               
#>  date     2021-04-20                  
#> 
#> - Packages -------------------------------------------------------------------
#>  package     * version date       lib source        
#>  assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.0.3)
#>  callr         3.5.1   2020-10-13 [1] CRAN (R 4.0.3)
#>  cli           2.2.0   2020-11-20 [1] CRAN (R 4.0.3)
#>  crayon        1.3.4   2017-09-16 [1] CRAN (R 4.0.3)
#>  desc          1.2.0   2018-05-01 [1] CRAN (R 4.0.3)
#>  devtools      2.3.2   2020-09-18 [1] CRAN (R 4.0.3)
#>  digest        0.6.27  2020-10-24 [1] CRAN (R 4.0.3)
#>  dplyr       * 1.0.2   2020-08-18 [1] CRAN (R 4.0.3)
#>  ellipsis      0.3.1   2020-05-15 [1] CRAN (R 4.0.3)
#>  evaluate      0.14    2019-05-28 [1] CRAN (R 4.0.3)
#>  fansi         0.4.1   2020-01-08 [1] CRAN (R 4.0.3)
#>  fs            1.5.0   2020-07-31 [1] CRAN (R 4.0.3)
#>  generics      0.1.0   2020-10-31 [1] CRAN (R 4.0.3)
#>  glue          1.4.2   2020-08-27 [1] CRAN (R 4.0.3)
#>  highr         0.8     2019-03-20 [1] CRAN (R 4.0.3)
#>  htmltools     0.5.0   2020-06-16 [1] CRAN (R 4.0.3)
#>  knitr         1.30    2020-09-22 [1] CRAN (R 4.0.3)
#>  lattice       0.20-41 2020-04-02 [2] CRAN (R 4.0.3)
#>  lcmm        * 1.9.2   2020-07-07 [1] CRAN (R 4.0.3)
#>  lifecycle     0.2.0   2020-03-06 [1] CRAN (R 4.0.3)
#>  magrittr      2.0.1   2020-11-17 [1] CRAN (R 4.0.3)
#>  Matrix        1.2-18  2019-11-27 [2] CRAN (R 4.0.3)
#>  memoise       1.1.0   2017-04-21 [1] CRAN (R 4.0.3)
#>  pillar        1.4.7   2020-11-20 [1] CRAN (R 4.0.3)
#>  pkgbuild      1.2.0   2020-12-15 [1] CRAN (R 4.0.3)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.0.3)
#>  pkgload       1.1.0   2020-05-29 [1] CRAN (R 4.0.3)
#>  prettyunits   1.1.1   2020-01-24 [1] CRAN (R 4.0.3)
#>  processx      3.4.5   2020-11-30 [1] CRAN (R 4.0.3)
#>  ps            1.5.0   2020-12-05 [1] CRAN (R 4.0.3)
#>  purrr       * 0.3.4   2020-04-17 [1] CRAN (R 4.0.3)
#>  R6            2.5.0   2020-10-28 [1] CRAN (R 4.0.3)
#>  remotes       2.2.0   2020-07-21 [1] CRAN (R 4.0.3)
#>  rlang         0.4.10  2020-12-30 [1] CRAN (R 4.0.3)
#>  rmarkdown     2.6     2020-12-14 [1] CRAN (R 4.0.3)
#>  rprojroot     2.0.2   2020-11-15 [1] CRAN (R 4.0.3)
#>  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.0.3)
#>  stringi       1.5.3   2020-09-09 [1] CRAN (R 4.0.3)
#>  stringr       1.4.0   2019-02-10 [1] CRAN (R 4.0.3)
#>  survival    * 3.2-7   2020-09-28 [1] CRAN (R 4.0.3)
#>  testthat      3.0.1   2020-12-17 [1] CRAN (R 4.0.3)
#>  tibble        3.0.4   2020-10-12 [1] CRAN (R 4.0.3)
#>  tidyr       * 1.1.2   2020-08-27 [1] CRAN (R 4.0.3)
#>  tidyselect    1.1.0   2020-05-11 [1] CRAN (R 4.0.3)
#>  usethis       2.0.0   2020-12-10 [1] CRAN (R 4.0.3)
#>  vctrs         0.3.6   2020-12-17 [1] CRAN (R 4.0.3)
#>  withr         2.3.0   2020-09-22 [1] CRAN (R 4.0.3)
#>  xfun          0.20    2021-01-06 [1] CRAN (R 4.0.3)
#>  yaml          2.2.1   2020-02-01 [1] CRAN (R 4.0.3)
#> 
#> [1] M:/R/win-library/3.6
#> [2] C:/Program Files/R/R-4.0.3/library