Run model on each group in dataframe and output key metrics into new dataframe

Question

I have a the following model that I currently run for one state of data. It's code from this blog: https://blog.ephorie.de/epidemiology-how-contagious-is-novel-coronavirus-2019-ncov

It models future infections based on existing daily COVID-19 infections data. I have a dataset for all US counties and want to run the same analysis county by county and output key variables into a dataset for each county. Here is the code I ran for one state.

library(deSolve)
library(tidyverse)

date <- c('2020-03-24','2020-03-25','2020-03-26','2020-03-24','2020-03-25','2020-03-26')
fips <- c(1001,1001,1001,1002,1002,1002)
Infected <- c(1,2,4,4,7,9)
day <- c(1,2,3,1,2,3)
N <- c(55601,55601,55601,2231,2231,2231)

cp <- data.frame(date,fips,Infected,day,N)

And the model:

SIR <- function(time, state, parameters) {
    par <- as.list(c(state, parameters))
    with(par, {
        dS <- -beta/N * I * S
        dI <- beta/N * I * S - gamma * I
        dR <- gamma * I
        list(c(dS, dI, dR))
    })
}

init <- c(S = N-Infected[1], I = Infected[1], R = 0)
RSS <- function(parameters) {
    names(parameters) <- c("beta", "gamma")
    out <- ode(y = init, times = Day, func = SIR, parms = parameters)
    fit <- out[ , 3]
    sum((Infected - fit)^2)
}

Opt <- optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", lower = c(0, 0), upper = c(1, 1))
Opt_par <- setNames(Opt$par, c("beta", "gamma"))
t <- 1:365 # time in days
fit <- data.frame(ode(y = init, times = t, func = SIR, parms = Opt_par))
R0 <- setNames(Opt_par["beta"] / Opt_par["gamma"], "R0")
infections <- fit[fit$I == max(fit$I), "I", drop = FALSE]
deaths <- max(fit$I) * 0.02

Here is a small sample of dataset I have with county level data:

date <- '2020-03-24','2020-03-25','2020-03-26','2020-03-24','2020-03-25','2020-03-26'
fips <- 1001,1001,1001,1002,1002,1002
Infected <- 1,2,4,4,7,9
day <- 1,2,3,1,2,3
N <- 55601,55601,55601,2231,2231,2231

I'd like to run the above model for each fips (county code). As output, I want to have a dataframe that includes: time_stamp, fips, max(Infected), max(day), N, beta, gamma, R0, infections, deaths

I'd like a dplyr pipe solution and have tried using group_modify() but continually get an error. Can you help??

Here is what I have and the error I get:

SIR <- function(time, state, parameters) {
    par <- as.list(c(state, parameters))
    with(par, {
        dS <- -beta/N * I * S
        dI <- beta/N * I * S - gamma * I
        dR <- gamma * I
        list(c(dS, dI, dR))
    })
}

RSS <- function(parameters) {
    names(parameters) <- c("beta", "gamma")
    out <- ode(y = c(S, I, R)
               , times = Day, func = SIR, parms = parameters)
    fit <- out[ , 3]
    sum((Infected - fit)^2)
}

cp <- cp %>%
    group_by(fips) %>%
    mutate(S = N-Infected[1], I = Infected[1], R = 0) %>%
    group_modify(optim(c(0.5, 0.5), RSS, method = "L-BFGS-B", lower = c(0, 0), upper = c(1, 1)))

Error:

Error in ode(y = c(S, I, R), times = Day, func = SIR, parms = parameters) : object 'S' not found

thanks massisenergy. A added it. I think my approach is wrong, I'm just not sure how to run/call the functions within my piped commands. — mr_puddles
Your call for mutate should be like mutate(df) where df is a dataframe or datatable. So, mutate(init) would be an example of the right usage. Type ?mutate & hit enter in you RStudio console. — massisenergy
Hi, I tried your model and found several problems. Lets start with the first one: Your model SIR contains a variable "N" that is no state variable. What is this? Is it just S or is it S+I+R? See also github.com/tpetzoldt/covid for a similar model. — tpetzoldt

tpetzoldt tpetzoldt · Accepted Answer · 2020-03-29T16:15:58

I think the easiest way is to write a separate function, e.g. fit_model that is then called by group_modify. Note that I had to change some other parts and that there are already more complex models available for this problem. The github page mentioned above, contains some links.

library(deSolve)
library(tidyverse)

cp <- data.frame(
  date     = c('2020-03-24','2020-03-25','2020-03-26','2020-03-24','2020-03-25','2020-03-26'),
  fips     = c(1001,1001,1001,1002,1002,1002),
  Infected = c(1,2,4,4,7,9),
  day      = c(1,2,3,1,2,3),
  N        = c(55601,55601,55601,2231,2231,2231)
)

SIR <- function(time, state, parameters) {
  par <- as.list(c(state, parameters))
  with(par, {
    N  <- S + I + R
    dS <- -beta/N * I * S
    dI <- beta/N * I * S - gamma * I
    dR <- gamma * I
    list(c(dS, dI, dR))
  })
}

RSS <- function(parameters, init, data) {
  names(parameters) <- c("beta", "gamma")
  out <- ode(y = init,
             times = data$day, func = SIR, parms = parameters)
  fit <- out[ , 3]
  sum((data$Infected - fit)^2)
}

fit_model <- function(data) {
  init <- slice(data, 1) %>% select(S, I, R) %>% unlist()
  opt <- optim(c(0.5, 0.5), RSS,
               init = init,
               data = data,
               method = "L-BFGS-B",
               lower = c(0, 0),
               upper = c(1, 1))

  data.frame(alpha=opt$par[1], beta=opt$par[2], RSS=opt$value,
             conv= opt$convergence, ok = opt$convergence == 0)
}

cp %>%
  group_by(fips) %>%
  mutate(S = N[1]-Infected[1], I = Infected[1], R = 0) %>%
  group_modify( ~ fit_model(.))

Run model on each group in dataframe and output key metrics into new dataframe

1 Answers