Error fitting a simulation model to data with optim in R

Question

Good afternoon: I have 30 years of population data on guanacos, and a matrix simulation model; I want to estimate the optimal values of 12 of the model's parameters in R using optim, by minimizing the residual sums of squares (Sum(obs-pred)^2). I created a function with the model. The model is working fine, for if I invoke the function with fixed parameter values I get the exected results. When I call optim with a sewt of initial parameters and the function, I get the following error message: "the argument a13 is absent, with no value by omission"; also: Message of lost warning: In if (SSQ == 0) { : the condition has a length > 1 and only the first element will be used Called from: top level (freely translated from Spanish).

Please find below the R code in three sections: (1) first with the function "guafit" declaration, (2) with a standalone run of the model invoking the "guafit", and (3) the call of "optim" with its starting parameter values.

Thank you, Jorge Rabinovich

# Section (1):
# Clean all
    rm(list=ls())

#####################################################################
####################### Function "guafit"  ##########################
#####################################################################

    guafit <- function(SSQ,a13,a21,a32,a33,ad1,ad2,ad3,ad4,bd1,bd2,bd3,bd4) {

# Create the field population (a 30-years time series)
# ====================================================
    tfield <- c(12334,10670,19078,11219,11771,12323,13027,14094,14604,17775,20774,16410,17626,21445,21111,20777,28978,27809,28935,38841,38363,32273,43128,58597,52456,33125,61334,60488,44773,56973)

# Assign values to the vector of the initial population
# =====================================================
    G <- matrix(c(0,0,0),ncol=3, nrow=1)
    G[1]= 1542
    G[2]= 740
    G[3]= 3885

# Load the matrices with their initial values for all 30 time units (years)
# =========================================================================
    if (SSQ == 0) {
    a<-array(0,dim=c(3,3,30))
    for(i in 1:29) {
    a[1,3,i]= a13
    a[2,1,i]= a21
    a[3,2,i]= a32
    a[3,3,i]= a33
            }
        }
# Initialize some  variables
# ==========================
    tmod<-array(1,dim=c(1,30)); tmod <- as.numeric(tmod)
    densprop<-array(1,dim=c(1,30)); densprop <- as.numeric(densprop)
    FdeltaFe<-array(1,dim=c(1,30)); FdeltaFe <- as.numeric(FdeltaFe)
    FdeltaSc<-array(1,dim=c(1,30)); FdeltaSc <- as.numeric(FdeltaSc)
    FdeltaSj<-array(1,dim=c(1,30)); FdeltaSj <- as.numeric(FdeltaSj)
    FdeltaSa<-array(1,dim=c(1,30)); FdeltaSa <- as.numeric(FdeltaSa)

# N0 is the initial population vector
# It is multiplied by 2 to represewnt both sexes
# ===============================================
# Transfer guanacos (G) as a vector with the three age classes
    N0 <- G
    tmod[1] <- (N0[1]+N0[2]+N0[3]) * 2

# Declaration of the initial simulation conditions
# ================================================
# ng is the number of female individuals per age class (dim 3x30)
# tmod is the total (both sexes) population (sum of the three age classes * 2)
    ng <- matrix( 0, nrow= 3, ncol=30)
    ng[,1] <- N0

# We assume a constant carrying capacity (K= 60000 individuals)
    carcap= 60000

# Start simulation for 30 years
    for(i in 1:29) {
#Set up the density-dependent coefficients

    densprop[i] <- tmod[i] / carcap

# Calculate the density-dependent factors
    FdeltaFe[i]= 1/(1+exp((densprop[i]-ad1)*bd1))
    FdeltaSc[i]= 1/(1+exp((densprop[i]-ad2)*bd2))
    FdeltaSj[i]= 1/(1+exp((densprop[i]-ad3)*bd3))
    FdeltaSa[i]= 1/(1+exp((densprop[i]-ad4)*bd4))

# Apply the density-dependent factors to each coefficient in its own age class

    a[1,3,i]= a[1,3,i] * FdeltaFe[i]
    a[2,1,i]= a[2,1,i] * FdeltaSc[i]
    a[3,2,i]= a[3,2,i] * FdeltaSj[i]
    a[3,3,i]= a[3,3,i] * FdeltaSa[i]

# Project the total population with the matrix operation

    ng[,i+1] <- a[,,i]%*%ng[,i]
    tmod[i+1] <- (ng[1,i+1] + ng[2,i+1] + ng[3,i+1]) * 2

# End of the 30-years simulation loop
            }

# Calculate the residual sum of squares (SSQ)       
            SSQ = sum((tmod - tfield)^2)
               return(list(outm=tmod, outc=tfield, elSSQ=SSQ, matrices=a,     losgua=G, losguaxe=ng))

# End of function guafit
        }
#################################################################################

# Section (2):

# Initialize conditions and parameters before calling function guafit

    SSQ <- 0

# Initialize the 8 density-dependent coefficients (2 coefficients per age class)
# ==============================================================================
    ad1= 1.195680167
    ad2= 1.127219245
    ad3= 1.113739384
    ad4= 1.320456815
    bd1= 10.21559509
    bd2= 9.80201883
    bd3= 9.760834107
    bd4= 10.59390027

# Assign initial values to the transition matrix coefficients
# ============================================================
    a21= 0.6
    a32=0.8
    a33=0.9
    a13=0.37

# Initialization of conditions is finished, we can  call function guafit
# As a test, we call function guafit only once to check for results
    myresults <- guafit(SSQ,a13,a21,a32,a33,ad1, ad2, ad3, ad4, bd1, bd2, bd3, bd4)
# We save the results of interest of function guafit with new variables 
    restmod <- myresults$outm
    tfield <- myresults$outc
    SSQvalue <- myresults$elSSQ
    lasmatrices <- myresults$matrices
    reslosgua <- myresults$losgua
    reslosguaxe <- myresults$losguaxe
    SSQvalue
# Graph the results
    axisx <- c(1982:2011)
    plot(axisx,tfield)
    lines(axisx,restmod)

#################################################################################

# Section (3):

# I try the optim function 

# First creating the initial parameter values variable to pass as an argument

    startparam <- c(SSQ, a13,a21,a32,a33,ad1, ad2, ad3, ad4, bd1, bd2, bd3, bd4)
    optim(startparam, guafit)
# and I got error message mentioned above.

# I also tried calling as:
    optim(par=startparam, fn=guafit) 
# and I got the same error message

# I also tried calling optim but passing the values directly as a list of values:
    startparam <- c(SSQ=0, a13=0.37, a21=0.6, a32=0.8, a33=0.9, ad1=1.1, ad2=1.1, ad3=1.1, ad4=1.1, bd1=10, bd2=10, bd3=10, bd4=10)
    optim(startparam, guafit)
    optim(par=startparam, fn=guafit) 

# and I got the same error message

Could you initialize stuff outside of the function? I am just guessing. That might be the first thing I would try. Although, I have only looked at the code for a few moments. — Mark Miller
a13 appears to be a constant, but it looks like you are trying to estimate it. Maybe remove your constants from your optim statement. — Mark Miller

Mark Miller Mark Miller · Accepted Answer · 2015-04-26T13:53:30

I got the code running, but I am not sure whether it returns the correct parameter estimates. I tried two different approaches.

With the first approach I did the following:

placed all data and initializations outside the function.
removed constants from the optim statement
assumed ad1, ad2, ad3 and ad4 were constants and not parameters to be estimated. I guess you could say I assumed ad1, ad2, ad3 and ad4 were constant offsets.

This first approach returned estimates of bd1, bd2, bd3 and bd4 that closely matched your initial values.

With the second approach I did the following:

placed all data and initializations outside the function.
removed constants from the optim statement
assumed ad1, ad2, ad3 and ad4 were parameters to be estimated.

This second approach returned estimates of all eight parameters: ad1, ad2, ad3, ad4, bd1, bd2, bd3 and bd4 but I do not think the estimates are correct. Note that two columns in the Hessian are all 0. The SE could not be estimated with this second approach and the estimates of bd1, bd2, bd3 and bd4 were not close to their initial values.

The R code for both approaches is below. Check the code to see whether either approach is doing what you want. If treating ad1, ad2, ad3, and ad4 as parameters is critical then perhaps the model code will have to be revised.

Here is the R code and output for the first approach:

set.seed(2345)

guafit <- function(param) {

          bd1 <- param[1]
          bd2 <- param[2]
          bd3 <- param[3]
          bd4 <- param[4]

# Start simulation for 30 years
    for(i in 1:29) {

# Set up the density-dependent coefficients
    densprop[i] <- tmod[i] / carcap

# Calculate the density-dependent factors
    FdeltaFe[i]= 1/(1+exp((densprop[i]-ad1)*bd1))
    FdeltaSc[i]= 1/(1+exp((densprop[i]-ad2)*bd2))
    FdeltaSj[i]= 1/(1+exp((densprop[i]-ad3)*bd3))
    FdeltaSa[i]= 1/(1+exp((densprop[i]-ad4)*bd4))

# Apply density-dependent factors to each coefficient in its own age class
    a[1,3,i]= a[1,3,i] * FdeltaFe[i]
    a[2,1,i]= a[2,1,i] * FdeltaSc[i]
    a[3,2,i]= a[3,2,i] * FdeltaSj[i]
    a[3,3,i]= a[3,3,i] * FdeltaSa[i]

# Project the total population with the matrix operation
    ng[,i+1]  <- a[,,i]%*%ng[,i]
    tmod[i+1] <- (ng[1,i+1] + ng[2,i+1] + ng[3,i+1]) * 2

# End of the 30-years simulation loop
            }

# Calculate the residual sum of squares (SSQ)       
            SSQ = sum((tmod - tfield)^2)

# End of function guafit
        }

# Create the field population (a 30-years time series)
# ====================================================
    tfield <- c(12334,10670,19078,11219,11771,12323,13027,14094,14604,17775,
                20774,16410,17626,21445,21111,20777,28978,27809,28935,38841,
                38363,32273,43128,58597,52456,33125,61334,60488,44773,56973)

# Initialize conditions and parameters before calling function guafit
    SSQ <- 0

# Assign initial values to the transition matrix coefficients
# ============================================================
    a21 = 0.6
    a32 = 0.8
    a33 = 0.9
    a13 = 0.37

# Assign values to the vector of the initial population
# =====================================================
    G <- matrix(c(0,0,0),ncol=3, nrow=1)
    G[1] = 1542
    G[2] = 740
    G[3] = 3885

# Load the matrices with their initial values for all 30 time units (years)
# =========================================================================

    a <- array(0,dim=c(3,3,30))

    for(i in 1:29) {
    a[1,3,i]= a13
    a[2,1,i]= a21
    a[3,2,i]= a32
    a[3,3,i]= a33
            }

# Initialize some  variables
# ==========================
    tmod<-array(1,dim=c(1,30)); tmod <- as.numeric(tmod)

    densprop <- array(1,dim=c(1,30)) 
    densprop <- as.numeric(densprop)

    FdeltaFe <- array(1,dim=c(1,30)) 
    FdeltaFe <- as.numeric(FdeltaFe)

    FdeltaSc <- array(1,dim=c(1,30))
    FdeltaSc <- as.numeric(FdeltaSc)

    FdeltaSj <- array(1,dim=c(1,30))
    FdeltaSj <- as.numeric(FdeltaSj)

    FdeltaSa <- array(1,dim=c(1,30))
    FdeltaSa <- as.numeric(FdeltaSa)

    ng <- matrix( 0, nrow= 3, ncol=30)

# We assume a constant carrying capacity (K= 60000 individuals)
    carcap= 60000

# N0 is the initial population vector
# It is multiplied by 2 to represewnt both sexes
# ===============================================
# Transfer guanacos (G) as a vector with the three age classes

    N0 <- G

    tmod[1] <- (N0[1]+N0[2]+N0[3]) * 2

# Declaration of the initial simulation conditions
# ================================================
# ng is the number of female individuals per age class (dim 3x30)
# tmod is total (both sexes) population (sum of the three age classes * 2)

    ng[,1] <- N0


ad1 <- 1.195680167
ad2 <- 1.127219245
ad3 <- 1.113739384
ad4 <- 1.320456815

 fit <- optim ( c(10.21559509,
                   9.80201883,
                   9.760834107,
                  10.59390027), fn=guafit, method='BFGS', hessian=TRUE)
 fit

Here is the output for the first approach:

$par
[1] 11.315899 11.886347 11.912239  9.675885

$value
[1] 1177381086

$counts
function gradient 
     150       17 

$convergence
[1] 0

$message
NULL

$hessian
         [,1]      [,2]      [,3]      [,4]
[1,] 417100.8  306371.2  326483.2  941186.8
[2,] 306371.2  516636.4  370602.5 1061540.5
[3,] 326483.2  370602.5  577929.7 1135695.9
[4,] 941186.8 1061540.5 1135695.9 3720506.4

Here are the four parameter estimates and their standard errors:

           mle          se
[1,] 11.315899 0.002439888
[2,] 11.886347 0.002240669
[3,] 11.912239 0.002153876
[4,]  9.675885 0.001042035

Here is the R code for the second approaches, that estimates eight parameters. The SE could not be estimated with this second approach:

set.seed(2345)

guafit <- function(param) {

          ad1 <- param[1]
          ad2 <- param[2]
          ad3 <- param[3]
          ad4 <- param[4]
          bd1 <- param[5]
          bd2 <- param[6]
          bd3 <- param[7]
          bd4 <- param[8]

# Start simulation for 30 years
    for(i in 1:29) {

# Set up the density-dependent coefficients
    densprop[i] <- tmod[i] / carcap

# Calculate the density-dependent factors
    FdeltaFe[i]= 1/(1+exp((densprop[i]-ad1)*bd1))
    FdeltaSc[i]= 1/(1+exp((densprop[i]-ad2)*bd2))
    FdeltaSj[i]= 1/(1+exp((densprop[i]-ad3)*bd3))
    FdeltaSa[i]= 1/(1+exp((densprop[i]-ad4)*bd4))

# Apply the density-dependent factors to each coefficient in its own age class
    a[1,3,i]= a[1,3,i] * FdeltaFe[i]
    a[2,1,i]= a[2,1,i] * FdeltaSc[i]
    a[3,2,i]= a[3,2,i] * FdeltaSj[i]
    a[3,3,i]= a[3,3,i] * FdeltaSa[i]

# Project the total population with the matrix operation

    ng[,i+1]  <- a[,,i]%*%ng[,i]
    tmod[i+1] <- (ng[1,i+1] + ng[2,i+1] + ng[3,i+1]) * 2

# End of the 30-years simulation loop
            }

# Calculate the residual sum of squares (SSQ)       
            SSQ = sum((tmod - tfield)^2)

# End of function guafit
        }

# Create the field population (a 30-years time series)
# ====================================================
    tfield <- c(12334,10670,19078,11219,11771,12323,13027,14094,14604,17775,
                20774,16410,17626,21445,21111,20777,28978,27809,28935,38841,
                38363,32273,43128,58597,52456,33125,61334,60488,44773,56973)

# Initialize conditions and parameters before calling function guafit
    SSQ <- 0

# Assign initial values to the transition matrix coefficients
# ============================================================
    a21 = 0.6
    a32 = 0.8
    a33 = 0.9
    a13 = 0.37

# Assign values to the vector of the initial population
# =====================================================
    G <- matrix(c(0,0,0),ncol=3, nrow=1)
    G[1] = 1542
    G[2] = 740
    G[3] = 3885

# Load the matrices with their initial values for all 30 time units (years)
# =========================================================================

    a <- array(0,dim=c(3,3,30))

    for(i in 1:29) {
    a[1,3,i]= a13
    a[2,1,i]= a21
    a[3,2,i]= a32
    a[3,3,i]= a33
            }

# Initialize some  variables
# ==========================
    tmod<-array(1,dim=c(1,30)); tmod <- as.numeric(tmod)

    densprop <- array(1,dim=c(1,30)) 
    densprop <- as.numeric(densprop)

    FdeltaFe <- array(1,dim=c(1,30)) 
    FdeltaFe <- as.numeric(FdeltaFe)

    FdeltaSc <- array(1,dim=c(1,30))
    FdeltaSc <- as.numeric(FdeltaSc)

    FdeltaSj <- array(1,dim=c(1,30))
    FdeltaSj <- as.numeric(FdeltaSj)

    FdeltaSa <- array(1,dim=c(1,30))
    FdeltaSa <- as.numeric(FdeltaSa)

    ng <- matrix( 0, nrow= 3, ncol=30)

# We assume a constant carrying capacity (K= 60000 individuals)
    carcap= 60000

# N0 is the initial population vector
# It is multiplied by 2 to represewnt both sexes
# ===============================================
# Transfer guanacos (G) as a vector with the three age classes

    N0 <- G

    tmod[1] <- (N0[1]+N0[2]+N0[3]) * 2

# Declaration of the initial simulation conditions
# ================================================
# ng is the number of female individuals per age class (dim 3x30)
# tmod is the total (both sexes) population (sum of the three age classes * 2)

    ng[,1] <- N0

 fit <- optim ( c( 1.195680167,
                   1.127219245,
                   1.113739384,
                   1.320456815,
                  10.21559509,
                   9.80201883,
                   9.760834107,
                  10.59390027), fn=guafit, method='BFGS', hessian=TRUE)

 fit

Here is the output for the second approach:

$par
[1]   0.9191288   1.0266562   1.1754382   0.9973042 248.5229404  75.2993413 380.2445394 460.2091730

$value
[1] 1031918725

$counts
function gradient 
     705       76 

$convergence
[1] 0

$message
NULL

$hessian
             [,1]          [,2] [,3]        [,4]          [,5]          [,6] [,7]        [,8]
[1,] 7.688038e+09  4.293539e+07    0  2.89082527  6.248865e+04  4.860839e+04    0  0.02980232
[2,] 4.293539e+07  2.761336e+06    0  0.29802322 -3.247231e+03  1.245818e+04    0 -0.02980232
[3,] 0.000000e+00  0.000000e+00    0  0.00000000  0.000000e+00  0.000000e+00    0  0.00000000
[4,] 2.890825e+00  2.980232e-01    0 55.25350571 -2.980232e-02  0.000000e+00    0  0.02980232
[5,] 6.248865e+04 -3.247231e+03    0 -0.02980232  8.401275e+01 -3.635883e+00    0 -0.02980232
[6,] 4.860839e+04  1.245818e+04    0  0.00000000 -3.635883e+00  3.185868e+01    0 -0.02980232
[7,] 0.000000e+00  0.000000e+00    0  0.00000000  0.000000e+00  0.000000e+00    0  0.00000000
[8,] 2.980232e-02 -2.980232e-02    0  0.02980232 -2.980232e-02 -2.980232e-02    0  0.02980232

Error fitting a simulation model to data with optim in R

2 Answers