0
votes

Here is the data of my regression : enter image description here

y is the number of passengers at platform of the train station in each 2 minutes period while A1 to A17 are the number of passengers at 17 study areas on concourse. Time lag has already between considered by shifting the Xs. Since sometimes, there will be no one waiting in the study areas on concourse, so excess zero occurs. I am planing to use zero inflated model. I have tried the code as shown between, but it said "minimum count is not zero" What does that mean and how can i solve it? I have done poisson and it's alright but zero inflated doesn't work.

    > setwd('C:/Users/zuzymelody/Desktop')
> try<-read.csv('0inflated_2mins27peak.csv',header=TRUE)
> attach(try)
> names(try)
 [1] "y"   "A1"  "A2"  "A3"  "A4"  "A5"  "A6"  "A7"  "A8"  "A9"  "A10" "A11"
[13] "A12" "A13" "A14" "A15" "A16" "A17"

    > model1<-glm(y~A1+A2+A3+A4+A5+A6+A7+A8+A9+A10+A11+A12+A13+A14+A15+A16+A17,family="poisson")
    > summary(model1)

Call:
glm(formula = y ~ A1 + A2 + A3 + A4 + A5 + A6 + A7 + A8 + A9 + 
    A10 + A11 + A12 + A13 + A14 + A15 + A16 + A17, family = "poisson")

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-7.8598  -3.4571  -0.3663   2.1867  12.5183  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  6.102009   0.164497  37.095  < 2e-16 ***
A1          -0.017555   0.003665  -4.790 1.66e-06 ***
A2          -0.026101   0.017569  -1.486 0.137371    
A3          -0.179988   0.014976 -12.018  < 2e-16 ***
A4          -0.032584   0.007735  -4.213 2.52e-05 ***
A5          -0.019908   0.007014  -2.839 0.004532 ** 
A6          -0.044144   0.010266  -4.300 1.71e-05 ***
A7           0.049829   0.006518   7.645 2.09e-14 ***
A8          -0.080712   0.009819  -8.220  < 2e-16 ***
A9           0.007390   0.007105   1.040 0.298273    
A10          0.041116   0.004085  10.065  < 2e-16 ***
A11         -0.041420   0.008418  -4.921 8.62e-07 ***
A12         -0.008241   0.007304  -1.128 0.259171    
A13         -0.033161   0.008966  -3.699 0.000217 ***
A14          0.020818   0.005250   3.965 7.34e-05 ***
A15         -0.002995   0.006125  -0.489 0.624887    
A16         -0.061997   0.017122  -3.621 0.000294 ***
A17         -0.025025   0.008391  -2.982 0.002860 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 1137.71  on 29  degrees of freedom
Residual deviance:  599.74  on 12  degrees of freedom
AIC: 840.1

Number of Fisher Scoring iterations: 5







     >with(model1, cbind(res.deviance = deviance, df = df.residual,
      p = pchisq(deviance, df.residual, lower.tail=FALSE)))

 res.deviance df             p
[1,]     599.7445 12 1.202013e-120

> require( pscl )
> Zip<-zeroinfl(model1,link="logit",dist="poisson")
**Error in zeroinfl(model1, link = "logit", dist = "poisson") : 
  invalid dependent variable, minimum count is not zero**

dput(try) structure(list(y = c(156L, 74L, 221L, 207L, 168L, 36L, 128L, 208L, 99L, 117L, 228L, 211L, 341L, 173L, 196L, 310L, 112L, 203L, 104L, 183L, 325L, 143L, 218L, 166L, 218L, 127L, 136L, 38L, 102L, 34L), A1 = c(24L, 24L, 24L, 19L, 20L, 9L, 14L, 23L, 15L, 23L, 14L, 16L, 15L, 25L, 25L, 19L, 24L, 26L, 25L, 26L, 22L, 14L, 13L, 15L, 9L, 12L, 9L, 12L, 15L, 18L), A2 = c(2L, 4L, 0L, 3L, 0L, 1L, 1L, 2L, 1L, 2L, 0L, 2L, 2L, 0L, 1L, 1L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 5L, 4L, 3L, 4L, 1L, 2L, 1L), A3 = c(2L, 2L, 0L, 1L, 1L, 9L, 3L, 0L, 0L, 0L, 1L, 1L, 3L, 1L, 0L, 0L, 1L, 2L, 3L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 2L), A4 = c(15L, 11L, 6L, 7L, 10L, 10L, 5L, 4L, 5L, 7L, 9L, 9L, 4L, 6L, 6L, 13L, 9L, 13L, 9L, 10L, 6L, 6L, 7L, 6L, 10L, 9L, 10L, 7L, 9L, 2L), A5 = c(13L, 10L, 6L, 6L, 11L, 19L, 13L, 14L, 7L, 7L, 6L, 8L, 10L, 5L, 7L, 9L, 9L, 11L, 3L, 13L, 8L, 8L, 8L, 6L, 8L, 9L, 9L, 14L, 9L, 6L), A6 = c(9L, 10L, 9L, 9L, 4L, 7L, 7L, 12L, 11L, 11L, 12L, 8L, 6L, 7L, 8L, 5L, 9L, 6L, 5L, 6L, 9L, 11L, 6L, 6L, 8L, 9L, 4L, 11L, 10L, 7L ), A7 = c(21L, 16L, 13L, 13L, 4L, 9L, 12L, 13L, 12L, 12L, 12L, 6L, 7L, 6L, 6L, 4L, 5L, 9L, 8L, 7L, 9L, 12L, 10L, 7L, 8L, 12L, 14L, 2L, 6L, 6L), A8 = c(1L, 5L, 10L, 10L, 1L, 9L, 6L, 6L, 7L, 7L, 5L, 6L, 3L, 2L, 4L, 0L, 4L, 2L, 5L, 5L, 5L, 3L, 2L, 4L, 3L, 8L, 10L, 8L, 2L, 5L), A9 = c(8L, 9L, 10L, 10L, 12L, 19L, 10L, 6L, 6L, 6L, 0L, 6L, 8L, 10L, 2L, 3L, 6L, 2L, 2L, 6L, 5L, 2L, 4L, 1L, 3L, 7L, 7L, 4L, 4L, 2L), A10 = c(7L, 10L, 12L, 20L, 24L, 21L, 24L, 18L, 20L, 18L, 26L, 21L, 12L, 11L, 18L, 18L, 19L, 16L, 25L, 21L, 22L, 14L, 12L, 17L, 21L, 14L, 14L, 10L, 8L, 7L), A11 = c(0L, 2L, 1L, 4L, 2L, 1L, 1L, 1L, 13L, 10L, 12L, 5L, 2L, 0L, 5L, 1L, 4L, 4L, 3L, 3L, 1L, 1L, 3L, 3L, 5L, 5L, 2L, 10L, 3L, 4L), A12 = c(12L, 14L, 14L, 17L, 10L, 14L, 13L, 19L, 7L, 5L, 6L, 6L, 8L, 7L, 13L, 11L, 10L, 8L, 6L, 6L, 9L, 14L, 9L, 10L, 8L, 9L, 8L, 9L, 5L, 7L ), A13 = c(6L, 2L, 1L, 5L, 9L, 6L, 7L, 4L, 12L, 5L, 9L, 10L, 3L, 7L, 4L, 2L, 2L, 6L, 4L, 6L, 7L, 4L, 9L, 6L, 11L, 4L, 5L, 4L, 6L, 6L), A14 = c(14L, 13L, 16L, 11L, 8L, 6L, 9L, 13L, 14L, 14L, 9L, 8L, 12L, 11L, 13L, 11L, 18L, 15L, 20L, 21L, 17L, 18L, 18L, 18L, 25L, 20L, 12L, 9L, 8L, 8L), A15 = c(7L, 6L, 7L, 5L, 4L, 9L, 12L, 12L, 11L, 12L, 9L, 8L, 7L, 8L, 10L, 16L, 8L, 8L, 13L, 10L, 5L, 5L, 8L, 10L, 10L, 4L, 6L, 6L, 6L, 7L), A16 = c(2L, 1L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 2L, 2L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 3L, 4L, 2L, 5L, 4L, 8L, 5L, 2L, 1L, 2L, 2L, 2L), A17 = c(10L, 13L, 13L, 2L, 5L, 1L, 3L, 3L, 5L, 4L, 4L, 6L, 4L, 6L, 3L, 2L, 2L, 2L, 7L, 8L, 3L, 7L, 5L, 6L, 7L, 6L, 6L, 3L, 4L, 3L)), .Names = c("y", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15", "A16", "A17"), class = "data.frame", row.names = c(NA, -30L))

above is the reproducible example. Sorry its my first time to post here, dont know the rule well

1
@melo Looking at your data, negative binomial distribution may be a better distribution. You have a large overdispersion problem since the var(y) >> mean(y), which is the $\lambda$ parameter of the Poisson distribution. For a Poisson distribution to be effect var(y) = mean(y).akash87

1 Answers

1
votes

Your data frame does not contain a zero value in your dependent variable $y$:

min(mydata$y) [1] 34

You'll need to have at least one $y = 0$.