library(ggplot2)
dat <- structure(list(y = c(52L, 63L, 59L, 58L, 57L, 54L, 27L, 20L, 15L, 27L, 27L, 26L, 70L, 70L, 70L, 70L, 70L, 70L, 45L, 42L, 41L, 55L, 45L, 39L, 51L,
64L, 57L, 39L, 59L, 37L, 44L, 44L, 38L, 57L, 50L, 56L, 66L, 66L, 64L, 64L, 60L, 55L, 52L, 57L, 47L, 57L, 64L, 63L, 49L, 49L,
56L, 55L, 57L, 42L, 60L, 53L, 53L, 57L, 56L, 54L, 42L, 45L, 34L, 52L, 57L, 50L, 60L, 59L, 52L, 42L, 45L, 47L, 45L, 51L, 39L,
38L, 42L, 33L, 62L, 57L, 65L, 44L, 44L, 39L, 46L, 49L, 52L, 44L, 43L, 38L),
x = c(122743L, 132300L, 146144L, 179886L, 195180L, 233605L, 1400L, 1400L, 3600L, 5000L, 14900L, 16000L, 71410L, 85450L, 106018L,
119686L, 189746L, 243171L, 536545L, 719356L, 830031L, 564546L, 677540L, 761225L, 551561L, 626799L, 68618L, 1211267L, 1276369L,
1440113L, 1153720L, 1244575L, 1328641L, 610452L, 692624L, 791953L, 4762522L, 5011232L, 5240402L, 521339L,
560098L, 608641L, 4727833L, 4990042L, 5263899L, 1987296L, 2158704L, 2350927L, 7931905L, 8628608L, 8983683L, 2947957L, 3176995L, 3263118L,
55402L, 54854L, 55050L, 52500L, 72000L, 68862L, 1158244L, 1099976L, 1019490L, 538146L, 471219L, 437954L, 863592L, 661055L,
548097L, 484450L, 442643L, 404487L, 1033728L, 925514L, 854793L, 371420L, 285257L, 260157L, 2039241L, 2150710L, 1898614L,
1175287L, 1495433L, 1569586L, 2646966L, 3330486L, 3282677L, 745784L, 858574L, 1119671L)),
class = "data.frame", row.names = c(NA, -90L))
ggplot(dat, aes(x = x, y = y)) + geom_point()
The relationship seems like a non-linear relationship. Hence I will fitted a model where I logged y and x
mod.lm <- lm(log(y) ~ log(x), data = dat)
ggplot(dat, aes(x = log(x), y = log(y))) + geom_point() + geom_smooth(method = "lm")
However, I can see that for lower values, the log-transformation results in big differences as shown by the residuals. I then moved to non linear least square method. I have not used this before but using this post
Why is nls() giving me "singular gradient matrix at initial parameter estimates" errors?
c.0 <- min(dat$y) * 0.5
model.0 <- lm(log(y - c.0) ~ x, data = dat)
start <- list(a = exp(coef(model.0)[1]), b = coef(model.0)[2], c = c.0)
model <- nls(y ~ a * exp(b * x) + c, data = dat, start = start)
Error in nls(y ~ a * exp(b * x) + c, data = dat, start = start) :
step factor 0.000488281 reduced below 'minFactor' of 0.000976562
Can anyone advise me what does this error mean and how to fit a nls model to the above data?