Rolling window
This can be solved by aggregating in a non-equi join which aggregates a varying number of rows which cover a given time period.
library(data.table)
# define parameters
time_1 <- -10
time_2 <- 10
n_min <- 5L
# create helper columns
setDT(dat)[, `:=`(join = time, start = time + time_1, end = time + time_2)][
# non-equi join and aggregate
dat, on = .(join >= start, join <= end), by = .EACHI, {
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
.(time = i.time,
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}]
join join time N adj_r_sqr RMSE p_val_y p_val_intercept slope
1: 5252.00 5272.00 5262.00 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
2: 5253.20 5273.20 5263.20 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
3: 5254.40 5274.40 5264.40 10 -1.248329e-01 0.06411770 0.973340896 0.77035143 2.202740e-04
4: 5255.60 5275.60 5265.60 11 -5.914522e-02 0.06631161 0.523022100 0.72831553 -3.713582e-03
5: 5256.80 5276.80 5266.80 12 -8.934954e-02 0.06570860 0.760946792 0.96376799 -1.488205e-03
6: 5258.00 5278.00 5268.00 13 -8.696209e-02 0.06341811 0.845238030 0.82437256 -7.998616e-04
7: 5259.20 5279.20 5269.20 14 -8.098718e-02 0.06260179 0.874476744 0.52663574 5.619086e-04
8: 5260.46 5280.46 5270.46 15 -6.931305e-02 0.06060738 0.765814583 0.39825585 9.125789e-04
9: 5261.84 5281.84 5271.84 16 -5.793624e-02 0.05873142 0.679041724 0.29974544 1.102731e-03
10: 5263.22 5283.22 5273.22 15 -6.443192e-02 0.06113970 0.702430340 0.34981142 1.151509e-03
11: 5264.60 5284.60 5274.60 15 -7.684134e-02 0.06444578 0.975417917 0.56686522 9.651049e-05
12: 5265.98 5285.98 5275.98 15 1.462585e-01 0.04608930 0.088169168 0.30888780 -4.011513e-03
13: 5267.36 5287.36 5277.36 15 1.964299e-02 0.04086278 0.278246773 0.81455430 -2.166657e-03
14: 5268.74 5288.74 5278.74 15 6.215962e-02 0.04008533 0.188319558 0.64301698 -2.594692e-03
15: 5270.12 5290.12 5280.12 15 4.288832e-02 0.04025402 0.224394742 0.72203401 -2.388716e-03
16: 5271.50 5291.50 5281.50 15 1.512386e-01 0.04851035 0.084258193 0.28641439 -4.218427e-03
17: 5272.88 5292.88 5282.88 14 1.381176e-01 0.05002305 0.104568029 0.29409267 -4.558051e-03
18: 5274.26 5294.26 5284.26 13 1.327825e-01 0.05157048 0.120223456 0.28797520 -5.072464e-03
19: 5275.64 5295.64 5285.64 13 3.596767e-01 0.04138389 0.017834204 0.06667444 -6.361901e-03
20: 5277.02 5297.02 5287.02 12 2.873660e-01 0.04305408 0.041945918 0.12120605 -6.247512e-03
21: 5278.40 5298.40 5288.40 12 5.028991e-01 0.05961926 0.005895787 0.01376478 -1.191625e-02
22: 5279.78 5299.78 5289.78 11 4.293597e-01 0.06222915 0.017042382 0.03427994 -1.172318e-02
23: 5281.16 5301.16 5291.16 11 2.515343e-01 0.06779510 0.066374606 0.13205602 -8.296797e-03
24: 5284.72 5304.72 5294.72 9 -1.061158e-01 0.08743787 0.644376920 0.85394202 -2.841506e-03
25: 5287.30 5307.30 5297.30 8 -1.646716e-01 0.09175885 0.922552618 0.87761939 -6.640669e-04
26: 5289.88 5309.88 5299.88 7 1.448103e-02 0.08454602 0.344665870 0.25432610 7.335472e-03
27: 5292.46 5312.46 5302.46 7 -7.071101e-02 0.08530518 0.472291173 0.35934829 5.744740e-03
28: 5295.04 5315.04 5305.04 7 1.737202e-01 0.07307195 0.192955618 0.13615980 9.523810e-03
29: 5297.62 5317.62 5307.62 7 -8.646937e-02 0.03513349 0.502173104 0.25578453 2.200997e-03
30: 5300.20 5320.20 5310.20 7 -1.900736e-01 0.03206618 0.846235515 0.69966581 -5.675526e-04
31: 5302.78 5322.78 5312.78 7 5.536354e-05 0.05732965 0.363144502 0.54063521 -4.969546e-03
32: 5305.36 5325.36 5315.36 7 5.335182e-02 0.05578091 0.299620417 0.45814873 -5.592470e-03
33: 5307.94 5327.94 5317.94 7 -1.657721e-01 0.06294568 0.717357983 0.94666422 -2.090255e-03
34: 5310.52 5330.52 5320.52 7 -6.028327e-02 0.06414699 0.453853207 0.63535624 -4.512735e-03
35: 5313.10 5333.10 5323.10 7 8.740895e-02 0.06389452 0.264987111 0.38753689 -6.949059e-03
36: 5315.68 5335.68 5325.68 7 -1.197917e-01 0.05898249 0.575617471 0.80276014 -3.059247e-03
37: 5318.26 5338.26 5328.26 7 -1.150258e-01 0.05925244 0.564069556 0.78843841 -3.169989e-03
38: 5320.84 5340.84 5330.84 7 -5.959161e-02 0.05513800 0.452664410 0.66756091 -3.889812e-03
39: 5323.42 5343.42 5333.42 7 -1.914314e-01 0.05727449 0.857057704 0.88289558 -9.413068e-04
40: 5326.00 5346.00 5336.00 7 1.980666e-01 0.03842399 0.175981115 0.09097818 5.246401e-03
41: 5328.58 5348.58 5338.58 7 2.435057e-01 0.03768830 0.147555108 0.07564568 5.592470e-03
42: 5331.16 5351.16 5341.16 7 1.506886e-01 0.03812754 0.210258699 0.10821693 4.748062e-03
43: 5333.74 5353.74 5343.74 7 -8.457396e-02 0.04203885 0.498438355 0.28478388 2.657807e-03
44: 5336.32 5356.32 5346.32 7 -7.010584e-02 0.04407872 0.471193766 0.27563851 2.976190e-03
45: 5338.90 5358.90 5348.90 7 3.356662e-01 0.03913660 0.100918300 0.05396996 6.810631e-03
46: 5341.48 5361.48 5351.48 7 5.571886e-01 0.03769020 0.032860686 0.01839957 9.551495e-03
47: 5344.06 5364.06 5354.06 7 5.543731e-01 0.03777052 0.033425890 0.01873703 9.523810e-03
48: 5346.64 5366.64 5356.64 7 6.568462e-01 0.04090850 0.016679391 0.01024635 1.252769e-02
49: 5349.22 5369.22 5359.22 7 4.263635e-01 0.04784925 0.066663521 0.04074626 9.689922e-03
50: 5351.80 5371.80 5361.80 7 2.805982e-01 0.03460683 0.127153338 0.06281590 5.481728e-03
51: 5354.38 5374.38 5364.38 7 3.716517e-01 0.03324983 0.086098144 0.04219813 6.146179e-03
52: 5356.96 5376.96 5366.96 7 1.502772e-01 0.03293035 0.210579418 0.09951647 4.097453e-03
53: 5359.54 5379.54 5369.54 7 3.375742e-01 0.03655294 0.100088760 0.05199627 6.381506e-03
54: 5362.12 5382.12 5372.12 7 -1.007327e-01 0.03472663 0.531670980 0.27614749 2.021041e-03
55: 5364.70 5384.70 5374.70 7 5.143262e-01 0.04270061 0.042184448 0.02514076 1.003599e-02
56: 5367.28 5387.28 5377.28 7 1.180375e-01 0.05043403 0.237081066 0.14603312 5.869324e-03
57: 5369.86 5389.86 5379.86 7 3.526957e-01 0.05256444 0.093690478 0.05958580 9.413068e-03
58: 5372.44 5392.44 5382.44 7 -1.143248e-01 0.06697669 0.562404900 0.40786204 3.599114e-03
59: 5375.02 5395.02 5385.02 7 -1.380467e-01 0.06582006 0.624143540 0.45527492 2.976190e-03
60: 5377.60 5397.60 5387.60 7 -1.437437e-01 0.08277209 0.640981907 0.80007275 -3.557586e-03
61: 5380.18 5400.18 5390.18 7 6.865351e-02 0.07101868 0.283553003 0.39287969 -7.392027e-03
62: 5382.76 5402.76 5392.76 7 -1.557062e-01 0.06968790 0.679826634 0.87463729 -2.643965e-03
63: 5385.34 5405.34 5395.34 7 -5.672857e-02 0.06614976 0.447786680 0.61411355 -4.720377e-03
64: 5387.92 5407.92 5397.92 7 -1.978539e-01 0.05568786 0.928270615 0.68172236 4.568106e-04
65: 5390.50 5410.50 5400.50 7 -1.682576e-01 0.05373328 0.727529980 0.98771567 -1.716501e-03
66: 5393.08 5413.08 5403.08 7 3.659413e-01 0.03870980 0.088336383 0.04816083 7.087486e-03
67: 5395.66 5415.66 5405.66 7 -5.189744e-02 0.02893483 0.439709327 0.19602199 2.104097e-03
68: 5398.24 5418.24 5408.24 7 6.657153e-02 0.02820253 0.285688876 0.12137978 2.920819e-03
69: 5400.82 5420.82 5410.82 7 -3.417829e-02 0.02978965 0.411610972 0.18697576 2.311739e-03
70: 5403.40 5423.40 5413.40 6 -1.689106e-01 0.03205016 0.626213128 0.38442443 1.915836e-03
71: 5405.98 5425.98 5415.98 5 -3.324952e-01 0.03383253 0.968080223 0.75067625 2.325581e-04
72: 5408.56 5428.56 5418.56 4 4.196229e-01 0.01811905 0.217004528 0.29310481 -7.906977e-03
join join time N adj_r_sqr RMSE p_val_y p_val_intercept slope
EDIT: The OP has posted the link to another sample dataset which ran into an error. The reason is that some group sizes are too small consisting of only one data point so that the linear model has no slope.
The updated version of the code catches this situation and prevents an out-of-bounds error.
The first two columns show the range of years which is covered; they can be removed if no longer needed .
N is the number of rows included in the computation of lm(). The OP has requested to return NA if N < 5. This also can be done afterwards.
# define parameters
time_1 <- -10
time_2 <- 10
n_min <- 5L
# coerce to data.table
result <- setDT(dat)[
# create helper columns
, `:=`(join = time, start = time + time_1, end = time + time_2)][
# non-equi join and aggregate each interval
dat, on = .(join >= start, join <= end), by = .EACHI, {
# do computations within interval
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
# create list of results, finally
.(time = i.time,
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}]
# clean-up result
computed_cols <- setdiff(names(result), c(names(dat), "N"))
result[
# remove join columns
, -(1:2)][
# put NA if too few data points
N < n_min, (computed_cols) := NA][]
time N adj_r_sqr RMSE p_val_y p_val_intercept slope
1: 5262.00 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
2: 5263.20 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
3: 5264.40 10 -1.248329e-01 0.06411770 0.973340896 0.77035143 2.202740e-04
...
70: 5413.40 6 -1.689106e-01 0.03205016 0.626213128 0.38442443 1.915836e-03
71: 5415.98 5 -3.324952e-01 0.03383253 0.968080223 0.75067625 2.325581e-04
72: 5418.56 4 NA NA NA NA NA
time N adj_r_sqr RMSE p_val_y p_val_intercept slope
Splitting by fixed intervals
The OP has also asked
An additional question could be to subset the data, but not in a
rolling, rather in a spliced way again using the time variable.
# define parameters
n_min <- 5L
t_len <- 20
# create "pretty" breaks
breaks <- setDT(dat)[, seq(floor(min(time)/t_len)*t_len, max(time) + t_len, t_len)]
dat[, {
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
.(t_min = min(time),
t_max = max(time),
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}, by = .(cut(time, breaks))]
cut t_min t_max N adj_r_sqr RMSE p_val_y p_val_intercept slope
1: (5.26e+03,5.28e+03] 5262.00 5278.74 14 -0.08098718 0.06260179 0.87447674 0.52663574 0.0005619086
2: (5.28e+03,5.3e+03] 5280.12 5299.88 12 0.33144858 0.06512008 0.02934449 0.06866916 -0.0087040163
3: (5.3e+03,5.32e+03] 5302.46 5317.94 7 -0.19007362 0.03206618 0.84623551 0.69966581 -0.0005675526
4: (5.32e+03,5.34e+03] 5320.52 5338.58 8 -0.16348201 0.06360759 0.90221583 0.62280945 0.0005629384
5: (5.34e+03,5.36e+03] 5341.16 5359.22 8 0.54042068 0.03778046 0.02285248 0.01024298 0.0079272794
6: (5.36e+03,5.38e+03] 5361.80 5379.86 8 0.20369592 0.03803705 0.14585425 0.06090703 0.0043881506
7: (5.38e+03,5.4e+03] 5382.44 5397.92 7 0.06865351 0.07101868 0.28355300 0.39287969 -0.0073920266
8: (5.4e+03,5.42e+03] 5400.50 5418.56 8 -0.04065894 0.02837687 0.42672518 0.14267960 0.0016703581
window_vekt, please? - Uwedat <- data.table::fread("testadat.csv", col.names = c("time", "var1", "var2"))- UweError in lsumm$coefficients[2, 4] : subscript out of bounds- Istvan Gabor Hatvani