1
votes

I'm struggling to plot a smoothing trend line in a time series plot that has many datapoints. For simplicity, I'll include the first 100 rows of the data here:

> dput(test_so)
structure(list(anon_screen_name = c("b62f7980d2e0fdb6b71f52d53f2fb84142d14f93", 
"b62f7980d2e0fdb6b71f52d53f2fb84142d14f93", "20730b3a6feb773e41e70a6949c083f03d0755ad", 
"4f41fc42f34efb6f5041a98a0a6ac65e8e92d147", "4f41fc42f34efb6f5041a98a0a6ac65e8e92d147", 
"4f41fc42f34efb6f5041a98a0a6ac65e8e92d147", "4f41fc42f34efb6f5041a98a0a6ac65e8e92d147", 
"41300a566beaa7ea51c4edf758736941a87d6b65", "41300a566beaa7ea51c4edf758736941a87d6b65", 
"41300a566beaa7ea51c4edf758736941a87d6b65", "4040abe3aebd3026d4edb129c067512c5e9ac113", 
"4040abe3aebd3026d4edb129c067512c5e9ac113", "4040abe3aebd3026d4edb129c067512c5e9ac113", 
"8da3013c6ba7d36cf05ca08dbc6a701b56eb0e85", "8da3013c6ba7d36cf05ca08dbc6a701b56eb0e85", 
"8da3013c6ba7d36cf05ca08dbc6a701b56eb0e85", "8da3013c6ba7d36cf05ca08dbc6a701b56eb0e85", 
"8da3013c6ba7d36cf05ca08dbc6a701b56eb0e85", "8b96a47926dd27337a8bf324904d1eeaa4a4a879", 
"8b96a47926dd27337a8bf324904d1eeaa4a4a879", "8b96a47926dd27337a8bf324904d1eeaa4a4a879", 
"8b96a47926dd27337a8bf324904d1eeaa4a4a879", "8b96a47926dd27337a8bf324904d1eeaa4a4a879", 
"1bce8af81427363a9f5a7a97a121f4243cb454c1", "b969fc16d2fc80db7b1f2bb2ff3c480959f3e748", 
"b969fc16d2fc80db7b1f2bb2ff3c480959f3e748", "3824f1c64bd7833cde58d050e529008420e7e26b", 
"3824f1c64bd7833cde58d050e529008420e7e26b", "3824f1c64bd7833cde58d050e529008420e7e26b", 
"3824f1c64bd7833cde58d050e529008420e7e26b", "3824f1c64bd7833cde58d050e529008420e7e26b", 
"3824f1c64bd7833cde58d050e529008420e7e26b", "3824f1c64bd7833cde58d050e529008420e7e26b", 
"3824f1c64bd7833cde58d050e529008420e7e26b", "20fd05d6da731d4c105cc7332254da8755af80bc", 
"20fd05d6da731d4c105cc7332254da8755af80bc", "20fd05d6da731d4c105cc7332254da8755af80bc", 
"20fd05d6da731d4c105cc7332254da8755af80bc", "293a85f3789417c1fd1408dfe606592a964ee315", 
"293a85f3789417c1fd1408dfe606592a964ee315", "dfbcbd784f2b424593a7d29f6c2dc7fdc09fdbda", 
"dfbcbd784f2b424593a7d29f6c2dc7fdc09fdbda", "f5fc8c5756f44a8e6d63f10bcd19fdb49ea79a34", 
"f5fc8c5756f44a8e6d63f10bcd19fdb49ea79a34", "f5fc8c5756f44a8e6d63f10bcd19fdb49ea79a34", 
"f5fc8c5756f44a8e6d63f10bcd19fdb49ea79a34", "f5fc8c5756f44a8e6d63f10bcd19fdb49ea79a34", 
"61aebd0b910d02e256667fe68d567587bd1e17d5", "c5f20f7a99b1700b6d5f22902c3b558e043d8a68", 
"c5f20f7a99b1700b6d5f22902c3b558e043d8a68", "c5f20f7a99b1700b6d5f22902c3b558e043d8a68", 
"c5f20f7a99b1700b6d5f22902c3b558e043d8a68", "13a76940ac319e0cd3c4b8887295e915946b2707", 
"13a76940ac319e0cd3c4b8887295e915946b2707", "38f81421e6d0d547f6d09020647f242e05698bad", 
"2234102d23222acc0619f924c2b3caba242a95cb", "2234102d23222acc0619f924c2b3caba242a95cb", 
"2234102d23222acc0619f924c2b3caba242a95cb", "2234102d23222acc0619f924c2b3caba242a95cb", 
"9dc75642013a81f9f039c373bdf8c498a6faccff", "9dc75642013a81f9f039c373bdf8c498a6faccff", 
"9dc75642013a81f9f039c373bdf8c498a6faccff", "9dc75642013a81f9f039c373bdf8c498a6faccff", 
"a1c6fb00c357d46ef7f01498629fef7a355ca726", "aceea4cc57d8e3603c278ef6d3e85fab97fb11f0", 
"21aa193c32394276a6ed57a6b71ad83b202763d3", "74906a9393823719fd131bc72a094c546537e206", 
"74906a9393823719fd131bc72a094c546537e206", "74906a9393823719fd131bc72a094c546537e206", 
"6c6ccce2590e011ba497179b002f8b67d4ed738f", "8d4b76eff43d041e0b11ad4d05957a714fd86558", 
"8d4b76eff43d041e0b11ad4d05957a714fd86558", "8d4b76eff43d041e0b11ad4d05957a714fd86558", 
"8d4b76eff43d041e0b11ad4d05957a714fd86558", "305ebd2609f469fd5db6304007ff8838b1219375", 
"305ebd2609f469fd5db6304007ff8838b1219375", "305ebd2609f469fd5db6304007ff8838b1219375", 
"305ebd2609f469fd5db6304007ff8838b1219375", "3858a2b7d11ce558730292e2450941c1c268677e", 
"48a3ae22cc09c00643198b934d2863f6c37725b0", "48a3ae22cc09c00643198b934d2863f6c37725b0", 
"48a3ae22cc09c00643198b934d2863f6c37725b0", "48a3ae22cc09c00643198b934d2863f6c37725b0", 
"aaabc0dac0dd30c380d5514a0d640e341bb4f78f", "aaabc0dac0dd30c380d5514a0d640e341bb4f78f", 
"aaabc0dac0dd30c380d5514a0d640e341bb4f78f", "aaabc0dac0dd30c380d5514a0d640e341bb4f78f", 
"8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", "8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", 
"8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", "8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", 
"8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", "8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", 
"8e13f89a9e8394eeafcbd59200ecb48e4a97b10f", "bcc7144d5f4e3638b584f9fd8d2df6bdc1259a5a", 
"bcc7144d5f4e3638b584f9fd8d2df6bdc1259a5a", "dd182fc037fe298a4664f2fa4cc4eb1ddf3b65d2", 
"6c321b15f35e628cbb8078996e9457c32d6d2168", "c7dc7889a5d8533a8767e280f7f0dd3da44acbb7", 
"c7dc7889a5d8533a8767e280f7f0dd3da44acbb7"), week = c(1L, 3L, 
2L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 3L, 4L, 5L, 6L, 
1L, 2L, 4L, 6L, 9L, 3L, 1L, 2L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 
1L, 2L, 3L, 4L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 5L, 6L, 1L, 1L, 2L, 
3L, 4L, 1L, 2L, 2L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 3L, 4L, 1L, 
1L, 2L, 3L, 2L, 1L, 2L, 3L, 4L, 1L, 2L, 4L, 5L, 1L, 1L, 3L, 4L, 
8L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 6L, 7L, 8L, 9L, 3L, 9L, 1L, 1L, 
3L, 6L), effort_sec = c(1331L, 526L, 2184L, 1893L, 16067L, 12375L, 
8197L, 1436L, 1715L, 1018L, 6659L, 3703L, 3243L, 11379L, 10478L, 
4009L, 549L, 500L, 3325L, 5694L, 6648L, 6928L, 1334L, 3010L, 
5518L, 6901L, 5188L, 19093L, 5289L, 11311L, 500L, 4368L, 2125L, 
2770L, 1000L, 10141L, 500L, 2221L, 21489L, 1074L, 27424L, 2963L, 
10087L, 5475L, 3225L, 4432L, 1315L, 4131L, 9887L, 43181L, 22282L, 
17063L, 1947L, 4231L, 2296L, 13334L, 8277L, 1809L, 5227L, 22461L, 
13903L, 11717L, 2498L, 1530L, 4102L, 946L, 5276L, 6174L, 14545L, 
624L, 6165L, 1775L, 13825L, 7208L, 13741L, 5055L, 5750L, 6872L, 
4379L, 1077L, 10120L, 2023L, 500L, 15742L, 15453L, 16448L, 3149L, 
36360L, 22387L, 11944L, 519L, 27396L, 31021L, 11909L, 800L, 1730L, 
22833L, 4214L, 547L, 2042L)), row.names = c(NA, -100L), class = c("tbl_df", 
"tbl", "data.frame"))

Essentially, when I run this code on the original dataset,

df %>% 
  ggplot(aes(x = week, y = effort_sec, group = anon_screen_name)) +
  geom_line(alpha = 0.2) 

I get this line graph here:

Weeks on the x-axis and Effort on the y-axis

When I add + geom_smooth(),

I get this error message:

geom_smooth() using method = 'loess' and formula 'y ~ x' span too small. fewer data values than degrees of freedom. pseudoinverse used at 0.985 neighborhood radius 2.015 reciprocal condition number 0 There are other near singularities as well. 4.0602 span too small. fewer data values than degrees of freedom. pseudoinverse used at 0.985 neighborhood radius 2.015 reciprocal condition number 0 There are other near singularities as well. 4.0602 span too small. fewer data values than degrees of freedom. at 5.995 radius 2.5e-05 all data on boundary of neighborhood. make span bigger pseudoinverse used at 5.995 neighborhood radius 0.005 reciprocal condition number 1 at 7.005 radius 2.5e-05 all data on boundary of neighborhood. make span bigger There are other near singularities as well. 2.5e-05 zero-width neighborhood. make span bigger zero-width neighborhood. make span bigger Computation failed in stat_smooth(): NA/NaN/Inf in foreign function call (arg 5)

Is there a way to add a smoothing trend line in ggplot2???

1
Your problem is that by grouping in the general aes() - the one in the ggplot() command, you are instructing the following geom_smooth() to calculate a regression on each group rather than on the whole dataset. Your particular error message just means that many groups have less than 3 observations, so you can't regress on them - Nicolás Velásquez

1 Answers

2
votes

Try moving the grouping instruction from the ggplot() to the geom_line().

df %>% 
 ggplot(aes(x = week, y = effort_sec)) +
 geom_line(aes(group = anon_screen_name), alpha = 0.2) +
 geom_smooth()

enter image description here