2
votes

I was trying to calculate the annual growth rate for grouped data; grouped by 1) group, 2) product category and 3) year.

I have tried grouping by the three parameters and then compute growth rate as: ((x/dplyr::lag(x,1))-1)*100. However, this operation results in NAs entirely for the new column.

group_exports_g.rate <- baci_exports %>% ungroup() %>% 
  group_by(group,sna,t) %>% summarise(exports = sum(exports),
                                      n= sum(n)) %>% ungroup() %>% 
  group_by(group,sna,t) %>% arrange(group,sna,t) %>% 
  mutate(gr.exports = 100*((exports/lag(exports,1))-1))
dput(baci_exports)

structure(list(t = c(1995, 1995, 1995, 1995, 1995, 1995, 1995, 
1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 
1995, 1995), i = c(4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 
8, 12, 12, 12, 12, 12), sna = c("Capital goods", "Consumer goods", 
"Fuels and lubricants", "Intermediate goods", "Parts and accessories of capital goods", 
"Passenger motor cars", "Primary goods", "Capital goods", "Consumer goods", 
"Fuels and lubricants", "Goods not elsewhere specified", "Intermediate goods", 
"Parts and accessories of capital goods", "Passenger motor cars", 
"Primary goods", "Capital goods", "Consumer goods", "Fuels and lubricants", 
"Intermediate goods", "Parts and accessories of capital goods"
), exports = c(1195.2624224154, 22997.0533036558, 5.3693833059, 
9720.564817782, 183.9809856813, 111.2556313572, 47976.2051093033, 
5711.1685900189, 120659.569187797, 2264.5410811663, 75.806, 60184.2530566294, 
2249.821, 280.439, 36458.4499193458, 30316.017597217, 150361.471507771, 
8686043.01945958, 448718.59940394, 6633.9115459232), n = c(36L, 
207L, 1L, 137L, 30L, 5L, 58L, 143L, 654L, 5L, 3L, 322L, 57L, 
12L, 143L, 270L, 364L, 106L, 563L, 150L), name_baci = c("Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Albania", "Albania", "Albania", "Albania", "Albania", 
"Albania", "Albania", "Albania", "Algeria", "Algeria", "Algeria", 
"Algeria", "Algeria"), cntry = c("Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Albania", "Albania", "Albania", "Albania", "Albania", "Albania", 
"Albania", "Albania", "Algeria", "Algeria", "Algeria", "Algeria", 
"Algeria"), group = c("Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "SE", 
"SE", "SE", "SE", "SE", "SE", "SE", "SE", "Algeria", "Algeria", 
"Algeria", "Algeria", "Algeria")), row.names = c(NA, -20L), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), groups = structure(list(t = c(1995, 
1995, 1995), i = c(4, 8, 12), .rows = list(1:7, 8:15, 16:20)), row.names = c(NA, 
-3L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
> 
structure(list(t = c(1995, 1996, 1997, 1998, 1999, 2000, 2001, 
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 
2013, 2014, 2015, 2016, 2017), i = c(4, 4, 4, 4, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), sna = c("Capital goods", 
"Capital goods", "Capital goods", "Capital goods", "Capital goods", 
"Capital goods", "Capital goods", "Capital goods", "Capital goods", 
"Capital goods", "Capital goods", "Capital goods", "Capital goods", 
"Capital goods", "Capital goods", "Capital goods", "Capital goods", 
"Capital goods", "Capital goods", "Capital goods", "Capital goods", 
"Capital goods", "Capital goods"), exports = c(1195.2624224154, 
1487.4614064276, 1525.1489543903, 1598.3509917338, 2687.9284436967, 
1754.2923408387, 5913.8963941332, 2619.5146133123, 2915.904116471, 
6412.63136988, 8158.0444400432, 4312.0519543819, 31170.4665315818, 
39146.7973036179, 184970.163402516, 20611.8847778549, 28046.1267778067, 
12125.1990587805, 15435.0095479273, 15942.6566817083, 10549.8178035657, 
9092.5422325593, 7967.3875079918), n = c(36L, 49L, 71L, 50L, 
64L, 88L, 107L, 76L, 105L, 138L, 191L, 178L, 175L, 264L, 448L, 
306L, 210L, 232L, 306L, 280L, 207L, 180L, 198L), name_baci = c("Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan"), cntry = c("Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan"), group = c("Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan"
)), row.names = c(NA, -23L), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), groups = structure(list(t = c(1995, 1996, 
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017), 
    i = c(4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
    4, 4, 4, 4, 4, 4), .rows = list(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
        8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
        19L, 20L, 21L, 22L, 23L)), row.names = c(NA, -23L), class = c("tbl_df", 
"tbl", "data.frame")))

I would like to have an annual growth rate (percentage change) per group, product category and year.

2

2 Answers

2
votes

As @Croote noted, your sample data only has one year, so computing a lag on that will (correctly) produce an NA. I think that is what you want, since growth is undefined for 1995.

To test your data on two years of data, here's some fake data where each value is approx. 50% higher the next year:

baci_exports2 <- bind_rows(
  baci_exports %>% ungroup(),
  baci_exports %>% 
    ungroup() %>%
    mutate(t = 1996, 
           exports = exports * rnorm(n(), mean = 1.5, sd = 0.01))
)

This code seems to produce the right output: (Note how each gr.exports is close to 50, corresponding to 50% growth and the ~1.5x applied to the 2nd year of fake data.)

cgroup_exports_g.rate <- baci_exports2 %>% 
  group_by(group,sna,t) %>% 
  summarise(exports = sum(exports),
            n= sum(n)) %>% 
  mutate(gr.exports = 100*((exports/lag(exports,1))-1)) %>%
  ungroup()


> cgroup_exports_g.rate
# A tibble: 40 x 6
   group       sna                                        t  exports     n gr.exports
   <chr>       <chr>                                  <dbl>    <dbl> <int>      <dbl>
 1 Afghanistan Capital goods                           1995  1195.      36       NA  
 2 Afghanistan Capital goods                           1996  1784.      36       49.3
 3 Afghanistan Consumer goods                          1995 22997.     207       NA  
 4 Afghanistan Consumer goods                          1996 34932.     207       51.9
 5 Afghanistan Fuels and lubricants                    1995     5.37     1       NA  
 6 Afghanistan Fuels and lubricants                    1996     8.00     1       49.0
 7 Afghanistan Intermediate goods                      1995  9721.     137       NA  
 8 Afghanistan Intermediate goods                      1996 14647.     137       50.7
 9 Afghanistan Parts and accessories of capital goods  1995   184.      30       NA  
10 Afghanistan Parts and accessories of capital goods  1996   272.      30       47.9
# ... with 30 more rows

Edit, using new Afghanistan data in the OP:

afghanistan %>% 
  ungroup() %>%
  group_by(group,sna,t) %>% 
  summarise(exports = sum(exports),
            n= sum(n)) %>% 
  mutate(gr.exports = 100*((exports/lag(exports,1))-1)) %>%
  ungroup()

# A tibble: 23 x 6
   group       sna               t exports     n gr.exports
   <chr>       <chr>         <dbl>   <dbl> <int>      <dbl>
 1 Afghanistan Capital goods  1995   1195.    36      NA   
 2 Afghanistan Capital goods  1996   1487.    49      24.4 
 3 Afghanistan Capital goods  1997   1525.    71       2.53
 4 Afghanistan Capital goods  1998   1598.    50       4.80
 5 Afghanistan Capital goods  1999   2688.    64      68.2 
 6 Afghanistan Capital goods  2000   1754.    88     -34.7 
 7 Afghanistan Capital goods  2001   5914.   107     237.  
 8 Afghanistan Capital goods  2002   2620.    76     -55.7 
 9 Afghanistan Capital goods  2003   2916.   105      11.3 
10 Afghanistan Capital goods  2004   6413.   138     120.
1
votes

Here you need to set a default for your lag expression since, lag of your first value defaults to NA. which then results in an NA infecting the rest of your calculations.

Hence, setting lag(exports, 1, 1) (You should decide on your default which suits your purposes, as an example I have set it to 1)

group_exports_g.rate <- baci_exports %>% ungroup() %>% 
  group_by(group,sna,t) %>% summarise(exports = sum(exports),
                                      n= sum(n)) %>% ungroup() %>% 
  group_by(group,sna,t) %>% arrange(group,sna,t) %>% 
  mutate(gr.exports = 100*((exports/lag(exports,1, 1))-1))
group_exports_g.rate
# A tibble: 20 x 6
# Groups:   group, sna, t [20]
   group       sna                                        t    exports     n gr.exports
   <chr>       <chr>                                  <dbl>      <dbl> <int>      <dbl>
 1 Afghanistan Capital goods                           1995    1195.      36    119426.
 2 Afghanistan Consumer goods                          1995   22997.     207   2299605.
 3 Afghanistan Fuels and lubricants                    1995       5.37     1       437.
 4 Afghanistan Intermediate goods                      1995    9721.     137    971956.
 5 Afghanistan Parts and accessories of capital goods  1995     184.      30     18298.
 6 Afghanistan Passenger motor cars                    1995     111.       5     11026.
 7 Afghanistan Primary goods                           1995   47976.      58   4797521.
 8 Algeria     Capital goods                           1995   30316.     270   3031502.
 9 Algeria     Consumer goods                          1995  150361.     364  15036047.
10 Algeria     Fuels and lubricants                    1995 8686043.     106 868604202.
11 Algeria     Intermediate goods                      1995  448719.     563  44871760.
12 Algeria     Parts and accessories of capital goods  1995    6634.     150    663291.
13 SE          Capital goods                           1995    5711.     143    571017.
14 SE          Consumer goods                          1995  120660.     654  12065857.
15 SE          Fuels and lubricants                    1995    2265.       5    226354.
16 SE          Goods not elsewhere specified           1995      75.8      3      7481.
17 SE          Intermediate goods                      1995   60184.     322   6018325.
18 SE          Parts and accessories of capital goods  1995    2250.      57    224882.
19 SE          Passenger motor cars                    1995     280.      12     27944.
20 SE          Primary goods                           1995   36458.     143   3645745.