3
votes

I have a very beginners question. I would like to work with the time series package xts. Therefore, to convert my data to xts format.

My current dataset "data" is a "data.table" "data.frame" format. The Year column is an "integer". The filtered data$Year column comprises data from years 1999-2018. On the bottom of my code, I have provided a sample of my data.

I am facing the following issue:

as.Date doesn't recognize my yearly data as dates. It automatically converts them to daily data, starting from the beginning of 1975...

I tried the following commands:

data$Year <- as.Date(data$Year)
data_xts <- as.xts(data, data[, -1], order.by = data$Year)

# now xts format
class(data_xts)

# here is the problem: > 0 seconds periodicity
# from 1975-06-23 to 1975-07-12
periodicity(data_xts$year) 

Here is the head of the wrong output

> head(data_xts)
           Year         ReporterName PartnerName      TradeValue in 1000 USD year_group  total_average_period_in_1000USD
1975-06-23 "1975-06-23" "Comoros"    "France"         "    1360.758"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "United States"  "    1392.263"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Germany"        "     633.666"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "United Kingdom" "     152.029"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Singapore"      "     450.452"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Indonesia"      "     194.580"         "1999-2002" "    524.8275"                 
           total_average_period_byPartner_in_1000USD percentage_of_group
1975-06-23 "3.638645e+03"                            "6.933030e+00"     
1975-06-23 "1.449703e+03"                            "2.762247e+00"     
1975-06-23 "6.692080e+02"                            "1.275101e+00"     
1975-06-23 "4.821123e+02"                            "9.186109e-01"     
1975-06-23 "4.325665e+02"                            "8.242070e-01"     
1975-06-23 "1.945800e+02"                            "3.707504e-01" 

Here is a sample of my initial data

dput(head(data, n = 100))
structure(list(Year = structure(c(2015, 2016, 2017, 2018, 2011, 
2012, 2013, 2014, 2007, 2009, 2010, 2015, 2016, 2017, 2018, 2007, 
2009, 2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 
2016, 2017, 2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 
2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 
2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 2007, 2009, 
2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2007, 2009, 
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2015, 2016, 
2017, 2018, 2007, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017, 
2018, 2007, 2009, 2010, 2015, 2016, 2017), class = "Date"), ReporterName = c("Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola"), PartnerName = c("China", "China", "China", "China", 
"China", "China", "China", "China", "China", "China", "China", 
"India", "India", "India", "India", "United States", "United States", 
"United States", "Spain", "Spain", "Spain", "Spain", "United States", 
"United States", "United States", "United States", "South Africa", 
"South Africa", "South Africa", "South Africa", "Other Asia, nes", 
"Other Asia, nes", "Other Asia, nes", "Other Asia, nes", "United States", 
"United States", "United States", "United States", "Canada", 
"Canada", "Canada", "Canada", "France", "France", "France", "France", 
"Portugal", "Portugal", "Portugal", "Portugal", "United Arab Emirates", 
"United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
"India", "India", "India", "India", "India", "India", "India", 
"Italy", "Italy", "Italy", "Italy", "United Kingdom", "United Kingdom", 
"United Kingdom", "United Kingdom", "Canada", "Canada", "Canada", 
"Other Asia, nes", "Other Asia, nes", "Other Asia, nes", "Other Asia, nes", 
"Indonesia", "Indonesia", "Indonesia", "Indonesia", "Netherlands", 
"Netherlands", "Netherlands", "Netherlands", "France", "France", 
"France", "Canada", "Canada", "Canada", "Malaysia", "Malaysia", 
"Malaysia", "Malaysia", "Other Asia, nes", "Other Asia, nes", 
"Other Asia, nes", "Singapore", "Singapore", "Singapore"), `TradeValue in 1000 USD` = c(14320565.527, 
13923091.96, 19487066.539, 24517058.342, 24360792.847, 33710030.023, 
31947235.081, 27527110.851, 13459326.563, 15954060.922, 20963245.476, 
2676339.583, 1948845.077, 2890061.159, 3768940.47, 10875646.624, 
7708378.359, 9965785.888, 2245976.426, 882089.095, 1025777.275, 
1250554.873, 1265801.316, 1525650.265, 1079503.617, 1470132.736, 
1376041.349, 1309031.634, 1342549.642, 1161852.097, 1410793.303, 
1136068.366, 1388765.375, 145025.028, 16475024.144, 6594525.851, 
5018390.939, 2548807.59, 1035618.609, 873616.866, 1079684.282, 
647164.297, 1599581.068, 910864.068, 330799.771, 734551.345, 
1199355.049, 851431.606, 334787.698, 1074137.369, 665253.613, 
801908.541, 1016519.507, 884725.078, 6842018.3, 6932060.8, 6764232.765, 
4507416.181, 2376843.352, 3659557.185, 5117824.926, 1105765.643, 
488551.728, 460504.642, 468914.918, 1011126.417, 411203.618, 
373206.578, 425616.975, 2913186.035, 2324861.006, 4039116.578, 
5386493.281, 4699797.618, 4007020.057, 2329013.301, 566597.802, 
376715.236, 415879.351, 575477.283, 1107123.4, 507950.826, 93143.377, 
162760.789, 2592972.627, 3030206.205, 2213064.563, 3519981.595, 
3305027.169, 2719654.992, 94484.681, 480779.397, 571648.578, 
242727.975, 1913906.941, 1154653.223, 2699439.575, 456600.595, 
114849.93, 273956.82), year_group = structure(c(5L, 5L, 5L, 5L, 
4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L, 5L
), .Label = c("1999-2002", "2003-2007", "2008-2010", "2011-2014", 
"2015-2018"), class = "factor"), total_average_period_in_1000USD = c(251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 1938789.42919853, 
1938789.42919853, 1938789.42919853, 1938789.42919853, 1416797.00579381, 
1416797.00579381, 1416797.00579381, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 1416797.00579381, 1416797.00579381, 
1416797.00579381, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 1938789.42919853, 1938789.42919853, 1938789.42919853, 
1938789.42919853, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 1938789.42919853, 1938789.42919853, 1938789.42919853, 
1938789.42919853, 1416797.00579381, 1416797.00579381, 1416797.00579381, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
1416797.00579381, 1416797.00579381, 1416797.00579381, 1938789.42919853, 
1938789.42919853, 1938789.42919853, 1938789.42919853, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 1416797.00579381, 
1416797.00579381, 1416797.00579381, 1938789.42919853, 1938789.42919853, 
1938789.42919853, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 1416797.00579381, 1416797.00579381, 1416797.00579381, 
251327.404028933, 251327.404028933, 251327.404028933), total_average_period_byPartner_in_1000USD = c(18061945.592, 
18061945.592, 18061945.592, 18061945.592, 29386292.2005, 29386292.2005, 
29386292.2005, 29386292.2005, 16792210.987, 16792210.987, 16792210.987, 
2821046.57225, 2821046.57225, 2821046.57225, 2821046.57225, 9516603.62366667, 
9516603.62366667, 9516603.62366667, 1351099.41725, 1351099.41725, 
1351099.41725, 1351099.41725, 1335271.9835, 1335271.9835, 1335271.9835, 
1335271.9835, 1297368.6805, 1297368.6805, 1297368.6805, 1297368.6805, 
1020163.018, 1020163.018, 1020163.018, 1020163.018, 7659187.131, 
7659187.131, 7659187.131, 7659187.131, 909021.0135, 909021.0135, 
909021.0135, 909021.0135, 893949.063, 893949.063, 893949.063, 
893949.063, 864927.9305, 864927.9305, 864927.9305, 864927.9305, 
842101.68475, 842101.68475, 842101.68475, 842101.68475, 6261432.0115, 
6261432.0115, 6261432.0115, 6261432.0115, 3718075.15433333, 3718075.15433333, 
3718075.15433333, 630934.23275, 630934.23275, 630934.23275, 630934.23275, 
555288.397, 555288.397, 555288.397, 555288.397, 3092387.873, 
3092387.873, 3092387.873, 4105581.06425, 4105581.06425, 4105581.06425, 
4105581.06425, 483667.418, 483667.418, 483667.418, 483667.418, 
467744.598, 467744.598, 467744.598, 467744.598, 2612081.13166667, 
2612081.13166667, 2612081.13166667, 3181554.58533333, 3181554.58533333, 
3181554.58533333, 347410.15775, 347410.15775, 347410.15775, 347410.15775, 
1922666.57966667, 1922666.57966667, 1922666.57966667, 339847.48225, 
339847.48225, 339847.48225), percentage_of_group = c(71.8662004320097, 
71.8662004320097, 71.8662004320097, 71.8662004320097, 15.1570313711933, 
15.1570313711933, 15.1570313711933, 15.1570313711933, 11.8522349485003, 
11.8522349485003, 11.8522349485003, 11.2245880354744, 11.2245880354744, 
11.2245880354744, 11.2245880354744, 6.71698456783132, 6.71698456783132, 
6.71698456783132, 5.3758539482406, 5.3758539482406, 5.3758539482406, 
5.3758539482406, 5.31287858822702, 5.31287858822702, 5.31287858822702, 
5.31287858822702, 5.16206613247255, 5.16206613247255, 5.16206613247255, 
5.16206613247255, 4.05909981023223, 4.05909981023223, 4.05909981023223, 
4.05909981023223, 3.95049973743988, 3.95049973743988, 3.95049973743988, 
3.95049973743988, 3.61687981066861, 3.61687981066861, 3.61687981066861, 
3.61687981066861, 3.55691042309532, 3.55691042309532, 3.55691042309532, 
3.55691042309532, 3.4414390020136, 3.4414390020136, 3.4414390020136, 
3.4414390020136, 3.35061625294572, 3.35061625294572, 3.35061625294572, 
3.35061625294572, 3.22955753585287, 3.22955753585287, 3.22955753585287, 
3.22955753585287, 2.62428219365846, 2.62428219365846, 2.62428219365846, 
2.51040763018969, 2.51040763018969, 2.51040763018969, 2.51040763018969, 
2.2094224032015, 2.2094224032015, 2.2094224032015, 2.2094224032015, 
2.18266121424175, 2.18266121424175, 2.18266121424175, 2.11760029347137, 
2.11760029347137, 2.11760029347137, 2.11760029347137, 1.92445157291451, 
1.92445157291451, 1.92445157291451, 1.92445157291451, 1.8610966830587, 
1.8610966830587, 1.8610966830587, 1.8610966830587, 1.84365235173768, 
1.84365235173768, 1.84365235173768, 1.64100058387906, 1.64100058387906, 
1.64100058387906, 1.38230114257658, 1.38230114257658, 1.38230114257658, 
1.38230114257658, 1.35705155488342, 1.35705155488342, 1.35705155488342, 
1.35221021186721, 1.35221021186721, 1.35221021186721)), row.names = c(NA, 
-100L), groups = structure(list(ReporterName = c("Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola"), PartnerName = c("Canada", 
"Canada", "Canada", "China", "China", "China", "France", "France", 
"India", "India", "India", "Indonesia", "Italy", "Malaysia", 
"Netherlands", "Other Asia, nes", "Other Asia, nes", "Other Asia, nes", 
"Portugal", "Singapore", "South Africa", "Spain", "United Arab Emirates", 
"United Kingdom", "United States", "United States", "United States"
), year_group = structure(c(3L, 4L, 5L, 3L, 4L, 5L, 3L, 5L, 3L, 
4L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 
4L, 5L), .Label = c("1999-2002", "2003-2007", "2008-2010", "2011-2014", 
"2015-2018"), class = "factor"), .rows = structure(list(70:72, 
    88:90, 39:42, 9:11, 5:8, 1:4, 85:87, 43:46, 59:61, 55:58, 
    12:15, 77:80, 62:65, 91:94, 81:84, 95:97, 73:76, 31:34, 47:50, 
    98:100, 27:30, 19:22, 51:54, 66:69, 16:18, 35:38, 23:26), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 27L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
3

3 Answers

3
votes

At first, in the main dataset, it seems you should change class = "Date" to class = "Integer" for the Year variable in your dput data to prevent it to create year "1975"

structure(list(Year = structure(c(2015, 2016, 2017, 2018, 2011, 
2012, 2013, 2014, 2007, 2009, 2010, 2015, 2016, 2017, 2018, 2007, 
2009, 2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 
2016, 2017, 2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 
2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 
2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 2007, 2009, 
2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2007, 2009, 
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2015, 2016, 
2017, 2018, 2007, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017, 
2018, 2007, 2009, 2010, 2015, 2016, 2017), class = "Integer"), ...

R does not recognize just year as a Date format, so you should convert year to y/m/d format.
You can do it by converting for example "2018" to "2018-12-31" (considering end of the year as year (or whatever you want).
So the codes below will do it:

library(xts)

data_new <- data #creating a new dataset to preserve original  
class(data_new$Year)  #now class of year is integer

## [1] "Integer"


#formatting integer year to a Date format
data_new$Year <- as.Date(paste(data_new$Year, 12, 31, sep = "-")) 

class(data_new$Year) #check changed format of year

## [1] "Date"

#creating xts object
data_xts <- as.xts(data_new, data_new[,-c("Year")], order.by = data_new$Year)

head(data_new)

##         Year ReporterName PartnerName TradeValue in 1000 USD year_group
## 1 2015-12-31       Angola       China               14320566  2015-2018
## 2 2016-12-31       Angola       China               13923092  2015-2018
## 3 2017-12-31       Angola       China               19487067  2015-2018
## 4 2018-12-31       Angola       China               24517058  2015-2018
## 5 2011-12-31       Angola       China               24360793  2011-2014
## 6 2012-12-31       Angola       China               33710030  2011-2014
##   total_average_period_in_1000USD total_average_period_byPartner_in_1000USD
## 1                        251327.4                                  18061946
## 2                        251327.4                                  18061946
## 3                        251327.4                                  18061946
## 4                        251327.4                                  18061946
## 5                       1938789.4                                  29386292
## 6                       1938789.4                                  29386292
##   percentage_of_group
## 1            71.86620
## 2            71.86620
## 3            71.86620
## 4            71.86620
## 5            15.15703
## 6            15.15703


# check periodicity
periodicity(data_xts$Year)

## 0 seconds periodicity from 2007-12-31 to 2018-12-31
1
votes

Keep only numeric columns in your dataframe and use xts :

library(xts)
library(dplyr)

new_data <- data %>% ungroup() %>% select(where(is.numeric))
data_xts <- xts(new_data, order.by = data$Year)
class(data_xts)
#[1] "xts" "zoo"

head(data_xts)

#           TradeValue in 1000 USD total_average_period_in_1000USD
#1975-07-01               13459327                         1416797
#1975-07-01               10875647                         1416797
#1975-07-01                2376843                         1416797
#1975-07-01                2913186                         1416797
#1975-07-01                2592973                         1416797
#1975-07-01                1913907                         1416797

#           total_average_period_byPartner_in_1000USD percentage_of_group
#1975-07-01                                  16792211           11.852235
#1975-07-01                                   9516604            6.716985
#1975-07-01                                   3718075            2.624282
#1975-07-01                                   3092388            2.182661
#1975-07-01                                   2612081            1.843652
#1975-07-01                                   1922667            1.357052
0
votes

We can use base R with xts

xts::xts(Filter(is.numeric, data), order.by = data$Year)

-output

           TradeValue in 1000 USD total_average_period_in_1000USD total_average_period_byPartner_in_1000USD percentage_of_group
1975-07-01            13459326.56                       1416797.0                                16792211.0           11.852235
1975-07-01            10875646.62                       1416797.0                                 9516603.6            6.716985
1975-07-01             2376843.35                       1416797.0                                 3718075.2            2.624282
1975-07-01             2913186.04                       1416797.0                                 3092387.9            2.182661
1975-07-01             2592972.63                       1416797.0                                 2612081.1            1.843652
1975-07-01             1913906.94                       1416797.0                                 1922666.6            1.357052
1975-07-03            15954060.92                       1416797.0                                16792211.0           11.852235