1
votes

I'm trying to use xgboost in R to train on x and predict y. I've read some notes on how I need to create a DMatrix, requiring labels. However this is not a classification problem/dataset. I was wondering what the simplest solution is to get the data in a format appropriate for xgboost. Does anyone know a quick fix?

In case relevant, I included the data below in the format of dput()

Thanks

rm(list = ls()) #clears the workspace


library(caret)
library(ggplot2)
library(doParallel)
library(tidyverse)
library(xgboost)
library(readr)
library(stringr)
library(car)

# read data
proj_path = "P:/R"
Macro <- read.csv("P:/Earnest/Old/R/InputFull.csv")
x <- Macro[1:31,3:21]
x <- data.matrix(x)
x
y <- Macro[1:31,2:2]
y <- as.matrix(y)
y
t <- Macro[32:32,3:21]
t <- as.matrix(t)
t


xgb <- xgboost(data = x,
               label = y, 
               eta = 0.1,
               max_depth = 15, 
               nround=25, # max number of boosting iterations
               subsample = 0.5,
               colsample_bytree = 0.5,
               seed = 1,
               eval_metric = "merror",
               objective ="reg:linear",
               num_class = 12,
               nthread = 3
)

Error:

Error in xgb.iter.update(bst$handle, dtrain, iteration - 1, obj) : 
  [18:32:05] amalgamation/../src/objective/regression_obj.cc:44: Check failed: preds->Size() == info.labels_.size() (372 vs. 31) labels are not correctly providedpreds.size=372, label.size=31

.

dput(x)
structure(c(1.401199848, 1.598505241, 1.899487362, 1.950431503, 
2.728728119, 1.869149245, 2.08378686, 2.118478071, 2.958476801, 
2.111698187, 2.713647324, 2.841381208, 4.021384046, 3.44797061, 
4.051464427, 4.069710171, 5.242768815, 3.869747888, 4.652228652, 
4.452166281, 5.553326349, 4.188964308, 5.012729352, 4.538928371, 
5.638139338, 3.925012902, 4.682906379, 4.660168251, 5.952803094, 
4.721206697, 5.758055685, 0.505057793, 0.902142653, 1.030042836, 
0.94493422, 1.412524824, 0.898767152, 1.102147692, 1.175001349, 
1.51808015, 1.133066252, 1.497577481, 1.445753497, 1.728968761, 
1.294600657, 1.575102079, 1.401923214, 1.994269036, 1.44058083, 
1.903978779, 1.837148025, 2.027326755, 1.686266994, 2.017370721, 
1.773818155, 1.937293325, 1.647630534, 1.749950342, 1.730985306, 
2.263447785, 1.98126732, 2.434642854, 1.591318715, 0.970218047, 
1.355681306, 1.092661393, 1.740056868, 1.286781553, 1.649011557, 
1.716957013, 2.333949215, 1.685729263, 2.009246026, 1.952436999, 
2.438041585, 1.767846259, 2.08817292, 2.116640755, 2.779002777, 
2.026336561, 2.481570287, 2.33258972, 2.912199168, 2.110299378, 
2.595402032, 2.322950029, 2.842142129, 1.979896372, 2.319502793, 
2.359240313, 2.937856446, 2.399225856, 2.827140745, 2.873380916, 
3.343696488, 4.155669862, 3.681448489, 4.599708108, 3.317898543, 
4.205817383, 3.914123106, 4.755850094, 3.604543087, 4.477151098, 
4.282528572, 4.934619327, 3.829466764, 4.628469239, 4.292460153, 
5.447687324, 3.951418388, 5.082707128, 4.712472353, 5.516298089, 
4.168246822, 5.267110329, 4.833249636, 5.828918236, 4.140403816, 
5.28575776, 4.955121909, 6.404571778, 4.880203713, 6.640952257, 
1.10974415, 3.246743054, 3.078431062, 3.094671309, 2.810030477, 
1.986317052, 1.877208512, 2.112517635, 1.852527129, 1.80189335, 
2.15024097, 2.150726317, 2.469052688, 1.825418035, 2.211184806, 
2.252660217, 2.729888904, 2.072823114, 2.320933796, 2.120317491, 
2.593030466, 1.969162713, 2.195998477, 2.198837636, 2.6051228, 
1.87366517, 2.1863434, 2.112724392, 2.625023126, 2.069334825, 
2.328814677, 0.794053644, 2.373534101, 2.525818245, 2.793249383, 
2.637249523, 2.065657523, 2.34613727, 2.548710073, 2.819121607, 
1.646862654, 2.362708534, 2.426512586, 2.647719197, 2.075211547, 
2.386040587, 2.476754917, 2.860952759, 2.231988862, 2.468942531, 
2.46345831, 2.99338684, 2.349608577, 2.568479669, 2.600346713, 
3.056925547, 2.129869136, 2.449644735, 2.352858179, 2.924043472, 
2.26104673, 2.411660085, 1.022750639, 1.339780076, 1.513034557, 
1.416148235, 1.53849107, 1.282660173, 1.483180217, 1.464739076, 
1.845663423, 1.411929468, 1.601485605, 1.57230113, 1.93109256, 
1.447007329, 1.643935412, 1.545581999, 1.995227504, 1.530803771, 
1.692902733, 1.541408483, 1.947110878, 1.520008357, 1.671217322, 
1.660831673, 1.992684923, 1.40046815, 1.644093122, 1.581270255, 
1.937395811, 1.519664787, 1.77809053, 0.634051674, 1.491041765, 
1.687893987, 1.500625828, 1.772809593, 1.381943419, 1.593021538, 
1.502959572, 1.845580558, 1.401185216, 1.685389702, 1.632520307, 
1.943709478, 1.500879275, 1.836383763, 1.797702533, 2.251015458, 
1.825559911, 2.130588433, 1.928360314, 2.501628649, 1.930166962, 
2.19632807, 2.024905776, 2.667980643, 1.837998822, 2.346063091, 
2.137170693, 2.745812681, 2.111553082, 2.352218846, 0.014316765, 
0.014976622, 0.018718418, 0.019685819, 0.02936856, 0.018674042, 
0.020919535, 0.021602221, 0.032336153, 0.021601321, 0.027824404, 
0.027699804, 0.041995276, 0.03115149, 0.037909942, 0.035864057, 
0.050010111, 0.032899321, 0.040721902, 0.039608433, 0.055438722, 
0.037340065, 0.044442281, 0.041467602, 0.056821845, 0.036648017, 
0.044310579, 0.042240444, 0.060200942, 0.03989574, 0.05027253, 
0.008960054, 0.009677327, 0.012042657, 0.011848273, 0.01704815, 
0.01025834, 0.013200639, 0.013957269, 0.020315483, 0.013237455, 
0.016540384, 0.01545766, 0.021502152, 0.014583975, 0.016489787, 
0.015011493, 0.022079413, 0.014652139, 0.019096759, 0.01877026, 
0.024778923, 0.016531124, 0.019846859, 0.017973319, 0.024291258, 
0.017512498, 0.019927366, 0.018047551, 0.027240891, 0.019572431, 
0.024967503, 0.015571196, 0.008793921, 0.012213982, 0.010098792, 
0.019141165, 0.012191416, 0.015685138, 0.016141294, 0.025104952, 
0.015530046, 0.018885584, 0.017573026, 0.024755454, 0.015537127, 
0.018573178, 0.017592893, 0.02624355, 0.016540749, 0.021136035, 
0.019737199, 0.029282295, 0.018555507, 0.022862438, 0.020947225, 
0.029544409, 0.018154018, 0.021944567, 0.021016969, 0.030287821, 
0.020090802, 0.024800349, 0.027344333, 0.028790708, 0.036305175, 
0.03279094, 0.04468925, 0.030123487, 0.03788963, 0.036046439, 
0.048920416, 0.033241727, 0.041905434, 0.03894684, 0.048799303, 
0.033783826, 0.042038253, 0.037765709, 0.051272308, 0.033649304, 
0.044050512, 0.041113277, 0.054492472, 0.038055331, 0.047872178, 
0.044639163, 0.059491776, 0.039308651, 0.04955884, 0.047517015, 
0.064765233, 0.043519587, 0.060235515, 0.015865017, 0.046393208, 
0.045022457, 0.046137529, 0.043409945, 0.028791928, 0.027440233, 
0.030888428, 0.029154138, 0.026850127, 0.031928178, 0.030985288, 
0.037250141, 0.025239685, 0.030213125, 0.029014243, 0.037448246, 
0.026483297, 0.029467808, 0.027428719, 0.036827997, 0.02638519, 
0.029086009, 0.0290847, 0.037315356, 0.025529541, 0.029876725, 
0.028196018, 0.037560361, 0.026557948, 0.030220349, 0.012738675, 
0.037842159, 0.039434707, 0.045916097, 0.047657071, 0.035361537, 
0.037887314, 0.042082156, 0.051177967, 0.028699666, 0.04014397, 
0.041131971, 0.046971141, 0.03450433, 0.039210963, 0.037889999, 
0.046804333, 0.03379912, 0.03750512, 0.03653175, 0.048462325, 
0.036646121, 0.038238758, 0.038760921, 0.049783835, 0.033023068, 
0.037417803, 0.035506876, 0.046763041, 0.033261873, 0.034905163, 
0.015078101, 0.018675716, 0.020982281, 0.020163208, 0.024112597, 
0.01844289, 0.021872319, 0.021488391, 0.029756399, 0.0208895, 
0.024292297, 0.023164544, 0.030436155, 0.020734056, 0.023939554, 
0.021517168, 0.02954451, 0.020454479, 0.022926791, 0.021173154, 
0.029388903, 0.021020651, 0.023448143, 0.023490222, 0.030767307, 
0.020584604, 0.024119676, 0.022463317, 0.029776277, 0.020834843, 
0.024352043, 0.0086, 0.018163633, 0.020572651, 0.018873258, 0.02404777, 
0.017476562, 0.020376855, 0.019459232, 0.025686536, 0.018781604, 
0.022039175, 0.021486706, 0.026922468, 0.019110607, 0.023306799, 
0.02195693, 0.02921719, 0.021150515, 0.025042587, 0.023646145, 
0.032299653, 0.024133764, 0.026572026, 0.024691929, 0.034760273, 
0.023298189, 0.02887728, 0.026780364, 0.035738399, 0.024864652, 
0.027768462, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 
0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 
1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0), .Dim = c(31L, 19L), .Dimnames = list(
    c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", 
    "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", 
    "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"
    ), c("c1372", "c5244", "c5640", "c6164", "b1372", "b5244", 
    "b5640", "b6164", "v1372", "v5244", "v5640", "v6164", "bv1372", 
    "bv5244", "bv5640", "bv6164", "s1", "s2", "s3")))

dput(y)
structure(c(668.39, 524.019, 609.181, 609.953, 730.648, 568.93, 
676.269, 692.894, 856.832, 648.177, 758.524, 774.049, 905.858, 
686.31, 811.253, 814.47, 1011.044, 739.01, 867.46, 825.258, 1013.406, 
762.577, 890.568, 862.491, 1030.2, 761.2, 872.93, 892.77, 1089.12, 
855.69, 992.454), .Dim = c(31L, 1L))
1

1 Answers

0
votes

There isn't a problem with your data, but with your xgboost parameters.

eval_metric = "merror" and num_class = 12 are for multiclass classification, and not compatible with regression. Remove them by replacing your xgboost code with the following and it should work fine.

xgb <- xgboost(data = x,
               label = y, 
               eta = 0.1,
               max_depth = 15, 
               nround=25, # max number of boosting iterations
               subsample = 0.5,
               colsample_bytree = 0.5,
               seed = 1,
               objective ="reg:linear",
               nthread = 3
)