I'm trying to use xgboost in R to train on x and predict y. I've read some notes on how I need to create a DMatrix, requiring labels. However this is not a classification problem/dataset. I was wondering what the simplest solution is to get the data in a format appropriate for xgboost. Does anyone know a quick fix?
In case relevant, I included the data below in the format of dput()
Thanks
rm(list = ls()) #clears the workspace
library(caret)
library(ggplot2)
library(doParallel)
library(tidyverse)
library(xgboost)
library(readr)
library(stringr)
library(car)
# read data
proj_path = "P:/R"
Macro <- read.csv("P:/Earnest/Old/R/InputFull.csv")
x <- Macro[1:31,3:21]
x <- data.matrix(x)
x
y <- Macro[1:31,2:2]
y <- as.matrix(y)
y
t <- Macro[32:32,3:21]
t <- as.matrix(t)
t
xgb <- xgboost(data = x,
label = y,
eta = 0.1,
max_depth = 15,
nround=25, # max number of boosting iterations
subsample = 0.5,
colsample_bytree = 0.5,
seed = 1,
eval_metric = "merror",
objective ="reg:linear",
num_class = 12,
nthread = 3
)
Error:
Error in xgb.iter.update(bst$handle, dtrain, iteration - 1, obj) :
[18:32:05] amalgamation/../src/objective/regression_obj.cc:44: Check failed: preds->Size() == info.labels_.size() (372 vs. 31) labels are not correctly providedpreds.size=372, label.size=31
.
dput(x)
structure(c(1.401199848, 1.598505241, 1.899487362, 1.950431503,
2.728728119, 1.869149245, 2.08378686, 2.118478071, 2.958476801,
2.111698187, 2.713647324, 2.841381208, 4.021384046, 3.44797061,
4.051464427, 4.069710171, 5.242768815, 3.869747888, 4.652228652,
4.452166281, 5.553326349, 4.188964308, 5.012729352, 4.538928371,
5.638139338, 3.925012902, 4.682906379, 4.660168251, 5.952803094,
4.721206697, 5.758055685, 0.505057793, 0.902142653, 1.030042836,
0.94493422, 1.412524824, 0.898767152, 1.102147692, 1.175001349,
1.51808015, 1.133066252, 1.497577481, 1.445753497, 1.728968761,
1.294600657, 1.575102079, 1.401923214, 1.994269036, 1.44058083,
1.903978779, 1.837148025, 2.027326755, 1.686266994, 2.017370721,
1.773818155, 1.937293325, 1.647630534, 1.749950342, 1.730985306,
2.263447785, 1.98126732, 2.434642854, 1.591318715, 0.970218047,
1.355681306, 1.092661393, 1.740056868, 1.286781553, 1.649011557,
1.716957013, 2.333949215, 1.685729263, 2.009246026, 1.952436999,
2.438041585, 1.767846259, 2.08817292, 2.116640755, 2.779002777,
2.026336561, 2.481570287, 2.33258972, 2.912199168, 2.110299378,
2.595402032, 2.322950029, 2.842142129, 1.979896372, 2.319502793,
2.359240313, 2.937856446, 2.399225856, 2.827140745, 2.873380916,
3.343696488, 4.155669862, 3.681448489, 4.599708108, 3.317898543,
4.205817383, 3.914123106, 4.755850094, 3.604543087, 4.477151098,
4.282528572, 4.934619327, 3.829466764, 4.628469239, 4.292460153,
5.447687324, 3.951418388, 5.082707128, 4.712472353, 5.516298089,
4.168246822, 5.267110329, 4.833249636, 5.828918236, 4.140403816,
5.28575776, 4.955121909, 6.404571778, 4.880203713, 6.640952257,
1.10974415, 3.246743054, 3.078431062, 3.094671309, 2.810030477,
1.986317052, 1.877208512, 2.112517635, 1.852527129, 1.80189335,
2.15024097, 2.150726317, 2.469052688, 1.825418035, 2.211184806,
2.252660217, 2.729888904, 2.072823114, 2.320933796, 2.120317491,
2.593030466, 1.969162713, 2.195998477, 2.198837636, 2.6051228,
1.87366517, 2.1863434, 2.112724392, 2.625023126, 2.069334825,
2.328814677, 0.794053644, 2.373534101, 2.525818245, 2.793249383,
2.637249523, 2.065657523, 2.34613727, 2.548710073, 2.819121607,
1.646862654, 2.362708534, 2.426512586, 2.647719197, 2.075211547,
2.386040587, 2.476754917, 2.860952759, 2.231988862, 2.468942531,
2.46345831, 2.99338684, 2.349608577, 2.568479669, 2.600346713,
3.056925547, 2.129869136, 2.449644735, 2.352858179, 2.924043472,
2.26104673, 2.411660085, 1.022750639, 1.339780076, 1.513034557,
1.416148235, 1.53849107, 1.282660173, 1.483180217, 1.464739076,
1.845663423, 1.411929468, 1.601485605, 1.57230113, 1.93109256,
1.447007329, 1.643935412, 1.545581999, 1.995227504, 1.530803771,
1.692902733, 1.541408483, 1.947110878, 1.520008357, 1.671217322,
1.660831673, 1.992684923, 1.40046815, 1.644093122, 1.581270255,
1.937395811, 1.519664787, 1.77809053, 0.634051674, 1.491041765,
1.687893987, 1.500625828, 1.772809593, 1.381943419, 1.593021538,
1.502959572, 1.845580558, 1.401185216, 1.685389702, 1.632520307,
1.943709478, 1.500879275, 1.836383763, 1.797702533, 2.251015458,
1.825559911, 2.130588433, 1.928360314, 2.501628649, 1.930166962,
2.19632807, 2.024905776, 2.667980643, 1.837998822, 2.346063091,
2.137170693, 2.745812681, 2.111553082, 2.352218846, 0.014316765,
0.014976622, 0.018718418, 0.019685819, 0.02936856, 0.018674042,
0.020919535, 0.021602221, 0.032336153, 0.021601321, 0.027824404,
0.027699804, 0.041995276, 0.03115149, 0.037909942, 0.035864057,
0.050010111, 0.032899321, 0.040721902, 0.039608433, 0.055438722,
0.037340065, 0.044442281, 0.041467602, 0.056821845, 0.036648017,
0.044310579, 0.042240444, 0.060200942, 0.03989574, 0.05027253,
0.008960054, 0.009677327, 0.012042657, 0.011848273, 0.01704815,
0.01025834, 0.013200639, 0.013957269, 0.020315483, 0.013237455,
0.016540384, 0.01545766, 0.021502152, 0.014583975, 0.016489787,
0.015011493, 0.022079413, 0.014652139, 0.019096759, 0.01877026,
0.024778923, 0.016531124, 0.019846859, 0.017973319, 0.024291258,
0.017512498, 0.019927366, 0.018047551, 0.027240891, 0.019572431,
0.024967503, 0.015571196, 0.008793921, 0.012213982, 0.010098792,
0.019141165, 0.012191416, 0.015685138, 0.016141294, 0.025104952,
0.015530046, 0.018885584, 0.017573026, 0.024755454, 0.015537127,
0.018573178, 0.017592893, 0.02624355, 0.016540749, 0.021136035,
0.019737199, 0.029282295, 0.018555507, 0.022862438, 0.020947225,
0.029544409, 0.018154018, 0.021944567, 0.021016969, 0.030287821,
0.020090802, 0.024800349, 0.027344333, 0.028790708, 0.036305175,
0.03279094, 0.04468925, 0.030123487, 0.03788963, 0.036046439,
0.048920416, 0.033241727, 0.041905434, 0.03894684, 0.048799303,
0.033783826, 0.042038253, 0.037765709, 0.051272308, 0.033649304,
0.044050512, 0.041113277, 0.054492472, 0.038055331, 0.047872178,
0.044639163, 0.059491776, 0.039308651, 0.04955884, 0.047517015,
0.064765233, 0.043519587, 0.060235515, 0.015865017, 0.046393208,
0.045022457, 0.046137529, 0.043409945, 0.028791928, 0.027440233,
0.030888428, 0.029154138, 0.026850127, 0.031928178, 0.030985288,
0.037250141, 0.025239685, 0.030213125, 0.029014243, 0.037448246,
0.026483297, 0.029467808, 0.027428719, 0.036827997, 0.02638519,
0.029086009, 0.0290847, 0.037315356, 0.025529541, 0.029876725,
0.028196018, 0.037560361, 0.026557948, 0.030220349, 0.012738675,
0.037842159, 0.039434707, 0.045916097, 0.047657071, 0.035361537,
0.037887314, 0.042082156, 0.051177967, 0.028699666, 0.04014397,
0.041131971, 0.046971141, 0.03450433, 0.039210963, 0.037889999,
0.046804333, 0.03379912, 0.03750512, 0.03653175, 0.048462325,
0.036646121, 0.038238758, 0.038760921, 0.049783835, 0.033023068,
0.037417803, 0.035506876, 0.046763041, 0.033261873, 0.034905163,
0.015078101, 0.018675716, 0.020982281, 0.020163208, 0.024112597,
0.01844289, 0.021872319, 0.021488391, 0.029756399, 0.0208895,
0.024292297, 0.023164544, 0.030436155, 0.020734056, 0.023939554,
0.021517168, 0.02954451, 0.020454479, 0.022926791, 0.021173154,
0.029388903, 0.021020651, 0.023448143, 0.023490222, 0.030767307,
0.020584604, 0.024119676, 0.022463317, 0.029776277, 0.020834843,
0.024352043, 0.0086, 0.018163633, 0.020572651, 0.018873258, 0.02404777,
0.017476562, 0.020376855, 0.019459232, 0.025686536, 0.018781604,
0.022039175, 0.021486706, 0.026922468, 0.019110607, 0.023306799,
0.02195693, 0.02921719, 0.021150515, 0.025042587, 0.023646145,
0.032299653, 0.024133764, 0.026572026, 0.024691929, 0.034760273,
0.023298189, 0.02887728, 0.026780364, 0.035738399, 0.024864652,
0.027768462, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0), .Dim = c(31L, 19L), .Dimnames = list(
c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "24", "25", "26", "27", "28", "29", "30", "31"
), c("c1372", "c5244", "c5640", "c6164", "b1372", "b5244",
"b5640", "b6164", "v1372", "v5244", "v5640", "v6164", "bv1372",
"bv5244", "bv5640", "bv6164", "s1", "s2", "s3")))
dput(y)
structure(c(668.39, 524.019, 609.181, 609.953, 730.648, 568.93,
676.269, 692.894, 856.832, 648.177, 758.524, 774.049, 905.858,
686.31, 811.253, 814.47, 1011.044, 739.01, 867.46, 825.258, 1013.406,
762.577, 890.568, 862.491, 1030.2, 761.2, 872.93, 892.77, 1089.12,
855.69, 992.454), .Dim = c(31L, 1L))