0
votes

Need your help in solving the below error.

##Loading Libraries.
library(caret)
library(kernlab)

##Loading the data
rm(list=ls())
set.seed(3421)

Extrapolation_Data <- read.table("./Data/Data5/EP_CUST_COMBINED_07042017.txt",
                                sep="|", header=TRUE , 
                                colClasses = c("X6MNTH_FTD" = "NULL" , 
                                              "X6MNTH_LTD"="NULL" , 
                                              "India3MLtd_TRANS" = "NULL", 
                                              "Cust_Considered" = "NULL",
                                              "Customer_No"="character",
                                              "Segment"="factor",
                                              "Store"="factor",
                                              "DISTINCT_VISITS_BAND"="factor",
                                              "DISTINCT_MONTH_VISITS"="factor",
                                              "CUST_SALES_BAND"="factor",
                                              "ITEMS_PER_MONTH_BAND"="factor",
                                              "VISITS_PER_MONTH_BAND"="factor",
                                              "STAPLES_TRANS"="factor",
                                              "BDF_TRANS"="factor",
                                              "HPC_TRANS"="factor",
                                              "PF_TRANS"="factor",
                                              "FV_TRANS"="factor",
                                              "PROCESS_FOOD_TRANS"="factor",
                                              "BREAD_EGGS_TRANS"="factor",
                                              "FROZEN_TRANS"="factor",
                                              "MILK_TRANS"="factor",
                                              "LAUNDRY_TRANS"="factor",
                                              "PC_TRANS"="factor",
                                              "DISTINCT_CLASSES_BAND"="factor",
                                              "LAUNDRY_TRANS_1"="factor",
                                              "Cookies_TRANS"="factor",
                                              "ExoticFruitandVegetables_TRANS"="factor",
                                              "Healthbiscuit_TRANS"="factor",
                                              "Kellogs_TRANS"="factor",
                                              "BasmatiRice_TRANS"="factor",
                                              "Pastry_TRANS"="factor",
                                              "Dessert_TRANS"="factor",
                                              "Organics_TRANS"="factor",
                                              "PaperandTissue_TRANS"="factor",
                                              "Almonds_TRANS"="factor",
                                              "Pears_TRANS"="factor",
                                              "GingellyOil_TRANS"="factor",
                                              "Yoghurt_TRANS"="factor",
                                              "Dove_TRANS"="factor",
                                              "Mayonnaise_TRANS"="factor",
                                              "PeanutButter_TRANS"="factor",
                                              "HealthDietFood_TRANS"="factor",
                                              "OliveOil_TRANS"="factor",
                                              "ShowerGel_TRANS"="factor",
                                              "ChocolateSpread_TRANS"="factor",
                                              "Continental_TRANS"="factor",
                                              "GarbageBag_TRANS"="factor",
                                              "ReadytoEat_TRANS"="factor",
                                              "ToiletPaper_TRANS"="factor",
                                              "MOP_TRANS"="factor",
                                              "IceTea_TRANS"="factor",
                                              "ShowerandBath_TRANS"="factor",
                                              "CarCare_TRANS"="factor",
                                              "PetFood_TRANS"="factor",
                                              "Muesli_TRANS"="factor",
                                              "CottonBall_TRANS"="factor",
                                              "CannedFood_TRANS"="factor",
                                              "PremiumVegetables_TRANS"="factor",
                                              "Maybelline_TRANS"="factor",
                                              "PremixCoffee_TRANS"="factor",
                                              "ImportedCigarettes_TRANS"="factor",
                                              "MicrowaveItems_TRANS"="factor",
                                              "Housekeeping.Plugin_TRANS"="factor",
                                              "YogaMat_TRANS"="factor",
                                              "Moti_TRANS"="factor",
                                              "Toys_TRANS"="factor",
                                              "Loreal_TRANS"="factor",
                                              "AdultsBooks_TRANS"="factor",
                                              "Gala_TRANS"="factor",
                                              "Revlon_TRANS"="factor"))

## Dividing the data in Train Test.    
indexes = sample(nrow(Extrapolation_Data), 
                 size=0.2*nrow(Extrapolation_Data), replace= FALSE)
TrainData <- Extrapolation_Data[-indexes,]
TestData <- Extrapolation_Data[indexes,]    

##Creating new column Segment_C from Segment
TrainData$Segment_C <- as.factor(ifelse(TrainData$Segment=="C", "Y" , "N"))
TestData$Segment_C <- as.factor(ifelse(TestData$Segment=="C", "Y" , "N"))

## No Null Values
sum(is.na(TrainData))
# [1] 0

fitControl <- trainControl(method = "cv", number = 1,repeats = 2,
                           summaryFunction = twoClassSummary)    
set.seed(10001)

## Executing the below query is giving me error    
SVMFit <- train(Segment_C ~ TENURE + CUST_SALES + VISITS_PER_MONTH + FROZEN_TRANS + 
                          MILK_TRANS + PC_TRANS + Cookies_TRANS,  
                data=TrainData, method="lssvmPoly", 
                trControl = fitControl , metric = "Kappa")

Error:

Error in [.data.frame(data, , lvls[1]) : undefined columns selected

Am I missing anything? Is any of my variables incorrect?

Any help is much appreciated.

1
Update code formatting, title, error message, and grammar for readabilityParfait
looks like you are missing "_BAND" from "CUST_SALES" and "VISITS_PER_MONTH", which both should have the "_BAND" suffix, based on your read-in code.Nate
SVMFit<-train(Segment_C~MILK_TRANS, data=TrainData, method="lssvmPoly" , trControl = fitControl , metric = "Kappa") Even this is giving me same errorBhavinNagda

1 Answers

0
votes

The variable names are incorrectly defined in the below statement-- SVMFit<-train(Segment_C~TENURE+CUST_SALES+VISITS_PER_MONTH+FROZEN_TRANS+MILK_TRANS+PC_TRANS+Cookies_TRANS, data=TrainData, method="lssvmPoly" , trControl = fitControl , metric = "Kappa")

The original data does not contains the var. named as "Tenure". As well as CUST_SALES,VISITS_PER_MONTH: Original variable names are CUST_SALES_BAND and VISITS_PER_MONTH_BAND