I am trying to figure out how i could use any of the parallel processing packages like foreach or doParallel in this random forest loop i have created:
ModelInfo <- data.frame ( model=as.numeric()
,Nodesize=as.numeric()
,Mrty=as.numeric()
,Maxdepth=as.numeric()
,Cp=as.numeric()
,Accuracy_Training=as.numeric()
,AUC_Training=as.numeric())
w=1
set.seed(1809)
NumberOfSamples=1
# Number of iterations
rfPred=list()
pred=list()
roundpred=list()
cTab=list()
Acc=list()
pred.to.roc=list()
pred.rocr=list()
perf.rocr=list()
AUC=list()
Var_imp=list()
rf_model_tr = list()
length(rf_model_tr) <- NumberOfSamples
for (i in 1:NumberOfSamples)
{
rf_model_tr[[i]] = list()
rfPred[[i]]=list()
pred[[i]]=list()
roundpred[[i]]=list()
cTab[[i]]=list()
Acc[[i]]=list()
pred.to.roc[[i]]=list()
pred.rocr[[i]]=list()
perf.rocr[[i]]=list()
AUC[[i]]=list()
Var_imp[[i]]=list()
## Tune nodesize
nodesize =c(10,20,50,80,100,200)
n=length(nodesize)
length(rf_model_tr[[i]]) <- n
for ( j in 1: length (nodesize))
{
rf_model_tr[[i]][[j]] = list()
rfPred[[i]][[j]]=list()
pred[[i]][[j]]=list()
roundpred[[i]][[j]]=list()
cTab[[i]][[j]]=list()
Acc[[i]][[j]]=list()
pred.to.roc[[i]][[j]]=list()
pred.rocr[[i]][[j]]=list()
perf.rocr[[i]][[j]]=list()
AUC[[i]][[j]]=list()
Var_imp[[i]][[j]]=list()
## Tune mrty
mrtysize =c(2,3,4)
m=length(mrtysize)
length(rf_model_tr[[i]][[j]]) <- m
for ( k in 1: length (mrtysize))
{
rf_model_tr[[i]][[j]][[k]] = list()
rfPred[[i]][[j]][[k]]=list()
pred[[i]][[j]][[k]]=list()
roundpred[[i]][[j]][[k]]=list()
cTab[[i]][[j]][[k]]=list()
Acc[[i]][[j]][[k]]=list()
pred.to.roc[[i]][[j]][[k]]=list()
pred.rocr[[i]][[j]][[k]]=list()
perf.rocr[[i]][[j]][[k]]=list()
AUC[[i]][[j]][[k]]=list()
Var_imp[[i]][[j]][[k]]=list()
## Tune maxdepth
maxdep =c(10,20,30)
z=length(maxdep)
length(rf_model_tr[[i]][[j]][[k]]) <- z
for (l in 1:length (maxdep))
{
rf_model_tr[[i]][[j]][[k]][[l]] = list()
rfPred[[i]][[j]][[k]][[l]]=list()
pred[[i]][[j]][[k]][[l]]=list()
roundpred[[i]][[j]][[k]][[l]]=list()
cTab[[i]][[j]][[k]][[l]]=list()
Acc[[i]][[j]][[k]][[l]]=list()
pred.to.roc[[i]][[j]][[k]][[l]]=list()
pred.rocr[[i]][[j]][[k]][[l]]=list()
perf.rocr[[i]][[j]][[k]][[l]]=list()
AUC[[i]][[j]][[k]][[l]]=list()
Var_imp[[i]][[j]][[k]][[l]]=list()
## Tune cp
cp =c(0,0.01,0.001)
p=length(cp)
length(rf_model_tr[[i]][[j]][[k]][[l]]) <- p
for (m in 1:length (cp))
{
rf_model_tr[[i]][[j]][[k]][[l]][[m]]= randomForest (as.factor(class) ~.
, data=train,mtry=mrtysize[[k]],maxDepth = maxdep[[l]], replace=F, importance=T, do.trace=10, ntree=200,nodesize=nodesize[j],cp=cp[[m]])
#Accuracy
rfPred[[i]][[j]][[k]][[l]][[m]] <- predict(rf_model_tr[[i]][[j]][[k]][[l]][[m]], train, type = "prob")
pred[[i]][[j]][[k]][[l]][[m]] <- colnames(rfPred[[i]][[j]][[k]][[l]][[m]] )[apply(rfPred[[i]][[j]][[k]][[l]][[m]] ,1,which.max)]
cTab[[i]][[j]][[k]][[l]][[m]] = table(pred[[i]][[j]][[k]][[l]][[m]],train$class)
Acc[[i]][[j]][[k]][[l]][[m]]<- sum(diag(cTab[[i]][[j]][[k]][[l]][[m]])) / sum(cTab[[i]][[j]][[k]][[l]][[m]])
#AUC
pred.to.roc[[i]][[j]][[k]][[l]][[m]]<-rfPred[[i]][[j]][[k]][[l]][[m]][,2]
pred.rocr[[i]][[j]][[k]][[l]][[m]]<-prediction(pred.to.roc[[i]][[j]][[k]][[l]][[m]],as.factor(train$class))
perf.rocr[[i]][[j]][[k]][[l]][[m]]<-performance(pred.rocr[[i]][[j]][[k]][[l]][[m]],measure="auc",x.measure="cutoff")
AUC[[i]][[j]][[k]][[l]][[m]]<-as.numeric(perf.rocr[[i]][[j]][[k]][[l]][[m]]@y.values)
#Variable Importance
Var_imp[[i]][[j]][[k]][[l]][[m]]<-(importance(rf_model_tr[[i]][[j]][[k]][[l]][[m]],type=2))
ModelInfo[w,1]<-w
ModelInfo[w,2]<-nodesize[[j]]
ModelInfo[w,3]<-mrtysize[[k]]
ModelInfo[w,4]<-maxdep[[l]]
ModelInfo[w,5]<-cp[[m]]
ModelInfo[w,6]<-Acc[[i]][[j]][[k]][[l]][[m]]
ModelInfo[w,7]<-AUC[[i]][[j]][[k]][[l]][[m]]
w=w+1
}
}
}
}
}
Basically ,what i am doing is that i am creating all possible model variations with one dataset based on the available tuning parameters for a random forest (nodesize,cp ect) and storing that information to the table model info as every iteration goes by. In addition i add measures like accuracy and AUC, so as to compare the different models created in the end and make a pick.
The reason i am looking for an alternative, is that the caret package offers me only to tune the mtry allthough there i do have the chance to run parRF which could solve my problem, but i prefer to incorporate something here, how would that be possible?
I have read about the foreach and doParallel packages but i dont quite get how this could be syntaxed here.
If the initial data is needed please let me know, i just thought at this point to show the part that neeeds to be parallel computed.
Thank you in advance
svd
command run 7 times faster on a quad i7 and used all cores (real and virtual) – Panagiotis Kanavos