I have a very computationally intensive program that I need to run for 16 nested for loops to accomplish a iterative check of all possible permutations of 16 numerical vectors each of size 26. My first attempt was in R (my preferred language), but was quickly redirected to C++ via the Rcpp package. I can run the code locally on my PC (4-core, Intel i7-6600U CPU @ 2.60GHz, 16GB RAM), but also have access to Azure cloud computing and can spin up a cluster of any size.
My current code looks like this:
#include <Rcpp.h>
#include <math.h>
#include <iostream>
using namespace Rcpp;
// [[Rcpp::export]]
NumericMatrix optimalIndex(NumericVector a, NumericVector b, NumericVector c, NumericVector d, NumericVector e, NumericVector f,
NumericVector g, NumericVector h, NumericVector i, NumericVector j, NumericVector k, NumericVector l,
NumericVector m, NumericVector n, NumericVector o, NumericVector p){
NumericMatrix outp(1000000, 16);
int index = 0;
int minsum = 0;
for(int c1 = 0; c1 < a.size(); c1++){
for(int c2 = 0; c2 < b.size(); c2++){
for(int c3 = 0; c3 < c.size(); c3++){
for(int c4 = 0; c4 < d.size(); c4++){
for(int c5 = 0; c5 < e.size(); c5++){
for(int c6 = 0; c6 < f.size(); c6++){
for(int c7 = 0; c7 < g.size(); c7++){
for(int c8 = 0; c8 < h.size(); c8++){
for(int c9 = 0; c9 < i.size(); c9++){
for(int c10 = 0; c10 < j.size(); c10++){
for(int c11 = 0; c11 < k.size(); c11++){
for(int c12 = 0; c12 < l.size(); c12++){
for(int c13 = 0; c13 < m.size(); c13++){
for(int c14 = 0; c14 < n.size(); c14++){
for(int c15 = 0; c15 < o.size(); c15++){
for(int c16 = 0; c16 < p.size(); c16++){
minsum = a(c1) + b(c2) + c(c3) + d(c4) + e(c5) + f(c6)
+ g(c7) + h(c8) + i(c9) + j(c10) + k(c11) + l(c12)
+ m(c13) + n(c14) + o(c15) + p(c16);
if(minsum == 0){
outp(index, 0) = c1;
outp(index, 1) = c2;
outp(index, 2) = c3;
outp(index, 3) = c4;
outp(index, 4) = c5;
outp(index, 5) = c6;
outp(index, 6) = c7;
outp(index, 7) = c8;
outp(index, 8) = c9;
outp(index, 9) = c10;
outp(index, 10) = c11;
outp(index, 11) = c12;
outp(index, 12) = c13;
outp(index, 13) = c14;
outp(index, 14) = c15;
outp(index, 15) = c16;
outp(index, 16) = c17;
outp(index, 17) = c18;
outp(index, 18) = c19;
outp(index, 19) = c20;
outp(index, 20) = c21;
outp(index, 21) = c22;
outp(index, 22) = c23;
outp(index, 23) = c24;
outp(index, 24) = c25;
outp(index, 25) = c26;
outp(index, 26) = c27;
outp(index, 27) = c28;
outp(index, 28) = c29;
outp(index, 29) = c30;
outp(index, 30) = c31;
index++;
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
return(outp);
}
The dimension of the output of this function, outp, is unknown at this point, so I arbitrarily chose 1 million rows. I want to return the index of each column where the row sum matches a specified condition, ie. = 0.
Obviously, this is taking what feels like YEARS to run. I am not sure if parallelization is an option for this loop, or what other methods I can use to increase speed. Like I said, I can run in Azure with more cores and/or more memory if that will do it.
Is there a better/faster way to do this?
std::next_permutation? That might be appropriate here. - cigiena(c1),b(c2).... correspond to? Functions? Simple ones? If not, calculateac1 = a(c1)just after setting ofc1...etc. - Damienstd::next_permutationas it has been some time since I last coded inC++. Can you share how you would implement such a function? - Kyle Power