To add to cdeterman's answer:
You can use eigen's build in parallelization for dense matrix products. In order to do so, you need to compile with open mp activated.
// [[Rcpp::depends(RcppArmadillo, RcppEigen)]]
// [[Rcpp::plugins(openmp)]]
#include <omp.h>
#include <RcppArmadillo.h>
#include <RcppEigen.h>
// [[Rcpp::export]]
SEXP armaMatMult(arma::mat A, arma::mat B){
arma::mat C = A * B;
return Rcpp::wrap(C);
}
// [[Rcpp::export]]
SEXP eigenMatMult(Eigen::MatrixXd A,
Eigen::MatrixXd B,
int n_cores){
Eigen::setNbThreads(n_cores);
//qDebug() << Eigen::nbThreads( );
Eigen::MatrixXd C = A * B;
return Rcpp::wrap(C);
}
// [[Rcpp::export]]
SEXP eigenMapMatMult2(const Eigen::Map<Eigen::MatrixXd> A,
Eigen::Map<Eigen::MatrixXd> B,
int n_cores){
Eigen::setNbThreads(n_cores);
Eigen::MatrixXd C = A * B;
return Rcpp::wrap(C);
}
Here are some benchmarks:
Note that if N = k = 100
, parallelization does not necessarily improve performance.
If the matrix dimensions get larger, parallelization starts to have an impact (N = k = 1000)
:
library(microbenchmark)
# Benchmark 1: N = k = 100
N <- 100
k <- 100
A <- matrix(rnorm(N*k), N, k)
B <- matrix(rnorm(N*k), k, N)
microbenchmark(A%*%B,
armaMatMult2(A, B),
eigenMatMult2(A, B, n_cores = 1),
eigenMatMult2(A, B, n_cores = 2),
eigenMatMult2(A, B, n_cores = 4),
eigenMapMatMult2(A, B, n_cores = 1),
eigenMapMatMult2(A, B, n_cores = 2),
eigenMapMatMult2(A, B, n_cores = 4),
times = 100
# Unit: microseconds
# expr min lq mean median uq max neval
# A %*% B 535.6 540.75 552.594 551.25 554.50 650.2 100
# armaMatMult2(A, B) 542.0 549.10 560.975 556.35 560.25 738.1 100
# eigenMatMult2(A, B, n_cores = 1) 147.1 152.65 159.165 159.65 162.90 180.5 100
# eigenMatMult2(A, B, n_cores = 2) 97.1 109.90 124.496 119.60 127.50 391.8 100
# eigenMatMult2(A, B, n_cores = 4) 71.7 88.15 155.220 115.55 216.95 507.3 100
# eigenMapMatMult2(A, B, n_cores = 1) 139.1 150.10 154.889 154.20 158.35 244.3 100
# eigenMapMatMult2(A, B, n_cores = 2) 93.4 105.70 116.808 113.55 120.40 323.7 100
# eigenMapMatMult2(A, B, n_cores = 4) 66.8 82.60 161.516 196.25 210.40 598.9 100
)
# Benchmark 2: N = k = 1000
N <- 1000
k <- 1000
A <- matrix(rnorm(N*k), N, k)
B <- matrix(rnorm(N*k), k, N)
microbenchmark(A%*%B,
armaMatMult2(A, B),
eigenMatMult2(A, B, n_cores = 1),
eigenMatMult2(A, B, n_cores = 2),
eigenMatMult2(A, B, n_cores = 4),
eigenMapMatMult2(A, B, n_cores = 1),
eigenMapMatMult2(A, B, n_cores = 2),
eigenMapMatMult2(A, B, n_cores = 4),
times = 100
)
Unit: milliseconds
expr min lq mean median uq
A %*% B 597.1293 605.56840 814.52389 665.86650 1025.5896
armaMatMult2(A, B) 603.3894 620.25675 830.98947 693.22355 1078.4853
eigenMatMult2(A, B, n_cores = 1) 131.4696 135.22475 186.69826 193.37870 219.8727
eigenMatMult2(A, B, n_cores = 2) 67.8948 71.71355 114.52759 74.17380 173.3060
eigenMatMult2(A, B, n_cores = 4) 41.8564 48.87075 79.55535 72.00705 106.8572
eigenMapMatMult2(A, B, n_cores = 1) 125.3890 129.26125 175.09933 177.23655 213.0536
eigenMapMatMult2(A, B, n_cores = 2) 62.2866 65.78785 115.74248 79.92470 167.0217
eigenMapMatMult2(A, B, n_cores = 4) 35.2977 40.42480 68.21669 63.13655 97.2571
max neval
1217.6475 100
1446.5127 100
419.2043 100
217.9513 100
139.9629 100
298.2859 100
230.6307 100
118.2553 100