I'm not sure if this is an efficient way to perform batch matrix multiplication for Eigen Tensors but one solution might be mapping tensor pages as matrices and performing general matrix multiplication:
#include <Eigen/Dense>
#include <unsupported/Eigen/CXX11/Tensor>
typedef Eigen::Tensor<double, 3> Tensor3d;
inline void batchedTensorMultiplication(const Tensor3d& A, const Tensor3d& B, const std::vector<int>& batchIndices, Tensor3d& C)
{
Eigen::DenseIndex memStepA = A.dimension(0) * A.dimension(1);
Eigen::DenseIndex memStepB = B.dimension(0) * B.dimension(1);
Eigen::DenseIndex memStepC = C.dimension(0) * C.dimension(1);
int outputBatchIndex = 0;
for (int batchIndex : batchIndices)
{
Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>> pageA(A.data() + batchIndex * memStepA, A.dimension(0), A.dimension(1));
Eigen::Map<const Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>> pageB(B.data() + batchIndex * memStepB, B.dimension(0), B.dimension(1));
Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>> pageC(C.data() + outputBatchIndex * memStepC, C.dimension(0), C.dimension(1));
outputBatchIndex++;
pageC.noalias() = pageA * pageB;
}
}
int main()
{
constexpr int N = 50;
std::vector<int> batchIndices = { 0,1,2,3,4,9,10,11,12,13 };
Tensor3d A(1, 4, N), B(4, 4, N), C(1, 4, (int)batchIndices.size());
batchedTensorMultiplication(A, B, batchIndices, C);
return 0;
}