I am currently implementing Strassen's Matrix Multiplication algorithm on C and got it to work correctly. However, the way I am passing around the sub matrices on each recursion is far from optimal as I make a new array and copy the elements from the original matrix. Here is my implementation
// this algorithm assumes that both matrices have the same size and are square matrices.
int **matrix_multiply_divide_conquer(const int **A, const int **B, int length, int threshold)
{
int **result;
int i, j, k;
// allocate result space
result = (int **)malloc(sizeof(int *) * length);
for (i = 0; i < length; i++)
{
result[i] = (int *)malloc(sizeof(int) * length);
}
if (length == threshold)
{
// compute the dot product
matrix_multiply((const int **)A, (const int **)B, result, length, length, length, false);
return result;
}
else
{
// stored in an int given that the matrix length is guaranteed to be a power of 2
int nLength = length / 2;
int i;
int **a11;
int **a12;
int **a21;
int **a22;
int **b11;
int **b12;
int **b21;
int **b22;
// allocate new sub matrices
a11 = (int **)malloc(sizeof(int *) * nLength);
a12 = (int **)malloc(sizeof(int *) * nLength);
a21 = (int **)malloc(sizeof(int *) * nLength);
a22 = (int **)malloc(sizeof(int *) * nLength);
b11 = (int **)malloc(sizeof(int *) * nLength);
b12 = (int **)malloc(sizeof(int *) * nLength);
b21 = (int **)malloc(sizeof(int *) * nLength);
b22 = (int **)malloc(sizeof(int *) * nLength);
for (i = 0; i < nLength; i++)
{
a11[i] = (int *)malloc(sizeof(int) * nLength);
a12[i] = (int *)malloc(sizeof(int) * nLength);
a21[i] = (int *)malloc(sizeof(int) * nLength);
a22[i] = (int *)malloc(sizeof(int) * nLength);
b11[i] = (int *)malloc(sizeof(int) * nLength);
b12[i] = (int *)malloc(sizeof(int) * nLength);
b21[i] = (int *)malloc(sizeof(int) * nLength);
b22[i] = (int *)malloc(sizeof(int) * nLength);
}
a11 = get_sub_matrix((const int **)A, 0, 0, nLength);
a12 = get_sub_matrix((const int **)A, 0, nLength, nLength);
a21 = get_sub_matrix((const int **)A, nLength, 0, nLength);
a22 = get_sub_matrix((const int **)A, nLength, nLength, nLength);
b11 = get_sub_matrix((const int **)B, 0, 0, nLength);
b12 = get_sub_matrix((const int **)B, 0, nLength, nLength);
b21 = get_sub_matrix((const int **)B, nLength, 0, nLength);
b22 = get_sub_matrix((const int **)B, nLength, nLength, nLength);
// combine the results
int **c11 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a11, (const int **)b11, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a12, (const int **)b21, nLength, 4), nLength, nLength);
int **c12 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a11, (const int **)b12, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a12, (const int **)b22, nLength, 4), nLength, nLength);
int **c21 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a21, (const int **)b11, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a22, (const int **)b21, nLength, 4), nLength, nLength);
int **c22 = matrix_addition((const int **)matrix_multiply_divide_conquer((const int **)a21, (const int **)b12, nLength, 4), (const int **)matrix_multiply_divide_conquer((const int **)a22, (const int **)b22, nLength, 4), nLength, nLength);
// combine result quarters
combine_sub_matrix((const int **)c11, result, nLength, 0, 0);
combine_sub_matrix((const int **)c12, result, nLength, 0, nLength);
combine_sub_matrix((const int **)c21, result, nLength, nLength, 0);
combine_sub_matrix((const int **)c22, result, nLength, nLength, nLength);
return result;
}
}
this is the get_sub_matrix method:
int **get_sub_matrix(const int **A, int verticalOffset, int horizontalOffset, int size)
{
int **sub_matrix;
sub_matrix = (int **)malloc(sizeof(int *) * size);
int i, j;
for (i = 0; i < size; i++)
{
//each row will contain a number of columns size
sub_matrix[i] = (int *)malloc(sizeof(int) * size);
}
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
sub_matrix[i][j] = A[i + verticalOffset][j + horizontalOffset];
}
}
return sub_matrix;
}
I am sure this is not the optimal way of passing the submatrices around as they are only read and never modified, so I would like to ask you if using pointers for this problem is possible and how to do it (as I am still wrapping my head around them).
I have tried the following but it did not work:
int **a11 = &A[0][0];
int **a12 = &A[0][nLength];
int **a21 = &A[nLength][0] ;
int **a22 = &A[nLength][nLength];
int **b11 = &B[0][0];
int **b12 = &B[0][nLength];
int **b21 = &B[nLength][0] ;
int **b22 = &B[nLength][nLength];
Thanks for helping me out.