Problems using FFT on multiplication of large numbers

Question

I am recently trying to multiply a bunch (up to 10^5) of very small numbers (order of 10^-4) with each other, so I would end in the order of 10^-(4*10^5) which does not fit into any variable.

My approach is the following:

Multiply each number by 10^8 and store it in an array split by the powers of 10, i.e. I make a polynomial given by the powers of 10 out of it. An example would be: p = 0.1234 -> p*10^8 = 12340000 -> A={0, 0 ,0, 0, 4, 3, 2, 1}.
Multiply those Arrays using FFT
iFFT the result

This is done multiple times for a small number of different cases. What I want to know in the end is the fraction of one such product over the sum of all the products up to a precision of 10^-6. To do this, between step 2 and 3 the result is added to a sum array which is in the end also iFFTed. As the required precision is pretty low, I am not dividing polynomials but only take the first few numbers into an integer.

My issue is the following: the FFT and/or iFFT is not properly working! I am new to this stuff and have only implemented a FFT once. My code is as follows (C++14):

#include <cstdio>
#include <vector>
#include <cmath>
#include <complex>
#include <iostream>
using namespace std;

const double PI = 4*atan(1.0);

vector<complex<double>> FFT (vector<complex<double>> A, long N)
{
    vector<complex<double>> Ans(0);
    if(N==1) 
    {
        Ans.push_back(A[0]);
        return Ans;
    }
    vector<complex<double>> even(N/2);
    vector<complex<double>> odd(N/2);
    for(long i=0; i<(N/2); i++)
    {
        even[i] = A[2*i];
        odd[i] = A[2*i+1];
    }
    vector<complex<double>> L1 = FFT(even, N/2);
    vector<complex<double>> L2 = FFT(odd, N/2);
    for(long i=0; i<N; i++)
    {
        complex<double> z(cos(2*PI*i/N),sin(2*PI*i/N));
        long k = i%(N/2);
        Ans.push_back(L1[k] + z*L2[k]);
    }
    return Ans;
    }

    vector<complex<double>> iFFT (vector<complex<double>> A, long N)
    {
    vector<complex<double>> Ans(0);
    if(N==1) 
    {
        Ans.push_back(A[0]);
        return Ans;
    }
    vector<complex<double>> even(N/2);
    vector<complex<double>> odd(N/2);
    for(long i=0; i<(N/2); i++)
    {
        even[i] = A[2*i];
        odd[i] = A[2*i+1];
    }
    vector<complex<double>> L1 = FFT(even, N/2);
    vector<complex<double>> L2 = FFT(odd, N/2);
    for(long i=0; i<N; i++)
    {
        complex<double> z(cos(-2*PI*i/N),sin(-2*PI*i/N));
        complex<double> inv(double(1.0/N), 0);
        long k = i%(N/2);
        Ans.push_back(inv*(L1[k]+z*L2[k]));
    }
    return Ans;
}

vector<complex<double>> PMult (vector<complex<double>> A, vector<complex<double>> B, long L) 
{
vector<complex<double>> Ans(L);
for(int i=0; i<L; i++)
{
    Ans[i] = A[i]*B[i];
}
return Ans;
}

vector<complex<double>> DtoA (double x)
{
    vector<complex<double>> ans(8);
    long X = long(x*10000000);
    ans[0] = complex<double>(double(X%10), 0.0); X/=10;
    ans[1] = complex<double>(double(X%10), 0.0); X/=10;
    ans[2] = complex<double>(double(X%10), 0.0); X/=10;
    ans[3] = complex<double>(double(X%10), 0.0); X/=10;
    ans[4] = complex<double>(double(X%10), 0.0); X/=10;
    ans[5] = complex<double>(double(X%10), 0.0); X/=10;
    ans[6] = complex<double>(double(X%10), 0.0); X/=10;
    ans[7] = complex<double>(double(X%10), 0.0);
    return ans;
}

int main()
{
    vector<vector<complex<double>>> W;
    int T, N, M;
    double p;
    scanf("%d", &T);
    while( T-- )
    {
        scanf("%d %d", &N, &M);
        W.resize(N); 
        for(int i=0; i<N; i++)
        {
            cin >> p;
            W[i] = FFT(DtoA(p),8);
            for(int j=1; j<M; j++)
            {
                cin >> p;
                W[i] = PMult(W[i], FFT(DtoA(p),8), 8);
            }
        }
        vector<complex<double>> Sum(8);
        for(int j=0; j<8; j++) Sum[j]=W[0][j];
        for(int i=1; i<N; i++)
        {
            for(int j=0; j<8; j++)
            {
                Sum[j]+=W[i][j];
            }
        }

        W[0]=iFFT(W[0],8);
        Sum=iFFT(Sum, 8);

        long X=0;
        long Y=0;
        int a;
        for(a=0; Sum[a].real()!=0; a++);
        for(int i=a; i<8; i++)
        {
            Y*=10;
            Y=Y+long(Sum[i].real());
        }
        for(int i=a; i<8; i++)
        {
            X*=10;
            X=X+long(W[0][i].real()); 
        }
        double ans = 0.0;
        if(Y) ans=double(X)/double(Y);
        printf("%.7f\n", ans);
    }
}

What I have observed is, that for an Array that constists of only zeroes except one entry, the FFT returns an Array with more than one non-empty entry. Also, after the iFFT is done, the result still contains entries with non-zero imaginary part.

Can someone find the error or provide me a tip where I could make the solution easier? As I want it to be fast, I would not like to do a naive multiplication. Would Karatsuba's algorithm be better as I don't need complex numbers?

Some years ago I wrote a library for large integer values. If I recall correctly, then FFT had only with very very large numbers a performance gain over karatsuba. — Daniel Jour
Thank you, I will try to do it using Karatsuba now, seems more easy to implement and if the performance is not worse there is no point in using FFT. — el_paco

Arrigo Arrigo · Accepted Answer · 2015-07-13T23:35:11

I checked with an old Java implementation of mine for the FFT (sorry for the nasty code). I found the following things in your FFT and DtoA functions:

a 0 was missing in your DtoA function
the order in which you set the coefficients in the vector was reversed (maybe this was intentional for some reason)
the "combine" phase of the Cooley Tukey algorithm was not correct: the first half of the terms is of the form a + b * c and the second half is of the form a - b * c.

The code below is not efficient but should be clear.

#include <iostream>
#include <vector>
#include <complex>
#include <exception>
#include <cmath>

using namespace std;

vector<complex<double>> fft(const vector<complex<double>> &x) {
    int N = x.size();

    // base case
    if (N == 1) return vector<complex<double>> { x[0] };

    // radix 2 Cooley-Tukey FFT
    if (N % 2 != 0) { throw new std::exception(); }

    // fft of even terms
    vector<complex<double>> even, odd;
    for (int k = 0; k < N/2; k++) {
        even.push_back(x[2 * k]);
        odd.push_back(x[2 * k + 1]);
    }
    vector<complex<double>> q = fft(even);
    vector<complex<double>> r = fft(odd);

    // combine
    vector<complex<double>> y;
    for (int k = 0; k < N/2; k++) {
        double kth = -2 * k * M_PI / N;
        complex<double> wk = complex<double>(cos(kth), sin(kth));
        y.push_back(q[k] + (wk * r[k]));
    }
    for (int k = 0; k < N/2; k++) {
        double kth = -2 * k * M_PI / N;
        complex<double> wk = complex<double>(cos(kth), sin(kth));
        y.push_back(q[k] - (wk * r[k])); // you didn't do this
    }
    return y;
}

vector<complex<double>> DtoA (double x)
{
    vector<complex<double>> ans(8);
    long X = long(x*100000000); // a 0 was missing  here
    ans[7] = complex<double>(double(X%10), 0.0); X/=10;
    ans[6] = complex<double>(double(X%10), 0.0); X/=10;
    ans[5] = complex<double>(double(X%10), 0.0); X/=10;
    ans[4] = complex<double>(double(X%10), 0.0); X/=10;
    ans[3] = complex<double>(double(X%10), 0.0); X/=10;
    ans[2] = complex<double>(double(X%10), 0.0); X/=10;
    ans[1] = complex<double>(double(X%10), 0.0); X/=10;
    ans[0] = complex<double>(double(X%10), 0.0);
    return ans;
}

int main ()
{
    double n = 0.1234;
    auto nComplex = DtoA(n);
    for (const auto &e : nComplex) {
        std::cout << e << " ";
    }
    std::cout << std::endl;

    try {
        auto nFFT = fft(nComplex);

        for (const auto &e : nFFT) {
            std::cout << e << " ";
        }
    }
    catch (const std::exception &e) {
        std::cout << "exception" << std::endl;
    }
  return 0;
}

Output of the program (I checked it with Octave, it's the same):

(1,0) (2,0) (3,0) (4,0) (0,0) (0,0) (0,0) (0,0) 
(10,0) (-0.414214,-7.24264) (-2,2) (2.41421,-1.24264) (-2,0) (2.41421,1.24264) (-2,-2) (-0.414214,7.24264)

Hope this helps.

EDIT:

Regarding the inverse FFT, you can demonstrate that

iFFT(x) = (1 / N) conjugate( FFT( conjugate(x) ) )

where N is the number of elements in the array x. So you can use the fft function to compute the ifft:

vector<complex<double>> ifft(const vector<complex<double>> &vec) {
    std::vector<complex<double>> conj;
    for (const auto &e : vec) {
        conj.push_back(std::conj(e));
    }

    std::vector<complex<double>> vecFFT = fft(conj);

    std::vector<complex<double>> result;
    for (const auto &e : vecFFT) {
        result.push_back(std::conj(e) / static_cast<double>(vec.size()));
    }

    return result;
}

Here is the modified main:

int main ()
{
    double n = 0.1234;
    auto nComplex = DtoA(n);
    for (const auto &e : nComplex) {
        std::cout << e << " ";
    }
    std::cout << std::endl;

    auto nFFT = fft(nComplex);
    for (const auto &e : nFFT)
        std::cout << e << " ";
    std::cout << std::endl;

    auto iFFT = ifft(nFFT);
    for (const auto &e : iFFT)
        std::cout << e << " ";
    return 0;
}

and the output:

(1,0) (2,0) (3,0) (4,0) (0,0) (0,0) (0,0) (0,0) 
(10,0) (-0.414214,-7.24264) (-2,2) (2.41421,-1.24264) (-2,0) (2.41421,1.24264) (-2,-2) (-0.414214,7.24264) 
(1,-0) (2,-1.08163e-16) (3,7.4688e-17) (4,2.19185e-16) (0,-0) (0,1.13882e-16) (0,-7.4688e-17) (0,-2.24904e-16)

Note that numbers like 1e-16 is pretty much 0 (doubles are not perfect in hardware).

Problems using FFT on multiplication of large numbers

1 Answers