2
votes

I am recently trying to multiply a bunch (up to 10^5) of very small numbers (order of 10^-4) with each other, so I would end in the order of 10^-(4*10^5) which does not fit into any variable.

My approach is the following:

  1. Multiply each number by 10^8 and store it in an array split by the powers of 10, i.e. I make a polynomial given by the powers of 10 out of it. An example would be: p = 0.1234 -> p*10^8 = 12340000 -> A={0, 0 ,0, 0, 4, 3, 2, 1}.
  2. Multiply those Arrays using FFT
  3. iFFT the result

This is done multiple times for a small number of different cases. What I want to know in the end is the fraction of one such product over the sum of all the products up to a precision of 10^-6. To do this, between step 2 and 3 the result is added to a sum array which is in the end also iFFTed. As the required precision is pretty low, I am not dividing polynomials but only take the first few numbers into an integer.

My issue is the following: the FFT and/or iFFT is not properly working! I am new to this stuff and have only implemented a FFT once. My code is as follows (C++14):

#include <cstdio>
#include <vector>
#include <cmath>
#include <complex>
#include <iostream>
using namespace std;

const double PI = 4*atan(1.0);

vector<complex<double>> FFT (vector<complex<double>> A, long N)
{
    vector<complex<double>> Ans(0);
    if(N==1) 
    {
        Ans.push_back(A[0]);
        return Ans;
    }
    vector<complex<double>> even(N/2);
    vector<complex<double>> odd(N/2);
    for(long i=0; i<(N/2); i++)
    {
        even[i] = A[2*i];
        odd[i] = A[2*i+1];
    }
    vector<complex<double>> L1 = FFT(even, N/2);
    vector<complex<double>> L2 = FFT(odd, N/2);
    for(long i=0; i<N; i++)
    {
        complex<double> z(cos(2*PI*i/N),sin(2*PI*i/N));
        long k = i%(N/2);
        Ans.push_back(L1[k] + z*L2[k]);
    }
    return Ans;
    }

    vector<complex<double>> iFFT (vector<complex<double>> A, long N)
    {
    vector<complex<double>> Ans(0);
    if(N==1) 
    {
        Ans.push_back(A[0]);
        return Ans;
    }
    vector<complex<double>> even(N/2);
    vector<complex<double>> odd(N/2);
    for(long i=0; i<(N/2); i++)
    {
        even[i] = A[2*i];
        odd[i] = A[2*i+1];
    }
    vector<complex<double>> L1 = FFT(even, N/2);
    vector<complex<double>> L2 = FFT(odd, N/2);
    for(long i=0; i<N; i++)
    {
        complex<double> z(cos(-2*PI*i/N),sin(-2*PI*i/N));
        complex<double> inv(double(1.0/N), 0);
        long k = i%(N/2);
        Ans.push_back(inv*(L1[k]+z*L2[k]));
    }
    return Ans;
}

vector<complex<double>> PMult (vector<complex<double>> A, vector<complex<double>> B, long L) 
{
vector<complex<double>> Ans(L);
for(int i=0; i<L; i++)
{
    Ans[i] = A[i]*B[i];
}
return Ans;
}

vector<complex<double>> DtoA (double x)
{
    vector<complex<double>> ans(8);
    long X = long(x*10000000);
    ans[0] = complex<double>(double(X%10), 0.0); X/=10;
    ans[1] = complex<double>(double(X%10), 0.0); X/=10;
    ans[2] = complex<double>(double(X%10), 0.0); X/=10;
    ans[3] = complex<double>(double(X%10), 0.0); X/=10;
    ans[4] = complex<double>(double(X%10), 0.0); X/=10;
    ans[5] = complex<double>(double(X%10), 0.0); X/=10;
    ans[6] = complex<double>(double(X%10), 0.0); X/=10;
    ans[7] = complex<double>(double(X%10), 0.0);
    return ans;
}

int main()
{
    vector<vector<complex<double>>> W;
    int T, N, M;
    double p;
    scanf("%d", &T);
    while( T-- )
    {
        scanf("%d %d", &N, &M);
        W.resize(N); 
        for(int i=0; i<N; i++)
        {
            cin >> p;
            W[i] = FFT(DtoA(p),8);
            for(int j=1; j<M; j++)
            {
                cin >> p;
                W[i] = PMult(W[i], FFT(DtoA(p),8), 8);
            }
        }
        vector<complex<double>> Sum(8);
        for(int j=0; j<8; j++) Sum[j]=W[0][j];
        for(int i=1; i<N; i++)
        {
            for(int j=0; j<8; j++)
            {
                Sum[j]+=W[i][j];
            }
        }

        W[0]=iFFT(W[0],8);
        Sum=iFFT(Sum, 8);

        long X=0;
        long Y=0;
        int a;
        for(a=0; Sum[a].real()!=0; a++);
        for(int i=a; i<8; i++)
        {
            Y*=10;
            Y=Y+long(Sum[i].real());
        }
        for(int i=a; i<8; i++)
        {
            X*=10;
            X=X+long(W[0][i].real()); 
        }
        double ans = 0.0;
        if(Y) ans=double(X)/double(Y);
        printf("%.7f\n", ans);
    }
}

What I have observed is, that for an Array that constists of only zeroes except one entry, the FFT returns an Array with more than one non-empty entry. Also, after the iFFT is done, the result still contains entries with non-zero imaginary part.

Can someone find the error or provide me a tip where I could make the solution easier? As I want it to be fast, I would not like to do a naive multiplication. Would Karatsuba's algorithm be better as I don't need complex numbers?

1
Some years ago I wrote a library for large integer values. If I recall correctly, then FFT had only with very very large numbers a performance gain over karatsuba.Daniel Jour
Thank you, I will try to do it using Karatsuba now, seems more easy to implement and if the performance is not worse there is no point in using FFT.el_paco

1 Answers

1
votes

I checked with an old Java implementation of mine for the FFT (sorry for the nasty code). I found the following things in your FFT and DtoA functions:

  • a 0 was missing in your DtoA function
  • the order in which you set the coefficients in the vector was reversed (maybe this was intentional for some reason)
  • the "combine" phase of the Cooley Tukey algorithm was not correct: the first half of the terms is of the form a + b * c and the second half is of the form a - b * c.

The code below is not efficient but should be clear.

#include <iostream>
#include <vector>
#include <complex>
#include <exception>
#include <cmath>

using namespace std;

vector<complex<double>> fft(const vector<complex<double>> &x) {
    int N = x.size();

    // base case
    if (N == 1) return vector<complex<double>> { x[0] };

    // radix 2 Cooley-Tukey FFT
    if (N % 2 != 0) { throw new std::exception(); }

    // fft of even terms
    vector<complex<double>> even, odd;
    for (int k = 0; k < N/2; k++) {
        even.push_back(x[2 * k]);
        odd.push_back(x[2 * k + 1]);
    }
    vector<complex<double>> q = fft(even);
    vector<complex<double>> r = fft(odd);

    // combine
    vector<complex<double>> y;
    for (int k = 0; k < N/2; k++) {
        double kth = -2 * k * M_PI / N;
        complex<double> wk = complex<double>(cos(kth), sin(kth));
        y.push_back(q[k] + (wk * r[k]));
    }
    for (int k = 0; k < N/2; k++) {
        double kth = -2 * k * M_PI / N;
        complex<double> wk = complex<double>(cos(kth), sin(kth));
        y.push_back(q[k] - (wk * r[k])); // you didn't do this
    }
    return y;
}

vector<complex<double>> DtoA (double x)
{
    vector<complex<double>> ans(8);
    long X = long(x*100000000); // a 0 was missing  here
    ans[7] = complex<double>(double(X%10), 0.0); X/=10;
    ans[6] = complex<double>(double(X%10), 0.0); X/=10;
    ans[5] = complex<double>(double(X%10), 0.0); X/=10;
    ans[4] = complex<double>(double(X%10), 0.0); X/=10;
    ans[3] = complex<double>(double(X%10), 0.0); X/=10;
    ans[2] = complex<double>(double(X%10), 0.0); X/=10;
    ans[1] = complex<double>(double(X%10), 0.0); X/=10;
    ans[0] = complex<double>(double(X%10), 0.0);
    return ans;
}

int main ()
{
    double n = 0.1234;
    auto nComplex = DtoA(n);
    for (const auto &e : nComplex) {
        std::cout << e << " ";
    }
    std::cout << std::endl;

    try {
        auto nFFT = fft(nComplex);

        for (const auto &e : nFFT) {
            std::cout << e << " ";
        }
    }
    catch (const std::exception &e) {
        std::cout << "exception" << std::endl;
    }
  return 0;
}

Output of the program (I checked it with Octave, it's the same):

(1,0) (2,0) (3,0) (4,0) (0,0) (0,0) (0,0) (0,0) 
(10,0) (-0.414214,-7.24264) (-2,2) (2.41421,-1.24264) (-2,0) (2.41421,1.24264) (-2,-2) (-0.414214,7.24264)

Hope this helps.

EDIT:

Regarding the inverse FFT, you can demonstrate that

iFFT(x) = (1 / N) conjugate( FFT( conjugate(x) ) )

where N is the number of elements in the array x. So you can use the fft function to compute the ifft:

vector<complex<double>> ifft(const vector<complex<double>> &vec) {
    std::vector<complex<double>> conj;
    for (const auto &e : vec) {
        conj.push_back(std::conj(e));
    }

    std::vector<complex<double>> vecFFT = fft(conj);

    std::vector<complex<double>> result;
    for (const auto &e : vecFFT) {
        result.push_back(std::conj(e) / static_cast<double>(vec.size()));
    }

    return result;
}

Here is the modified main:

int main ()
{
    double n = 0.1234;
    auto nComplex = DtoA(n);
    for (const auto &e : nComplex) {
        std::cout << e << " ";
    }
    std::cout << std::endl;

    auto nFFT = fft(nComplex);
    for (const auto &e : nFFT)
        std::cout << e << " ";
    std::cout << std::endl;

    auto iFFT = ifft(nFFT);
    for (const auto &e : iFFT)
        std::cout << e << " ";
    return 0;
}

and the output:

(1,0) (2,0) (3,0) (4,0) (0,0) (0,0) (0,0) (0,0) 
(10,0) (-0.414214,-7.24264) (-2,2) (2.41421,-1.24264) (-2,0) (2.41421,1.24264) (-2,-2) (-0.414214,7.24264) 
(1,-0) (2,-1.08163e-16) (3,7.4688e-17) (4,2.19185e-16) (0,-0) (0,1.13882e-16) (0,-7.4688e-17) (0,-2.24904e-16)

Note that numbers like 1e-16 is pretty much 0 (doubles are not perfect in hardware).