2
votes

I am trying to carry out the convolution of two arrays via FFT, using the FFTW library in C++ and implementing the overlap-add algorithm. The vector data specifically consists of doubles loaded from WAV files using sndfile — one of them is a WAV file for processing, the other is a WAV file of an impuse response (IR), so essentially the goal is Convolution Reverb.

I have previously implemented this successfully with Python, but that involved using FFT convolution via scipy.signal, so I didn’t have to implement overlap-add myself. But from that experience, I can confirm that I am working with an impulse response that should neatly convolve with the input signal via FFT, and yield a recognizable echo effect comparable to the Python output. I can also confirm that the 2 WAV files have matching bitrates (16 bit) and sample rates (44.1 kHz).

A few false starts only yielded flat noise as output, a familiar stumbling block. Now I am yielding a WAV output that almost resembles the changing taps of the impulse response, but with only pulses of noise at each perceived “tap.”

I am following two guides for my implementation of overlap-add; https://www.dspguide.com/ch18/1.htm

http://blog.robertelder.org/overlap-add-overlap-save/

I’m also following some online examples and other StackOverflow threads, and reading the FFTW official docs (http://www.fftw.org/fftw2_doc/fftw_2.html).

Here is my code, with the process explained below:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <cstring>
#include <vector>
#include <complex.h>
#include <fftw3.h>

using namespace std;

#define ARRAY_LEN(x)    ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y)        ((x) > (y) ? (x) : (y))
#define MIN(x,y)        ((x) < (y) ? (x) : (y))

fftw_complex* fftw_complex_transform(vector<double >);
fftw_complex* ifft_from_complex_vector(vector<vector<double> >);
vector<double> convolved_block(vector<double>, vector<complex<double> >, int);
vector<complex<double> > filter_kernel_cpx_vec(vector<double>);
vector<double> padded(vector<double>, int);
void testDummyData();

int main (int argc, char ** argv) {
    SNDFILE *infile, *outfile, *irfile ;
    SF_INFO sfinfo ;
    double buffer[1024];
    sf_count_t count;

    // 1 - import both WAV files 
    vector<double> inputWavArr;
    vector<double> irWavArr;

    if (argc != 4) {
        printf("\nUsage :\n\n    <executable name>  <input signal file> <impulse response file> <output file>\n") ;
        exit(0);
    }

    memset (&sfinfo, 0, sizeof (sfinfo)) ;
    if ((infile = sf_open (argv [1], SFM_READ, &sfinfo)) == NULL)     {     
        printf ("Error : Not able to open input file '%s'\n", argv [1]);
        sf_close (infile);
        exit (1) ;
    } 

    if ((irfile = sf_open (argv [2], SFM_READ, &sfinfo)) == NULL)     {     
        printf ("Error : Not able to open input file '%s'\n", argv [2]);
        sf_close (irfile);
        exit (1) ;
    }   

    if ((outfile = sf_open (argv [3], SFM_WRITE, &sfinfo)) == NULL) { 
        printf ("Error : Not able to open output file '%s'\n", argv [3]);
        sf_close (outfile);
        exit (1);
    }

    while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
        for (int i = 0; i < 1024; i++)
            inputWavArr.push_back(buffer[i]);
    }
    while ((count = sf_read_double (irfile, buffer, ARRAY_LEN (buffer))) > 0) {
        for (int i = 0; i < 1024; i++)
            irWavArr.push_back(buffer[i]); // max value 0.0408325
    }
    // 2 - Settle on a padding scheme
    int irLen = irWavArr.size();
    int windowSize = 262144 - irLen+1; //  262144 is a pwr of 2
    const int outputLength = irLen + windowSize - 1;

    sf_close(infile);
    sf_close(irfile);

    irWavArr = padded(irWavArr, outputLength);
    int newIrLength = irWavArr.size();
    // 3 and 4 - use FFTW to process IR kernel into vector of complex values
    vector<complex<double> > ir_vec;
    ir_vec = filter_kernel_cpx_vec(irWavArr);
    // 5 - divide dry input signal into sections of length windowSize
    int numSections = floor(inputWavArr.size()/windowSize);

    // OVERLAP-ADD PROCEDURE
    vector<vector<double> > totals;
    cout << "numSections is " << numSections << "\n";

    for (int j=0; j<numSections; j++) { // may be OBOB use numSections+1? or pad inputWavArr? 
        vector<double> total;
        cout << "convolving section " << j << "\n";
        vector<double> section_arr;
        for (int i=j*windowSize; i<(j*windowSize + windowSize); i++) {
            section_arr.push_back(inputWavArr[i]);
        }

        // Hanning window
        for (int i = 0; i < windowSize; i++) {
            double multiplier = 0.5 * (1 - cos(2*M_PI*i/(windowSize-1)));
            section_arr[i] = multiplier * section_arr[i];
        }
        int old_section_arr_size = section_arr.size();
        section_arr = padded(section_arr, outputLength);
        // 6 - yield convolved block for each section
        vector<double> output = convolved_block(section_arr, ir_vec, old_section_arr_size+irLen-1);

        for (int i=0; i<output.size(); i++) {
            total.push_back(output[i]);
        }
        // 7 - append convolved block to vector of resultant block vectors
        totals.push_back(total);
    }
    vector<double> results(inputWavArr.size()+newIrLength-1, 0);
    // 8 - loop though vector of convolved segments, and carry out addition of overlapping sub-segments to yield final "results" vector
    for (int j=0; j<numSections; j++) {
        vector<double> totals_arr = totals[j];
        cout << "overlap summing section " << j << "\n";
        for (int i=0; i<totals_arr.size(); i++) {
            int newIdx = j*windowSize+i;
            results[newIdx] += totals_arr[i];
        }
    }
    // RESULTS MARK THE END OF OVERLAP-ADD PROCEDURE

    // load result vector into buffer for writing via libsndfile
    double* buff3 = (double*)malloc(results.size()*sizeof(double));
    for (int idx = 0; idx < results.size(); idx++) {
        buff3[idx] = results[idx]/400; // normalizing factor for these samples... output without this has amplitude going almost up to 120. input file had max around 0.15. max should be 1 about
    }

    long writtenFrames = sf_writef_double (outfile, buff3, results.size());
    sf_close (outfile);

    return 0 ;
}

fftw_complex* fftw_complex_transform(vector<double> signal_wav) {
    int N = signal_wav.size();
    fftw_complex *in, *out;
    fftw_plan irPlan;
    in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
    out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);

    for (int i = 0; i < signal_wav.size(); i++)
    {
        in[i][0] = signal_wav[i];
        in[i][1] = (float)0; // complex component .. 0 for input of wav file
    }
    irPlan = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
    fftw_execute(irPlan);
    fftw_destroy_plan(irPlan);
    fftw_free(in);

    return out;
}

vector<complex<double> > filter_kernel_cpx_vec(vector<double> input) {
    fftw_complex* irFFT = fftw_complex_transform(input);

    vector<complex<double> > kernel_vec;
    for (int i=0; i<input.size(); i++) {
        complex<double> m1 (irFFT[i][0], irFFT[i][1]);
        kernel_vec.push_back(m1);
    }

    fftw_free(irFFT); 
    return kernel_vec;
}

fftw_complex* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
    int N = signal_vec.size();
    fftw_complex *in, *out;
    fftw_plan irPlan;
    in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
    out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);

    for (int i = 0; i < signal_vec.size(); i++)
    {
        in[i][0] = signal_vec[i][0]; // real
        in[i][1] = signal_vec[i][1]; // imag
    }
    irPlan = fftw_plan_dft_1d(N, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
    fftw_execute(irPlan);
    fftw_destroy_plan(irPlan);
    fftw_free(in);

    return out;
}

vector<double> convolved_block(vector<double> in_vector, vector<complex<double> > ir_cpx_vector, int size) {
    fftw_complex* outputFFT = fftw_complex_transform(in_vector);

    vector<vector<double> > products;
    vector<complex<double> > sig_fft_cpx;
    for (int i=0; i<size; i++) {
        complex<double> m1 (outputFFT[i][0], outputFFT[i][1]);
        sig_fft_cpx.push_back(m1);
    }        
    fftw_free(outputFFT);
    for (int j=0; j<size; j++) {
        std::complex<double> complexProduct = sig_fft_cpx[j]*ir_cpx_vector[j];
        double re = real(complexProduct);
        double im = imag(complexProduct);
        vector<double> elemVec(2);
        elemVec[0] = re;
        elemVec[1] = im;

        products.push_back(elemVec);
    }
    fftw_complex* revFFT = ifft_from_complex_vector(products);
    vector<double> out_vec_dbl;

    for (int i=0; i<size; i++) {
        out_vec_dbl.push_back(outputFFT[i][0]);
    }
    fftw_free(revFFT);
    return out_vec_dbl;
}

vector<double> padded(vector<double> input, int total) {
    vector<double> padded_vec;
    for (int i = 0; i<input.size(); i++) {
    padded_vec.push_back(input[i]);
    }
    int numZeroes = total - input.size();
    for (int i = 0; i< numZeroes; i++) {
    padded_vec.push_back((double)0);
    }
    return padded_vec;
}

void testDummyData() {
    vector<double> dummyFilter;
    dummyFilter.push_back(1);
    dummyFilter.push_back(-1);
    dummyFilter.push_back(1);

    vector<double> dummySignal;
    dummySignal.push_back(3);
    dummySignal.push_back(-1);
    dummySignal.push_back(0);
    dummySignal.push_back(3);
    dummySignal.push_back(2);
    dummySignal.push_back(0);
    dummySignal.push_back(1);
    dummySignal.push_back(2);
    dummySignal.push_back(1);

    const int nearWindowSize=3;
    const int nearIrLength=3;
    const int totalLength = 5;

    dummyFilter = padded(dummyFilter, totalLength);
    vector<complex<double> > dummy_ir_vec = filter_kernel_cpx_vec(dummyFilter);

    int localNumSections = 3;
    vector<vector<double> > outputs;
    for (int j=0; j<localNumSections; j++) {
        vector<double> local_section;
        for (int i; i<nearWindowSize; i++) {
            int idx = j*nearWindowSize + i;
            local_section.push_back(dummySignal[idx]);
        }
        local_section = padded(local_section, totalLength);
        vector<double> local_output = convolved_block(local_section, dummy_ir_vec, totalLength);
        outputs.push_back(local_output);
    }
    vector<double> local_results(11,0); // example has 11 in output

    for (int j=0; j<localNumSections; j++) {
        vector<double> local_totals_arr = outputs[j];
        cout << "overlap summing section " << j << "\n";
        for (int i=0; i<local_totals_arr.size(); i++) {
            int newIdx = j*nearWindowSize+i;
            local_results[newIdx] += local_totals_arr[i];
        }
    }    
    for (int i=0; i<11; i++) {
        cout << "result " << i << "\n";
        cout << local_results[i] << "\n";
    }
}

My process:

  1. import both WAV files (“dry” input signal, and IR) via libsndfile, yielding vector representations, which are apparently well-decoded by libsndfile from WAV via PCM.
  2. settle on a padding scheme. I aimed for the IR and the segments of “dry” signal input to each be padded to a total length of 262144 samples, a power of two (2^18) as recommended in various guides. So as I am importing an impulse response with the length of 189440 samples, that would yield a segment-window size of 72705, so that both segment vectors of dry signal and the IR vector can be padded to a total length of 2^18 samples.
  3. use FFTW (specifically, the function fftw_plan_dft_1d, in mode FFTW_FORWARD) to process the vector with the IR time domain representation of N samples (including padding, so 2^18 samples), with FFTW yielding a pointer to a (presumed) N-length list of complex phasors to describe the frequency domain of the IR.
  4. process this pointer into a vector list of complex phasor values, to represent the IR filter kernel in the format I’ll need most immediately for processing input signal segments. close the FFTW plan and free the FFTW output pointer once this vector is loaded.
  5. divide the dry signal wav into segments of 72705 samples, with zero-padding to a total length of 262144 samples for each segment including the overlap region. Use a Hanning window procedure (multiplication by cosine values with period depending on window size) prior to the padding, to eliminate likely aliasing from high-frequency artifacts at the edges of windows.
  6. for each padded segment of the dry signal, yield a “convolved block” by calling convolved_block with that segment as well as the IR complex vector as arguments.
    1. “convolved_block” works as follows — use FFTW (again, fftw_plan_dft_1d in mode FFTW_FORWARD) to run the FFT on the dry segment vector (“in_vector“) and yield a vector of complex values, and multiply the elements of that complex vector, element-by-element, with those of the complex-vector representation of the IR kernel (ir_cpx_vector). This is complex multiplaction, handled by the std::complex class.
    2. Run the inverse FFT on the resulting array of complex phasors (again this is fftw_plan_dft_1d, but now in mode FFTW_:BACKWARD).
    3. Use the real components of the resulting N-length array to yield the N sample-amplitude values for this convolved, overlapping block
  7. Append each resulting “convolved block” to our vector of convolved, overlapping blocks (vector > totals).
  8. Loop through this result vector (totals"), and perform overlap-add to yield a new vector slightly longer than the original dry signal (vector results). Once summed cumulatively, this should represent the convolved signal, which can then be placed in a buffer and exported to WAV via libsndfile.

Since this didn’t yield the expected results, I tried running my same higher-level FFT functions (those encapsulating the FFTW calls) on the demo data from the guide I referenced above (http://blog.robertelder.org/overlap-add-overlap-save/).

This test is implemented with the void function testDummyData().

In this test, the intermediate FFT complex values were not the same as those in the guide, nor were the ultimate results of the “convolved” vectors (confirmed as correct in the guide via input side algorithm for the small dataset). This kind of leaves me scratching my head, since I have followed the FFTW tutorial (http://www.fftw.org/fftw2_doc/fftw_2.html) with care in order to yield my FFT results. I’ve double-checked my implementation of overlap-add, and I’ve made sure to avoid common implementation pitfalls such as lack of a Hanning window or improper padding scheme (it seems padding is right at this point…). Clearly there is something I’m missing here.

One note on the output: the “convolved” output samples come at much higher amplitude than the maximum sample amplitudes for either the dry input signal or the IR signal. I have used division by a factor of 400 to get an approximately normalized volume range, though of course this doesn’t fix the fundamental problem. Still, I hope this unexpected amplitude variance gives some insight to the problem.

Environment details: the C++ script, if saved as convolution.cpp, can be compiled with the following command: g++ pkg-config --cflags --libs fftw3 sndfile convolution.cpp -o outprog Only the fftw and libsndfile libraries are required to run, and I installed both with homebrew on Mac. And it can be run the following way, which is how I've loaded the relevant files (same as in the working Python version): ./outprog ../F.wav ../spaceEchoIR.wav wetSignal.wav

1
Make sure you normalize by the sum of the impulse response you convolve with. - Cris Luengo
I just tried this but it didn't really change the output. Summed the impulse response amplitudes with abs() of each amplitude (also tried abs of sum of amplitudes, + and -). I used this factor, to normalize: results[newIdx] += totals_arr[i]/(sumIrImpulses); still got the same noise at each tap. I also tried normalizing by the same sum for the values of the input (dry signal) subsections during the building of section_arr: section_arr.push_back(inputWavArr[i]/sumIrImpulses); this still didn't have an effect. In any of these (4) cases, am I applying the normalization in the correct place? - William Clark

1 Answers

1
votes

It looks like I've resolved the basic problem. revFFT, the pointer returned from the inverse-fft wrapper function (ifft_from_complex_vector), was not being referenced after being loaded with values. Instead the code was looking for the expected values in outputFFT, which had already been freed via fftw_free.

Another thing that helped clear up the confusion was switching to the FFTW functions fftw_plan_dft_r2c_1d and fftw_plan_dft_c2r_1d, to replace the calls to fftw_plan_dft_1d using the flags FFTW_BACKWARD and FFTW_FORWARD. The types are much more easy to track now, especially for vectors of real numbers that need to work with FFTW.

The convolution reverb is now rendering almost as expected. There are some windows and isolated parts that experience aliasing and distortion. This is likely due to my continuing need for a robust normalization scheme, as suggested by Cris. That's what I'm working on next, but thankfully the basic problem is resolved.

Working code below:

//Give the input and output file names on the command line
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <cstring>
#include <vector>
#include <complex.h>
#include <fftw3.h>

using namespace std;

#define ARRAY_LEN(x)    ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y)        ((x) > (y) ? (x) : (y))
#define MIN(x,y)        ((x) < (y) ? (x) : (y))

fftw_complex* fftw_complex_transform(vector<double >);
//fftw_complex* ifft_from_complex_vector(vector<vector<double> >);
double* ifft_from_complex_vector(vector<vector<double> >);
vector<double> convolved_block(vector<double>, vector<complex<double> >, int);
vector<complex<double> > filter_kernel_cpx_vec(vector<double>);
vector<double> padded(vector<double>, int);
void testDummyData();

int main (int argc, char ** argv) {
    SNDFILE *infile, *outfile, *irfile ;
    SF_INFO sfinfo ;
    double buffer[1024];
    sf_count_t count;

    //for fftw3
    vector<double> inputWavArr;
    vector<double> irWavArr;

    if (argc != 4) {
        printf("\nUsage :\n\n    <executable name>  <input signal file> <impulse response file> <output file>\n") ;
        exit(0);
    }

    memset (&sfinfo, 0, sizeof (sfinfo)) ;
    if ((infile = sf_open (argv [1], SFM_READ, &sfinfo)) == NULL)     {     
        printf ("Error : Not able to open input file '%s'\n", argv [1]);
        sf_close (infile);
        exit (1) ;
    } 

    if ((irfile = sf_open (argv [2], SFM_READ, &sfinfo)) == NULL)     {     
        printf ("Error : Not able to open input file '%s'\n", argv [2]);
        sf_close (irfile);
        exit (1) ;
    }   

    if ((outfile = sf_open (argv [3], SFM_WRITE, &sfinfo)) == NULL) { 
        printf ("Error : Not able to open output file '%s'\n", argv [3]);
        sf_close (outfile);
        exit (1);
    }

    while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
        for (int i = 0; i < 1024; i++)
            inputWavArr.push_back(buffer[i]);
    }
    double sumIrImpulses = 0;
    while ((count = sf_read_double (irfile, buffer, ARRAY_LEN (buffer))) > 0) {
        for (int i = 0; i < 1024; i++) {
            double el = buffer[i];
            irWavArr.push_back(el); // max value 0.0408325
            sumIrImpulses += (el);
        }
    }
    sumIrImpulses = abs(sumIrImpulses);
    //const int irLen = 189440; // filter(ir) block len
    int irLen = irWavArr.size();
    //const int windowSize = 72705; // s.t. irLen+windowSize-1 is a pwr of 2
    int windowSize = 262144 - irLen+1; //  262144 is a pwr of 2
    const int outputLength = irLen + windowSize - 1;

    sf_close(infile);
    sf_close(irfile);

    irWavArr = padded(irWavArr, outputLength);
    int newIrLength = irWavArr.size();

    vector<complex<double> > ir_vec;
    ir_vec = filter_kernel_cpx_vec(irWavArr);

    int numSections = floor(inputWavArr.size()/windowSize);
    if (numSections*windowSize != inputWavArr.size()) {
        inputWavArr = padded(inputWavArr, (numSections+1)*windowSize);
        numSections++;
    }



    // OVERLAP-ADD PROCEDURE
    vector<vector<double> > totals;
    cout << "numSections is " << numSections << "\n";

    for (int j=0; j<numSections; j++) { // may be OBOB use numSections+1? or pad inputWavArr? 
        vector<double> total;
        cout << "convolving section " << j << "\n";
        vector<double> section_arr;
        for (int i=j*windowSize; i<(j*windowSize + windowSize); i++) {
            section_arr.push_back(inputWavArr[i]/200.21);
        }

        // hanning window
        for (int i = 0; i < windowSize; i++) {
            double multiplier = 0.5 * (1 - cos(2*M_PI*i/(windowSize-1)));
            section_arr[i] = multiplier * section_arr[i];
        }
        int old_section_arr_size = section_arr.size();
        section_arr = padded(section_arr, outputLength);
        vector<double> output = convolved_block(section_arr, ir_vec, old_section_arr_size+irLen-1);

        for (int i=0; i<output.size(); i++) {
            total.push_back(output[i]); // normalize
        }
        totals.push_back(total);
    }
    vector<double> results(inputWavArr.size()+newIrLength-1, 0);

    for (int j=0; j<numSections; j++) {
        vector<double> totals_arr = totals[j];
        cout << "overlap summing section " << j << "\n";
        for (int i=0; i<totals_arr.size(); i++) {
            int newIdx = j*windowSize+i;
            results[newIdx] += totals_arr[i]/550;
        }
    }
    double maxVal = 0;
    for (int i=0; i<results.size(); i++) {
        if (results[i] > maxVal) {
            maxVal = results[i];
        }
    }
    cout << "maxval" << maxVal << "\n";
    cout << "sumIrImpulses" << sumIrImpulses << "\n";
    // RESULTS MARK THE END OF OVERLAP-ADD PROCEDURE
    double* buff3 = (double*)malloc(results.size()*sizeof(double));
    for (int idx = 0; idx < results.size(); idx++) {
        buff3[idx] = results[idx]; // NO LONGER normalizing factor for these samples... output without this has amplitude going almost up to 120. input file had max around 0.15. max should be 1 about
    }

    long writtenFrames = sf_writef_double (outfile, buff3, results.size());
    sf_close (outfile);

    return 0 ;
}

fftw_complex* fftw_complex_transform(vector<double> signal_wav) {
    int N = signal_wav.size();
    double *in;
    fftw_complex *out;
    fftw_plan irPlan;
    //in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
    in = (double*) malloc(sizeof(double)*N);
    out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);

    for (int i = 0; i < signal_wav.size(); i++)
    {
        //in[i][0] = signal_wav[i];
        in[i] = signal_wav[i];
        //in[i][1] = (float)0; // complex component .. 0 for input of wav file
    }
    //irPlan = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
    irPlan = fftw_plan_dft_r2c_1d(N, in, out, FFTW_ESTIMATE);
    fftw_execute(irPlan);
    fftw_destroy_plan(irPlan);
    fftw_free(in);

    return out;
}

vector<complex<double> > filter_kernel_cpx_vec(vector<double> input) {
    fftw_complex* irFFT = fftw_complex_transform(input);

    vector<complex<double> > kernel_vec;
    for (int i=0; i<input.size(); i++) {
        complex<double> m1 (irFFT[i][0], irFFT[i][1]);
        kernel_vec.push_back(m1);
    }

    fftw_free(irFFT); 
    return kernel_vec;
}

//fftw_complex* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
double* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
    int N = signal_vec.size();
    //fftw_complex *in, *out;
    fftw_complex *in;
    double *out;
    fftw_plan irPlan;
    in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
    //out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
    out = (double*) malloc(sizeof(double)*N);

    for (int i = 0; i < signal_vec.size(); i++)
    {
        in[i][0] = signal_vec[i][0]; // real
        in[i][1] = signal_vec[i][1]; // imag
    }
    //irPlan = fftw_plan_dft_1d(N, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
    irPlan = fftw_plan_dft_c2r_1d(N, in, out, FFTW_ESTIMATE);
    fftw_execute(irPlan);
    fftw_destroy_plan(irPlan);
    fftw_free(in);

    return out;
}

vector<double> convolved_block(vector<double> in_vector, vector<complex<double> > ir_cpx_vector, int size) {
    fftw_complex* outputFFT = fftw_complex_transform(in_vector);

    vector<vector<double> > products;
    vector<complex<double> > sig_fft_cpx;
    for (int i=0; i<size; i++) {
        complex<double> m1 (outputFFT[i][0], outputFFT[i][1]);
        sig_fft_cpx.push_back(m1);
    }        
    fftw_free(outputFFT);
    for (int j=0; j<size; j++) {
        std::complex<double> complexProduct = sig_fft_cpx[j]*ir_cpx_vector[j];
        double re = real(complexProduct);
        double im = imag(complexProduct);
        vector<double> elemVec(2);
        elemVec[0] = re;
        elemVec[1] = im;

        products.push_back(elemVec);
    }
    //fftw_complex* revFFT = ifft_from_complex_vector(products);
    double* revFFT = ifft_from_complex_vector(products);
    vector<double> out_vec_dbl;

    for (int i=0; i<size; i++) {
        //out_vec_dbl.push_back(outputFFT[i][0]);
        //out_vec_dbl.push_back(revFFT[i][0]);
        out_vec_dbl.push_back(revFFT[i]);
        //out_vec_dbl.push_back(outputFFT[i]);
    }
    //fftw_free(revFFT);
    free(revFFT);
    return out_vec_dbl;
}

vector<double> padded(vector<double> input, int total) {
    vector<double> padded_vec;
    for (int i = 0; i<input.size(); i++) {
    padded_vec.push_back(input[i]);
    }
    int numZeroes = total - input.size();
    for (int i = 0; i< numZeroes; i++) {
    padded_vec.push_back((double)0);
    }
    return padded_vec;
}

void testDummyData() {
    vector<double> dummyFilter;
    dummyFilter.push_back(1);
    dummyFilter.push_back(-1);
    dummyFilter.push_back(1);

    vector<double> dummySignal;
    dummySignal.push_back(3);
    dummySignal.push_back(-1);
    dummySignal.push_back(0);
    dummySignal.push_back(3);
    dummySignal.push_back(2);
    dummySignal.push_back(0);
    dummySignal.push_back(1);
    dummySignal.push_back(2);
    dummySignal.push_back(1);

    const int nearWindowSize=3;
    const int nearIrLength=3;
    const int totalLength = 5;

    dummyFilter = padded(dummyFilter, totalLength);
    vector<complex<double> > dummy_ir_vec = filter_kernel_cpx_vec(dummyFilter);

    int localNumSections = 3;
    vector<vector<double> > outputs;
    for (int j=0; j<localNumSections; j++) {
        vector<double> local_section;
        for (int i; i<nearWindowSize; i++) {
            int idx = j*nearWindowSize + i;
            local_section.push_back(dummySignal[idx]);
        }
        local_section = padded(local_section, totalLength);
        vector<double> local_output = convolved_block(local_section, dummy_ir_vec, totalLength);
        outputs.push_back(local_output);
    }
    vector<double> local_results(11,0); // example has 11 in output

    for (int j=0; j<localNumSections; j++) {
        vector<double> local_totals_arr = outputs[j];
        cout << "overlap summing section " << j << "\n";
        for (int i=0; i<local_totals_arr.size(); i++) {
            int newIdx = j*nearWindowSize+i;
            local_results[newIdx] += local_totals_arr[i];
        }
    }    
    for (int i=0; i<11; i++) {
        cout << "result " << i << "\n";
        cout << local_results[i] << "\n";
    }
}