I am trying to carry out the convolution of two arrays via FFT, using the FFTW library in C++ and implementing the overlap-add algorithm. The vector data specifically consists of doubles loaded from WAV files using sndfile — one of them is a WAV file for processing, the other is a WAV file of an impuse response (IR), so essentially the goal is Convolution Reverb.
I have previously implemented this successfully with Python, but that involved using FFT convolution via scipy.signal, so I didn’t have to implement overlap-add myself. But from that experience, I can confirm that I am working with an impulse response that should neatly convolve with the input signal via FFT, and yield a recognizable echo effect comparable to the Python output. I can also confirm that the 2 WAV files have matching bitrates (16 bit) and sample rates (44.1 kHz).
A few false starts only yielded flat noise as output, a familiar stumbling block. Now I am yielding a WAV output that almost resembles the changing taps of the impulse response, but with only pulses of noise at each perceived “tap.”
I am following two guides for my implementation of overlap-add; https://www.dspguide.com/ch18/1.htm
http://blog.robertelder.org/overlap-add-overlap-save/
I’m also following some online examples and other StackOverflow threads, and reading the FFTW official docs (http://www.fftw.org/fftw2_doc/fftw_2.html).
Here is my code, with the process explained below:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <cstring>
#include <vector>
#include <complex.h>
#include <fftw3.h>
using namespace std;
#define ARRAY_LEN(x) ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#define MIN(x,y) ((x) < (y) ? (x) : (y))
fftw_complex* fftw_complex_transform(vector<double >);
fftw_complex* ifft_from_complex_vector(vector<vector<double> >);
vector<double> convolved_block(vector<double>, vector<complex<double> >, int);
vector<complex<double> > filter_kernel_cpx_vec(vector<double>);
vector<double> padded(vector<double>, int);
void testDummyData();
int main (int argc, char ** argv) {
SNDFILE *infile, *outfile, *irfile ;
SF_INFO sfinfo ;
double buffer[1024];
sf_count_t count;
// 1 - import both WAV files
vector<double> inputWavArr;
vector<double> irWavArr;
if (argc != 4) {
printf("\nUsage :\n\n <executable name> <input signal file> <impulse response file> <output file>\n") ;
exit(0);
}
memset (&sfinfo, 0, sizeof (sfinfo)) ;
if ((infile = sf_open (argv [1], SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", argv [1]);
sf_close (infile);
exit (1) ;
}
if ((irfile = sf_open (argv [2], SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", argv [2]);
sf_close (irfile);
exit (1) ;
}
if ((outfile = sf_open (argv [3], SFM_WRITE, &sfinfo)) == NULL) {
printf ("Error : Not able to open output file '%s'\n", argv [3]);
sf_close (outfile);
exit (1);
}
while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 1024; i++)
inputWavArr.push_back(buffer[i]);
}
while ((count = sf_read_double (irfile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 1024; i++)
irWavArr.push_back(buffer[i]); // max value 0.0408325
}
// 2 - Settle on a padding scheme
int irLen = irWavArr.size();
int windowSize = 262144 - irLen+1; // 262144 is a pwr of 2
const int outputLength = irLen + windowSize - 1;
sf_close(infile);
sf_close(irfile);
irWavArr = padded(irWavArr, outputLength);
int newIrLength = irWavArr.size();
// 3 and 4 - use FFTW to process IR kernel into vector of complex values
vector<complex<double> > ir_vec;
ir_vec = filter_kernel_cpx_vec(irWavArr);
// 5 - divide dry input signal into sections of length windowSize
int numSections = floor(inputWavArr.size()/windowSize);
// OVERLAP-ADD PROCEDURE
vector<vector<double> > totals;
cout << "numSections is " << numSections << "\n";
for (int j=0; j<numSections; j++) { // may be OBOB use numSections+1? or pad inputWavArr?
vector<double> total;
cout << "convolving section " << j << "\n";
vector<double> section_arr;
for (int i=j*windowSize; i<(j*windowSize + windowSize); i++) {
section_arr.push_back(inputWavArr[i]);
}
// Hanning window
for (int i = 0; i < windowSize; i++) {
double multiplier = 0.5 * (1 - cos(2*M_PI*i/(windowSize-1)));
section_arr[i] = multiplier * section_arr[i];
}
int old_section_arr_size = section_arr.size();
section_arr = padded(section_arr, outputLength);
// 6 - yield convolved block for each section
vector<double> output = convolved_block(section_arr, ir_vec, old_section_arr_size+irLen-1);
for (int i=0; i<output.size(); i++) {
total.push_back(output[i]);
}
// 7 - append convolved block to vector of resultant block vectors
totals.push_back(total);
}
vector<double> results(inputWavArr.size()+newIrLength-1, 0);
// 8 - loop though vector of convolved segments, and carry out addition of overlapping sub-segments to yield final "results" vector
for (int j=0; j<numSections; j++) {
vector<double> totals_arr = totals[j];
cout << "overlap summing section " << j << "\n";
for (int i=0; i<totals_arr.size(); i++) {
int newIdx = j*windowSize+i;
results[newIdx] += totals_arr[i];
}
}
// RESULTS MARK THE END OF OVERLAP-ADD PROCEDURE
// load result vector into buffer for writing via libsndfile
double* buff3 = (double*)malloc(results.size()*sizeof(double));
for (int idx = 0; idx < results.size(); idx++) {
buff3[idx] = results[idx]/400; // normalizing factor for these samples... output without this has amplitude going almost up to 120. input file had max around 0.15. max should be 1 about
}
long writtenFrames = sf_writef_double (outfile, buff3, results.size());
sf_close (outfile);
return 0 ;
}
fftw_complex* fftw_complex_transform(vector<double> signal_wav) {
int N = signal_wav.size();
fftw_complex *in, *out;
fftw_plan irPlan;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
for (int i = 0; i < signal_wav.size(); i++)
{
in[i][0] = signal_wav[i];
in[i][1] = (float)0; // complex component .. 0 for input of wav file
}
irPlan = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(irPlan);
fftw_destroy_plan(irPlan);
fftw_free(in);
return out;
}
vector<complex<double> > filter_kernel_cpx_vec(vector<double> input) {
fftw_complex* irFFT = fftw_complex_transform(input);
vector<complex<double> > kernel_vec;
for (int i=0; i<input.size(); i++) {
complex<double> m1 (irFFT[i][0], irFFT[i][1]);
kernel_vec.push_back(m1);
}
fftw_free(irFFT);
return kernel_vec;
}
fftw_complex* ifft_from_complex_vector(vector<vector<double> > signal_vec) {
int N = signal_vec.size();
fftw_complex *in, *out;
fftw_plan irPlan;
in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*N);
for (int i = 0; i < signal_vec.size(); i++)
{
in[i][0] = signal_vec[i][0]; // real
in[i][1] = signal_vec[i][1]; // imag
}
irPlan = fftw_plan_dft_1d(N, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(irPlan);
fftw_destroy_plan(irPlan);
fftw_free(in);
return out;
}
vector<double> convolved_block(vector<double> in_vector, vector<complex<double> > ir_cpx_vector, int size) {
fftw_complex* outputFFT = fftw_complex_transform(in_vector);
vector<vector<double> > products;
vector<complex<double> > sig_fft_cpx;
for (int i=0; i<size; i++) {
complex<double> m1 (outputFFT[i][0], outputFFT[i][1]);
sig_fft_cpx.push_back(m1);
}
fftw_free(outputFFT);
for (int j=0; j<size; j++) {
std::complex<double> complexProduct = sig_fft_cpx[j]*ir_cpx_vector[j];
double re = real(complexProduct);
double im = imag(complexProduct);
vector<double> elemVec(2);
elemVec[0] = re;
elemVec[1] = im;
products.push_back(elemVec);
}
fftw_complex* revFFT = ifft_from_complex_vector(products);
vector<double> out_vec_dbl;
for (int i=0; i<size; i++) {
out_vec_dbl.push_back(outputFFT[i][0]);
}
fftw_free(revFFT);
return out_vec_dbl;
}
vector<double> padded(vector<double> input, int total) {
vector<double> padded_vec;
for (int i = 0; i<input.size(); i++) {
padded_vec.push_back(input[i]);
}
int numZeroes = total - input.size();
for (int i = 0; i< numZeroes; i++) {
padded_vec.push_back((double)0);
}
return padded_vec;
}
void testDummyData() {
vector<double> dummyFilter;
dummyFilter.push_back(1);
dummyFilter.push_back(-1);
dummyFilter.push_back(1);
vector<double> dummySignal;
dummySignal.push_back(3);
dummySignal.push_back(-1);
dummySignal.push_back(0);
dummySignal.push_back(3);
dummySignal.push_back(2);
dummySignal.push_back(0);
dummySignal.push_back(1);
dummySignal.push_back(2);
dummySignal.push_back(1);
const int nearWindowSize=3;
const int nearIrLength=3;
const int totalLength = 5;
dummyFilter = padded(dummyFilter, totalLength);
vector<complex<double> > dummy_ir_vec = filter_kernel_cpx_vec(dummyFilter);
int localNumSections = 3;
vector<vector<double> > outputs;
for (int j=0; j<localNumSections; j++) {
vector<double> local_section;
for (int i; i<nearWindowSize; i++) {
int idx = j*nearWindowSize + i;
local_section.push_back(dummySignal[idx]);
}
local_section = padded(local_section, totalLength);
vector<double> local_output = convolved_block(local_section, dummy_ir_vec, totalLength);
outputs.push_back(local_output);
}
vector<double> local_results(11,0); // example has 11 in output
for (int j=0; j<localNumSections; j++) {
vector<double> local_totals_arr = outputs[j];
cout << "overlap summing section " << j << "\n";
for (int i=0; i<local_totals_arr.size(); i++) {
int newIdx = j*nearWindowSize+i;
local_results[newIdx] += local_totals_arr[i];
}
}
for (int i=0; i<11; i++) {
cout << "result " << i << "\n";
cout << local_results[i] << "\n";
}
}
My process:
- import both WAV files (“dry” input signal, and IR) via libsndfile, yielding vector representations, which are apparently well-decoded by libsndfile from WAV via PCM.
- settle on a padding scheme. I aimed for the IR and the segments of “dry” signal input to each be padded to a total length of 262144 samples, a power of two (2^18) as recommended in various guides. So as I am importing an impulse response with the length of 189440 samples, that would yield a segment-window size of 72705, so that both segment vectors of dry signal and the IR vector can be padded to a total length of 2^18 samples.
- use FFTW (specifically, the function fftw_plan_dft_1d, in mode FFTW_FORWARD) to process the vector with the IR time domain representation of N samples (including padding, so 2^18 samples), with FFTW yielding a pointer to a (presumed) N-length list of complex phasors to describe the frequency domain of the IR.
- process this pointer into a vector list of complex phasor values, to represent the IR filter kernel in the format I’ll need most immediately for processing input signal segments. close the FFTW plan and free the FFTW output pointer once this vector is loaded.
- divide the dry signal wav into segments of 72705 samples, with zero-padding to a total length of 262144 samples for each segment including the overlap region. Use a Hanning window procedure (multiplication by cosine values with period depending on window size) prior to the padding, to eliminate likely aliasing from high-frequency artifacts at the edges of windows.
- for each padded segment of the dry signal, yield a “convolved block” by calling convolved_block with that segment as well as the IR complex vector as arguments.
- “convolved_block” works as follows — use FFTW (again, fftw_plan_dft_1d in mode FFTW_FORWARD) to run the FFT on the dry segment vector (“in_vector“) and yield a vector of complex values, and multiply the elements of that complex vector, element-by-element, with those of the complex-vector representation of the IR kernel (ir_cpx_vector). This is complex multiplaction, handled by the std::complex class.
- Run the inverse FFT on the resulting array of complex phasors (again this is fftw_plan_dft_1d, but now in mode FFTW_:BACKWARD).
- Use the real components of the resulting N-length array to yield the N sample-amplitude values for this convolved, overlapping block
- Append each resulting “convolved block” to our vector of convolved, overlapping blocks (vector > totals).
- Loop through this result vector (totals"), and perform overlap-add to yield a new vector slightly longer than the original dry signal (vector results). Once summed cumulatively, this should represent the convolved signal, which can then be placed in a buffer and exported to WAV via libsndfile.
Since this didn’t yield the expected results, I tried running my same higher-level FFT functions (those encapsulating the FFTW calls) on the demo data from the guide I referenced above (http://blog.robertelder.org/overlap-add-overlap-save/).
This test is implemented with the void function testDummyData().
In this test, the intermediate FFT complex values were not the same as those in the guide, nor were the ultimate results of the “convolved” vectors (confirmed as correct in the guide via input side algorithm for the small dataset). This kind of leaves me scratching my head, since I have followed the FFTW tutorial (http://www.fftw.org/fftw2_doc/fftw_2.html) with care in order to yield my FFT results. I’ve double-checked my implementation of overlap-add, and I’ve made sure to avoid common implementation pitfalls such as lack of a Hanning window or improper padding scheme (it seems padding is right at this point…). Clearly there is something I’m missing here.
One note on the output: the “convolved” output samples come at much higher amplitude than the maximum sample amplitudes for either the dry input signal or the IR signal. I have used division by a factor of 400 to get an approximately normalized volume range, though of course this doesn’t fix the fundamental problem. Still, I hope this unexpected amplitude variance gives some insight to the problem.
Environment details: the C++ script, if saved as convolution.cpp, can be compiled with the following command: g++ pkg-config --cflags --libs fftw3 sndfile
convolution.cpp -o outprog
Only the fftw and libsndfile libraries are required to run, and I installed both with homebrew on Mac.
And it can be run the following way, which is how I've loaded the relevant files (same as in the working Python version): ./outprog ../F.wav ../spaceEchoIR.wav wetSignal.wav