audio recording and real-time playing in windows using waveIn and waveOut

Question

I want to record microphone audio input and, with some small time delay, play the recorded sound immediately. This will be done continuously using a queue of buffers.

I got the code running to the point that it almost continuously plays the microphone audio input, but there are very short but still noticeable repeated pauses throughout the whole audio output using waveOut. What is causing these annoying pauses? and how to remove them?

Another question is, I'm not using any thing like mutex, I'm relying on the fact that waveIn and waveOut have the same sampling rate and the same amount of data, so hopefully waveOut always follows waveIn and waveIn will not write to buffer being played. Would this be a problem?

Here's the code, it should compile and run. I only made the code run, and it's far from being well written. Any comment on improving the code is highly welcome.

    #include "stdafx.h"
    #include <Windows.h>
    #pragma comment(lib, "winmm.lib")
    #include <iostream>
    #include <fstream>
    #include <sstream>

    using namespace std;
    HANDLE hEvent_BufferReady;
    HANDLE hEvent_FinishedPlaying;
    #define Samplerate 44100
    #define nSec  1

    int _iBuf;
    int _iplaying;
    unsigned long result;


    HWAVEIN hWaveIn;
    HWAVEOUT hWaveOut;
    WAVEFORMATEX pFormat;

    enum { NUM_BUF = 3 };
    WAVEHDR _header [NUM_BUF];

    DWORD WINAPI RecordingWaitingThread(LPVOID ivalue)
    {
        while(1)
        {
        WaitForSingleObject(hEvent_BufferReady,INFINITE);


        result = waveInUnprepareHeader (hWaveIn, &_header[_iBuf], sizeof (WAVEHDR));
        _iplaying = _iBuf;
        result = waveOutPrepareHeader(hWaveOut, &_header[_iBuf], sizeof(WAVEHDR));
        result = waveOutWrite(hWaveOut, &_header[_iBuf], sizeof(WAVEHDR));   // play audio
        ++_iBuf;
        if (_iBuf == NUM_BUF)   _iBuf = 0;
        result = waveInPrepareHeader(hWaveIn, & _header[_iBuf], sizeof(WAVEHDR));
        result = waveInAddBuffer (hWaveIn, & _header[_iBuf], sizeof (WAVEHDR)); 
        }
        return 0;
    }

    DWORD WINAPI PlayingWaitingThread(LPVOID ivalue)
    {
        while(1){
            WaitForSingleObject(hEvent_FinishedPlaying,INFINITE);
            waveOutUnprepareHeader(hWaveOut, &_header[_iplaying], sizeof(WAVEHDR));
        }
    }

    static void CALLBACK waveOutProc(HWAVEOUT hWaveOut, UINT uMsg, DWORD dwInstance, DWORD dwParam1,DWORD dwParam2 )
    {
    if(uMsg != WOM_DONE)
    return;
    SetEvent(hEvent_FinishedPlaying);
    }


    void CALLBACK myWaveInProc(HWAVEIN hwi, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2)
    {
    if(uMsg != WIM_DATA)
    return;
    SetEvent(hEvent_BufferReady);
    }

    int main(int argc, _TCHAR* argv[])
    {

        hEvent_BufferReady=CreateEvent(NULL,FALSE, FALSE, NULL);
        hEvent_FinishedPlaying = CreateEvent(NULL,FALSE, FALSE, NULL);


        pFormat.wFormatTag = WAVE_FORMAT_PCM; // simple, uncompressed format
        pFormat.nChannels = 1; // 1=mono, 2=stereo
        pFormat.nSamplesPerSec = Samplerate; 
        pFormat.wBitsPerSample = 16; // 16 for high quality, 8 for telephone-grade
        pFormat.nBlockAlign = pFormat.nChannels*pFormat.wBitsPerSample/8; 
        pFormat.nAvgBytesPerSec = (pFormat.nSamplesPerSec)*(pFormat.nChannels)*(pFormat.wBitsPerSample)/8; 
        pFormat.cbSize=0;


        short int  *_pBuf;
        size_t bpbuff =4000;//= (pFormat.nSamplesPerSec) * (pFormat.nChannels) * (pFormat.wBitsPerSample)/8;
        _pBuf = new short int [bpbuff * NUM_BUF];

        result = waveInOpen(&hWaveIn, WAVE_MAPPER,&pFormat, (DWORD)myWaveInProc, 0L, CALLBACK_FUNCTION);
        result = waveOutOpen(&hWaveOut, WAVE_MAPPER, &pFormat, (DWORD_PTR)waveOutProc, 0, CALLBACK_FUNCTION);
        // initialize all headers in the queue
        for ( int i = 0; i < NUM_BUF; i++ )
        {
            _header[i].lpData = (LPSTR)&_pBuf [i * bpbuff];
            _header[i].dwBufferLength = bpbuff*sizeof(*_pBuf);
            _header[i].dwFlags = 0L;
            _header[i].dwLoops = 0L;
        }

        DWORD myThreadID;
        DWORD myThreadIDPlay;
        HANDLE hThread;
        HANDLE hThreadPlay;
        hThread = CreateThread(NULL, 0, RecordingWaitingThread,NULL,0,&myThreadID);
        hThreadPlay = CreateThread(NULL, 0, PlayingWaitingThread,NULL,0,&myThreadIDPlay);

        _iBuf = 0;

        waveInPrepareHeader(hWaveIn, & _header[_iBuf], sizeof(WAVEHDR));
        waveInAddBuffer (hWaveIn, & _header[_iBuf], sizeof (WAVEHDR));

        waveInStart(hWaveIn);

        getchar();
        waveInClose(hWaveIn);
        waveOutClose(hWaveOut);
        CloseHandle(hThread);
        CloseHandle(hThreadPlay);

        CloseHandle(hEvent_BufferReady);
        CloseHandle(hEvent_FinishedPlaying);

        return 0;
    }

I've only used the wavIn/wavOut functions with a callback thread - I.e I give it a thread and handle the WOM_DONE messages the mm system sends me. From a quick look, differences I'm noticing include (a) I'm using at least 4 buffers, you seem only to be using 3. I fill all the buffers and then blast them all out with WavOutWrite. Then, as each buffer finishes, the next one is started and I get a message telling me that a buffer has been played. I then synthesize the next part of of the song (about 1/8th of a second, or 5292 samples) — enhzflep
<continued> and again blast it out with WavOutWrite, where it languishes until the next 3 buffers have finished playing. It's just a matter (from memory) of making sure that the MM subsystem has enough data and buffers to deal with. This approach seems to work miles better than having a single buffer large enough for the whole several minutes performance. It also lets you pause playback with little latency (certainly, quicker than I can detect a lag) I did have crappy playback when the buffer length didn't suit the synthesis I was doing, should be unrelated to this Q though. — enhzflep
Tried to increase the number of buffers, but it didn't make any difference.The next thing I'd like to try is to delay waveOut by at least two buffers as waveIn is filling in the buffer queue, blast these batch of buffers into wavOut device together, then each time WOM_DONE message is received, if a new buffer is already filled in, write the new buffer to the waveOut device, otherwise wait for the new buffer to be filled in. — Andy
That sounds like a good strategy.It could well be that from the time the recording has finished until the time the playback starts you need to have a couple of buffers processed. And actually, I've just found an older version of the code I was looking at when I last replied - the code I'm looking at now has just two buffers in use. I can't see how it would make a difference, but for what it's worth - the buffers I'm playing back hold 32-bit float samples. I'm also not using WaitForSingleObject or SetEvent either. I'd try your code, but uninstalled the mic drivers ages ago. :oops: — enhzflep
I would start with just the play code, using pre-generated data (like a sine wave) to see if the glitches are happening on the output side or on the recording side. — Adrian McCarthy

William Junqueira William Junqueira · Accepted Answer · 2021-01-18T03:34:16

The problem on your program is that you are declaring a too small bpbuff. Just try to declare it with size_t bpbuff = 4410; and you will get rid of all of these repeated breaks in your audio stream.

By the way, I think you could get rid of these threading approach making your code much more simple like this one below:

#include "stdafx.h"
#include <Windows.h>
#pragma comment(lib, "winmm.lib")
#include <iostream>
#include <fstream>
#include <sstream>

using namespace std;
#define Samplerate 44100

static HWAVEIN hWaveIn;
static HWAVEOUT hWaveOut;

enum { NUM_BUF = 3 };
WAVEHDR _header [NUM_BUF];

void CALLBACK myWaveInProc(HWAVEIN hwi, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2)
{
    static int _iBuf;
    waveOutWrite(hWaveOut, &_header[_iBuf], sizeof(WAVEHDR));   // play audio
    ++_iBuf;
    if (_iBuf == NUM_BUF)   _iBuf = 0;
    waveInAddBuffer (hWaveIn, & _header[_iBuf], sizeof (WAVEHDR)); 
}

int main(int argc, _TCHAR* argv[])
{
    WAVEFORMATEX pFormat;
    pFormat.wFormatTag = WAVE_FORMAT_PCM; // simple, uncompressed format
    pFormat.nChannels = 1; // 1=mono, 2=stereo
    pFormat.nSamplesPerSec = Samplerate; 
    pFormat.wBitsPerSample = 16; // 16 for high quality, 8 for telephone-grade
    pFormat.nBlockAlign = pFormat.nChannels*pFormat.wBitsPerSample/8; 
    pFormat.nAvgBytesPerSec = (pFormat.nSamplesPerSec)*(pFormat.nChannels)*(pFormat.wBitsPerSample)/8; 
    pFormat.cbSize=0;

    short int  *_pBuf;
    size_t bpbuff = 4410;//= (pFormat.nSamplesPerSec) * (pFormat.nChannels) * (pFormat.wBitsPerSample)/8;
    _pBuf = new short int [bpbuff * NUM_BUF];

    waveInOpen(&hWaveIn, WAVE_MAPPER,&pFormat, (DWORD)myWaveInProc, 0L, CALLBACK_FUNCTION);
    waveOutOpen(&hWaveOut, WAVE_MAPPER, &pFormat, (DWORD_PTR)nullptr, 0, CALLBACK_FUNCTION);
    // initialize all headers in the queue
    for ( int i = 0; i < NUM_BUF; i++ )
    {
        _header[i].lpData = (LPSTR)&_pBuf [i * bpbuff];
        _header[i].dwBufferLength = bpbuff*sizeof(*_pBuf);
        _header[i].dwFlags = 0L;
        _header[i].dwLoops = 0L;
        waveInPrepareHeader(hWaveIn, & _header[i], sizeof(WAVEHDR));
    }
    waveInAddBuffer (hWaveIn, & _header[0], sizeof (WAVEHDR));

    waveInStart(hWaveIn);

    getchar();
    waveInClose(hWaveIn);
    waveOutClose(hWaveOut);
    delete _pBuf;

    return 0;
}

This piece of code performs exactly the same task with less code. Cheers, mate!

audio recording and real-time playing in windows using waveIn and waveOut

1 Answers