Filling audio endpoint buffer provided by WASAPI not playing sound

Question

I am trying to play noise through the default audio endpoint renderer using the WASPAI interface. I am trying to use the code provided by Microsoft on this page: https://docs.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream. I want to write a class that can generate noise for this code sample.

I have tried writing signed and unsigned integer values to the buffer of the default audio endpoint renderer, and see that values are being written to the buffer, but there is no sound playing.

To start, I made a header with the needed methods, and a random number generator.

#pragma once

// RNG
#include <random>

template <typename T>
class Random {
public:
    Random(T low, T high) : mLow(low), mHigh(high), function(std::mt19937_64(__rdtsc())) {};

    T operator()() { 
        signed __int64 f =  function();

        return ((f  % ((signed __int64) mHigh + (signed __int64) mLow)) + (signed __int64) mLow); }

private:
    T mLow;
    T mHigh;
    std::mt19937_64 function;
};

class Noise_Gen {

public:

    Noise_Gen() : nChannels(NULL), nSamplesPerSec(NULL), nAvgBytesPerSec(NULL), nByteAlign(NULL), wBitsPerSample(NULL), 
        wValidBitsPerSample(NULL), wSamplesPerBlock(NULL), dwChannelMask(NULL), rd(NULL) {};

    ~Noise_Gen() {
        if(rd != NULL) {
            delete rd;
        }
    };

    HRESULT SetFormat(WAVEFORMATEX*);

    HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags);

private:
    void* rd;

    // WAVEFORMATEX
    WORD nChannels;
    DWORD nSamplesPerSec;
    DWORD nAvgBytesPerSec;
    WORD nByteAlign;
    WORD wBitsPerSample;

    // WAVEFORMATEXTENSIBLE
    WORD wValidBitsPerSample;
    WORD wSamplesPerBlock;
    DWORD dwChannelMask;
};

Then I added the definitions:

// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include <time.h>

#include "Noise_Gen.h"

HRESULT Noise_Gen::SetFormat(WAVEFORMATEX* format) {
    nChannels = format->nChannels;
    nSamplesPerSec = format->nSamplesPerSec;
    nAvgBytesPerSec = format->nAvgBytesPerSec;
    nByteAlign = format->nBlockAlign;
    wBitsPerSample = format->wBitsPerSample;
    WORD  wFormatTag = format->wFormatTag;
    if(wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
        WAVEFORMATEXTENSIBLE* pWFE = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(format);
        wValidBitsPerSample = pWFE->Samples.wValidBitsPerSample;
        wSamplesPerBlock = pWFE->Samples.wSamplesPerBlock;
        dwChannelMask = pWFE->dwChannelMask;
    } else {
        wValidBitsPerSample = wBitsPerSample;
    }
    double amplitude = std::pow(2.0, wValidBitsPerSample) - 1;
    switch(wBitsPerSample / 8) {
    case(1):
        rd = new Random<unsigned __int8>(0.0, amplitude);
        break;
    case(2): 
        rd = new Random<unsigned __int16>(0.0, amplitude);
        break;
    case(3):
        rd = new Random<unsigned __int32>(0.0, amplitude);
        break;
    case(4): 
        rd = new Random<signed __int32>(-amplitude, amplitude);
        break;
    case(5): 
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    case(6):
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    case(7): 
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    case(8):
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    default:
        return E_NOTIMPL;
    }
    return S_OK;
}

// (The size of an audio frame = nChannels * wBitsPerSample)
HRESULT Noise_Gen::LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
    for(UINT32 i = 0; i < nChannels *bufferFrameCount; i++) {
        switch(wBitsPerSample / 8) {
        case(1):
            pData[i] = (((Random<unsigned __int8>*)rd)->operator()());
            break;
        case(2):{
            unsigned __int16* pData2 = (unsigned __int16*) pData;
            pData2[i] = (((Random<unsigned __int16>*)rd)->operator()());
            break;
        }
        case(3): {
            __int32 data = ((Random<unsigned __int32>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) (&data);
            pData[(3 * i)] = cp[0];
            pData[1 + (3 * i)] = cp[1];
            pData[2 + (3 * i)] = cp[2];
            break;
        }
        case(4):{
            signed __int32* pData2 = (signed __int32*) pData;
            pData2[i] = (((Random<signed __int32>*)rd)->operator()());
            break;
        }
        case(5): {
            __int64 data = ((Random<unsigned __int64>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) &data;
            pData[(5 * i)] = cp[0];
            pData[1 + (5 * i)] = cp[1];
            pData[2 + (5 * i)] = cp[2];
            pData[3 + (5 * i)] = cp[3];
            pData[4 + (5 * i)] = cp[4];
            break;
        }
        case(6): {
            __int64 data = ((Random<unsigned __int64>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) &data;
            pData[(6 * i)] = cp[0];
            pData[1 + (6 * i)] = cp[1];
            pData[2 + (6 * i)] = cp[2];
            pData[3 + (6 * i)] = cp[3];
            pData[4 + (6 * i)] = cp[4];
            pData[5 + (6 * i)] = cp[5];
            break;
        }
        case(7): {
            __int64 data = ((Random<unsigned __int64>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) &data;
            pData[(7 * i)] = cp[0];
            pData[1 + (7 * i)] = cp[1];
            pData[2 + (7 * i)] = cp[2];
            pData[3 + (7 * i)] = cp[3];
            pData[4 + (7 * i)] = cp[4];
            pData[5 + (7 * i)] = cp[5];
            pData[6 + (7 * i)] = cp[6];
            break;
        }
        case(8): {
            unsigned __int64* pData2 = (unsigned __int64*) pData;
            pData2[i] = (((Random<unsigned __int64>*)rd)->operator()());
            break;
        }
        default:
            // For stopping playback
            (*flags) = AUDCLNT_BUFFERFLAGS_SILENT;
            return E_NOTIMPL;
        }
    }
    return S_OK;
}

Then I added my class to the template provided by Microsoft and printed the default audio endpoint renderer to the console.

#include <InitGuid.h>
#include <iostream>
#include <Windows.h>
#include <dshow.h>

// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>

// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include "Noise_Gen.h"

//-----------------------------------------------------------
// Play an audio stream on the default audio rendering
// device. The PlayAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data to the
// rendering device. The inner loop runs every 1/2 second.
//-----------------------------------------------------------

// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

#define EXIT_ON_ERROR(hres)  \
              if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk)  \
              if ((punk) != NULL)  \
                { (punk)->Release(); (punk) = NULL; }

const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient);

HRESULT PlayAudioStream(Noise_Gen* pMySource) {
    HRESULT hr;
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    REFERENCE_TIME hnsActualDuration;
    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioRenderClient* pRenderClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;
    UINT32 numFramesPadding;
    BYTE* pData;
    DWORD flags = 0;
    IPropertyStore* pPropertyStore = NULL;
    PROPVARIANT name;

    hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL,
                          CLSCTX_ALL, IID_IMMDeviceEnumerator,
                          (void**) &pEnumerator);
    EXIT_ON_ERROR(hr);
    hr = pEnumerator->GetDefaultAudioEndpoint(
        eRender, eConsole, &pDevice);

    hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
    PropVariantInit(&name);
    hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
    printf("%S", name.pwszVal);
    printf("\n");
    EXIT_ON_ERROR(hr);
    hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL,
                           NULL, (void**) &pAudioClient);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetMixFormat(&pwfx);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
                                  0, hnsRequestedDuration,
                                  0, pwfx, NULL);
    EXIT_ON_ERROR(hr);
    // Tell the audio source which format to use.
    hr = pMySource->SetFormat(pwfx);
    EXIT_ON_ERROR(hr);
    // Get the actual size of the allocated buffer.
    hr = pAudioClient->GetBufferSize(&bufferFrameCount);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetService(IID_IAudioRenderClient,
                                  (void**) &pRenderClient);
    EXIT_ON_ERROR(hr);
    // Grab the entire buffer for the initial fill operation.
    hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
    EXIT_ON_ERROR(hr);
    // Load the initial data into the shared buffer.
    hr = pMySource->LoadData(bufferFrameCount, pData, &flags);
    EXIT_ON_ERROR(hr);
    hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
    EXIT_ON_ERROR(hr);
    // Calculate the actual duration of the allocated buffer.
    hnsActualDuration = (double) REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
    hr = pAudioClient->Start();  // Start playing.
    EXIT_ON_ERROR(hr);
    // Each loop fills about half of the shared buffer.
    while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
        // Sleep for half the buffer duration.
        Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
        // See how much buffer space is available.
        hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
        EXIT_ON_ERROR(hr);
        numFramesAvailable = bufferFrameCount - numFramesPadding;
        // Grab all the available space in the shared buffer.
        hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
        EXIT_ON_ERROR(hr);
        // Get next 1/2-second of data from the audio source.
        hr = pMySource->LoadData(numFramesAvailable, pData, &flags);
        EXIT_ON_ERROR(hr);
        hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
        EXIT_ON_ERROR(hr);
    }
    // Wait for last data in buffer to play before stopping.
    Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
    hr = pAudioClient->Stop();  // Stop playing.
    EXIT_ON_ERROR(hr);
Exit:
    CoTaskMemFree(pwfx);
    SAFE_RELEASE(pEnumerator);
    SAFE_RELEASE(pDevice);
    SAFE_RELEASE(pAudioClient);
    SAFE_RELEASE(pRenderClient);
    return hr;
}

int main() {
    HRESULT hr = CoInitialize(nullptr);
    if(FAILED(hr)) { return hr; }
    Noise_Gen* ng = new Noise_Gen();
    PlayAudioStream(ng);
    delete ng;
    CoUninitialize();
}

The default audio endpoint renderer on my system uses 32 bit values, so the code started by writing unsigned 32 bit values to the buffer. I then tried to use signed values, which can be seen in the code above. No sound was played in both these cases. I checked the contents of the buffer while debugging and they do change. I printed the default audio endpoint renderer to the console, and it is my system's speaker. Windows even shows my app in the Volume mixer, but there is no sound showing even with the volume all the way up. I then checked the sleep time to be sure it was sleeping so the system had access to the buffer, and it does sleep for 500ms between writes to the buffer.

Update: I found out I am using the KSDATAFORMAT_SUBTYPE_IEEE_FLOAT subformat and have tried feeding the buffer floats in the -amplitude to amplitude range, the the 0 to amplitude range, the -1 to 1 range, and the 0 to 1 range.

What am I missing?

The Om The Om · Accepted Answer · 2020-12-18T10:59:13

Your random number distribution code does not work correctly for floating point formats (which is basically always going to be the mix format in shared mode as far as I know).

It's wrong even for integers. I assume you meant to write

((f  % ((signed __int64) mHigh - (signed __int64) mLow)) + (signed __int64) mLow);

(note the minus), but you should not use raw modulus anyway because it's slightly biased.

For floating point formats you always use the -1 to 1 range.

I have adapted your code to use std::uniform_real_distribution and I get noise playing on my speakers.

#include <cstdio>
#include <Windows.h>

// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>

// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include <random>


class Noise_Gen {
public:
    Noise_Gen() : format(), engine(__rdtsc()), float_dist(-1.f, 1.f) {};

    void SetFormat(WAVEFORMATEX* wfex) {
        if(wfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
            format = *reinterpret_cast<WAVEFORMATEXTENSIBLE*>(wfex);
        } else {
            format.Format = *wfex;
            format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
            INIT_WAVEFORMATEX_GUID(&format.SubFormat, wfex->wFormatTag);
            format.Samples.wValidBitsPerSample = format.Format.wBitsPerSample;
            format.dwChannelMask = 0;
        }
    }

    // (The size of an audio frame = nChannels * wBitsPerSample)
    void FillBuffer(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
        const UINT16 formatTag = EXTRACT_WAVEFORMATEX_ID(&format.SubFormat);
        if(formatTag == WAVE_FORMAT_IEEE_FLOAT) {
            float* fData = (float*)pData;
            for(UINT32 i = 0; i < format.Format.nChannels * bufferFrameCount; i++) {
                fData[i] = float_dist(engine);
            }
        } else if(formatTag == WAVE_FORMAT_PCM) {
            using rndT = decltype(engine)::result_type;
            UINT32 iterations = format.Format.nBlockAlign * bufferFrameCount / sizeof(rndT);
            UINT32 leftoverBytes = format.Format.nBlockAlign * bufferFrameCount % sizeof(rndT);
            rndT* iData = (rndT*)pData;
            UINT32 i = 0;
            for(; i < iterations; i++) {
                iData[i] = engine();
            }
            if(leftoverBytes != 0) {
                rndT lastRnd = engine();
                BYTE* pLastBytes = pData + i * sizeof(rndT);
                for(UINT32 j = 0; j < leftoverBytes; ++j) {
                    pLastBytes[j] = lastRnd >> (j * 8) & 0xFF;
                }
            }
        } else {
            //memset(pData, 0, wfex.Format.nBlockAlign * bufferFrameCount);
            *flags = AUDCLNT_BUFFERFLAGS_SILENT;
        }
    }

private:
    WAVEFORMATEXTENSIBLE format;

    std::mt19937_64 engine;
    std::uniform_real_distribution<float> float_dist;
};

// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC  10000000ll
#define REFTIMES_PER_MILLISEC  10000

#define EXIT_ON_ERROR(hres)  \
              if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk)  \
              if ((punk) != NULL)  \
                { (punk)->Release(); (punk) = NULL; }

HRESULT PlayAudioStream(Noise_Gen* pMySource) {
    HRESULT hr;
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    REFERENCE_TIME hnsActualDuration;
    IMMDeviceEnumerator* pEnumerator = NULL;
    IPropertyStore* pPropertyStore = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioRenderClient* pRenderClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 bufferFrameCount;
    BYTE* pData;
    DWORD flags = 0;
    PROPVARIANT name;

    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL,
        CLSCTX_ALL, IID_PPV_ARGS(&pEnumerator));
    EXIT_ON_ERROR(hr);
    hr = pEnumerator->GetDefaultAudioEndpoint(
        eRender, eConsole, &pDevice);
    EXIT_ON_ERROR(hr);

    hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
    EXIT_ON_ERROR(hr);
    PropVariantInit(&name);
    hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
    EXIT_ON_ERROR(hr);
    printf("%S", name.pwszVal);
    printf("\n");
    hr = pDevice->Activate(__uuidof(pAudioClient), CLSCTX_ALL,
        NULL, (void**) &pAudioClient);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetMixFormat(&pwfx);
    EXIT_ON_ERROR(hr);

    hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
        0, hnsRequestedDuration,
        0, pwfx, NULL);
    EXIT_ON_ERROR(hr);
    // Tell the audio source which format to use.
    pMySource->SetFormat(pwfx);
    // Get the actual size of the allocated buffer.
    hr = pAudioClient->GetBufferSize(&bufferFrameCount);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetService(IID_PPV_ARGS(&pRenderClient));
    EXIT_ON_ERROR(hr);
    // Grab the entire buffer for the initial fill operation.
    hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
    EXIT_ON_ERROR(hr);

    // Load the initial data into the shared buffer.
    pMySource->FillBuffer(bufferFrameCount, pData, &flags);    

    hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
    EXIT_ON_ERROR(hr);
    // Calculate the actual duration of the allocated buffer.
    hnsActualDuration = REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
    hr = pAudioClient->Start();  // Start playing.
    EXIT_ON_ERROR(hr);
    // Each loop fills about half of the shared buffer.
    DWORD sleepTime;
    while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
        // Sleep for half the buffer duration.
        sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
        if(sleepTime != 0)
            Sleep(sleepTime);
        // See how much buffer space is available.
        UINT32 numFramesPadding;
        hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
        EXIT_ON_ERROR(hr);

        UINT32 numFramesAvailable = bufferFrameCount - numFramesPadding;
        // Grab all the available space in the shared buffer.
        hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
        EXIT_ON_ERROR(hr);

        // Get next 1/2-second of data from the audio source.
        pMySource->FillBuffer(numFramesAvailable, pData, &flags);

        hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
        EXIT_ON_ERROR(hr);
    }
    // Wait for last data in buffer to play before stopping.
    sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
    if(sleepTime != 0)
        Sleep(sleepTime);
    hr = pAudioClient->Stop();  // Stop playing.
    EXIT_ON_ERROR(hr);

Exit:
    CoTaskMemFree(pwfx);
    SAFE_RELEASE(pRenderClient);
    SAFE_RELEASE(pAudioClient);
    SAFE_RELEASE(pDevice);
    SAFE_RELEASE(pPropertyStore); // you forgot to free the property store
    SAFE_RELEASE(pEnumerator);
    return hr;
}

int main() {
    HRESULT hr = CoInitialize(nullptr);
    if(FAILED(hr)) { return hr; }

    Noise_Gen ng;
    PlayAudioStream(&ng);
    
    CoUninitialize();
}

Filling audio endpoint buffer provided by WASAPI not playing sound

1 Answers