I created a SinkWriter that is able to encode video and audio using Microsoft's Media Foundation Platform.
Video is working fine so far but I have some troubles with audio only.
My PCM source has a sample rate of 48828hz, 32 bits per sample and is mono.
Everything is working well so far except for FLAC.
For instance the MP3 output is working more or less but has a wrong format. Regarding to MSDN (MP3 Audio Encoder) the MP3 encoder only supports 16 bits per sample as input. My PCM source as descriped above has 32 bits per sample.
However the export with MP3 is working cause the MF Platform seems like to have some kind of fallback and is using the MPEG Audio layer 1/2 (mpga) with 2 Channels, 32khz and a bitrate of 320kb/s.
Things start to get weird when I set the MF_MT_SUBTYPE to MFAudioFormat_FLAC. The export is working too but the quality of the audio is aweful. There's a lot of noise but I am able to recognize the audio. Regarding to VLC the FLAC file has a sample rate of 44,1khz, 8 bits per sample and is mono.
Does this mean the FLAC codec isn't able to work with the PCM I provide?
Has anyone had the same problem and was able to fix it?
Update
After doing some more research about this problem it seems like that my PCM Audio with a resolution of 32 Bit is too high. So currently I am trying to convert the 32 Bit PCM to 24 Bit for FLAC and 16 Bit for MP3 but with no luck so far. I keep you updated if I make some progress.
--------
Update 2
I've created a minimal example app that shows the problem I am facing. It reads the 48khz32bit wave file and tries to encode it to flac.
When executing the hr = pSinkWriter->BeginWriting();
command I get the error 0xc00d36b4
whice means The data specified for the media type is invalid, inconsistent, or not supported by this object
.
What am I doing wrong here?
#include "stdafx.h"
#include <windows.h>
#include <windowsx.h>
#include <comdef.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <Mferror.h>
#pragma comment(lib, "ole32")
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfuuid")
using namespace System;
int main(array<System::String ^> ^args)
{
HRESULT hr = CoInitializeEx(0, COINIT_MULTITHREADED);
hr = MFStartup(MF_VERSION);
IMFMediaType *pMediaType;
IMFMediaType *pMediaTypeOut;
IMFSourceReader *pSourceReader;
IMFAttributes *pAttributes;
IMFSinkWriter *pSinkWriter;
hr = MFCreateSourceReaderFromURL(
L"C:\\Temp\\48khz32bit.wav",
NULL,
&pSourceReader
);
hr = MFCreateAttributes(&pAttributes, 1);
hr = pAttributes->SetGUID(
MF_TRANSCODE_CONTAINERTYPE,
MFTranscodeContainerType_WAVE
);
hr = MFCreateSinkWriterFromURL(
L"C:\\Temp\\foo.flac",
NULL,
pAttributes,
&pSinkWriter
);
hr = pSourceReader->GetCurrentMediaType(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
&pMediaType);
hr = MFCreateMediaType(&pMediaTypeOut);
hr = pMediaTypeOut->SetGUID(
MF_MT_MAJOR_TYPE,
MFMediaType_Audio
);
hr = pMediaTypeOut->SetGUID(
MF_MT_SUBTYPE,
MFAudioFormat_FLAC
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
48000
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
1
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
32
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_AVG_BYTES_PER_SECOND,
(((32 + 7) / 8) * 1) * 48000
);
hr = pMediaTypeOut->SetUINT32(
MF_MT_AUDIO_BLOCK_ALIGNMENT,
((32 + 7) / 8) * 1
);
DWORD nWriterStreamIndex = -1;
hr = pSinkWriter->AddStream(pMediaTypeOut, &nWriterStreamIndex);
hr = pSinkWriter->BeginWriting();
_com_error err(hr);
LPCTSTR errMsg = err.ErrorMessage();
for (;;)
{
DWORD nStreamIndex, nStreamFlags;
LONGLONG nTime;
IMFSample *pSample;
hr = pSourceReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM,
0,
&nStreamIndex,
&nStreamFlags,
&nTime,
&pSample);
if (pSample)
{
OutputDebugString(L"Write sample...\n");
hr = pSinkWriter->WriteSample(
nWriterStreamIndex,
pSample
);
}
if (nStreamFlags & MF_SOURCE_READERF_ENDOFSTREAM)
{
break;
}
}
hr = pSinkWriter->Finalize();
return 0;
}
--------
Update 3
I added the solution as answer.
--------
Initialize SinkWriter
HRESULT SinkWriter::InitializeSinkWriter(IMFSinkWriter **ppWriter, DWORD *pStreamIndex, DWORD *pAudioStreamIndex, LPCWSTR filename)
{
*ppWriter = NULL;
*pStreamIndex = NULL;
*pAudioStreamIndex = NULL;
IMFSinkWriter *pSinkWriter = NULL;
// Attributes
IMFAttributes *pAttributes;
HRESULT hr = S_OK;
DX::ThrowIfFailed(
MFCreateAttributes(
&pAttributes,
3
)
);
#if defined(ENABLE_HW_ACCELERATION)
CComPtr<ID3D11Device> device;
D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
#if defined(ENABLE_HW_DRIVER)
DX::ThrowIfFailed(
D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_HARDWARE,
nullptr,
(0 * D3D11_CREATE_DEVICE_SINGLETHREADED) | D3D11_CREATE_DEVICE_VIDEO_SUPPORT,
levels,
ARRAYSIZE(levels),
D3D11_SDK_VERSION,
&device,
nullptr,
nullptr
)
);
const CComQIPtr<ID3D10Multithread> pMultithread = device;
pMultithread->SetMultithreadProtected(TRUE);
#else
DX::ThrowIfFailed(
D3D11CreateDevice(
nullptr,
D3D_DRIVER_TYPE_NULL,
nullptr,
D3D11_CREATE_DEVICE_SINGLETHREADED,
levels,
ARRAYSIZE(levels),
D3D11_SDK_VERSION,
&device,
nullptr,
nullptr)
);
#endif
UINT token;
CComPtr<IMFDXGIDeviceManager> pManager;
DX::ThrowIfFailed(
MFCreateDXGIDeviceManager(
&token,
&pManager
)
);
DX::ThrowIfFailed(
pManager->ResetDevice(
device,
token
)
);
DX::ThrowIfFailed(
pAttributes->SetUnknown(
MF_SOURCE_READER_D3D_MANAGER,
pManager
)
);
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS,
TRUE
)
);
#if (WINVER >= 0x0602)
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING,
TRUE
)
);
#endif
#else
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS,
TRUE
)
);
DX::ThrowIfFailed(
pAttributes->SetUINT32(
MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING,
TRUE
)
);
#endif
DX::ThrowIfFailed(
MFCreateSinkWriterFromURL(
filename,
NULL,
pAttributes,
&pSinkWriter
)
);
if (m_vFormat != VideoFormat::SWFV_NONE)
{
DX::ThrowIfFailed(
InitializeVideoCodec(
pSinkWriter,
pStreamIndex
)
);
}
if (m_audFormat != AudioFormat::SWAF_NONE)
{
DX::ThrowIfFailed(
InitializeAudioCodec(
pSinkWriter,
pAudioStreamIndex
)
);
}
// Tell the sink writer to start accepting data.
DX::ThrowIfFailed(
pSinkWriter->BeginWriting()
);
// Return the pointer to the caller.
*ppWriter = pSinkWriter;
(*ppWriter)->AddRef();
SAFE_RELEASE(pSinkWriter);
return hr;
}
Initialize Audio Codec
HRESULT SinkWriter::InitializeAudioCodec(IMFSinkWriter *pSinkWriter, DWORD *pStreamIndex)
{
// Audio media types
IMFMediaType *pAudioTypeOut = NULL;
IMFMediaType *pAudioTypeIn = NULL;
DWORD audioStreamIndex;
HRESULT hr = S_OK;
// Set the output audio type.
DX::ThrowIfFailed(
MFCreateMediaType(
&pAudioTypeOut
)
);
DX::ThrowIfFailed(
pAudioTypeOut->SetGUID(
MF_MT_MAJOR_TYPE,
MFMediaType_Audio
)
);
DX::ThrowIfFailed(
pAudioTypeOut->SetGUID(
MF_MT_SUBTYPE,
AUDIO_SUBTYPE
)
);
DX::ThrowIfFailed(
pSinkWriter->AddStream(
pAudioTypeOut,
&audioStreamIndex
)
);
// Set the input audio type
DX::ThrowIfFailed(
MFCreateMediaType(
&pAudioTypeIn
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetGUID(
MF_MT_MAJOR_TYPE,
AUDIO_MAJOR_TYPE
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetGUID(
MF_MT_SUBTYPE,
MFAudioFormat_PCM
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_NUM_CHANNELS,
AUDIO_NUM_CHANNELS
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_BITS_PER_SAMPLE,
AUDIO_BITS_PER_SAMPLE
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_BLOCK_ALIGNMENT,
AUDIO_BLOCK_ALIGNMENT
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_SAMPLES_PER_SECOND,
AUDIO_SAMPLES_PER_SECOND
)
);
DX::ThrowIfFailed(
pAudioTypeIn->SetUINT32(
MF_MT_AUDIO_AVG_BYTES_PER_SECOND,
AUDIO_AVG_BYTES_PER_SECOND
)
);
DX::ThrowIfFailed(
pSinkWriter->SetInputMediaType(
audioStreamIndex,
pAudioTypeIn,
NULL
)
);
*pStreamIndex = audioStreamIndex;
SAFE_RELEASE(pAudioTypeOut);
SAFE_RELEASE(pAudioTypeIn);
return hr;
}
Push audio data
HRESULT SinkWriter::PushAudio(UINT32* data)
{
HRESULT hr = S_FALSE;
if (m_isInitializing)
{
return hr;
}
IMFSample *pSample = NULL;
IMFMediaBuffer *pBuffer = NULL;
BYTE *pMem = NULL;
size_t cbBuffer = m_bufferLength * sizeof(short);
// Create a new memory buffer.
hr = MFCreateMemoryBuffer(cbBuffer, &pBuffer);
// Lock the buffer and copy the audio frame to the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->Lock(&pMem, NULL, NULL);
}
if (SUCCEEDED(hr))
{
CopyMemory(pMem, data, cbBuffer);
}
if (pBuffer)
{
pBuffer->Unlock();
}
if (m_vFormat == VideoFormat::SWFV_NONE && m_audFormat == AudioFormat::SWAF_WAV)
{
DWORD cbWritten = 0;
if (SUCCEEDED(hr))
{
hr = m_pByteStream->Write(pMem, cbBuffer, &cbWritten);
}
if (SUCCEEDED(hr))
{
m_cbWrittenByteStream += cbWritten;
}
}
else
{
// Set the data length of the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->SetCurrentLength(cbBuffer);
}
// Create media sample and add the buffer to the sample.
if (SUCCEEDED(hr))
{
hr = MFCreateSample(&pSample);
}
if (SUCCEEDED(hr))
{
hr = pSample->AddBuffer(pBuffer);
}
// Set the timestamp and the duration.
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleTime(m_cbRtStartVideo);
}
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleDuration(m_cbRtDurationVideo);
}
// Send the sample to the Sink Writer
if (SUCCEEDED(hr))
{
hr = m_pSinkWriter->WriteSample(m_audioStreamIndex, pSample);
}
/*if (SUCCEEDED(hr))
{
m_cbRtStartAudio += m_cbRtDurationAudio;
}*/
SAFE_RELEASE(pSample);
SAFE_RELEASE(pBuffer);
}
return hr;
}