I've discovered that Microsoft Media Foundation generates MPEG4 file in which the MDAT atom comes before the MOOV atom. MOOV before MDAT is required for streaming. I assumed the solution to my problem would be to use the MF_MPEG4SINK_MOOV_BEFORE_MDAT attribute when creating the sink, but I can't seem to get it to have an effect. My code is largely the same as that http://blogs.msdn.com/b/eternalcoding/archive/2013/03/06/developing-a-winrt-component-to-create-a-video-file-using-media-foundation.aspx . I'm setting the attribute as a UINT32 TRUE just before MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS is set, in that example.
3 Answers
If you can't get the encoder to output a file with the moov atom at the beginning, you could always just correct the file afterward. Both of these utilities should run on Windows, and do the trick. (They both handle.mp4 files without issue, in spite of the 'qt' in the names)
Although its been a long time since this post, I too had issues with the MOOV before MDAT with MediaFoundation encoding. Very little documentation on the subject from Microsoft. The setting needs to be applied to the MFT Video Sink or in the SinkWriter upon creation of the object.
I managed to enable the functionality I wantnt but the video in the recording was empty frames even though the file size was the same and mp4box info stated progressive download supported, video length etc. Audio was fine. GOP also was not present in the info so there is still a misconfiguration.
I then did some more reading on H264 and MP4 structures,
The key for me was the MPEG container needs to be a fragmented MP4 container so simply setting the container type to FMPEG4 instead of MPEG4 did the trick. Below is the initialisation of the SinkWriter that works well for this feature to work.
Here is my full initialisation of the SinkWriter.
ComPtr<ID3D11Device> device;
ComPtr<ID3D11Device> dx3Device; // multithread configuration.
ComPtr<ID3D10Multithread> dx3MultiThread;
ComPtr<IMFDXGIDeviceManager> manager;
unsigned videoQuality = 50;
unsigned videoBitrate = FPS * width * height; // DEFAULT_BITRATE;
videoBitrate = DEFAULT_BITRATE;
// Audio Input
const UINT SamplesPerSecond = BaseSampleRate;
const UINT AverageBytesPerSecond = SamplesPerSecond / sizeof(FLOAT);
const UINT ChannelCount = 2; // Converted
const UINT BitsPerSample = 16; // Converted
MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET);
_Clock = new Clock();
// Create a random access stream in memory
// CHK(MFCreateMFByteStreamOnStreamEx((IUnknown*)videoStream, &m_SpByteStream));
// Create a temporary working MP4.
IMFByteStreamEx::CreateInstance((IUnknown*)videoStream, IID_IMFByteStream, &m_SpByteStream);
// Create the Sink Writer
ComPtr<IMFAttributes> spAttr;
ComPtr<IMFMediaType> audioOutputType;
ComPtr<IMFMediaType> spVideoTypeIn;
ComPtr<IMFMediaType> spVideoTypeOut;
CHK(MFCreateAttributes(&spAttr, 10));
CHK(spAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, TRUE));
CHK(spAttr->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE));
CHK(spAttr->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, TRUE));
CHK(spAttr->SetUINT32(MF_LOW_LATENCY, TRUE));
CHK(spAttr->SetGUID(MF_TRANSCODE_CONTAINERTYPE, MFTranscodeContainerType_FMPEG4));
CHK(spAttr->SetUINT32(MF_MPEG4SINK_MOOV_BEFORE_MDAT, TRUE))
// Setup the output video media type
HRESULT hr = 0;
D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
CHK(D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT | D3D11_CREATE_DEVICE_BGRA_SUPPORT,
levels, ARRAYSIZE(levels), D3D11_SDK_VERSION, &device, nullptr, nullptr));
UINT token;
CHK(MFCreateDXGIDeviceManager(&token, &manager));
HANDLE deviceHandle;
CHK(manager->ResetDevice(reinterpret_cast<IUnknown*>(device.Get()), token));
if (SUCCEEDED(manager->OpenDeviceHandle(&deviceHandle))) {
// https://docs.microsoft.com/en-au/windows/desktop/medfound/supporting-direct3d-11-video-decoding-in-media-foundation
// make sure we are using the same device
hr = manager->GetVideoService(deviceHandle, IID_PPV_ARGS(&dx3Device));
hr = dx3Device->QueryInterface(IID_PPV_ARGS(&dx3MultiThread));
dx3MultiThread->SetMultithreadProtected(TRUE);
}
CHK(spAttr->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, manager.Get()));
CHK(MFCreateSinkWriterFromURL(L".mp4v", m_SpByteStream.Get(), spAttr.Get(), &m_SpSinkWriter));
//// Video In Format
CHK(MFCreateMediaType(&spVideoTypeIn));
CHK(spVideoTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
CHK(spVideoTypeIn->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32));
CHK(spVideoTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive));
CHK(MFSetAttributeSize(spVideoTypeIn.Get(), MF_MT_FRAME_SIZE, m_Width, m_Height));
CHK(MFSetAttributeRatio(spVideoTypeIn.Get(), MF_MT_FRAME_RATE, m_FramesPerSecond, 1));
CHK(MFSetAttributeRatio(spVideoTypeIn.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1));
CHK(spVideoTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(spVideoTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE));
// Video Out format
CHK(MFCreateMediaType(&spVideoTypeOut));
CHK(spVideoTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
CHK(spVideoTypeOut->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264));
CHK(spVideoTypeOut->SetUINT32(MF_MT_COMPRESSED, FALSE));
CHK(spVideoTypeOut->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(spVideoTypeOut->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, FALSE));
CHK(spVideoTypeOut->SetUINT32(MF_MT_AVG_BITRATE, videoBitrate ));
CHK(spVideoTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive));
CHK(spVideoTypeOut->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High));
CHK(MFSetAttributeSize(spVideoTypeOut.Get(), MF_MT_FRAME_SIZE, m_Width, m_Height));
CHK(MFSetAttributeRatio(spVideoTypeOut.Get(), MF_MT_FRAME_RATE, m_FramesPerSecond , 1));
CHK(MFSetAttributeRatio(spVideoTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1));
spVideoTypeOut->SetUINT32(MF_MT_SAMPLE_SIZE, 1);
MFSetAttributeSize(spVideoTypeOut.Get(), MF_MT_FRAME_RATE_RANGE_MAX, m_FramesPerSecond, 1);
MFSetAttributeSize(spVideoTypeOut.Get(), MF_MT_FRAME_RATE_RANGE_MIN, m_FramesPerSecond / 2, 1);
// Audio In Format
ComPtr<IMFMediaType> spAudioTypeIn;
CHK(MFCreateMediaType(&spAudioTypeIn));
CHK(spAudioTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
CHK(spAudioTypeIn->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, BitsPerSample));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, BaseSampleRate));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, ChannelCount));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, AverageBytesPerSec)); // 32bit converted to 16
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 4));
CHK(spAudioTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(spAudioTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE));
CHK(MFCreateMediaType(&audioOutputType));
CHK(audioOutputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
CHK(audioOutputType->SetUINT32(MF_MT_AVG_BITRATE, 16000));
CHK(audioOutputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, SamplesPerSecond));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, BitsPerSample / ((BitsPerSample > 16) ? 2 : 1)));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, ChannelCount));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 12000)); // AverageBytesPerSecond));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 1));
CHK(audioOutputType->SetUINT32(MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 0x29));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_PREFER_WAVEFORMATEX, 1));
CHK(audioOutputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(audioOutputType->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE));
// Add Video out stream
ComPtr<IMFAttributes> encoderAttributes;
if (TRUE) { // Experimental
CHK(MFCreateAttributes(&encoderAttributes, 12));
if (TRUE) {
unsigned force_keyframe_every_nframes = 11;
unsigned force_bframe_every_nframes = 2;
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncMPVGOPSize, force_keyframe_every_nframes));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncMPVDefaultBPictureCount, force_bframe_every_nframes));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncNumWorkerThreads, 6));
}
if (TRUE) {
// constant quality for screen captures
CHK(encoderAttributes->SetUINT32(CODECAPI_AVLowLatencyMode, TRUE));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonRealTime, 1));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncAdaptiveMode, eAVEncAdaptiveMode_Resolution));
CHK(encoderAttributes->SetGUID(CODECAPI_AVEncCodecType, CODECAPI_GUID_AVEncH264Video));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonMultipassMode, 2));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonRateControlMode, eAVEncCommonRateControlMode_PeakConstrainedVBR));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonMeanBitRate, DEFAULT_BITRATE));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonStreamEndHandling, eAVEncCommonStreamEndHandling_EnsureComplete));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncVideoContentType, eAVEncVideoContentType_FixedCameraAngle));
}
}
CHK(m_SpSinkWriter->AddStream(spVideoTypeOut.Get(), &m_VideoStreamIndex));
CHK(m_SpSinkWriter->SetInputMediaType(m_VideoStreamIndex, spVideoTypeIn.Get(), encoderAttributes.Get()));
CHK(m_SpSinkWriter->AddStream(audioOutputType.Get(), &m_AudioStreamIndex));
CHK(m_SpSinkWriter->SetInputMediaType(m_AudioStreamIndex, spAudioTypeIn.Get(), nullptr));
_Clock->Start();
m_ClockStart = clock();
CHK(m_SpSinkWriter->BeginWriting());
Did you read the remarks in http://msdn.microsoft.com/en-us/library/windows/desktop/hh870256%28v=vs.85%29.aspx ?
"In order for the mpeg4 sink to use this attribute, the byte stream passed in must not be slow seek or remote"
Please check the capabilities of your IMFByteStream?
MFBYTESTREAM_IS_REMOTE and MFBYTESTREAM_HAS_SLOW_SEEK should be cleared.
If your IMFByteStream doesn't qualify - then first create a file MDAT->MOOV and then remux to a new file MOOV->MDAT.