I am trying to mux H264 encoded data and G711 PCM data into mov
multimedia container. I am creating AVPacket
from encoded data and initially the PTS and DTS value of video/audio frames is equivalent to AV_NOPTS_VALUE
. So I calculated the DTS using current time information. My code -
bool AudioVideoRecorder::WriteVideo(const unsigned char *pData, size_t iDataSize, bool const bIFrame) {
.....................................
.....................................
.....................................
AVPacket pkt = {0};
av_init_packet(&pkt);
int64_t dts = av_gettime();
dts = av_rescale_q(dts, (AVRational){1, 1000000}, m_pVideoStream->time_base);
int duration = 90000 / VIDEO_FRAME_RATE;
if(m_prevVideoDts > 0LL) {
duration = dts - m_prevVideoDts;
}
m_prevVideoDts = dts;
pkt.pts = AV_NOPTS_VALUE;
pkt.dts = m_currVideoDts;
m_currVideoDts += duration;
pkt.duration = duration;
if(bIFrame) {
pkt.flags |= AV_PKT_FLAG_KEY;
}
pkt.stream_index = m_pVideoStream->index;
pkt.data = (uint8_t*) pData;
pkt.size = iDataSize;
int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);
if(ret < 0) {
LogErr("Writing video frame failed.");
return false;
}
Log("Writing video frame done.");
av_free_packet(&pkt);
return true;
}
bool AudioVideoRecorder::WriteAudio(const unsigned char *pEncodedData, size_t iDataSize) {
.................................
.................................
.................................
AVPacket pkt = {0};
av_init_packet(&pkt);
int64_t dts = av_gettime();
dts = av_rescale_q(dts, (AVRational){1, 1000000}, (AVRational){1, 90000});
int duration = AUDIO_STREAM_DURATION; // 20
if(m_prevAudioDts > 0LL) {
duration = dts - m_prevAudioDts;
}
m_prevAudioDts = dts;
pkt.pts = AV_NOPTS_VALUE;
pkt.dts = m_currAudioDts;
m_currAudioDts += duration;
pkt.duration = duration;
pkt.stream_index = m_pAudioStream->index;
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.data = (uint8_t*) pEncodedData;
pkt.size = iDataSize;
int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);
if(ret < 0) {
LogErr("Writing audio frame failed: %d", ret);
return false;
}
Log("Writing audio frame done.");
av_free_packet(&pkt);
return true;
}
And I added stream like this -
AVStream* AudioVideoRecorder::AddMediaStream(enum AVCodecID codecID) {
................................
.................................
pStream = avformat_new_stream(m_pFormatCtx, codec);
if (!pStream) {
LogErr("Could not allocate stream.");
return NULL;
}
pStream->id = m_pFormatCtx->nb_streams - 1;
pCodecCtx = pStream->codec;
pCodecCtx->codec_id = codecID;
switch(codec->type) {
case AVMEDIA_TYPE_VIDEO:
pCodecCtx->bit_rate = VIDEO_BIT_RATE;
pCodecCtx->width = PICTURE_WIDTH;
pCodecCtx->height = PICTURE_HEIGHT;
pStream->time_base = (AVRational){1, 90000};
pStream->avg_frame_rate = (AVRational){90000, 1};
pStream->r_frame_rate = (AVRational){90000, 1}; // though the frame rate is variable and around 15 fps
pCodecCtx->pix_fmt = STREAM_PIX_FMT;
m_pVideoStream = pStream;
break;
case AVMEDIA_TYPE_AUDIO:
pCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
pCodecCtx->bit_rate = AUDIO_BIT_RATE;
pCodecCtx->sample_rate = AUDIO_SAMPLE_RATE;
pCodecCtx->channels = 1;
m_pAudioStream = pStream;
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (m_pOutputFmt->flags & AVFMT_GLOBALHEADER)
m_pFormatCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
return pStream;
}
There are several problems with this calculation:
The video is laggy and lags behind than audio increasingly with time.
Suppose, an audio frame is received (
WriteAudio(..)
) little lately like 3 seconds, then the late frame should be started playing with 3 second delay, but it's not. The delayed frame is played consecutively with previous frame.Sometimes I recorded for ~40 seconds but the file duration is much like 2 minutes, but audio/video is played only few moments like 40 seconds and rest of the file contains nothing and seekbar jumps at en immediately after 40 seconds (tested in VLC).
EDIT:
According to Ronald S. Bultje's suggestion, what I've understand:
m_pAudioStream->time_base = (AVRational){1, 9000}; // actually no need to set as 9000 is already default value for audio as you said
m_pVideoStream->time_base = (AVRational){1, 9000};
should be set as now both audio and video streams are now in same time base units.
And for video:
...................
...................
int64_t dts = av_gettime(); // get current time in microseconds
dts *= 9000;
dts /= 1000000; // 1 second = 10^6 microseconds
pkt.pts = AV_NOPTS_VALUE; // is it okay?
pkt.dts = dts;
// and no need to set pkt.duration, right?
And for audio: (exactly same as video, right?)
...................
...................
int64_t dts = av_gettime(); // get current time in microseconds
dts *= 9000;
dts /= 1000000; // 1 second = 10^6 microseconds
pkt.pts = AV_NOPTS_VALUE; // is it okay?
pkt.dts = dts;
// and no need to set pkt.duration, right?
And I think they are now like sharing same currDts
, right? Please correct me if I am wrong anywhere or missing anything.
Also, if I want to use video stream time base as (AVRational){1, frameRate}
and audio stream time base as (AVRational){1, sampleRate}
, how the correct code should look like?
EDIT 2.0:
m_pAudioStream->time_base = (AVRational){1, VIDEO_FRAME_RATE};
m_pVideoStream->time_base = (AVRational){1, VIDEO_FRAME_RATE};
And
bool AudioVideoRecorder::WriteAudio(const unsigned char *pEncodedData, size_t iDataSize) {
...........................
......................
AVPacket pkt = {0};
av_init_packet(&pkt);
int64_t dts = av_gettime() / 1000; // convert into millisecond
dts = dts * VIDEO_FRAME_RATE;
if(m_dtsOffset < 0) {
m_dtsOffset = dts;
}
pkt.pts = AV_NOPTS_VALUE;
pkt.dts = (dts - m_dtsOffset);
pkt.stream_index = m_pAudioStream->index;
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.data = (uint8_t*) pEncodedData;
pkt.size = iDataSize;
int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);
if(ret < 0) {
LogErr("Writing audio frame failed: %d", ret);
return false;
}
Log("Writing audio frame done.");
av_free_packet(&pkt);
return true;
}
bool AudioVideoRecorder::WriteVideo(const unsigned char *pData, size_t iDataSize, bool const bIFrame) {
........................................
.................................
AVPacket pkt = {0};
av_init_packet(&pkt);
int64_t dts = av_gettime() / 1000;
dts = dts * VIDEO_FRAME_RATE;
if(m_dtsOffset < 0) {
m_dtsOffset = dts;
}
pkt.pts = AV_NOPTS_VALUE;
pkt.dts = (dts - m_dtsOffset);
if(bIFrame) {
pkt.flags |= AV_PKT_FLAG_KEY;
}
pkt.stream_index = m_pVideoStream->index;
pkt.data = (uint8_t*) pData;
pkt.size = iDataSize;
int ret = av_interleaved_write_frame(m_pFormatCtx, &pkt);
if(ret < 0) {
LogErr("Writing video frame failed.");
return false;
}
Log("Writing video frame done.");
av_free_packet(&pkt);
return true;
}
Is the last change okay? The video and audio seems synced. Only problem is - the audio is played without the delay regardless the packet arrived in delay. Like -
packet arrival: 1 2 3 4... (then next frame arrived after 3 sec) .. 5
audio played: 1 2 3 4 (no delay) 5
EDIT 3.0:
zeroed audio sample data:
AVFrame* pSilentData;
pSilentData = av_frame_alloc();
memset(&pSilentData->data[0], 0, iDataSize);
pkt.data = (uint8_t*) pSilentData;
pkt.size = iDataSize;
av_freep(&pSilentData->data[0]);
av_frame_free(&pSilentData);
Is this okay? But after writing this into file container, there are dot dot noise during playing the media. Whats the problem?
EDIT 4.0:
Well, For ยต-Law audio the zero value is represented as 0xff
. So -
memset(&pSilentData->data[0], 0xff, iDataSize);
solve my problem.