I am learning how to create MP4 video from this example.The problem is that the example demonstrates audio encoding from some dummy source data generated on the fly.I need to encode audio from a file.I have checked many examples and most of them show the same or just a separate audio encoding. In my trial and error process I am using the same AVFormatContext for both audio and video frames.I am not sure if it's right thing to do, or should I rather have 2 separate contexts?So far I got Video encoding ok but audio stream fails as AVPacket can't locate correct audio stream index. Here is how I setup audio stream:
void open_audio(AVFormatContext *oc, AVCodec **codec, AVStream **st ,enum AVCodecID codec_id){
// AVCodecContext *c;
int ret;
// c = st->codec;
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",avcodec_get_name(codec_id));
}
/* open it */
if(avformat_open_input(&oc,_audioInName.c_str(),NULL,NULL) !=0){
Msg::PrintErrorMsg("Error opening audio file");
}
AVStream* audioStream = NULL;
// Find the audio stream (some container files can have multiple streams in them)
for (uint32_t i = 0; i < oc->nb_streams; ++i)
{
if (oc->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
audioStream = oc->streams[i];
break;
}
}
if (audioStream == NULL)
{
Msg::PrintErrorMsg("Could not find any audio stream in the file");
}
*st =audioStream;
AVCodecContext *c = audioStream->codec;
c->codec = *codec;//avcodec_find_decoder(c->codec_id);
audioStream->id = 1;
c->sample_fmt = AV_SAMPLE_FMT_S16;
c->bit_rate = 64000;
c->sample_rate = 44100;
c->channels = 1;
if (oc->oformat->flags & AVFMT_GLOBALHEADER){
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (c->codec == NULL)
{
Msg::PrintErrorMsg("Couldn't find a proper decoder");
}
ret = avcodec_open2(c, *codec, NULL);
if (ret < 0) {
Msg::PrintErrorMsg("Could not open audio codec\n");
}
}
Here "oc" is the same context used to initialize video stream as well.
Then I am trying to write audio frame like this:
void write_audio_frame(AVFormatContext *oc, AVStream *st){
AVCodecContext *c;
AVPacket pkt = { 0 }; // data and size must be 0;
AVFrame *frame = avcodec_alloc_frame();
int got_packet, ret;
av_init_packet(&pkt);
c = st->codec;
/////
// get_audio_frame(samples, audio_input_frame_size, c->channels);
////Read the packet:
while(av_read_frame(oc,&pkt) == 0 ){
if(pkt.stream_index ==st->index){
// Try to decode the packet into a frame
int frameFinished = 0;
avcodec_decode_audio4(c, frame, &frameFinished, &pkt);
// Some frames rely on multiple packets, so we have to make sure the frame is finished before
// we can use it
if (frameFinished){
assert(frameFinished);
ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);
if (ret < 0) {
Msg::PrintErrorMsg("Error encoding audio frame\n");
}
if (!got_packet){
printf("failed to aquire packet");
}
pkt.stream_index = st->index;
/* Write the compressed frame to the media file. */
ret = av_interleaved_write_frame(oc, &pkt);
if (ret != 0) {
Msg::PrintErrorMsg("Error while writing audio frame.");
}
}
}
}
}
av_free_packet(&pkt);
avcodec_free_frame(&frame);
}
The thing is I never pass this statement: "if(pkt.stream_index ==st->index)".Packet stream index is never equal to the audio stream index.Anyone can point out where I am wrong?
UPDATE:
I did managed to open input audio stream for encoding but I can't encode audio and video streams into single output.From what I see PTS and DTS are probably the source of the problem.Currently I calculate pts based on muxing.c example but it doesn't work for audio at all.
Here is how I use it :
while(frame_count < _streamDurationNBFrames-1){
uint8_t *frameToWrite =_frames.front();
// Compute current audio and video time. ///
if (audio_st){
audio_pts = (double)audioIn_st->pts.val * audioIn_st->time_base.num / audioIn_st->time_base.den;
}
else{
audio_pts = 0.0;
}
if (video_st){
video_pts = (double)video_st->pts.val * video_st->time_base.num / video_st->time_base.den;
}else{
video_pts = 0.0;
}
if ((!audio_st || audio_pts >= _streamDuration) && (!video_st || video_pts >= _streamDuration)){
break;
}
if (audio_st && audio_pts < video_pts) {
av_read_frame(informat, &pkt);//read audio from input stream
Msg::PrintMsg("Encode audio here...");
//================== AUDIO ENCODE HERE
outpkt.data = pkt.data;
outpkt.size = pkt.size;
outpkt.stream_index = pkt.stream_index;
outpkt.flags |= AV_PKT_FLAG_KEY;
outpkt.pts = pkt.pts;
outpkt.dts =pkt.dts;
if(av_interleaved_write_frame(oc, &outpkt) < 0)
{
Msg::PrintErrorMsg("Fail Audio Write ");
}
else
{
audio_st->codec->frame_number++;
}
av_free_packet(&outpkt);
av_free_packet(&pkt);
}else{
//================== VIDEO ENCODE HERE
write_video_frame(oc, video_st,frameToWrite);
frame->pts += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
}
///at last delete this frame:
_frames.pop();
delete frameToWrite; ///deallocate the written frame!
}
Somehow ,once I am in the audio encoding loop the audio_pts never reaches the video_pts and is always zero:
audio_pts = (double)audio_st->pts.val * audio_st->time_base.num / audio_st->time_base.den; is always zero because (double)audio_st->pts.val returns zero.
So basically I am asking the same question again:How to do the muxing when audio comes from external file?
Btw,the answer below doesn't help as it presumes both audio and video streams come from the same file, whereas in my case only audio comes from the external source.