I'm trying to resample a decoded audio frame from 48KHz to 44.1KHz using the libswresample API. The code I have is the following:
// 'frame' is the original decoded audio frame
AVFrame *output_frame = av_frame_alloc();
// Without this, there is no sound at all at the output (PTS stuff I guess)
av_frame_copy_props(output_frame, frame);
output_frame->channel_layout = audioStream->codec->channel_layout;
output_frame->sample_rate = audioStream->codec->sample_rate;
output_frame->format = audioStream->codec->sample_fmt;
SwrContext *swr;
// Configure resampling context
swr = swr_alloc_set_opts(NULL, // we're allocating a new context
AV_CH_LAYOUT_STEREO, // out_ch_layout
AV_SAMPLE_FMT_FLTP, // out_sample_fmt
44100, // out_sample_rate
AV_CH_LAYOUT_STEREO, // in_ch_layout
AV_SAMPLE_FMT_FLTP, // in_sample_fmt
48000, // in_sample_rate
0, // log_offset
NULL); // log_ctx
// Initialize resampling context
swr_init(swr);
// Perform conversion
swr_convert_frame(swr, output_frame, frame);
// Close resampling context
swr_close(swr);
swr_free(&swr);
// Free the original frame and replace it with the new one
av_frame_unref(frame);
return output_frame;
With this code I'm able to hear the audio at the output but it is also noisy. From what I read, this code without the av_frame_copy_props() should be enough but it is not working for some reason. Any ideas?
EDIT: The input stream encodes the audio using AAC and the number of samples is 1024. But, after conversion, the number of samples is 925.
EDIT: I tried doing it in reverse. Since my app receives streams from any sources, some audio streams are 48KHz and some others 44.1KHz. So I tried resampling from 44.1 to 48 to avoid resampling loss. But now the frames has more than 1024 samples each one and the encoding fails.
EDIT: I tried using libavfilter instead with the following filter chain:
int init_filter_graph(AVStream *audio_st) {
// create new graph
filter_graph = avfilter_graph_alloc();
if (!filter_graph) {
av_log(NULL, AV_LOG_ERROR, "unable to create filter graph: out of memory\n");
return -1;
}
AVFilter *abuffer = avfilter_get_by_name("abuffer");
AVFilter *aformat = avfilter_get_by_name("aformat");
AVFilter *asetnsamples = avfilter_get_by_name("asetnsamples");
AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
int err;
// create abuffer filter
AVCodecContext *avctx = audio_st->codec;
AVRational time_base = audio_st->time_base;
snprintf(strbuf, sizeof(strbuf),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
time_base.num, time_base.den, avctx->sample_rate,
av_get_sample_fmt_name(avctx->sample_fmt),
avctx->channel_layout);
fprintf(stderr, "abuffer: %s\n", strbuf);
err = avfilter_graph_create_filter(&abuffer_ctx, abuffer,
NULL, strbuf, NULL, filter_graph);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "error initializing abuffer filter\n");
return err;
}
// create aformat filter
snprintf(strbuf, sizeof(strbuf),
"sample_fmts=%s:sample_rates=%d:channel_layouts=0x%" PRIx64,
av_get_sample_fmt_name(AV_SAMPLE_FMT_FLTP), 44100,
AV_CH_LAYOUT_STEREO);
fprintf(stderr, "aformat: %s\n", strbuf);
err = avfilter_graph_create_filter(&aformat_ctx, aformat,
NULL, strbuf, NULL, filter_graph);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "unable to create aformat filter\n");
return err;
}
// create asetnsamples filter
snprintf(strbuf, sizeof(strbuf),
"n=1024:p=0");
fprintf(stderr, "asetnsamples: %s\n", strbuf);
err = avfilter_graph_create_filter(&asetnsamples_ctx, asetnsamples,
NULL, strbuf, NULL, filter_graph);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "unable to create asetnsamples filter\n");
return err;
}
// create abuffersink filter
err = avfilter_graph_create_filter(&abuffersink_ctx, abuffersink,
NULL, NULL, NULL, filter_graph);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "unable to create aformat filter\n");
return err;
}
// connect inputs and outputs
if (err >= 0) err = avfilter_link(abuffer_ctx, 0, aformat_ctx, 0);
if (err >= 0) err = avfilter_link(aformat_ctx, 0, asetnsamples_ctx, 0);
if (err >= 0) err = avfilter_link(asetnsamples_ctx, 0, abuffersink_ctx, 0);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "error connecting filters\n");
return err;
}
err = avfilter_graph_config(filter_graph, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "error configuring the filter graph\n");
return err;
}
return 0;
}
Now the resulting frame has 1024 samples but the audio is still choppy.