3
votes

I'm trying to convert a .m4a file to raw PCM file so that I can play it back in Audacity.

According to the AVCodecContext it is a 44100 Hz track using the sample format AV_SAMPLE_FMT_FLTP which, to my understanding, when decodeded using avcodec_decode_audio4, I should get two arrays of floating point values (one for each channel).

I'm unsure of the significance of the AVCodecContext's bits_per_coded_sample = 16

Unfortunately Audacity plays the result back as if I have the original track is mixed in with some white noise.

Here is some sample code of what I've been done. Note that I've also added a case for a track that uses signed 16bit non-interleaved data (sample_format = AC_SAMPLE_FMT_S16P), which Audacity plays back fine.

int AudioDecoder::decode(std::string path)
{
  const char* input_filename=path.c_str();

  av_register_all();

  AVFormatContext* container=avformat_alloc_context();
  if(avformat_open_input(&container,input_filename,NULL,NULL)<0){
    printf("Could not open file");
  }

  if(avformat_find_stream_info(container, NULL)<0){
      printf("Could not find file info");
  }
  av_dump_format(container,0,input_filename,false);

  int stream_id=-1;
  int i;
  for(i=0;i<container->nb_streams;i++){
    if(container->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO){
        stream_id=i;
        break;
    }
  }
  if(stream_id==-1){
    printf("Could not find Audio Stream");
  }

  AVDictionary *metadata=container->metadata;
  AVCodecContext *ctx=container->streams[stream_id]->codec;
  AVCodec *codec=avcodec_find_decoder(ctx->codec_id);

  if(codec==NULL){
    printf("cannot find codec!");
  }

  if(avcodec_open2(ctx,codec,NULL)<0){
     printf("Codec cannot be found");
  }

  AVSampleFormat sfmt = ctx->sample_fmt;

  AVPacket packet;
  av_init_packet(&packet);
  AVFrame *frame = avcodec_alloc_frame();

  int buffer_size = AVCODEC_MAX_AUDIO_FRAME_SIZE+ FF_INPUT_BUFFER_PADDING_SIZE;;
  uint8_t buffer[buffer_size];
  packet.data=buffer;
  packet.size =buffer_size;

  FILE *outfile = fopen("test.raw", "wb");

  int len;
  int frameFinished=0;

  while(av_read_frame(container,&packet) >= 0)
  {
      if(packet.stream_index==stream_id)
      {
        //printf("Audio Frame read \n");
        int len=avcodec_decode_audio4(ctx, frame, &frameFinished, &packet);

        if(frameFinished)
        {       
          if (sfmt==AV_SAMPLE_FMT_S16P)
          { // Audacity: 16bit PCM little endian stereo
            int16_t* ptr_l = (int16_t*)frame->extended_data[0];
            int16_t* ptr_r = (int16_t*)frame->extended_data[1];
            for (int i=0; i<frame->nb_samples; i++)
            {
              fwrite(ptr_l++, sizeof(int16_t), 1, outfile);
              fwrite(ptr_r++, sizeof(int16_t), 1, outfile);
            }
          }
          else if (sfmt==AV_SAMPLE_FMT_FLTP)
          { //Audacity: big endian 32bit stereo start offset 7 (but has noise)
            float* ptr_l = (float*)frame->extended_data[0];
            float* ptr_r = (float*)frame->extended_data[1];
            for (int i=0; i<frame->nb_samples; i++)
            {
                fwrite(ptr_l++, sizeof(float), 1, outfile);
                fwrite(ptr_r++, sizeof(float), 1, outfile);
             }
           }            
        }
    }
}
fclose(outfile);
av_close_input_file(container);
return 0;   

}

I'm hoping I've just done a naive conversion (most/less significant bit issues), but at present I've been unable to figure it out. Note that Audacity can only import RAW float data if its 32bit or 64 bit float (big or little endian).

Thanks for any insight.

2

2 Answers

0
votes

I think problem is in "nb_samples". It's not exactly you need. It's better to try with "linesize[0]".

Example:

char* ptr_l = (char*)frame->extended_data[0];
char* ptr_r = (char*)frame->extended_data[1];
size_t size = sizeof(float);
for (int i=0; i<frame->linesize[0]; i+=size)
{
   fwrite(ptr_l, size, 1, outfile);
   fwrite(ptr_r, size, 1, outfile);
   ptr_l += size;
   ptr_r += size;    
}

It's for "float", and repeat the same for "int16_t". But "size" will be "sizeof(int16_t)"

0
votes

You must use a converter of AV_SAMPLE_FMT_FLTP in AC_SAMPLE_FMT_S16P

How to convert sample rate from AV_SAMPLE_FMT_FLTP to AV_SAMPLE_FMT_S16?

Here is a working example (in pAudioBuffer you have pcm data within white nose):

SwrContext *swr;
swr=swr_alloc();
av_opt_set_int(swr,"in_channel_layout",2,0);
av_opt_set_int(swr, "out_channel_layout", 2,  0);
av_opt_set_int(swr, "in_sample_rate",     codecContext->sample_rate, 0);
av_opt_set_int(swr, "out_sample_rate",    codecContext->sample_rate, 0);
av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_FLTP, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16P,  0);
swr_init(swr);
int16_t * pAudioBuffer = (int16_t *) av_malloc (AUDIO_INBUF_SIZE * 2);
while(av_read_frame(fmt_cntx,&readingPacket)==0){
   if(readingPacket.stream_index==audioSteam->index){
    AVPacket decodingPacket=readingPacket;
        while(decodingPacket.size>0){
     int gotFrame=0;
         int result=avcodec_decode_audio4(codecContext,frame,&gotFrame,&decodingPacket);
     if(result<0){
           av_frame_free(&frame);
       avformat_close_input(&fmt_cntx);
       return null;
        }
        if(result>=0 && gotFrame){
          int data_size=frame->nb_samples*frame->channels;
          swr_convert(swr,&pAudioBuffer,frame->nb_samples,frame->extended_data,frame->nb_samples);
          jshort *outShortArray=(*pEnv)->NewShortArray(pEnv,data_size);
                                (*pEnv)->SetShortArrayRegion(pEnv,outShortArray,0,data_size,pAudioBuffer);
          (*pEnv)->CallVoidMethod(pEnv,pObj,callBackShortBuffer,outShortArray,data_size);
          (*pEnv)->DeleteLocalRef(pEnv,outShortArray);
          decodingPacket.size -= result;
          decodingPacket.data += result;
        }else{
          decodingPacket.size=0;
          decodingPacket.data=NULL;
        }}
    av_free_packet(&decodingPacket);
    }