1
votes

I am trying to write a pcm to mp3 conversion program using Lame. While the pcm data does get converted to an mp3 file, the output is very squeaky. Before I post questions, the following is the code I have:

/*
Sample program to generate a single sinusoid and encode it in mp3.
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <include/lame/lame.h>
#include <assert.h>
#include <string.h>

int main(int argc, char *argv[]) {

  unsigned int sampleRate = 16000;  /*assumed.*/
  unsigned int nSecondsAudio = 4;
  float *arr;
  lame_global_flags *gfp;
  unsigned char mp3buffer[2304]; /*some odd buffer sizes hard-coded.*/
  int pcm_samples_1d[2*1152];
  int pcm_samples_2d[2][1152];
  int read = 0, write = 0;
  int return_code = 1;
  int mp3buf_size;
  FILE *mp3;
  FILE *pcm;
  int framesize = 0;
  int i = 0, j = 0, num_samples_encoded = 0;

  /*Step 1. Generate sinusoid.*/
  /*arr = (float *) malloc(sizeof(float) * nSecondsAudio * sampleRate);
  arr = generateSinusoid(sampleRate, nSecondsAudio);*/

  /*Step 2. See if encoder exists.*/
  char *s = (char *) malloc(sizeof(char)*200);
  s = get_lame_version();
  printf("Lame version = %s\n", s);


  /* Init lame flags.*/
  gfp = lame_init();
  if(!gfp) {
    printf("Unable to initialize gfp object.");
  } else {
    printf("Able to initialize gfp object.\n");
  }

  /* set other parameters.*/
  lame_set_num_channels(gfp, 1);
  /*lame_set_num_samples(gfp, (nSecondsAudio * sampleRate));*/
  lame_set_in_samplerate(gfp, sampleRate);
  lame_set_quality(gfp, 5);  /* set for high speed and good quality. */
  lame_set_mode(gfp, 3);  /* the input audio is mono */

  lame_set_out_samplerate(gfp, sampleRate);
  printf("Able to set a number of parameters too.");
  framesize = lame_get_framesize(gfp);
  printf("Framesize = %d\n", framesize);
  assert(framesize <= 1152);

  /* set more internal variables. check for failure.*/
  if(lame_init_params(gfp) == -1) {
    printf("Something failed in setting internal parameters.");
  }

  /* encode the pcm array as mp3.*
   * Read the file. Encode whatever is read.
   * As soon as end of file is reached, flush the buffers.
   * Write everything to a file.
   * Write headers too.
  */

  /* Open PCM file for reading from.*/
  pcm = fopen("out.pcm", "rb");   /*hard-coded to the only available pcm file.*/
  if(!pcm) {
    printf("Cannot open pcm file for reading.");
    return 1;
  }

  mp3 = fopen("out.mp3", "wb+");
  if(!mp3) {
    printf("Cannot open file for writing.");
    return 1;
 }

  do {
   read = fread(pcm_samples_1d, sizeof(short), 2304, pcm); /*reads framesize shorts from pcm file.*/
   printf("Read %d shorts from file.\n", read);

   /* check for number of samples read. if 0, start flushing, else encode.*/
   if(read > 0) {
     /* got data in 1D array. convert it to 2D */
     /* snippet below taken from lame source code. needs better understanding. pcm_samples_2d[0] = contents of buffer. pcm_samples_2d[1] = 0 since number of channels is always one.*/
     memset(pcm_samples_2d[1], 0, 1152 * sizeof(int));  /*set all other samples with 0.*/
     memset(pcm_samples_2d[0], 0, 1152 * sizeof(int));
     i = 0, j = 0;
     for(i = 0; i < 1152; i++) {
       pcm_samples_2d[0][i] = pcm_samples_1d[i];
     }

     /* encode samples. */
     num_samples_encoded = lame_encode_buffer_int(gfp, pcm_samples_2d[0], pcm_samples_2d[1], read, mp3buffer, sizeof(mp3buffer));

     printf("number of samples encoded = %d\n", num_samples_encoded);

     /* check for value returned.*/
     if(num_samples_encoded > 1) {
       printf("It seems the conversion was successful.\n");
     } else if(num_samples_encoded == -1) {
       printf("mp3buf was too small");
       return 1;
     } else if(num_samples_encoded == -2) {
       printf("There was a malloc problem.");
       return 1;
     } else if(num_samples_encoded == -3) {
       printf("lame_init_params() not called.");
       return 1;
     } else if(num_samples_encoded == -4) {
       printf("Psycho acoustic problems.");
       return 1;
     } else {
       printf("The conversion was not successful.");
       return 1;
     }

     printf("Contents of mp3buffer = \n");
     for(i = 0; i < 2304; i++) {
       printf("mp3buffer[%d] = %d\n", i, mp3buffer[i]);
     }


     write = (int) fwrite(mp3buffer, sizeof(char), num_samples_encoded, mp3);
     if(write != num_samples_encoded) {
       printf("There seems to have been an error writing to mp3 within the loop.\n");
       return 1;
     } else {
       printf("Writing of %d samples a success.\n", write);
     }
   }
 } while(read > 0);

 /* in case where the number of samples read is 0, or negative, start flushing.*/
 read = lame_encode_flush(gfp, mp3buffer, sizeof(mp3buffer)); /*this may yield one more mp3 buffer.*/
 if(read < 0) {
   if(read == -1) {
     printf("mp3buffer is probably not big enough.\n");
   } else {
     printf("MP3 internal error.\n");
   }
   return 1;
 } else {
   printf("Flushing stage yielded %d frames.\n", read);
 }

 write = (int) fwrite(mp3buffer, 1, read, mp3);
 if(write != read) {
   printf("There seems to have been an error writing to mp3.\n");
   return 1;
 }

  /*samples have been written. write ID3 tag.*/
  read = lame_get_id3v1_tag(gfp, mp3buffer, sizeof(mp3buffer));
  if(sizeof(read) > sizeof(mp3buffer)) {
    printf("Buffer too small to write ID3v1 tag.\n");
  } else {
    if(read > 0) {
      write = (int) fwrite(mp3buffer, 1, read, mp3);
      if(read != write) {
        printf("more errors in writing id tag to mp3 file.\n");
      }
    }
  }

  lame_close(gfp);
  fclose(pcm);
  fclose(mp3);

  return 0;
}

My questions:
1. My input pcm data is sampled at 16kHz, mono and encoded at 16 bits. Given that there is only one channel, what are the left and right channels in input to lame_encode_buffer_int?
2. I am not sure I understand the "conversion" process of going from a 1d array to a 2d array (pcm_samples_1d to pcm_samples_2d in the code) and as given in this question.
3. Why am I getting a squeaky voice? In the code, the libraries used were compiled from source using the --enable-debug flag. Yet, I am not able to step in to the functions on using gdb. What else should I have done?

What I have tried so far:
1. Read the documentation (or what is available on the net) of the LAME project.
2. Read through questions posted here on SO and other forums.
3. Gone through the source code: lame.h, frontend/main.c frontend/get_audio.c` etc.

Any help on this is most welcome.

3

3 Answers

1
votes

This:

char *s = (char *) malloc(sizeof(char)*200);
s = get_lame_version();

is wrong, it leaks memory. Remove the malloc() call, you're not using the allocated memory anyway since you overwrite the pointer with whatever get_lame_version() returns.

Also, don't cast the return value of malloc() in C, and avoid sizeof (char) since it's always 1. If you want to "lock" the allocation to the pointer type, use:

s = malloc(200 * sizeof *s);

To be more specific about your code, the 1d/2d arrays are just plain scary, it's impossible to know if that code is correct without knowledge of the LAME API, which I don't have. It might be related to mono/stereo, since that seems to be what it does.

Not sure if MP3 is fine with having a silent channel, that might be illegal input data for some reason (that generates clicks).

0
votes

Here is what I did to solve the problem:
Tried to play the output audio file in Audacity. Decreasing the "speed" of that file by 50% got the correct output. This means that the problem could be that the input and output sampling rates are different and the resampling operation does not take place within lame_encode_buffer_int. Changed that routine to lame_encode_buffer which handles resampling among other routines. That made the encoding work.

0
votes

I think that your problem was indeed the fact that LAME does not support 16kHz and that the lame_encode_buffer_int() function would not automatically resample the data.

It could also be that setting the output sample rate:

lame_set_out_samplerate(gfp, sampleRate);

when not compatible with MP3 creates a problem.

The valid MP3 (MPEG-1, Layer III) sample rates are:

  • 32kHz,
  • 44.1kHz (LAME uses this by default), and
  • 48kHz.

LAME probably selected 32kHz, which is why halving the rate in Audacity restored your audio to the perfect rate.


In regard to the mono/stereo concerns, just do not specify a right buffer (use NULL) whenever you only have mono data. That works as long as you defined a single channel:

lame_set_num_channels(gfp, 1);
...snip...
lame_encode_buffer_int(gfp, pcm_samples_1d, NULL,
                       read, mp3buffer, sizeof(mp3buffer));

The lame_encode_buffer() can also have the right buffer set to NULL.

lame_encode_buffer(gfp, pcm_samples_1d, NULL
                   read, mp3bufer, sizeof(mp3buffer));

Internally, that means your 1d buffer is going to be used for both channels, but the low level sampling will anyway ignore the right buffer.


As unwind mentioned in his answer, that code also has a few problems not directly related to LAME... if you still have it and you're interested and want a review, you could try to post it on the code review stack.