1
votes

I've built a console app that reads a text file in two languages (english and russian) so i can make my own audio lessons.

Everything works fine, except that now I have modified it to convert the Wav audio stream to MP3 using Naudio, I'm missing around 20% of the recording at the end.

I've run code to track the capacity of the memory stream after each utterance, and sure enough, it is missing data at the end.

I'm suspecting it has something to do with the RIFF header created in the Wav stream.

I'll post the code, if I can work out how!

using System;
using System.Text;
using System.Speech.Synthesis;
using System.Speech.AudioFormat;
using System.IO;
using NAudio.Wave;
using NAudio.Lame;
using NAudio;
using System.Threading.Tasks;


namespace Immercia
{

    class Program
    {



    public static void ConvertWavStreamToMp3File(ref MemoryStream ms, string savetofilename)
    {
        //rewind to beginning of stream
        ms.Seek(0, SeekOrigin.Begin);

        using (var rdr = new WaveFileReader(ms))
        using (var wtr = new LameMP3FileWriter(savetofilename, rdr.WaveFormat, 128/* LAMEPreset.VBR_90*/))

        {
            rdr.CopyTo(wtr);

        }
    }


    public static void Main(string[] args)
    {
        Console.Title = "Immercia";

        MemoryStream ms = new MemoryStream();


        //Create two instances of speech Synthesizer, set voices, and volume -rate is set in the function below
        SpeechSynthesizer synthE = new SpeechSynthesizer();
        SpeechSynthesizer synthR = new SpeechSynthesizer();
        synthE.SelectVoice("Microsoft Anna");
        synthR.SelectVoice("IVONA 2 Tatyana OEM");
        //new SpeechAudioFormatInfo(22000, AudioBitsPerSample.Sixteen, AudioChannel.Mono);



        synthE.Volume = 100;
        synthR.Volume = 85;
        synthE.Rate = -4;
        synthE.SetOutputToWaveStream(ms);
        synthR.SetOutputToWaveStream(ms);



        //  Set up the input text file to be read, and the output .wav file to be created
        Console.WriteLine("Enter exact lesson name to be read (case sensitive), " +
                                                "do not include path or file extention");
        string LessonName = Console.ReadLine();

        //Console.WriteLine("Enter unique Wav File name to be created (case sensitive), " +
        //                                        "do not include path or file extention ");
        string WavFileName = LessonName;

        //The output mp3 file of the recorded audio
        string savetofilename = @"C:\RussianLessons\" + WavFileName + ".mp3";

        Console.OutputEncoding = Encoding.UTF8;

        /*Source file for text to speech. must be created with an asterix before the english, 
        and a 4 digit "pause time" entry before the russian in milliseconds i.e."1400"
        Creation note: file mus be saved as a txt file in unicode  UFT8 or UFT 16 to support cryllic.*/

        string[] lesson = File.ReadAllLines(@"C:\RussianLessons\" + LessonName + ".txt");

        //this fixes the problem of the MP3 not recording the first word/phrase of the lesson,but doesn't actually get recorded???? weird stuff.
        synthR.Speak("Здравствуйте");

        try
        {


            // Loop over strings.
            foreach (string word in lesson)
            {
                //Trim off english indicator asterix and create new string "english" for speaking. 
                if (word.Contains("*") == true)
                {
                    char trim = '*';
                    string english = word.Trim(trim);

                    //write english word to console
                    Console.WriteLine(english);

                    //speak english word

                    synthE.Speak(english);

                    //Test for debug of end file loss
                    Console.WriteLine(
                   "Capacity = {0}, Length = {1}, Position = {2}\n",
                    ms.Capacity.ToString(),
                    ms.Length.ToString(),
                    ms.Position.ToString());

                }
                if (word.Contains("#") == true)
                {

                    //Trim off pause time instructions to leave crylllic to be written to console
                    char[] charsToTrim = { '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '#' };
                    string TrimmedRussian = word.TrimStart(charsToTrim);

                    //extract pause time entry from the begining of the russian word string
                    string milli = word.Substring(0, 4);
                    string pause = string.Concat('"' + milli + "ms\"");

                    //create SSML string to allow for "pause time" to be read automatically by the speech synthesizer
                    string russian;
                    russian = "<speak version=\"1.0\"";
                    russian += " xmlns=\"http://www.w3.org/2001/10/synthesis \"";
                    russian += " xml:lang=\"ru-RU\">";
                    russian += TrimmedRussian;
                    russian += "<break time= ";
                    russian += pause;
                    russian += " />";
                    russian += "</speak>";

                    //Write russian word to console
                    Console.WriteLine(TrimmedRussian);

                    //speak russian word
                    synthR.Rate = -6;

                    synthR.SpeakSsml(russian);
                    //Test for debug of end file loss
                    Console.WriteLine(
                    "Capacity = {0}, Length = {1}, Position = {2}\n",
                     ms.Capacity.ToString(),
                     ms.Length.ToString(),
                     ms.Position.ToString());

                    synthR.Rate = -4;
                    // repeat russian word
                    synthR.SpeakSsml(russian);

                    //Test for debug of end file loss
                    Console.WriteLine(
                    "Capacity = {0}, Length = {1}, Position = {2}\n",
                     ms.Capacity.ToString(),
                     ms.Length.ToString(),
                     ms.Position.ToString());

                }
                /* if (word.Contains("end of file") == true)
                 {

                 synthR.Speak("До свидания, До свидания, До свидания");

                 }*/

            }

            //This is not being spoken???? Trying to fix the loss of 20% of the bytes from the end of the MP3.
            //Having synthR speak before the foreach fixed the loss of the first 20%....

            //synthR.Speak("Здравствуйте");

        }

        catch (FileNotFoundException)
        {
            // Write error.
            Console.WriteLine("Lesson file not found or spelt incorrectly, " +
               "please restart program and check the file name and remember it is case sensitive.");
        }



        //Test for debug of end file loss
        Console.WriteLine(
        "Capacity = {0}, Length = {1}, Position = {2}\n",
        ms.Capacity.ToString(),
        ms.Length.ToString(),
        ms.Position.ToString());



        ConvertWavStreamToMp3File(ref ms, savetofilename);

        ms.Close();

        //Stop fucking around with C# and learn Russian, NOW....
        Console.WriteLine("\n That is all, press any key to exit");
        Console.ReadKey();


    }

}

}

1
Whoops, was hoping to post the code...Andrew Haas
I found this comment by Mark Heath answering a question about wav file headers. It seems to be what I'm looking for. Now I just need to find out how to actually do what he says. stackoverflow.com/questions/39328396/…Andrew Haas
Hi Andrew, just a tip - if you tag your question with the appropriate language tag, it will automatically show your code with correct highlights so it's easier for everyone to read... (I.e., "c#", or whatever)Ian
Thanks Ian! Will do from now on. :)Andrew Haas
Looks better ;)Ian

1 Answers

0
votes

I ended up adding another speech event into the foreach function. it's not what i was hoping for, but it worked for some reason. I do remember reading about "if" statements failing to loop, but I still suspect it's the RIFF header in the Wav stream. That's a mystery for another day. –

if (word.Contains("end") == true)
                {
                    string millis = "1000";
                    string pauses = string.Concat('"' + millis + "ms\"");
                    //create SSML string to be read automatically by the 
speech synthesizer
                    string endfile;
                    endfile = "<speak version=\"1.0\"";
                    endfile += " xmlns=\"http://www.w3.org/2001/10/synthesis \"";
                    endfile += " xml:lang=\"ru-RU\">";
                    endfile += "до свидания";
                    endfile += "<break time= ";
                    endfile += pauses;
                    endfile += " />";
                    endfile += "</speak>";

                    //Write russian word to console
                    Console.WriteLine(endfile);

                    //speak russian word
                    synthR.Rate = -6;

                    synthR.SpeakSsml(endfile);

                    //Test for debug of end file loss
                    Console.WriteLine(
                    "Capacity = {0}, Length = {1}, Position = {2}\n",
                     ms.Capacity.ToString(),
                     ms.Length.ToString(),
                     ms.Position.ToString());

                    synthR.Rate = -4;
                    // repeat russian word
                    synthR.SpeakSsml(endfile);

                    //Test for debug of end file loss
                    Console.WriteLine(
                    "Capacity = {0}, Length = {1}, Position = {2}\n",
                     ms.Capacity.ToString(),
                     ms.Length.ToString(),
                     ms.Position.ToString());

                }