Speech Recognition api with Project Oxford

Question

I downloaded an app from Project oxford site and trying to make it work. So the code works when its given a wav file as input. When trying to make it work with Microphone its not working.

Ran VS in Admin mode
Checked Microphone privacy setting
Modified code in various ways to make it work
Primary/Secondary key should be working as it works with Wav file but not with microphone.

Here is sample modified code

using MicrosoftProjectOxford;
using System;
using System.Configuration;
using System.Diagnostics;
using System.IO;
using System.Windows;

namespace MicrosoftProjectOxfordExample
{
    /// <summary>
    /// Interaction logic for MainWindow.xaml
    /// </summary>
    public partial class MainWindow : Window
    {
        string m_primaryOrSecondaryKey = ConfigurationManager.AppSettings["primaryKey"];
        private const string LongWaveFile = @"batman.wav";
        private const string ShortWaveFile = @"whatstheweatherlike.wav";
        private string m_filename = null;
        private SpeechRecognitionMode m_recoMode = SpeechRecognitionMode.ShortPhrase;
        private bool m_isMicrophoneReco = true;

        private DataRecognitionClient m_dataClient;
        private MicrophoneRecognitionClient m_micClient;

        /// <summary>
        /// Initializes a new instance of the <see cref="MainWindow"/> class.
        /// </summary>
        public MainWindow()
        {
            InitializeComponent();
            Intialize();
        }

        private void Intialize()
        {
            if (m_isMicrophoneReco) {

                    m_micClient = SpeechRecognitionServiceFactory.CreateMicrophoneClient(m_recoMode, "en-us",m_primaryOrSecondaryKey);
                // Event handlers for speech recognition results
                m_micClient.OnResponseReceived += this.OnResponseReceivedHandler;
                m_micClient.OnPartialResponseReceived += this.OnPartialResponseReceivedHandler;
                m_micClient.OnConversationError += this.OnConversationErrorHandler;
                m_micClient.OnMicrophoneStatus += this.OnMicrophoneStatus;
            }
            else {
                  DataRecognitionClientWithIntent intentDataClient;

                m_dataClient = SpeechRecognitionServiceFactory.CreateDataClient(m_recoMode, "en-us",m_primaryOrSecondaryKey);


                // Event handlers for speech recognition results
                m_dataClient.OnResponseReceived += this.OnResponseReceivedHandler;
                m_dataClient.OnPartialResponseReceived += this.OnPartialResponseReceivedHandler;
                m_dataClient.OnConversationError += this.OnConversationErrorHandler;
            }
        }

        /// <summary>
        /// Handles the Click event of the button1 control.
        /// </summary>
        /// <param name="sender">The source of the event.</param>
        /// <param name="e">The <see cref="RoutedEventArgs"/> instance containing the event data.</param>
        private void button1_Click(object sender, RoutedEventArgs e)
        {
            this.editText1.Text = String.Empty;
            this.button1.IsEnabled = false;
            m_micClient.StartMicAndRecognition();

        }
        private void button3_Click(object sender, RoutedEventArgs e)
        {

            int waitSeconds =  15;

            bool isReceivedResponse = m_micClient.WaitForFinalResponse(waitSeconds * 1000);
            // We are done sending audio.  Final recognition results will arrive in OnResponseReceived event call.
            m_micClient.EndMicAndRecognition();
            this.WriteLine("App Done.");


        }

        /// <summary>
        /// Handles the Click event of the button2 control.
        /// </summary>
        /// <param name="sender">The source of the event.</param>
        /// <param name="e">The <see cref="RoutedEventArgs"/> instance containing the event data.</param>
        private void button2_Click(object sender, RoutedEventArgs e)
        {
            bool isReceivedResponse = false;

            int waitSeconds = (m_recoMode == SpeechRecognitionMode.LongDictation) ? 200 : 15;

            // sleep until the final result in OnResponseReceived event call, or waitSeconds, whichever is smaller.
            if (null != m_dataClient)
            {
                isReceivedResponse = m_dataClient.WaitForFinalResponse(waitSeconds * 1000);
                this.WriteLine("App Done.");
                this.button1.IsEnabled = false;
                m_dataClient.Dispose();
                m_dataClient = null;
            }

            if (null != m_micClient)
            {
                isReceivedResponse = m_micClient.WaitForFinalResponse(waitSeconds * 1000);
                // We are done sending audio.  Final recognition results will arrive in OnResponseReceived event call.
                m_micClient.EndMicAndRecognition();
                this.WriteLine("App Done.");
                this.button1.IsEnabled = false;
                m_micClient.Dispose();
                m_micClient = null;
            }

            if (!isReceivedResponse)
            {
                this.button1.IsEnabled = false;
                this.WriteLine("{0}: Timed out waiting for conversation response after {1} ms",
                               DateTime.UtcNow, waitSeconds * 1000);
            }
        }

        /// <summary>
        /// Writes the line.
        /// </summary>
        void WriteLine()
        {
            this.WriteLine(string.Empty);
        }

        /// <summary>
        /// Writes the line.
        /// </summary>
        /// <param name="format">The format.</param>
        /// <param name="args">The arguments.</param>
        void WriteLine(string format, params object [] args)
        {
            var formattedStr = string.Format(format, args);
            Trace.WriteLine(formattedStr);
            this.Dispatcher.Invoke(() => {
                this.editText1.Text += (formattedStr + "\n");
            });
        }

        /// <summary>
        ///     Called when a final response is received; 
        /// </summary>
        void OnResponseReceivedHandler(object sender, SpeechResponseEventArgs e)
        {
            bool isFinalDicationMessage = m_recoMode == SpeechRecognitionMode.LongDictation &&
                                          (e.PhraseResponse.RecognitionStatus == RecognitionStatus.EndOfDictation ||
                                           e.PhraseResponse.RecognitionStatus == RecognitionStatus.DictationEndSilenceTimeout);

            if ((m_recoMode == SpeechRecognitionMode.ShortPhrase) || isFinalDicationMessage) {
                this.Dispatcher.Invoke((Action)(() => {
                    this.button1.IsEnabled = true;
                }));                
            }

            if (m_isMicrophoneReco && ((m_recoMode == SpeechRecognitionMode.ShortPhrase) || isFinalDicationMessage)) {
                // we got the final result, so it we can end the mic reco.  No need to do this
                // for dataReco, since we already called endAudio() on it as soon as we were done
                // sending all the data.
                m_micClient.EndMicAndRecognition();
            }

            if (!isFinalDicationMessage)
            {
                this.WriteLine("********* Final NBEST Results *********");
                for (int i = 0; i < e.PhraseResponse.Results.Length; i++)
                {
                    this.WriteLine("[{0}] Confidence={1} Text=\"{2}\"",
                                   i, e.PhraseResponse.Results[i].Confidence,
                                   e.PhraseResponse.Results[i].DisplayText);
                }
                this.WriteLine();
            }
        }

        /// <summary>
        ///     Called when a final response is received and its intent is parsed 
        /// </summary>
        void OnIntentHandler(object sender, SpeechIntentEventArgs e)
        {
            this.WriteLine("********* Final Intent *********");
            this.WriteLine("{0}", e.Payload);
            this.WriteLine();
        }

        /// <summary>
        ///     Called when a partial response is received.
        /// </summary>
        void OnPartialResponseReceivedHandler(object sender, PartialSpeechResponseEventArgs e)
        {
            this.WriteLine("********* Partial Result *********");
            this.WriteLine("{0}", e.PartialResult);
            this.WriteLine();
        }

        /// <summary>
        ///     Called when an error is received.
        /// </summary>
        void OnConversationErrorHandler(object sender, SpeechErrorEventArgs e)
        {
            this.Dispatcher.Invoke((Action) (() => {
                this.button1.IsEnabled = true;
            }));

            this.WriteLine("********* Error Detected *********");
            this.WriteLine("{0}", e.SpeechErrorCode.ToString());
            this.WriteLine("{0}", e.SpeechErrorText);
            this.WriteLine();
        }

        /// <summary>
        ///     Called when the microphone status has changed.
        /// </summary>
        void OnMicrophoneStatus(object sender, MicrophoneEventArgs e)
        {
            //if (!e.Recording) {
            //    m_micClient.EndMicAndRecognition();
            //    this.Dispatcher.Invoke((Action) (() => {
            //        this.button1.IsEnabled = true;
            //    }));
            //}

            this.WriteLine("********* Microphone status: {0} *********", e.Recording);
            this.WriteLine();
        }


    }
}

user1749865 user1749865 · Accepted Answer · 2015-07-25T21:50:16

I was having the same problem. I ended up disabling all the recording devices (except my microphone) from Windows Sound Panel to get it to work.

Speech Recognition api with Project Oxford

2 Answers