Cannot authenticate Watson Assistant credentials using Unity SDK

Question

Every time I try to connect to my Watson Assistant workspace using the Unity SDK, I get an error that looks like this:

[RESTConnector.ProcessRequestQueue()][ERROR] URL: https://gateway.watsonplatform.net/assistant/api/v2/assistants/5332aca6-7a09-4485-ae17-be60a002ce39/sessions//message?version=2018-11-08, ErrorCode: 404, Error: 404 Not Found, Response: {"error":"Resource not found","code":404}

I've tried deleting and regenerating the credentials. I've triple checked that I'm using all the right credentials from bluemix. I am configured to the US-south so I believe I am using the correct url (https://gateway.watsonplatform.net/assistant/api). This is really baffling because I am not having any problems with speech-to-text or text-to-speech. My only other thought it that it is a problem with the Assistant V2 beta but I don't see an option to create an assistant with V1.

Here is my code for reference:

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using IBM.Watson.DeveloperCloud.Services.TextToSpeech.v1;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.Connection;
using CrazyMinnow.SALSA; // Import SALSA from the CrazyMinnow namespace
//using IBM.Watson.DeveloperCloud.Services.Assistant.v1;
using IBM.WatsonDeveloperCloud.Assistant.v2;
using FullSerializer;
using UnityEngine.UI;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.DataTypes;
using IBM.Watson.DeveloperCloud.Services.Assistant.v2;


public class WatsonTTS : MonoBehaviour
{
    public string myName;
    public RandomEyes3D randomEyes;
    public GameObject[] lookTargets;

    #region PLEASE SET THESE VARIABLES IN THE INSPECTOR
    [SerializeField]
    private string _STT_username;
    [SerializeField]
    private string _STT_password;
    [SerializeField]
    private string _STT_url;
    [SerializeField]
    private string _TTS_username;
    [SerializeField]
    private string _TTS_password;
    [SerializeField]
    private string _TTS_url;
    [SerializeField]
    private string _Assistant_username;
    [SerializeField]
    private string _Assistant_password;
    [SerializeField]
    private string _Assistant_url;
    [SerializeField]
    private string _Assistant_iamApikey;
    [SerializeField]
    private string _Assistant_iamUrl;
    [SerializeField]
    private string _Assistant_workspaceId;
    [SerializeField]
    private string _Assistant_versionDate;
    #endregion
    //as this field is Public it is set in the Inspector
    public Text ResultsField;

    private bool _createSessionTested = false;
    private bool _messageTested = false;
    private bool _deleteSessionTested = false;
    private string _sessionId;

    private int _recordingRoutine = 0;
    private string _microphoneID = null;
    private AudioClip _recording = null;
    private int _recordingBufferSize = 1;
    private int _recordingHZ = 22050;

    private SpeechToText _speechToText;
    private Assistant _service;
    //private bool _messageTested = false;

    private fsSerializer _serializer = new fsSerializer();
    private MessageContext _context = null;
    private bool _waitingForResponse = true;
    private float wait;
    private bool check;

    TextToSpeech _textToSpeech;
    private AudioClip audioClip; // Link an AudioClip for Salsa3D to play
    private Salsa3D salsa3D;
    private AudioSource audioSrc;

    private string TTS_content = "";
    public bool play = false;

    // Use this for initialization
    void Start()
    {

        LogSystem.InstallDefaultReactors();

        audioSrc = GetComponent<AudioSource>(); // Get the SALSA AudioSource from this GameObject

        //  Create credential and instantiate service
        /*Credentials CONVcredentials = new Credentials(_CONV_username, _CONV_password, _CONV_url);
        _service = new Assistant(CONVcredentials);
        _service.VersionDate = _CONV_versionDate;*/

        //  Create credential and instantiate service
        Credentials TTScredentials = new Credentials(_TTS_username, _TTS_password, _TTS_url);
        _textToSpeech = new TextToSpeech(TTScredentials);

        //  Create credential and instantiate service
        Credentials STTcredentials = new Credentials(_STT_username, _STT_password, _STT_url);
        _speechToText = new SpeechToText(STTcredentials);

        LogSystem.InstallDefaultReactors();
        Runnable.Run(CreateService());



    }

    private IEnumerator CreateService()
    {
        //  Create credential and instantiate service
        Credentials credentials = null;
        if (!string.IsNullOrEmpty(_Assistant_username) && !string.IsNullOrEmpty(_Assistant_password))
        {
            //  Authenticate using username and password
            credentials = new Credentials(_Assistant_username, _Assistant_password, _Assistant_url);
        }
        else if (!string.IsNullOrEmpty(_Assistant_iamApikey))
        {
            //  Authenticate using iamApikey
            TokenOptions tokenOptions = new TokenOptions()
            {
                IamApiKey = _Assistant_iamApikey,
                IamUrl = _Assistant_iamUrl
            };

            credentials = new Credentials(tokenOptions, _Assistant_url);

            //  Wait for tokendata
            while (!credentials.HasIamTokenData())
                yield return null;
        }
        else
        {
            throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service.");
        }

        _service = new Assistant(credentials);
        _service.VersionDate = _Assistant_versionDate;

        //  Message
        MessageInput input = new MessageInput();
        //Say HELLO - start the conversation
        input.Text = "first hello";
        MessageRequest messageRequest = new MessageRequest()
        {
            Input = input
        };
        _service.Message(OnMessage, OnFail, _Assistant_workspaceId, _sessionId);

    }

    private void OnDeleteSession(object response, Dictionary<string, object> customData)
    {
        Log.Debug("ExampleAssistantV2.OnDeleteSession()", "Session deleted.");
        _createSessionTested = true;
    }


    private void OnCreateSession(SessionResponse response, Dictionary<string, object> customData)
    {
        Log.Debug("ExampleAssistantV2.OnCreateSession()", "Session: {0}", response.SessionId);
        _sessionId = response.SessionId;
        _createSessionTested = true;
    }

    private void OnFail(RESTConnector.Error error, Dictionary<string, object> customData)
    {
        Log.Debug("ExampleAssistantV2.OnFail()", "Call failed: {0}: {1}", error.ErrorCode, error.ErrorMessage);
    }

    public bool Active
    {
        get { return _speechToText.IsListening; }
        set
        {
            if (value && !_speechToText.IsListening)
            {
                _speechToText.DetectSilence = true;
                _speechToText.EnableWordConfidence = true;
                _speechToText.EnableTimestamps = true;
                _speechToText.SilenceThreshold = 0.01f;
                _speechToText.MaxAlternatives = 0;
                _speechToText.EnableInterimResults = true;
                _speechToText.OnError = OnSTTError;
                _speechToText.InactivityTimeout = -1;
                _speechToText.ProfanityFilter = false;
                _speechToText.SmartFormatting = true;
                _speechToText.SpeakerLabels = false;
                _speechToText.WordAlternativesThreshold = null;
                _speechToText.StartListening(OnSTTRecognize, OnSTTRecognizeSpeaker);
            }
            else if (!value && _speechToText.IsListening)
            {
                _speechToText.StopListening();
            }
        }
    }


    private void StartRecording()
    {
        if (_recordingRoutine == 0)
        {
            UnityObjectUtil.StartDestroyQueue();
            _recordingRoutine = Runnable.Run(RecordingHandler());
        }
    }

    private void StopRecording()
    {
        if (_recordingRoutine != 0)
        {
            Microphone.End(_microphoneID);
            Runnable.Stop(_recordingRoutine);
            _recordingRoutine = 0;
        }
    }

    private void OnSTTError(string error)
    {
        Active = false;
        Log.Debug("STT.OnSTTError()", "Error! {0}", error);
    }


    private IEnumerator RecordingHandler()
    {
        //      Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return null;      // let _recordingRoutine get set..

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int midPoint = _recording.samples / 2;
        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            int writePos = Microphone.GetPosition(_microphoneID);
            if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("STT.RecordingHandler()", "Microphone disconnected.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint)
                || (!bFirstBlock && writePos < midPoint))
            {
                // front block is recorded, make a RecordClip and pass it onto our callback.
                samples = new float[midPoint];
                _recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
                record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                _speechToText.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                // calculate the number of samples remaining until we ready for a block of audio, 
                // and wait that amount of time it will take to record.
                int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)_recordingHZ;

                yield return new WaitForSeconds(timeRemaining);
            }

        }

        yield break;
    }

    //  private void OnSTTRecognize(SpeechRecognitionEvent result)
    //  updated for Watson SDK 2.4.0 compatability
    private void OnSTTRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
    {
        if (result != null && result.results.Length > 0)
        {
            foreach (var res in result.results)
            {
                foreach (var alt in res.alternatives)
                {
                    string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
                    //                  Log.Debug("STT.OnSTTRecognize()", text);
                    ResultsField.text = text;

                    //only send to CONV once we know the user has stopped talking
                    if (res.final)
                    {
                        string _conversationString = alt.transcript;
                        //We can now call the CONV service?
                        Log.Debug("STT.OnSTTRecognize()", _conversationString);

                        Active = false;  //Stop Microphone from listening

                        //  Message
                        MessageInput input = new MessageInput();

                        input.Text = _conversationString;
                        MessageRequest messageRequest = new MessageRequest()
                        {
                            Input = input,
                            Context = _context
                        };
                        _service.Message(OnMessage, OnFail, _Assistant_workspaceId, _sessionId);

                    }
                }

                if (res.keywords_result != null && res.keywords_result.keyword != null)
                {
                    foreach (var keyword in res.keywords_result.keyword)
                    {
                        Log.Debug("STT.OnSTTRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
                    }
                }

            }
        }
    }

    //potentially useful to detect difference between different people speaking?
    //  private void OnSTTRecognizeSpeaker(SpeakerRecognitionEvent result)
    //  updated for Watson SDK 2.4.0 compatability
    private void OnSTTRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData)
    {
        if (result != null)
        {
            foreach (SpeakerLabelsResult labelResult in result.speaker_labels)
            {
                Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
            }
        }
    }


    private void OnMessage(MessageResponse response, Dictionary<string, object> customData)
    {
        //  Log.Debug("Assistant.OnMessage()", "Response: {0}", customData["json"].ToString());
        //  Convert resp to fsdata
        fsData fsdata = null;
        fsResult r = _serializer.TrySerialize(response.GetType(), response, out fsdata);
        if (!r.Succeeded)
            throw new WatsonException(r.FormattedMessages);

        //  Convert fsdata to MessageResponse
        MessageResponse messageResponse = new MessageResponse();
        object obj = messageResponse;
        r = _serializer.TryDeserialize(fsdata, obj.GetType(), ref obj);
        if (!r.Succeeded)
            throw new WatsonException(r.FormattedMessages);

        //  Set context for next round of messaging
        MessageContext _tempContext = null;
        //(response as Dictionary<string, object>).TryGetValue("context", out _tempContext)
        _tempContext = response.Context;

        if (_tempContext != null)
            _context = _tempContext;
        else
            Log.Debug("ExampleConversation.OnMessage()", "Failed to get context");

        //  Get intent
        object tempIntentsObj = null;
        //(response as Dictionary<string, object>).TryGetValue("intents", out tempIntentsObj);
        tempIntentsObj = response.Output.Intents;

        //Need to wrap this in try/catch so don't trigger exception is has no content for some reason
        object _tempText = null;
        //(messageResponse.Output as Dictionary<string, object>).TryGetValue("text", out _tempText);
        _tempText = response.Output.Generic;
        object _tempTextObj = (_tempText as List<object>)[0];
        string output = _tempTextObj.ToString();

        if (output != null)
        {
            //replace any <waitX> tags with the value expected by the TTS service
            string replaceActionTags = output.ToString();
            int pos3 = replaceActionTags.IndexOf("<wait3>");
            if (pos3 != -1)
            {
                replaceActionTags = output.Replace("<wait3>", "<break time='3s'/>");
            }
            int pos4 = replaceActionTags.IndexOf("<wait4>");
            if (pos4 != -1)
            {
                replaceActionTags = output.Replace("<wait4>", "<break time='4s'/>");
            }
            int pos5 = replaceActionTags.IndexOf("<wait5>");
            if (pos5 != -1)
            {
                replaceActionTags = output.Replace("<wait5>", "<break time='5s'/>");
            }
            output = replaceActionTags;
        }
        else
        {
            Log.Debug("Extract outputText", "Failed to extract outputText and set for speaking");
        }

        TTS_content = output;
        //trigger the Update to PLAY the TTS message
        play = true;
    }


    /*private void OnCONVFail(RESTConnector.Error error, Dictionary<string, object> customData)
    {
        Log.Error("ExampleConversation.OnFail()", "Error received: {0}", error.ToString());
        _messageTested = false;
    }*/

    //called by Update() when play=true;
    private void GetTTS()
    {
        //  Synthesize
        //      Log.Debug("WatsonTTS", "Attempting synthesize.");
        _textToSpeech.Voice = VoiceType.fr_FR_Renee; // .en_US_Allison; //.en_GB_Kate;
        _textToSpeech.ToSpeech(HandleToSpeechCallback, OnTTSFail, TTS_content, true);
    }

    void HandleToSpeechCallback(AudioClip clip, Dictionary<string, object> customData = null)
    {
        if (Application.isPlaying && clip != null && audioSrc != null)
        {
            audioSrc.spatialBlend = 0.0f;
            audioSrc.clip = clip;
            audioSrc.Play();

            //set flag values that can be picked up in the Update() loop
            wait = clip.length;
            check = true;
        }
    }

    private void OnTTSFail(RESTConnector.Error error, Dictionary<string, object> customData)
    {
        Log.Error("WatsonTTS.OnFail()", "Error received: {0}", error.ToString());
    }


    /// <summary>
    /// A coroutine to track a GameObject with a pre-delay and a track duration
    /// </summary>
    /// <param name="preDelay">Pre delay.</param>
    /// <param name="duration">Duration.</param>
    /// <param name="customShapeIndex">Custom shape index.</param>
    IEnumerator Look(float preDelay, float duration, GameObject lookTarget)
    {
        yield return new WaitForSeconds(preDelay);

        Debug.Log("Look=" + "LEFT/RIGHT");
        randomEyes.SetLookTarget(lookTarget);

        yield return new WaitForSeconds(duration);

        randomEyes.SetLookTarget(null);
    }

    // Update is called once per frame
    void Update()
    {

        if (play)
        {
            Debug.Log("play=true");
            play = false;
            Active = false;
            GetTTS();
        }

        if (check)
        {
            //          Debug.Log ("Update() check=true");
            wait -= Time.deltaTime; //reverse count
        }

        if ((wait < 0f) && (check))
        { //check that clip is not playing      
          //            Debug.Log ("Speech has finished");
            check = false;
            //Now let's start listening again.....
            Active = true;
            StartRecording();
        }
    }
}

In the url there are two // here: /sessions//message?version=2018-11-08, Also, what if you try to access the assistant with v1 API? You don't create v1 or v2 assistant the API version you are using is only telling how will you access the same assistant - or at least as far as I know. — Michal Bida

taj taj · Accepted Answer · 2018-11-12T15:40:33

You need to create a session and send the sessionId into the message operation. I will add a null check for the sessionId in the message operation.

void Start()
{
    _assistant.CreateSession(OnCreateSession, OnFail, _assistantId);

}

private void OnCreateSession(SessionResponse response, Dictionary<string, object> customData)
{
    _sessionId = response.SessionId;
    _assistant.Message(OnMessage, OnFail, _assistantId, _sessionId);
}

Cannot authenticate Watson Assistant credentials using Unity SDK

1 Answers