I am trying to build a continuous speech recognition from microphone using the Microsoft Cognitive Speech for Xamarin Android. I don't think there is library for Xamarin. The documentation are : https://docs.microsoft.com/en-us/azure/cognitive-services/speech/api-reference-rest/websocketprotocol
I already done the websocket connection thing, now I am pretty stuck on sending the message to the websocket server. I noticed on the documentation that
We have to send Headers on a specific Path everytime we send a Message
for e.g these headers are to set up the first configuration for speech protocol,
Path : speech.config
X-Timestamp : Client UTC clock time stamp in ISO 8601 format
Content-Type : application/json; charset=utf-8
I am using WebSocketClient but I don't find any way to set up headers or change path. Is there any way to set up the headers and/or changing path so I can send message properly to the server? Or do I have a wrong perception?
My second problem is WebSocketClient doesnt have any event handler to receive message, what I do is :
private static async Task DataReceiving(ClientWebSocket ws)
{
while (true)
{
ArraySegment<byte> bytesReceived = new ArraySegment<byte>(new byte[1024]);
WebSocketReceiveResult result = await ws.ReceiveAsync(
bytesReceived, CancellationToken.None);
Log.Info("SOCKETRECEIVED",Encoding.UTF8.GetString(bytesReceived.Array, 0, result.Count));
if (ws.State != WebSocketState.Open)
{
Log.Info("SOCKETCLOSED", "CLOSED");
break;
}
}
}
but I did not receive any message or anything.
EDIT :
Here is my code for Headers,
//List<Tuple<string, string>> Headers <<Contains [Title] and [Content]
foreach (var item in Headers)
{
message += item.Item1 + " : " + item.Item2 + Environment.NewLine;
}
message += Environment.Newline; // ensure double carriage return
EDIT : Here is my code for sending WAV Header :
using (MemoryStream stream = new MemoryStream())
{
short channelCount = 1;
int sampleRate = 1024;
int bitsPerSample = 16;
using (var writer = new BinaryWriter(stream, Encoding.UTF8))
{
writer.Write("Path: audio"+Environment.NewLine);
writer.Write("X-Timestamp: " + DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ"+Environment.NewLine));
writer.Write("Content-Type : audio/x-wav"+Environment.NewLine);
writer.Write("X-RequestId: " + Guid.NewGuid().ToString().Replace("-",string.Empty)+Environment.NewLine);
writer.Write(Environment.NewLine);
//chunk ID
writer.Write('R');
writer.Write('I');
writer.Write('F');
writer.Write('F');
writer.Write(-1); // -1 - Unknown size
//format
writer.Write('W');
writer.Write('A');
writer.Write('V');
writer.Write('E');
//subchunk 1 ID
writer.Write('f');
writer.Write('m');
writer.Write('t');
writer.Write(' ');
writer.Write(16); //subchunk 1 (fmt) size
writer.Write((short)1); //PCM audio format
writer.Write((short)channelCount);
writer.Write(sampleRate);
writer.Write(sampleRate * 2);
writer.Write((short)2); //block align
writer.Write((short)bitsPerSample);
//subchunk 2 ID
writer.Write('d');
writer.Write('a');
writer.Write('t');
writer.Write('a');
//subchunk 2 (data) size
writer.Write(-1); // -1 - Unknown size
}
byte[] result;
//using (MemoryStream ms = new MemoryStream())
//{
// stream.CopyTo(ms);
// result = ms.ToArray();
//}
result = stream.ToArray();
ArraySegment<byte> byteresult = new ArraySegment<byte>(result);
await _socketclient.SendAsync(byteresult, WebSocketMessageType.Binary, false, CancellationToken.None);
Log.Info("SENDINGWAV", System.Text.Encoding.UTF8.GetString(result));
}
Here is my code for Sending data byte,
public async Task SendByteHeader(byte[] data)
{
string s = "";
s+=("Path: audio" + Environment.NewLine);
s +=("X-Timestamp: " + DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ" + Environment.NewLine));
s +=("Content-Type : audio/x-wav" + Environment.NewLine);
s +=("X-RequestId: " + Guid.NewGuid().ToString().Replace("-", string.Empty) + Environment.NewLine);
s +=(Environment.NewLine);
byte[] array = Encoding.UTF8.GetBytes(s);
List<byte> endres = new List<byte>(array);
endres.AddRange(data);
ArraySegment<byte> byteresult = new ArraySegment<byte>(endres.ToArray());
await _socketclient.SendAsync(byteresult, WebSocketMessageType.Binary, false, CancellationToken.None);
Log.Info("SENDINGBYTE", Encoding.UTF8.GetString(data));
}
I run this on the start of connection :
Task.Run(()=>DataReceiving(_socketclient));
So, I sent Wav header first, then start sending audio byte from recording ( I am using Plugin.AudioRecording). I still haven't got any message / response yet.
EDIT :
I sent some data to the server every 200 ms to make it "real time", but i noticed that after 5-6 sending, all of my SendAsync got crash on this code :
await _socketclient.SendAsync(byteresult, WebSocketMessageType.Binary, false, CancellationToken.None);
The error is "Cannot access disposable object (the websocket)) ". It seems the websocket got disposed? Or the connection got terminated?