I'm making a demo of speech to text using Azure speech api on browser by node.js. According to API document here, it does specify that it need .wav or .ogg files. But the example down there does a api call through sending byte data to api.
So I've already get my data from microphone in byte array form. Is it the right path to convert it to byte and send it to api? Or is it better for me to save it as a .wav file then send to the api?
So below is my code.
This is stream from microphone part.
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => { handlerFunction(stream) })
function handlerFunction(stream) {
rec = new MediaRecorder(stream);
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive") {
let blob = new Blob(audioChunks, { type: 'audio/wav; codec=audio/pcm; samplerate=16000' });
recordedAudio.src = URL.createObjectURL(blob);
recordedAudio.controls = true;
recordedAudio.autoplay = true;
console.log(blob);
let fileReader = new FileReader();
var arrayBuffer = new Uint8Array(1024);
var reader = new FileReader();
reader.readAsArrayBuffer(blob);
reader.onloadend = function () {
var byteArray = new Uint8Array(reader.result);
console.log("reader result" + reader.result)
etTimeout(() => getText(byteArray), 1000);
}
}
}
}
This is api call part
function getText(audio, callback) {
console.log("in function audio " + audio);
console.log("how many byte?: " + audio.byteLength)
const sendTime = Date.now();
fetch('https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US', {
method: "POST",
headers: {
'Accept': 'application/json',
'Ocp-Apim-Subscription-Key': YOUR_API_KEY,
// 'Transfer-Encoding': 'chunked',
// 'Expect': '100-continue',
'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000'
},
body: audio
})
.then(function (r) {
return r.json();
})
.then(function (response) {
if (sendTime < time) {
return
}
time = sendTime
//callback(response)
}).catch(e => {
console.log("Error", e)
})
}
It returns with 400 (Bad Request) and says :
{Message: "Unsupported audio format"}
