Loading Azure Text to Speech output to Azure Blob

Question

I need some guidance. My Azure function (written in node.js) will convert some random text to speech and then upload the speech output to a Blob. I will like to do so without using an intermediate local file. BlockBLobClient.upload method requires a Blob, string, ArrayBuffer, ArrayBufferView or a function which returns a new Readable stream, and also the content length. I am not able to get these from the RequestPromise object returned by call to TTS (As of now I am using request-promise to call TTS). Any suggestions will be really appreciated.

Thank you

Adding a code sample that can be tested as "node TTSSample.js" Sample code is based on

Azure Blob stream related code shared at https://github.com/Azure-Samples/azure-sdk-for-js-storage-blob-stream-nodejs/blob/master/v12/routes/index.js
Azure Text to speech sample code at https://github.com/Azure-Samples/Cognitive-Speech-TTS/tree/master/Samples-Http/NodeJS
Replace appropriate keys and parameters in the enclosed settings.js
I am using node.js runtime v12.18.3

Input text and the output blob name are hard coded in this code sample.

// TTSSample.js
// TTSSample.js
// Converts given text to Audio and uploads to Azure Blob storage
// Ultimately to be used in an Azure function
// As of now input text as well as the target BLobStorage object are hard coded.

// To install dependencies, run: npm install
const xmlbuilder = require('xmlbuilder');
// request-promise has a dependency on request
const rp = require('request-promise');
const fs = require('fs');
const readline = require('readline-sync');
const settings = require('./settings.js');

const multer = require('multer');
const inMemoryStorage = multer.memoryStorage();
const uploadStrategy = multer({ storage: inMemoryStorage }).single('image');
const getStream = require('into-stream');

const ONE_MEGABYTE = 1024 * 1024;
const uploadOptions = { bufferSize: 4 * ONE_MEGABYTE, maxBuffers: 20 };
const ONE_MINUTE = 60 * 1000;
// Blob Storage
const { BlobServiceClient } = require('@azure/storage-blob');

// Gets an access token.
function getAccessToken(subscriptionKey) {
        let options = {
                method: 'POST',
                uri: settings.issueTokenUri,
                headers: {
                        'Ocp-Apim-Subscription-Key': subscriptionKey
                }
        }
        return rp(options);
}

// Converts text to speech using the input from readline.
function textToSpeech_rp(accessToken, text) {
        // Create the SSML request.
        let xml_body = xmlbuilder.create('speak')
                .att('version', '1.0')
                .att('xml:lang', 'en-us')
                .ele('voice')
                .att('xml:lang', 'en-us')
                .att('name', 'en-US-Guy24kRUS') // Short name for 'Microsoft Server Speech Text to Speech Voice (en-US, Guy24KRUS)'
                .txt(text)
                .end();
        // Convert the XML into a string to send in the TTS request.
        let body = xml_body.toString();
        //console.log("xml string is done");
        let options = {
                method: 'POST',
                baseUrl: settings.cognitiveUri,
                url: 'cognitiveservices/v1',
                headers: {
                        'Authorization': 'Bearer ' + accessToken, 
                        'cache-control': 'no-cache',
                        'User-Agent': settings.cognitiveResource,
                        'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm',
                        'Content-Type': 'application/ssml+xml'
                },
                body: body
        }
        console.log(options);
        let request = rp(options)
                .on('response', async (response) =>  {
                        if (response.statusCode === 200) {
                                console.log("Inside response");
                                const stream = getStream(response);

     //request.pipe(fs.createWriteStream('TTSOutput.wav'));

                                const AZURE_STORAGE_CONNECTION_STRING = settings.storageConnectionString;
                                const storageAccount = settings.storageAccount;
                                const storageKey = settings.storageKey;
                                const containerName = settings.audioContainer;
                                // Create the BlobServiceClient object which will be used to create a container client
                                const blobServiceClient = BlobServiceClient.fromConnectionString(AZURE_STORAGE_CONNECTION_STRING);

                                // Get a reference to a container
                                const containerClient = blobServiceClient.getContainerClient(containerName);

                                // Create a unique name for the blob
                                //const blobName = id + '.mp3';
                                const blobName = 'audio1.mp3';

                                // Get a block blob client
                                const blockBlobClient = containerClient.getBlockBlobClient(blobName);
                                //blockBlobClient.upload(buffer,61000);
                                //blockBlobClient.upload(request.body);
                                try {
                                         await blockBlobClient.uploadStream(stream,
                                          uploadOptions.bufferSize, uploadOptions.maxBuffers,
                                          { blobHTTPHeaders: { blobContentType: "audio/mpeg3" } });
                                        //res.render('success', { message: 'File uploaded to Azure Blob storage.' });
                                        console.log('Success');
                                  } catch (err) {
                                        //res.render('error', { message: err.message });
                                        console.log("Failure", err.stack);
                                  }
                                console.log("I am done");
                        }
                });
        return request;

};

// Use async and await to get the token before attempting
// to convert text to speech.
async function main() {
        const subscriptionKey = settings.subscriptionKey; //process.env.SPEECH_SERVICE_KEY;
        if (!subscriptionKey) {
                throw new Error('Environment variable for your subscription key is not set.')
        };
        // Prompts the user to input text.
        //const text = readline.question('What would you like to convert to speech? ');
        const text = "Hello there";
        try {
                const accessToken = await getAccessToken(subscriptionKey);
                //console.log("Access Token is done");
                await textToSpeech_rp(accessToken, text);
        } catch (err) {
                console.log(`Something went wrong: ${err}`);
                console.log(err.stack);
        }
}

// Run the application
main()




 //============ Settings.js

(function() {
"use strict";
module.exports = {  
// Replace with your own subscription key, service region (e.g., "westus"),
// and recognition language.
subscriptionKey:   "CognitiveServiceSubsctiptionKey",
serviceRegion:     "eastus", // e.g., "westus"
language:          "en-US",
issueTokenUri:     'https://eastus.api.cognitive.microsoft.com/sts/v1.0/issuetoken',
cognitiveUri: 'https://eastus.tts.speech.microsoft.com/',
cognitiveResource: 'CognitiveResourceName',

// Storage
storageAccount: "StorageAccount",
storageKey: "sN/StorageKey==",
storageConnectionString: "StorageConnectionString",
audioContainer: "AudioContainer",
};
}());

Please show us a minimal reproducible example, add your code. — OfirD

Jim Xu Jim Xu · Accepted Answer · 2020-09-10T06:14:00

Regarding the issue, please refer to the following code

const xmlbuilder = require("xmlbuilder");
const rp = require("request-promise");
const azure = require("azure-storage");
const subscriptionKey = "<your speech service subscription key>";

module.exports = async function (context, req) {
  context.log("JavaScript HTTP trigger function processed a request.");
  try {
    const accessToken = await getAccessToken(subscriptionKey);

    const blobService = azure.createBlobService(
      "<connection string>",
    );
    const writableStream = blobService.createWriteStreamToBlockBlob(
      "test",
      "test.mp3",
      {
        blockIdPrefix: "block",
        contentSettings: {
          contentType: "audio/mpeg",
        },
      },
    );

    const text = "Hi";
    const data = await textToSpeech(accessToken, text, writableStream);

    context.res = { body: data };
  } catch (err) {
    console.log(`Something went wrong: ${err}`);
    context.res = {
      status: 500,
      body: err,
    };
  }
};

function getAccessToken(subscriptionKey) {
  let options = {
    method: "POST",
    uri:
      "https://southeastasia.api.cognitive.microsoft.com/sts/v1.0/issueToken",
    headers: {
      "Ocp-Apim-Subscription-Key": subscriptionKey,
    },
  };
  return rp(options);
}

// Converts text to speech using the input from readline.
function textToSpeech(accessToken, text, writableStream) {
  return new Promise((resolve, reject) => {
    try {
      let xml_body = xmlbuilder
        .create("speak")
        .att("version", "1.0")
        .att("xml:lang", "en-us")
        .ele("voice")
        .att("xml:lang", "en-us")
        .att("name", "en-US-Guy24kRUS")
        .txt(text)
        .end();
      // Convert the XML into a string to send in the TTS request.
      let body = xml_body.toString();

      let options = {
        method: "POST",
        baseUrl: "https://southeastasia.tts.speech.microsoft.com/",
        url: "cognitiveservices/v1",
        headers: {
          Authorization: "Bearer " + accessToken,
          "cache-control": "no-cache",
          "User-Agent": "YOUR_RESOURCE_NAME",
          "X-Microsoft-OutputFormat": "audio-16khz-64kbitrate-mono-mp3",
          "Content-Type": "application/ssml+xml",
        },
        body: body,
      };

      rp(options)
        .pipe(writableStream)
        .on("finish", () => {
          resolve("done");
        });
    } catch (error) {
      reject(error);
    }
  });
}

Loading Azure Text to Speech output to Azure Blob

1 Answers