0
votes

I need some guidance. My Azure function (written in node.js) will convert some random text to speech and then upload the speech output to a Blob. I will like to do so without using an intermediate local file. BlockBLobClient.upload method requires a Blob, string, ArrayBuffer, ArrayBufferView or a function which returns a new Readable stream, and also the content length. I am not able to get these from the RequestPromise object returned by call to TTS (As of now I am using request-promise to call TTS). Any suggestions will be really appreciated.

Thank you

Adding a code sample that can be tested as "node TTSSample.js" Sample code is based on

  1. Azure Blob stream related code shared at https://github.com/Azure-Samples/azure-sdk-for-js-storage-blob-stream-nodejs/blob/master/v12/routes/index.js

  2. Azure Text to speech sample code at https://github.com/Azure-Samples/Cognitive-Speech-TTS/tree/master/Samples-Http/NodeJS

  3. Replace appropriate keys and parameters in the enclosed settings.js

  4. I am using node.js runtime v12.18.3

  5. Input text and the output blob name are hard coded in this code sample.

    // TTSSample.js
    // TTSSample.js
    // Converts given text to Audio and uploads to Azure Blob storage
    // Ultimately to be used in an Azure function
    // As of now input text as well as the target BLobStorage object are hard coded.
    
    // To install dependencies, run: npm install
    const xmlbuilder = require('xmlbuilder');
    // request-promise has a dependency on request
    const rp = require('request-promise');
    const fs = require('fs');
    const readline = require('readline-sync');
    const settings = require('./settings.js');
    
    const multer = require('multer');
    const inMemoryStorage = multer.memoryStorage();
    const uploadStrategy = multer({ storage: inMemoryStorage }).single('image');
    const getStream = require('into-stream');
    
    const ONE_MEGABYTE = 1024 * 1024;
    const uploadOptions = { bufferSize: 4 * ONE_MEGABYTE, maxBuffers: 20 };
    const ONE_MINUTE = 60 * 1000;
    // Blob Storage
    const { BlobServiceClient } = require('@azure/storage-blob');
    
    // Gets an access token.
    function getAccessToken(subscriptionKey) {
            let options = {
                    method: 'POST',
                    uri: settings.issueTokenUri,
                    headers: {
                            'Ocp-Apim-Subscription-Key': subscriptionKey
                    }
            }
            return rp(options);
    }
    
    // Converts text to speech using the input from readline.
    function textToSpeech_rp(accessToken, text) {
            // Create the SSML request.
            let xml_body = xmlbuilder.create('speak')
                    .att('version', '1.0')
                    .att('xml:lang', 'en-us')
                    .ele('voice')
                    .att('xml:lang', 'en-us')
                    .att('name', 'en-US-Guy24kRUS') // Short name for 'Microsoft Server Speech Text to Speech Voice (en-US, Guy24KRUS)'
                    .txt(text)
                    .end();
            // Convert the XML into a string to send in the TTS request.
            let body = xml_body.toString();
            //console.log("xml string is done");
            let options = {
                    method: 'POST',
                    baseUrl: settings.cognitiveUri,
                    url: 'cognitiveservices/v1',
                    headers: {
                            'Authorization': 'Bearer ' + accessToken, 
                            'cache-control': 'no-cache',
                            'User-Agent': settings.cognitiveResource,
                            'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm',
                            'Content-Type': 'application/ssml+xml'
                    },
                    body: body
            }
            console.log(options);
            let request = rp(options)
                    .on('response', async (response) =>  {
                            if (response.statusCode === 200) {
                                    console.log("Inside response");
                                    const stream = getStream(response);
    
         //request.pipe(fs.createWriteStream('TTSOutput.wav'));
    
                                    const AZURE_STORAGE_CONNECTION_STRING = settings.storageConnectionString;
                                    const storageAccount = settings.storageAccount;
                                    const storageKey = settings.storageKey;
                                    const containerName = settings.audioContainer;
                                    // Create the BlobServiceClient object which will be used to create a container client
                                    const blobServiceClient = BlobServiceClient.fromConnectionString(AZURE_STORAGE_CONNECTION_STRING);
    
                                    // Get a reference to a container
                                    const containerClient = blobServiceClient.getContainerClient(containerName);
    
                                    // Create a unique name for the blob
                                    //const blobName = id + '.mp3';
                                    const blobName = 'audio1.mp3';
    
                                    // Get a block blob client
                                    const blockBlobClient = containerClient.getBlockBlobClient(blobName);
                                    //blockBlobClient.upload(buffer,61000);
                                    //blockBlobClient.upload(request.body);
                                    try {
                                             await blockBlobClient.uploadStream(stream,
                                              uploadOptions.bufferSize, uploadOptions.maxBuffers,
                                              { blobHTTPHeaders: { blobContentType: "audio/mpeg3" } });
                                            //res.render('success', { message: 'File uploaded to Azure Blob storage.' });
                                            console.log('Success');
                                      } catch (err) {
                                            //res.render('error', { message: err.message });
                                            console.log("Failure", err.stack);
                                      }
                                    console.log("I am done");
                            }
                    });
            return request;
    
    };
    
    // Use async and await to get the token before attempting
    // to convert text to speech.
    async function main() {
            const subscriptionKey = settings.subscriptionKey; //process.env.SPEECH_SERVICE_KEY;
            if (!subscriptionKey) {
                    throw new Error('Environment variable for your subscription key is not set.')
            };
            // Prompts the user to input text.
            //const text = readline.question('What would you like to convert to speech? ');
            const text = "Hello there";
            try {
                    const accessToken = await getAccessToken(subscriptionKey);
                    //console.log("Access Token is done");
                    await textToSpeech_rp(accessToken, text);
            } catch (err) {
                    console.log(`Something went wrong: ${err}`);
                    console.log(err.stack);
            }
    }
    
    // Run the application
    main()
    
    
    
    
     //============ Settings.js
    
    (function() {
    "use strict";
    module.exports = {  
    // Replace with your own subscription key, service region (e.g., "westus"),
    // and recognition language.
    subscriptionKey:   "CognitiveServiceSubsctiptionKey",
    serviceRegion:     "eastus", // e.g., "westus"
    language:          "en-US",
    issueTokenUri:     'https://eastus.api.cognitive.microsoft.com/sts/v1.0/issuetoken',
    cognitiveUri: 'https://eastus.tts.speech.microsoft.com/',
    cognitiveResource: 'CognitiveResourceName',
    
    // Storage
    storageAccount: "StorageAccount",
    storageKey: "sN/StorageKey==",
    storageConnectionString: "StorageConnectionString",
    audioContainer: "AudioContainer",
    };
    }());
    
1
Please show us a minimal reproducible example, add your code.OfirD
I have added the codeuser14197269

1 Answers

0
votes

Regarding the issue, please refer to the following code

const xmlbuilder = require("xmlbuilder");
const rp = require("request-promise");
const azure = require("azure-storage");
const subscriptionKey = "<your speech service subscription key>";

module.exports = async function (context, req) {
  context.log("JavaScript HTTP trigger function processed a request.");
  try {
    const accessToken = await getAccessToken(subscriptionKey);

    const blobService = azure.createBlobService(
      "<connection string>",
    );
    const writableStream = blobService.createWriteStreamToBlockBlob(
      "test",
      "test.mp3",
      {
        blockIdPrefix: "block",
        contentSettings: {
          contentType: "audio/mpeg",
        },
      },
    );

    const text = "Hi";
    const data = await textToSpeech(accessToken, text, writableStream);

    context.res = { body: data };
  } catch (err) {
    console.log(`Something went wrong: ${err}`);
    context.res = {
      status: 500,
      body: err,
    };
  }
};

function getAccessToken(subscriptionKey) {
  let options = {
    method: "POST",
    uri:
      "https://southeastasia.api.cognitive.microsoft.com/sts/v1.0/issueToken",
    headers: {
      "Ocp-Apim-Subscription-Key": subscriptionKey,
    },
  };
  return rp(options);
}

// Converts text to speech using the input from readline.
function textToSpeech(accessToken, text, writableStream) {
  return new Promise((resolve, reject) => {
    try {
      let xml_body = xmlbuilder
        .create("speak")
        .att("version", "1.0")
        .att("xml:lang", "en-us")
        .ele("voice")
        .att("xml:lang", "en-us")
        .att("name", "en-US-Guy24kRUS")
        .txt(text)
        .end();
      // Convert the XML into a string to send in the TTS request.
      let body = xml_body.toString();

      let options = {
        method: "POST",
        baseUrl: "https://southeastasia.tts.speech.microsoft.com/",
        url: "cognitiveservices/v1",
        headers: {
          Authorization: "Bearer " + accessToken,
          "cache-control": "no-cache",
          "User-Agent": "YOUR_RESOURCE_NAME",
          "X-Microsoft-OutputFormat": "audio-16khz-64kbitrate-mono-mp3",
          "Content-Type": "application/ssml+xml",
        },
        body: body,
      };

      rp(options)
        .pipe(writableStream)
        .on("finish", () => {
          resolve("done");
        });
    } catch (error) {
      reject(error);
    }
  });
}

enter image description here