I need help streaming large JSON files from Firebase Storage to Firestore using a Firebase Function.
I want to transfer several large newline JSON files (11 x 700MB) to Firestore. I'm attempting to load them from Firebase Storage, stream the file, and write contents to a Firestore collection.
I'm currently getting an error on the file read (from storage) while I'm testing on a very small json file. I am getting read and write access, and I can see Firestore documents being created (but only sometimes).
I'm getting this error on my Firebase Functions console:
Error: Deadline Exceeded at /user_code/node_modules/firebase-admin/node_modules/grpc/src/client.js:554:15
This is also coming from reading from Storage, as I've set an alert on a read error that is being triggered.
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp(functions.config().firebase);
const es = require('event-stream')
const Parser = require('newline-json').Parser
const gcs = require('@google-cloud/storage')();
const path = require('path');
// [START function]
exports.generateData = functions.storage.object().onChange(event => {
const object = event.data; // The Storage object.
const fileBucket = object.bucket; // The Storage bucket that contains the file.
const filePath = object.name; // File path in the bucket.
const contentType = object.contentType; // File content type.
const resourceState = object.resourceState; // The resourceState is 'exists' or 'not_exists' (for file/folder deletions).
const metageneration = object.metageneration; // Number of times metadata has been generated. New objects have a value of 1.
// Exit if this is triggered on a file that is not JSON.
if (!contentType.endsWith('json')) {
console.log('This is not a json file.');
return;
}
// Exit if this is a move or deletion event.
if (resourceState === 'not_exists') {
console.log('This is a deletion event.');
return;
}
// Exit if file exists but is not new and is only being triggered
// because of a metadata change.
if (resourceState === 'exists' && metageneration > 1) {
console.log('This is a metadata change event.');
return;
}
// Download file from bucket.
const bucket = gcs.bucket(fileBucket);
let buf = []
const getStream = function () {
let stream = bucket.file(filePath).createReadStream().on('error', () => { console.log('Read Error')}).on('end', () => {console.log('Successful Read')})
let parser = new Parser()
return stream.pipe(parser)
}
getStream()
.pipe(es.mapSync(function (data) {
buf.push(data)
pump()
}))
.on('end', () => {
console.log("Strem Finished")
return true
})
.on('error', () => {
console.log('Stream Error')
return false
})
function pump() {
let pos;
while((pos = buf.length) >= 1) {
processLine(buf.pop(0))
}
}
function processLine(line) {
admin.firestore().collection('test').add(line)
}
});
I'm returning Read Error
- so the read operation has to be dying.
I don't know what to do right now, but would appreciate any help.