I am trying to write a cloud function that loads GCS files that match a wildcard into BigQuery. All the samples i can see and in the api reference are load method which specify a specific filename via bigquery.dataset.table.load method.
What method can I use to trigger a file load using wildcards? I know i can specify the metadata source uri but cant see a load method example to go along. Any help appreciated.
Here is my code.
exports.importReadyToIngestFiles = (event, context) => {
const pubsubMessage = event.data;
console.log(Buffer.from(pubsubMessage, 'base64').toString());
const bucketName = Buffer.from(pubsubMessage, 'base64').toString();
const {BigQuery} = require('@google-cloud/bigquery');
const {Storage} = require('@google-cloud/storage');
//specify projectID and bigquery datasetID below
const projectId = "test-3";
const datasetId = "Data";
const filename = "p_*";
const gcsFile = `${bucketName}/p_*`;
const tableId = "numeric";
const bigquery = new BigQuery({
projectId: projectId,
});
const storage = new Storage({
projectId: projectId,
});
let job;
// Loads data from a Google Cloud Storage file into the table
bigquery
.dataset(datasetId)
.table(tableId)
.load(storage.bucket(bucketName).file(filename))
.then(results => {
job = results[0];
console.log(`Job ${job.id} started.`);
// Wait for the job to finish
return job;
})
.then(metadata => {
// Check the job's status for errors
const errors = metadata.status.errors;
if (errors && errors.length > 0) {
throw errors;
}
})
.then(() => {
console.log(`Job ${job.id} completed.`);
})
.catch(err => {
console.error('ERROR:', err);
});
// callback();
};
UPDATE: Aah, I figured out by looking at BigQuery job history error that my file is pipeline delimited so there was a column mismatch. Now, where can I find how to specify the delimiter? Is it just me or the nodejs bigquery api documentation is a bit unclear.
"gs://my_bucket/*.csv"? - Graham Polley