0
votes

We are attempting to use the node.js google_cloud sdk to load data into BigQuery from Google Cloud Storage. Here is a snippet of the Load code:

//init Google Cloud storage
var gcs = require("@google-cloud/storage")({
  projectId: bq_projectId,
  keyFilename: "./" + keyfile
});

//init Google BigQuery
var bq = require("@google-cloud/bigquery")({
  projectId: bq_projectId,
  keyFilename: "./" + keyfile
});

const datasetId = "my-dataset";
const tableId = "data_load";
const fileName = "data-20170518-082238.csv";

bq
  .dataset(datasetId)
  .table(tableId)
  .import(gcs.bucket(bucketName).file(fileName), (err, job, apiResponse) => {
    if (err){
      throw err
      return;
    }

    console.log(apiResponse);
    console.log(`Job ${job.id} started.`);
  });

The job kicks off and returns a jobId, no error is thrown and the apiResponse is:

{ kind: 'bigquery#job',
  etag: '"TcVKUQ2ft7DS9Q8U3noJdmpEDQ4/tFIJTWM2yuacXB5EvzWR1ffuKig"',
  id: 'my-project:job_FTO4_Jb5ctr2oEy2IsDSAUCWrgw',
  selfLink: 'https://www.googleapis.com/bigquery/v2/projects/my-project/jobs/job_FTO4_Jb5ctr2oEy2IsDSAUCWrgw',
  jobReference:
   { projectId: 'my-project',
     jobId: 'job_FTO4_Jb5ctr2oEy2IsDSAUCWrgw' },
  configuration:
   { load:
      { sourceUris: [Object],
        schema: [Object],
        destinationTable: [Object],
        sourceFormat: 'CSV' } },
  status: { state: 'RUNNING' },
  statistics: { creationTime: '1495151832686', startTime: '1495151833523' },
  user_email: 'service-account' }

The exact same file loads fine from the BigQuery interface into the specified table.

1
the job is in RUNNING state. you need to use jobid that you've got and then periodically check for status of that job - till when you will get state DONE. when it is done you will see errors if any!Mikhail Berlyant
Ah yes, I just included the initial output, we are certainly waiting for the job to complete before checking the table. It completes successfully.jasonsirota

1 Answers

1
votes

It turns out that errors in the BigQuery load process do not throw via callback or promise.catch.

In this case, you need to read the job.status.errors object in order to determine if there were errors in the load, here is some sample code:

bq
  .dataset(datasetId)
  .table(tableId)
  .import(gcs.bucket(bucketName).file(fileName),{
    skipLeadingRows: 1
  })
  .then(results => {
    console.log("initial results:")
    console.log(results);
    const job = results[0];
    console.log(`Job ${job.id} started.`);
    return job.promise();
  })
  .then(results => {
    console.log("completed results:")
    console.log(results);

    const job = results[0];

    // determine if there are any errors in the completed job
    if(job.status.errorResult || (job.status.errors && job.status.errors.count > 0)) {
      console.error(JSON.stringify(job.status.errors));
      return;
    }

    console.log(`Job ${job.id} completed successfully`);
  })
  .catch(err => {
    console.error(err);
  });