0
votes

I am having trouble writing a python script that loads or exports a file from google cloud storage to google bigquery.

#standardSQL
import json
import argparse
import time
import uuid

from google.cloud import bigquery
from google.cloud import storage

dataset = 'dataworks-356fa'
source = 'gs://dataworks-356fa-backups/pullnupload.json'

# def load_data_from_gcs(dataset, source): 
# #     load_data_from_gcs(dataworks-356fa, 'test10', gs://dataworks-356fa-backups/pullnupload.json):
#     bigquery_client = bigquery.Client('dataworks-356fa')
#     dataset = bigquery_client.dataset(FirebaseArchive)
#     table = dataset.table(test10)
#     job_name = str(uuid.uuid4())
# 
#     job = bigquery_client.load_table_from_storage(
#         job_name, test10, 'gs://dataworks-356fa-backups/pullnupload.json')
# 
#     job.source_format = 'NEWLINE_DELIMITED_JSON'
#     job.begin()

def load_data_from_gcs(dataset, test10, source ):
    bigquery_client = bigquery.Client(dataset)
    dataset = bigquery_client.dataset('FirebaseArchive')
    table = dataset.table(test10)
    job_name = str(uuid.uuid4())

    job = bigquery_client.load_table_from_storage(
        job_name, table, "gs://dataworks-356fa-backups/pullnupload.json")

    job.source_format = 'NEWLINE_DELIMITED_JSON'
    job.begin()
    job.errors

So far this is my code. This file will run but it does not load anything into bigquery or come back with an error message. It runs then returns me to the normal terminal view.

1

1 Answers

2
votes

From your previous question, you have the wait_for_job function. You should use it before printing for errors, like so:

def load_data_from_gcs(dataset, test10, source ):
    bigquery_client = bigquery.Client(dataset)
    dataset = bigquery_client.dataset('FirebaseArchive')
    table = dataset.table(test10)
    job_name = str(uuid.uuid4())

    job = bigquery_client.load_table_from_storage(
        job_name, table, "gs://dataworks-356fa-backups/pullnupload.json")

    job.source_format = 'NEWLINE_DELIMITED_JSON'
    job.begin()
    wait_for_job(job)
    print("state of job is: " + job.state)
    print("errors: " + job.errors)

You can also use IPython to run each step by hand and observe what results on each line.

Notice that job.state must reach 'DONE' status before looking for errors.