1
votes

I am trying to load json file with more then 100 columns into Bigquery. Some of these columns has special character within their name i.e. dollar sign($) and period(.) Rows/Records content also varies - meaning not all columns may be present in each row/record which is totally acceptable json format.

I have search similar posts

How to manage/handle schema changes while loading JSON file into BigQuery table

BigQuery: Create column of JSON datatype

which suggest to load the data into single "STRING" column as CSV format first and parse out columns using JSON_EXTRACT() function to target table. Hence I have created a BigQuery table with following schema definition:

[ { "name": "data", "type": "STRING" } ]

then I have ran following CLI command:

bq load --source_format=CSV test.bq_load_test ./data_file.json ./bq_load_test_schema.json

which result into following error:

Error Message: BigQuery error in load operation: Error processing job : Error while reading data, error message: CSV table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the errors[] collection for more details. Failure details: - Error while reading data, error message: Too many values in row starting at position: 0.

Here's the data file layout:

root
|-- $insert_id: string (nullable = true)
|-- $schema: long (nullable = true)
|-- adid: string (nullable = true)
|-- vendor_attribution_ids: array (nullable = true)
|    |-- element: string (containsNull = true)
|-- vendor_event_type: string (nullable = true)
|-- vendor_id: long (nullable = true)
|-- app: long (nullable = true)
|-- city: string (nullable = true)
|-- client_event_time: string (nullable = true)
|-- client_upload_time: string (nullable = true)
|-- country: string (nullable = true)
|-- data: struct (nullable = true)
|    |-- first_event: boolean (nullable = true)
|-- device_brand: string (nullable = true)
|-- device_carrier: string (nullable = true)
|-- device_family: string (nullable = true)
|-- device_id: string (nullable = true)
|-- device_manufacturer: string (nullable = true)
|-- device_model: string (nullable = true)
|-- device_type: string (nullable = true)
|-- dma: string (nullable = true)
|-- event_id: long (nullable = true)
|-- event_properties: struct (nullable = true)
|    |-- U.vf: string (nullable = true)
|    |-- app.name: string (nullable = true)
|    |-- app.pillar: string (nullable = true)
|    |-- app.version: string (nullable = true)
|    |-- categories: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- cmfAppId: string (nullable = true)
|    |-- content.area: string (nullable = true)
|    |-- content.authenticated: boolean (nullable = true)
|    |-- content.authors: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.cms: string (nullable = true)
|    |-- content.id: string (nullable = true)
|    |-- content.keywords.collections: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.keywords.company: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.keywords.location: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.keywords.organization: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.keywords.person: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.keywords.subject: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.keywords.tag: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- content.media.audiovideo: string (nullable = true)
|    |-- content.media.contentarea: string (nullable = true)
|    |-- content.media.duration: long (nullable = true)
|    |-- content.media.episodenumber: string (nullable = true)
|    |-- content.media.genre: string (nullable = true)
|    |-- content.media.length: double (nullable = true)
|    |-- content.media.liveondemand: string (nullable = true)
|    |-- content.media.region: string (nullable = true)
|    |-- content.media.seasonnumber: string (nullable = true)
|    |-- content.media.show: string (nullable = true)
|    |-- content.media.sport: string (nullable = true)
|    |-- content.media.streamtitle: string (nullable = true)
|    |-- content.media.type: string (nullable = true)
|    |-- content.originaltitle: string (nullable = true)
|    |-- content.pubdate: long (nullable = true)
|    |-- content.publishedtime: string (nullable = true)
|    |-- content.subsection1: string (nullable = true)
|    |-- content.subsection2: string (nullable = true)
|    |-- content.subsection3: string (nullable = true)
|    |-- content.subsection4: string (nullable = true)
|    |-- content.tier: string (nullable = true)
|    |-- content.title: string (nullable = true)
|    |-- content.type: string (nullable = true)
|    |-- content.updatedtime: string (nullable = true)
|    |-- content.url: string (nullable = true)
|    |-- custom.DNT: boolean (nullable = true)
|    |-- custom.cookiesenabled: boolean (nullable = true)
|    |-- custom.engine: string (nullable = true)
|    |-- feature.name: string (nullable = true)
|    |-- feature.position: string (nullable = true)
|    |-- lastupdate: string (nullable = true)
|    |-- pubdate: string (nullable = true)
|    |-- referrer.campaign: string (nullable = true)
|    |-- referrer.url: string (nullable = true)
|    |-- syndicate: string (nullable = true)
|    |-- user.interests.explicit.no: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- user.interests.explicit.yes: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- user.tier: string (nullable = true)
|    |-- userTier: string (nullable = true)
|-- event_time: string (nullable = true)
|-- event_type: string (nullable = true)
|-- idfa: string (nullable = true)
|-- ip_address: string (nullable = true)
|-- is_attribution_event: boolean (nullable = true)
|-- language: string (nullable = true)
|-- library: string (nullable = true)
|-- location_lat: double (nullable = true)
|-- location_lng: double (nullable = true)
|-- os_name: string (nullable = true)
|-- os_version: string (nullable = true)
|-- paying: string (nullable = true)
|-- platform: string (nullable = true)
|-- processed_time: string (nullable = true)
|-- region: string (nullable = true)
|-- sample_rate: string (nullable = true)
|-- server_received_time: string (nullable = true)
|-- server_upload_time: string (nullable = true)
|-- session_id: long (nullable = true)
|-- start_version: string (nullable = true)
|-- user_creation_time: string (nullable = true)
|-- user_id: string (nullable = true)
|-- user_properties: struct (nullable = true)
|    |-- internal.userID: string (nullable = true)
|    |-- internal.userTier: string (nullable = true)
|    |-- experiment.id: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- experiment.variant: array (nullable = true)
|    |    |-- element: string (containsNull = true)
|    |-- location.news: string (nullable = true)
|    |-- location.radio: string (nullable = true)
|    |-- location.region: string (nullable = true)
|    |-- location.tv: string (nullable = true)
|    |-- location.weather: string (nullable = true)
|    |-- referrer.campaign: string (nullable = true)
|    |-- user.id: string (nullable = true)
|    |-- user.id.internalvisitor: string (nullable = true)
|    |-- user.tier: string (nullable = true)
|-- uuid: string (nullable = true)
|-- version_name: string (nullable = true)
|-- feature_origin: string (nullable = true)

Here's the snippet of the data file:

{"server_received_time":"2019-01-17 15:00:00.482000","app":161,"device_carrier":null,"$schema":12,"city":"Caro","user_id":null,"uuid":"9018","event_time":"2019-01-17 15:00:00.045000","platform":"Web","os_version":"49","vendor_id":711,"processed_time":"2019-01-17 15:00:00.817195","user_creation_time":"2018-11-01 19:16:34.971000","version_name":null,"ip_address":null,"paying":null,"dma":null,"group_properties":{},"user_properties":{"location.radio":"ca","vendor.userTier":"free","vendor.userID":"a989","user.id":"a989","user.tier":"free","location.region":"ca"},"client_upload_time":"2019-01-17 15:00:00.424000","$insert_id":"e8410","event_type":"LOADED","library":"amp\/4.5.2","vendor_attribution_ids":null,"device_type":"Mac","device_manufacturer":null,"start_version":null,"location_lng":null,"server_upload_time":"2019-01-17 15:00:00.493000","event_id":64,"location_lat":null,"os_name":"Chrome","vendor_event_type":null,"device_brand":null,"groups":{},"event_properties":{"content.authenticated":false,"content.subsection1":"regions","custom.DNT":true,"content.subsection2":"ca","referrer.url":"","content.url":"","content.type":"index","content.title":"","custom.cookiesenabled":true,"app.pillar":"feed","content.area":"news","app.name":"oc"},"data":{},"device_id":"","language":"English","device_model":"Mac","country":"","region":"","is_attribution_event":false,"adid":null,"session_id":15,"device_family":"Mac","sample_rate":null,"idfa":null,"client_event_time":"2019-01-17 14:59:59.987000"}
{"server_received_time":"2019-01-17 15:00:00.913000","app":161,"device_carrier":null,"$schema":12,"city":"Fo","user_id":null,"uuid":"9052","event_time":"2019-01-17 15:00:00.566000","platform":"Web","os_version":"71","vendor_id":797,"processed_time":"2019-01-17 15:00:01.301936","user_creation_time":"2019-01-17 15:00:00.566000","version_name":null,"ip_address":null,"paying":null,"dma":"CO","group_properties":{},"user_properties":{"user.tier":"free"},"client_upload_time":"2019-01-17 15:00:00.157000","$insert_id":"69ae","event_type":"START WEB SESSION","library":"amp\/4.5.2","vendor_attribution_ids":null,"device_type":"Android","device_manufacturer":null,"start_version":null,"location_lng":null,"server_upload_time":"2019-01-17 15:00:00.925000","event_id":1,"location_lat":null,"os_name":"Chrome Mobile","vendor_event_type":null,"device_brand":null,"groups":{},"event_properties":{"content.subsection3":"home","content.subsection2":"archives","content.title":"","content.keywords.subject":["Lifestyle\/Recreation and leisure\/Outdoor recreation\/Boating","Lifestyle\/Relationships\/Couples","General news\/Weather","Oddities"],"content.publishedtime":154687,"app.name":"oc","referrer.url":"","content.subsection1":"archives","content.url":"","content.authenticated":false,"content.keywords.location":["Ot"],"content.originaltitle":"","content.type":"story","content.authors":["Archives"],"app.pillar":"feed","content.area":"news","content.id":"1.49","content.updatedtime":1546878600538,"content.keywords.tag":["24 1","boat house","Ot","Rockcliffe","River","m"],"content.keywords.person":["Ber","Shi","Jea","Jean\u00e9tien"]},"data":{"first_event":true},"device_id":"","language":"English","device_model":"Android","country":"","region":"","is_attribution_event":false,"adid":null,"session_id":15477,"device_family":"Android","sample_rate":null,"idfa":null,"client_event_time":"2019-01-17 14:59:59.810000"}
{"server_received_time":"2019-01-17 15:00:00.913000","app":16,"device_carrier":null,"$schema":12,"city":"","user_id":null,"uuid":"905","event_time":"2019-01-17 15:00:00.574000","platform":"Web","os_version":"71","vendor_id":7973,"processed_time":"2019-01-17 15:00:01.301957","user_creation_time":"2019-01-17 15:00:00.566000","version_name":null,"ip_address":null,"paying":null,"dma":"DCO","group_properties":{},"user_properties":{"user.tier":"free"},"client_upload_time":"2019-01-17 15:00:00.157000","$insert_id":"d045","event_type":"LOADED","library":"am-js\/4.5.2","vendor_attribution_ids":null,"device_type":"Android","device_manufacturer":null,"start_version":null,"location_lng":null,"server_upload_time":"2019-01-17 15:00:00.925000","event_id":2,"location_lat":null,"os_name":"Chrome Mobile","vendor_event_type":null,"device_brand":null,"groups":{},"event_properties":{"content.subsection3":"home","content.subsection2":"archives","content.subsection1":"archives","content.keywords.subject":["Lifestyle\/Recreation and leisure\/Outdoor recreation\/Boating","Lifestyle\/Relationships\/Couples","General news\/Weather","Oddities"],"content.type":"story","content.keywords.location":["Ot"],"app.pillar":"feed","app.name":"oc","content.authenticated":false,"custom.DNT":false,"content.id":"1.4","content.keywords.person":["Ber","Shi","Jea","Je\u00e9tien"],"content.title":"","content.url":"","content.originaltitle":"","custom.cookiesenabled":true,"content.authors":["Archives"],"content.publishedtime":1546878600538,"referrer.url":"","content.area":"news","content.updatedtime":1546878600538,"content.keywords.tag":["24 1","boat house","O","Rockcliffe","River","pr"]},"data":{},"device_id":"","language":"English","device_model":"Android","country":"","region":"","is_attribution_event":false,"adid":null,"session_id":1547737199081,"device_family":"Android","sample_rate":null,"idfa":null,"client_event_time":"2019-01-17 14:59:59.818000"}

Any input? What am I missing here?

1

1 Answers

0
votes

You have to specify the right delimiter for CSV file. Notice that the default value for this flag is ',' and your data has ',', therefore, every row is interpreted as multiple fields. I tested with your data and this worked for me:

bq load --source_format=CSV -F ';' test.bq_load_test ./data_file.json 

Notice that ';' worked because the snippet data does not contain ';'