There is no way to do this using SQL functions in BigQuery at the time of this writing unless you can impose a hard limit on the number of values in the JSON array; see the relevant issue tracker item. Your options are:
- Process the data differently (e.g. using Cloud Dataflow or another tool) so that you can load it from newline-delimited JSON into BigQuery.
- Use a JavaScript UDF that takes the input JSON and returns the desired type; this is fairly straightforward but generally uses more CPU (and hence may require a higher billing tier).
- Use SQL functions with the understanding that the solution breaks down if there are too many elements.
Here is the approach using a JavaScript UDF:
CREATE TEMP FUNCTION JsonToItems(input STRING)
RETURNS STRUCT<order_id INT64, customer_id STRING, items ARRAY<STRUCT<line STRING, ref_ids ARRAY<STRING>, sku STRING, amount INT64>>>
LANGUAGE js AS """
return JSON.parse(input);
""";
WITH Input AS (
SELECT '{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}' AS json
)
SELECT
JsonToItems(json).*
FROM Input;
If you do want to try the SQL-based approach without JavaScript, here's somewhat of a hack until the feature request above is resolved, where the number of array elements must be no more than 10:
CREATE TEMP FUNCTION JsonExtractRefIds(json STRING) AS (
(SELECT ARRAY_AGG(v IGNORE NULLS)
FROM UNNEST([
JSON_EXTRACT_SCALAR(json, '$.ref_ids[0]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[1]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[2]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[3]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[4]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[5]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[6]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[7]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[8]'),
JSON_EXTRACT_SCALAR(json, '$.ref_ids[9]')]) AS v)
);
CREATE TEMP FUNCTION JsonToItem(json STRING)
RETURNS STRUCT<line STRING, ref_ids ARRAY<STRING>, sku STRING, amount INT64>
AS (
IF(json IS NULL, NULL,
STRUCT(
JSON_EXTRACT_SCALAR(json, '$.line'),
JsonExtractRefIds(json),
JSON_EXTRACT_SCALAR(json, '$.sku'),
CAST(JSON_EXTRACT_SCALAR(json, '$.amount') AS INT64)
)
)
);
CREATE TEMP FUNCTION JsonToItems(json STRING) AS (
(SELECT AS STRUCT
CAST(JSON_EXTRACT_SCALAR(json, '$.order_id') AS INT64) AS order_id,
JSON_EXTRACT_SCALAR(json, '$.customer_id') AS customer_id,
(SELECT ARRAY_AGG(v IGNORE NULLS)
FROM UNNEST([
JsonToItem(JSON_EXTRACT(json, '$.items[0]')),
JsonToItem(JSON_EXTRACT(json, '$.items[1]')),
JsonToItem(JSON_EXTRACT(json, '$.items[2]')),
JsonToItem(JSON_EXTRACT(json, '$.items[3]')),
JsonToItem(JSON_EXTRACT(json, '$.items[4]')),
JsonToItem(JSON_EXTRACT(json, '$.items[5]')),
JsonToItem(JSON_EXTRACT(json, '$.items[6]')),
JsonToItem(JSON_EXTRACT(json, '$.items[7]')),
JsonToItem(JSON_EXTRACT(json, '$.items[8]')),
JsonToItem(JSON_EXTRACT(json, '$.items[9]'))]) AS v) AS items
)
);
WITH Input AS (
SELECT '{"order_id":"123456","customer_id":"2abcd", "items":[{"line":"1","ref_ids":["66b56e60","9e7ca2b7"],"sku":"1111","amount":40 },{"line":"2","ref_ids":["7777h0","8888j0"],"sku":"2222","amount":10 }]}' AS json
)
SELECT
JsonToItems(json).*
FROM Input;