I am running the following query to return the top purchased category per user, this works fine on date ranges of 30 days and 60 days, but when I try and run it over the past 100 days I get the error:
Array index 1 is out of bounds (overflow)
Why is the widening the date range causing this error?
WITH `query_result` AS (
SELECT customDimension.value AS UserID,
SUM(CASE WHEN LOWER(hits_product.v2ProductName) LIKE "% mens%" THEN 1 ELSE 0 END) AS mens,
SUM(CASE WHEN LOWER(hits_product.v2ProductName) LIKE "%womens%" THEN 1 ELSE 0 END) AS womens,
SUM(CASE WHEN LOWER(hits_product.v2ProductName) LIKE "%boys%"
OR LOWER(hits_product.v2ProductName) LIKE "%girls%"
THEN 1 ELSE 0 END) AS kids
FROM `xxx.xxx.ga_sessions_20*` AS t
CROSS JOIN UNNEST(hits) AS hits
CROSS JOIN UNNEST(t.customdimensions) AS customDimension
CROSS JOIN UNNEST(hits.product) AS hits_product
WHERE parse_date('%y%m%d', _table_suffix) between
DATE_sub(current_date(), interval 100 day) and
DATE_sub(current_date(), interval 1 day)
AND customDimension.index = 2
AND hits.eCommerceAction.action_type = "6"
GROUP BY UserID
HAVING
SUM(CASE WHEN LOWER(hits_product.v2ProductName) LIKE "%boys%"
OR LOWER(hits_product.v2ProductName) LIKE "%girls%"
THEN 1 ELSE 0 END) > 0
)
SELECT *,
ARRAY_TO_STRING(ARRAY(
SELECT SPLIT(kv, ':')[OFFSET(0)]
FROM UNNEST(SPLIT(REGEXP_REPLACE(TO_JSON_STRING(t), r'[{"}]', ''))) kv
WHERE LOWER(SPLIT(kv, ':')[OFFSET(0)]) <> LOWER('UserID')
ORDER BY CAST(SPLIT(kv, ':')[OFFSET(1)] AS INT64) DESC
LIMIT 1
), ',') top_purchased_gender
FROM `query_result` t
kv
values doesn't have the format you're expecting? You could try SAFE_OFFSET instead. – Elliott Brossard