0
votes

I'm using Firebase to analyze a mobile app. Data from Firebase is exported to BigQuery.

I'm trying to use the following query, written in standard SQL, to count purchasers relevant to a specific content:

SELECT COUNT(user_dim.app_info.app_instance_id) AS purchasers
    ,eventParams.value.string_value AS eventName
    ,event.DATE AS eventDate
FROM `id.TABLE.app_events_ * `
    ,UNNEST(event_dim) AS event
    ,UNNEST(event.params) AS eventParams
WHERE (
        event.NAME = 'select_content'
        AND eventParams.KEY = 'item_id'
        )
    AND user_dim.app_info.app_instance_id IN (
        SELECT [user_dim.app_info.app_instance_id]
        FROM `id.TABLE.app_events_ * `
            ,UNNEST(event_dim) AS event
            ,UNNEST(event.params) AS eventParams
        WHERE event.NAME = 'in_app_purchase'
            AND eventParams.KEY = 'quantity'
        )
GROUP BY eventDate
    ,eventName
ORDER BY eventDate DESC

But I get the error message: 'Cannot execute IN subquery with uncomparable types STRING and ARRAY'

I have tried to use 'UNNEST' function after the 'IN' operator. But then I got the error: 'Scalar subquery produced more than one element'

Please, help me to find out where my mistake is.

3

3 Answers

2
votes

You just need to remove the brackets on order for the query to work:

AND user_dim.app_info.app_instance_id IN (
    SELECT user_dim.app_info.app_instance_id
    FROM `id.TABLE.app_events_ * `
        ,UNNEST(event_dim) AS event
        ,UNNEST(event.params) AS eventParams
    WHERE event.NAME = 'in_app_purchase'
        AND eventParams.KEY = 'quantity'
    )

The problem is that you were creating an array by putting the brackets around the column name.

0
votes

Maybe you could optimize your query while eliminating this error, like so:

WITH data AS(
  SELECT STRUCT<user_id STRING, app_info STRUCT<app_instance_id STRING> > ('user_1', STRUCT('id_1' as app_instance_id)) user_dim, ARRAY<STRUCT<name STRING, date STRING, params ARRAY<STRUCT<key STRING, value STRUCT<string_value STRING> >> > > [STRUCT('select_content' as name, "20170610" as date, [STRUCT('item_id' as key, STRUCT('str_value1' as string_value) as value), STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params), STRUCT('in_app_purchase' as name, "20170610" as date, [STRUCT('quantity' as key, STRUCT('str_value1' as string_value) as value)] as params), STRUCT('select_content' as name, "20170611" as date, [STRUCT('item_id' as key, STRUCT('str_value1' as string_value) as value)] as params), STRUCT('select_content' as name, "20170611" as date, [STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params)] event_dim UNION ALL
  SELECT STRUCT<user_id STRING, app_info STRUCT<app_instance_id STRING> > ('user_2', STRUCT('id_2' as app_instance_id)) user_dim, ARRAY<STRUCT<name STRING, date STRING, params ARRAY<STRUCT<key STRING, value STRUCT<string_value STRING> >> > > [STRUCT('select_content' as name, "20170610" as date, [STRUCT('item_id' as key, STRUCT('str_value1' as string_value) as value), STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params), STRUCT('in_app_purchase' as name, "20170610" as date, [STRUCT('quantity' as key, STRUCT('str_value2' as string_value) as value)] as params), STRUCT('select_content' as name, "20170612" as date, [STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params), STRUCT('select_content' as name, "20170610" as date, [STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params)] event_dim union all
  SELECT STRUCT<user_id STRING, app_info STRUCT<app_instance_id STRING> > ('user_2', STRUCT('id_2' as app_instance_id)) user_dim, ARRAY<STRUCT<name STRING, date STRING, params ARRAY<STRUCT<key STRING, value STRUCT<string_value STRING> >> > > [STRUCT('select_content' as name, "20170610" as date, [STRUCT('item_id' as key, STRUCT('str_value1' as string_value) as value)] as params), STRUCT('select_content' as name, "20170612" as date, [STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params), STRUCT('select_content' as name, "20170610" as date, [STRUCT('item_id' as key, STRUCT('str_value2' as string_value) as value)] as params)] event_dim
)


SELECT
  date,
  eventName,
  COUNT(1) purchases
FROM(
  SELECT
  ARRAY(SELECT AS STRUCT event.date date, params.value.string_value eventName FROM UNNEST(event_dim) event, UNNEST(event.params) params GROUP BY date, eventName) data
FROM data
WHERE EXISTS(SELECT 1 FROM UNNEST(event_dim) event WHERE event.name = 'in_app_purchase' AND (SELECT COUNT(1) FROM UNNEST(event.params) params WHERE params.key = 'quantity') > 0)
),
UNNEST(data)
GROUP BY
  date, eventName

Not only it should run faster but it should also be cheaper as it queries over less columns of data.

0
votes

I've created the query which helped me to sort purchasers by content. It's in Legacy SQL:

`

SELECT
  user_dim.app_info.app_platform as appPlatform,
  event_dim.date as eventDate,
  COUNT(event_dim.name) as purchasers,
  user_dim.app_info.app_instance_id as appInstanceID,
  event_dim.params.value.string_value as contentID,
  event_dim.value_in_usd as revenue,
  CASE
    WHEN event_dim.params.value.string_value = 'content id1'  THEN 'content name1'
    WHEN event_dim.params.value.string_value = 'content id2'  THEN 'content name2'
    ELSE 'Undefined'
  END as contentName
FROM
  TABLE_QUERY([table_IOS], 'table_id CONTAINS "app_events_"'),
  TABLE_QUERY([table_ANDROID], 'table_id CONTAINS "app_events_"')
WHERE`enter code here`
  (event_dim.name = 'in_app_purchase' AND event_dim.params.key = 'product_id' OR event_dim.params.key = 'price')
  AND event_dim.params.value.string_value != 'null'
GROUP BY
  appPlatform, eventDate, appInstanceID, contentID, revenue, contentName 
ORDER BY
  appPlatform, eventDate DESC

`