I am converting legal SQL Query to Standard SQL Query in Bigquery to calculate google analytics bounce rate. But on Converting the query there is some difference in the output result.
Legacy SQL Query
SELECT session_bounceRate,
t1.source as source,t1.medium as medium,
total_session
FROM (
SELECT
IFNULL(t1.session_bounceSessionCount, 0) / t2.session_sessionId_distinct_count AS session_bounceRate,
t1.source,t1.medium,
t2.total_session_distinct_count AS total_session
FROM (
SELECT
INTEGER(session_bounceSession_distinct_count) AS session_bounceSessionCount,
source,
medium
FROM (
SELECT
COUNT(DISTINCT session_sessionId, 10000000) AS session_bounceSession_distinct_count, --Changes done (Count(x) in legacy sql stands for approx ,
source, --,so replaced it with APPROX_COUNT_DISTINCT to get approx count
medium from (
SELECT
SUM(IF(session_hitsType = 'event'
AND session_isInteraction_first = 1, 1, 0)) AS session_isEventInteraction_sum,
session_sessionId AS session_sessionId,
SUM(session_pageViews_sum) AS session_pageViews_sum_sum,
source,
medium
FROM (
SELECT
hits.type AS session_hitsType,
sessionId AS session_sessionId,
SUM(totals.pageviews) AS session_pageViews_sum,
FIRST(hits.isInteraction) AS session_isInteraction_first,
trafficSource.source AS source,
trafficSource.medium AS medium,
FROM
TABLE_DATE_RANGE([[test:test.session_streaming_], TIMESTAMP('2018-04-01'), TIMESTAMP('2018-04-30')) AS session_streaming
GROUP BY
source,
medium,
session_hitsType,
session_sessionId )
GROUP BY
source,
medium,
session_sessionId )
WHERE
(session_isEventInteraction_sum = 0
AND session_pageViews_sum_sum = 1)
GROUP BY
source,
medium ) ) AS t1
JOIN EACH (
SELECT
COUNT(DISTINCT sessionId, 10000000) AS session_sessionId_distinct_count,
trafficSource.source AS source,
trafficSource.medium AS medium,
COUNT(DISTINCT sessionId, 10000000) AS total_session_distinct_count
FROM
TABLE_DATE_RANGE([test:Test.session_streaming_], TIMESTAMP('2018-04-01'), TIMESTAMP('2018-04-30')) AS session_streaming
GROUP BY
source,
medium ) AS t2
ON
t1.source = t2.source
and t1.medium=t2.medium)
where t1.medium='zadv_display'
ORDER BY
total_session DESC
Standard SQL Query
SELECT
IFNULL(t1.session_bounceSessionCount, 0) / t2.session_sessionId_distinct_count AS session_bounceRate,
t1.source,
t1.medium,
t2.total_session_distinct_count AS total_session
FROM (
SELECT
CAST(session_bounceSession_distinct_count AS INT64) AS session_bounceSessionCount,
source,
medium
FROM (
SELECT
APPROX_COUNT_DISTINCT(DISTINCT session_sessionId) AS session_bounceSession_distinct_count,
source,
medium
FROM (
SELECT
SUM(IF(session_hitsType = 'event'
AND session_isInteraction_first = 1, 1, 0)) AS session_isEventInteraction_sum,
session_sessionId AS session_sessionId,
SUM(session_pageViews_sum) AS session_pageViews_sum_sum,
source,
medium
FROM (
SELECT
session_hitsType,
session_sessionId,
source,
medium,
CASE
WHEN session_isInteraction_first = TRUE THEN 1
ELSE 0
END AS session_isInteraction_first,
SUM(session_pageViews_sum) AS session_pageViews_sum
FROM (
SELECT
hits.Type AS session_hitsType,
sessionId AS session_sessionId,
trafficSource.source AS source,
trafficSource.medium AS medium,
totals.pageviews AS session_pageViews_sum,
FIRST_VALUE(hits.isInteraction) OVER(PARTITION BY sessionId ORDER BY TIMESTAMP_SECONDS(hits.Time)) AS session_isInteraction_first
FROM
`test.Test.session_streaming_*`,
unNEST(hits) hits
WHERE
_table_suffix BETWEEN '20180401'
AND '20180430' )
GROUP BY
session_hitsType,
session_sessionId,
source,
medium,
session_isInteraction_first )
GROUP BY
source,
medium,
session_sessionId)
WHERE
(session_isEventInteraction_sum = 0
AND session_pageViews_sum_sum = 1)
GROUP BY
source,
medium)) AS t1
JOIN (
SELECT
APPROX_COUNT_DISTINCT(DISTINCT sessionId) AS session_sessionId_distinct_count,
trafficSource.source AS source,
trafficSource.medium AS medium,
APPROX_COUNT_DISTINCT(DISTINCT sessionId) AS total_session_distinct_count
FROM
`test.Test.session_streaming_*`
WHERE _table_suffix BETWEEN '20180401' AND '20180430'
GROUP BY
source,
medium
) AS t2
ON
t1.source = t2.source
AND t1.medium=t2.medium
where t1.medium='zadv_display'
order by total_session desc
We have replace First function in value with First_value in standard sql that is the noticeable change made in the query.
Could Someone guide me is there some issue on conversion as standard sql query query output should match with legacy output?