I have a query using analytic function for a day partitioned table. I would expect it to read only data in partitions filtered in where clause but it reads all partitions in the table.
WITH query AS (
SELECT
* EXCEPT(rank)
FROM (
SELECT
*,
RANK() OVER (PARTITION BY day order by num_mean_temp_samples) AS rank
FROM (
SELECT
FORMAT_DATE("%Y%m%d", _PARTITIONDATE) AS day,
*
FROM
`mydataset.gsod_partitioned` ) q_nested
) q
WHERE
rank < 1000
)
SELECT
num_mean_temp_samples ,
count(1) as samples
FROM query
WHERE
day in ( '20100101', '20100103')
GROUP BY 1 ORDER BY 1
I verified partition pruning works without analytic function:
WITH query AS (
SELECT
FORMAT_DATE("%Y%m%d", _PARTITIONDATE) AS day,
*
FROM
`mydataset.gsod_partitioned`
)
or after adding UNION ALL nested select:
WITH query AS (
SELECT
* EXCEPT(rank)
FROM (
SELECT
*,
RANK() OVER (PARTITION BY day order by num_mean_temp_samples) AS rank
FROM (
SELECT
FORMAT_DATE("%Y%m%d", _PARTITIONDATE) AS day,
*
FROM
`mydataset.gsod_partitioned` WHERE _PARTITIONDATE < "1970-01-01" ) q_nested1
UNION ALL SELECT
*,
RANK() OVER (PARTITION BY day order by num_mean_temp_samples) AS rank
FROM (
SELECT
FORMAT_DATE("%Y%m%d", _PARTITIONDATE) AS day,
*
FROM
`mydataset.gsod_partitioned` WHERE _PARTITIONDATE >= "1970-01-01" ) q_nested2
) q
WHERE
rank < 1000
)
Table mydataset.gsod_partitioned is public dataset gsod based where day=20100101 partition is created as follows:
bq query --destination_table 'private.gsod_partitioned$20100101' --time_partitioning_type=DAY --use_legacy_sql=false
'SELECT station_number, mean_temp, num_mean_temp_samples FROM `bigquery-public-data.samples.gsod` where year=2010 and month=01 and day=01'
Could you find a way to enable partition pruning for the analytic function without adding extra union to the query?