I have table in bigquery which has over 100 columns and has more than 70 Million row. I want to find out if I can write a query to extract rows where any of the column contains a value 'FINISHED'.
1
votes
1 Answers
3
votes
Below is for BigQuery Standard SQL Should be good start for you :)
#standardSQL
SELECT <columns to output>
FROM yourTable AS t
WHERE REGEXP_CONTAINS(LOWER(TO_JSON_STRING(t)), 'finished')
You can test/play with it with below dummy data
#standardSQL
WITH yourTable AS (
SELECT 'a' AS x, 'b' AS y, 'c' AS z UNION ALL
SELECT 'finished', '', '' UNION ALL
SELECT '', 'Bigquery Select from table where any column contains "FINISHED"','' UNION ALL
SELECT '', '', 'aaa' UNION ALL
SELECT 'finished', 'bbb', 'finished'
)
SELECT *
FROM yourTable AS t
WHERE REGEXP_CONTAINS(LOWER(TO_JSON_STRING(t)), 'finished')
Update
Note: if you have your search word as a part of at least one column name - above will return all rows! To address this - you would need to invest a little bit more coding
For example, for simple schema (with no record or repeated) this would be a
#standardSQL
SELECT <columns to output>
FROM yourTable AS t
WHERE (SELECT COUNTIF(SPLIT(zzz, ':')[SAFE_OFFSET(1)] LIKE '%finished%')
FROM UNNEST(SPLIT(SUBSTR(LOWER(TO_JSON_STRING(t)),2,LENGTH(TO_JSON_STRING(t))-2))) AS zzz
) > 0
You can test this with below
#standardSQL
WITH yourTable AS (
SELECT 'a' AS x, 'b' AS y, 'c' AS col_finished UNION ALL
SELECT 'finished', '', '' UNION ALL
SELECT '', 'Bigquery Select from table where any column contains "FINISHED"','' UNION ALL
SELECT '', '', 'aaa' UNION ALL
SELECT 'finished', 'bbb', 'finished'
)
SELECT *
FROM yourTable AS t
WHERE (SELECT COUNTIF(SPLIT(zzz, ':')[SAFE_OFFSET(1)] LIKE '%finished%')
FROM UNNEST(SPLIT(SUBSTR(LOWER(TO_JSON_STRING(t)),2,LENGTH(TO_JSON_STRING(t))-2))) AS zzz
) > 0