2
votes

Table users:

id STRING
group STRING
score INTEGER

Query for a single group:

WITH users_with_positions AS (
  SELECT
    id,
    group,
    ROW_NUMBER() OVER (ORDER BY score DESC) AS score_position
  FROM gbq.users
  WHERE group = 'group_1'
)

SELECT
  id,
  group,
  (
    CASE
      WHEN position > thresholds.high_threshold THEN 'high'
      WHEN position > thresholds.medium_threshold THEN 'med'
      ELSE 'low'
  ) AS rank
FROM users_with_positions, (
  SELECT
    (total_positions / 3 * 2) AS high_threshold,
    (total_positions / 3) AS medium_threshold
  FROM (
    SELECT
      MAX(score_position) AS total_positions
    FROM users_with_positions
  )
) thresholds

score_position is group-specific. Means, if users are related to different groups, then their score_positions being calculated independently.

So, the query above works well for a single group. What do i need to do, to get output across all groups in users table?

1

1 Answers

2
votes

Below is for BigQuery Standard SQL

First, I want to correct/fix query presented in question (as it didn't worked most likely because of copy/pasting and fields name obfuscation, etc)

So, I think below is what works for one group to start with

Query for a single group:

#standardSQL
WITH `gbq.users` AS (
  SELECT '1' id, 'group_1' grp, 5 score UNION ALL
  SELECT '2', 'group_1', 3 UNION ALL
  SELECT '3', 'group_1', 4 UNION ALL
  SELECT '4', 'group_1', 2 UNION ALL
  SELECT '5', 'group_1', 5 UNION ALL
  SELECT '6', 'group_1', 4 UNION ALL
  SELECT '2', 'group_2', 3 UNION ALL
  SELECT '3', 'group_2', 4 UNION ALL
  SELECT '7', 'group_2', 2 UNION ALL
  SELECT '8', 'group_2', 5 UNION ALL
  SELECT '9', 'group_2', 4 
), users_with_positions AS (
  SELECT
    id,
    grp,
    ROW_NUMBER() OVER (ORDER BY score DESC) AS score_position
  FROM `gbq.users`
  WHERE grp = 'group_1'
)
SELECT
  id,
  grp,
  (
    CASE
      WHEN score_position > thresholds.high_threshold THEN 'high'
      WHEN score_position > thresholds.medium_threshold THEN 'med'
      ELSE 'low'
    END
  ) AS RANK
FROM users_with_positions, (
  SELECT
    (total_positions / 3 * 2) AS high_threshold,
    (total_positions / 3) AS medium_threshold
  FROM (
    SELECT
      MAX(score_position) AS total_positions
    FROM users_with_positions
  )
) thresholds
ORDER BY grp, id  

with result as below

id      grp         RANK     
1       group_1     low  
2       group_1     high     
3       group_1     med  
4       group_1     high     
5       group_1     low  
6       group_1     med    

So, now

What do i need to do, to get output across all groups in users table?

#standardSQL
WITH `gbq.users` AS (
  SELECT '1' id, 'group_1' grp, 5 score UNION ALL
  SELECT '2', 'group_1', 3 UNION ALL
  SELECT '3', 'group_1', 4 UNION ALL
  SELECT '4', 'group_1', 2 UNION ALL
  SELECT '5', 'group_1', 5 UNION ALL
  SELECT '6', 'group_1', 4 UNION ALL
  SELECT '2', 'group_2', 3 UNION ALL
  SELECT '3', 'group_2', 4 UNION ALL
  SELECT '7', 'group_2', 2 UNION ALL
  SELECT '8', 'group_2', 5 UNION ALL
  SELECT '9', 'group_2', 4 
), users_with_positions AS (
  SELECT
    id,
    grp,
    ROW_NUMBER() OVER (PARTITION BY grp ORDER BY score DESC) AS score_position
  FROM `gbq.users`
)
SELECT
  id,
  grp,
  (
    CASE
      WHEN score_position > thresholds.high_threshold THEN 'high'
      WHEN score_position > thresholds.medium_threshold THEN 'med'
      ELSE 'low'
    END
  ) AS RANK
FROM users_with_positions 
JOIN (
  SELECT grp,
    (total_positions / 3 * 2) AS high_threshold,
    (total_positions / 3) AS medium_threshold
  FROM (
    SELECT grp,
      MAX(score_position) AS total_positions
    FROM users_with_positions
    GROUP BY grp
  )
) thresholds
USING (grp)
ORDER BY grp, id

with result as

id      grp         RANK     
1       group_1     low  
2       group_1     high     
3       group_1     med  
4       group_1     high     
5       group_1     low  
6       group_1     med  
2       group_2     high     
3       group_2     med  
7       group_2     high     
8       group_2     low  
9       group_2     med    

Note: both single group and multiple group versions assume that user appears at most once in any given group