Bored Analyst
SQL

Window Functions

This is the section that rusts fastest and matters most in interviews.

Anatomy of a Window Function

FUNCTION(args) OVER (
  PARTITION BY col
  ORDER BY col
  ROWS/RANGE BETWEEN ...
)

PARTITION BY is like GROUP BY but does not collapse rows.

ROW_NUMBER: Latest per group

WITH ranked AS (
  SELECT
    account_id,
    score_value,
    score_date,
    ROW_NUMBER() OVER (
      PARTITION BY account_id
      ORDER BY score_date DESC
    ) AS rn
  FROM risk_scores
)
SELECT account_id, score_value, score_date
FROM ranked
WHERE rn = 1;

RANK vs DENSE_RANK vs ROW_NUMBER

SELECT
  name, salary,
  ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
  RANK()       OVER (ORDER BY salary DESC) AS rank,
  DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;
  • ROW_NUMBER() for exactly N rows
  • RANK() for competition ranking (gaps after ties)
  • DENSE_RANK() for Nth highest value (no gaps)

LAG and LEAD

WITH monthly AS (
  SELECT
    DATE_TRUNC('month', transaction_date)::DATE AS month,
    SUM(amount) AS total
  FROM transactions
  GROUP BY 1
)
SELECT
  month,
  total,
  LAG(total) OVER (ORDER BY month) AS prev_month,
  ROUND(
    (total - LAG(total) OVER (ORDER BY month)) * 100.0
    / NULLIF(LAG(total) OVER (ORDER BY month), 0),
    1
  ) AS mom_change_pct
FROM monthly;

Running Totals

SELECT
  order_date,
  amount,
  SUM(amount) OVER (ORDER BY order_date) AS running_total
FROM orders;

Partitioned version:

SUM(amount) OVER (PARTITION BY category ORDER BY order_date) AS running_total_by_category

NTILE Bucketing

WITH quartiled AS (
  SELECT
    account_id,
    current_balance,
    NTILE(4) OVER (ORDER BY current_balance) AS quartile
  FROM account
  WHERE account_status = 'OPEN'
)
SELECT
  quartile,
  COUNT(*) AS account_count,
  MIN(current_balance) AS min_bal,
  MAX(current_balance) AS max_bal,
  ROUND(AVG(current_balance), 2) AS avg_bal
FROM quartiled
GROUP BY quartile
ORDER BY quartile;

Percent of Total

SELECT
  region,
  COUNT(*) AS customer_count,
  ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 1) AS pct_of_total,
  ROUND(
    SUM(COUNT(*)) OVER (ORDER BY COUNT(*) DESC) * 100.0
    / SUM(COUNT(*)) OVER (),
    1
  ) AS cumulative_pct
FROM customers
GROUP BY region
ORDER BY customer_count DESC;

On this page