SQL
Window Functions
This is the section that rusts fastest and matters most in interviews.
Anatomy of a Window Function
FUNCTION(args) OVER (
PARTITION BY col
ORDER BY col
ROWS/RANGE BETWEEN ...
)PARTITION BY is like GROUP BY but does not collapse rows.
ROW_NUMBER: Latest per group
WITH ranked AS (
SELECT
account_id,
score_value,
score_date,
ROW_NUMBER() OVER (
PARTITION BY account_id
ORDER BY score_date DESC
) AS rn
FROM risk_scores
)
SELECT account_id, score_value, score_date
FROM ranked
WHERE rn = 1;RANK vs DENSE_RANK vs ROW_NUMBER
SELECT
name, salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
RANK() OVER (ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;ROW_NUMBER()for exactly N rowsRANK()for competition ranking (gaps after ties)DENSE_RANK()for Nth highest value (no gaps)
LAG and LEAD
WITH monthly AS (
SELECT
DATE_TRUNC('month', transaction_date)::DATE AS month,
SUM(amount) AS total
FROM transactions
GROUP BY 1
)
SELECT
month,
total,
LAG(total) OVER (ORDER BY month) AS prev_month,
ROUND(
(total - LAG(total) OVER (ORDER BY month)) * 100.0
/ NULLIF(LAG(total) OVER (ORDER BY month), 0),
1
) AS mom_change_pct
FROM monthly;Running Totals
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total
FROM orders;Partitioned version:
SUM(amount) OVER (PARTITION BY category ORDER BY order_date) AS running_total_by_categoryNTILE Bucketing
WITH quartiled AS (
SELECT
account_id,
current_balance,
NTILE(4) OVER (ORDER BY current_balance) AS quartile
FROM account
WHERE account_status = 'OPEN'
)
SELECT
quartile,
COUNT(*) AS account_count,
MIN(current_balance) AS min_bal,
MAX(current_balance) AS max_bal,
ROUND(AVG(current_balance), 2) AS avg_bal
FROM quartiled
GROUP BY quartile
ORDER BY quartile;Percent of Total
SELECT
region,
COUNT(*) AS customer_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 1) AS pct_of_total,
ROUND(
SUM(COUNT(*)) OVER (ORDER BY COUNT(*) DESC) * 100.0
/ SUM(COUNT(*)) OVER (),
1
) AS cumulative_pct
FROM customers
GROUP BY region
ORDER BY customer_count DESC;