← Back to Home

SQL Cheatsheet Data Analysts & Data Engineers

1. Basic SQL (Fundamentals for Everyone)

SELECT Statement (Retrieving Data)

SELECT column1, column2 FROM table_name;
SELECT * FROM customers;

WHERE Clause (Filtering Data)

SELECT * FROM sales WHERE region = 'North America';
SELECT * FROM employees WHERE salary > 50000;

ORDER BY (Sorting Results)

SELECT * FROM products ORDER BY price DESC;

LIMIT (Fetching Specific Rows)

SELECT * FROM orders LIMIT 10;

DISTINCT (Removing Duplicates)

SELECT DISTINCT category FROM products;

2. Intermediate SQL (For Data Analysis & Aggregation)

Aggregations (SUM, AVG, COUNT, MIN, MAX)

SELECT COUNT(*) AS total_orders FROM orders;
SELECT department, AVG(salary) AS avg_salary FROM employees GROUP BY department;

GROUP BY & HAVING Clause

SELECT region, SUM(sales_amount) AS total_sales
FROM sales
GROUP BY region
HAVING SUM(sales_amount) > 100000;

CASE Statement (Conditional Logic)

SELECT customer_name, purchase_amount,
    CASE 
        WHEN purchase_amount > 1000 THEN 'VIP'
        WHEN purchase_amount BETWEEN 500 AND 1000 THEN 'Regular'
        ELSE 'Basic'
    END AS customer_category
FROM customers;

3. Joins & Relationships Between Tables

INNER JOIN

SELECT e.name, d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.id;

LEFT JOIN (Keep all records from the left table)

SELECT e.name, d.department_name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id;

FULL OUTER JOIN (Combine both tables)

SELECT e.name, d.department_name
FROM employees e
FULL OUTER JOIN departments d ON e.department_id = d.id;

Self-Join (Finding Duplicates or Relationships)

SELECT a.name AS employee, b.name AS manager
FROM employees a
JOIN employees b ON a.manager_id = b.id;

4. Advanced SQL for Data Analysts

Window Functions (Ranking, Moving Averages)

SELECT employee_id, department, salary,
    RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS salary_rank
FROM employees;
SELECT order_id, customer_id, order_date, 
    SUM(order_amount) OVER (PARTITION BY customer_id ORDER BY order_date) AS running_total
FROM orders;

Common Table Expressions (CTE)

WITH SalesSummary AS (
    SELECT customer_id, SUM(amount) AS total_spent
    FROM sales
    GROUP BY customer_id
)
SELECT * FROM SalesSummary WHERE total_spent > 5000;

Recursive CTE (Handling Hierarchical Data)

WITH RECURSIVE EmployeeHierarchy AS (
    SELECT id, name, manager_id, 1 AS level
    FROM employees WHERE manager_id IS NULL
    UNION ALL
    SELECT e.id, e.name, e.manager_id, h.level + 1
    FROM employees e
    INNER JOIN EmployeeHierarchy h ON e.manager_id = h.id
)
SELECT * FROM EmployeeHierarchy;

Full-Text Search (PostgreSQL, MySQL 5.7+, MariaDB 10.2+)

SELECT * FROM articles 
WHERE to_tsvector(title || ' ' || content) @@ to_tsquery('database & optimization');

5. Advanced SQL for Data Engineers

Handling JSON Data (PostgreSQL, MySQL 5.7+, MariaDB 10.2+)

SELECT 
    data->>'name' AS employee_name, 
    data->>'age' AS employee_age
FROM employees_json;

Data Deduplication (Finding & Removing Duplicates)

DELETE FROM sales
WHERE id IN (
    SELECT id FROM (
        SELECT id, ROW_NUMBER() OVER (PARTITION BY customer_id, order_date ORDER BY id) AS row_num
        FROM sales
    ) AS t WHERE row_num > 1
);

Indexing for Performance Optimization

CREATE INDEX idx_orders_customer ON orders (customer_id);

Partitioning for Large Datasets (PostgreSQL)

CREATE TABLE sales_partitioned (
    id SERIAL PRIMARY KEY,
    sale_date DATE NOT NULL,
    amount DECIMAL(10,2) NOT NULL
) PARTITION BY RANGE (sale_date);

6. Data Cleaning & Preparation

Handling NULL Values

SELECT 
    name, 
    COALESCE(email, 'No Email') AS email 
FROM customers;

String Transformation

SELECT 
    LOWER(name) AS name_lowercase,
    UPPER(name) AS name_uppercase,
    TRIM(name) AS name_trimmed
FROM employees;

Extracting Data from Timestamps

SELECT 
    EXTRACT(YEAR FROM order_date) AS order_year,
    EXTRACT(MONTH FROM order_date) AS order_month
FROM orders;