SQL Cheatsheet Data Analysts & Data Engineers
1. Basic SQL (Fundamentals for Everyone)
SELECT Statement (Retrieving Data)
SELECT column1, column2 FROM table_name;
SELECT * FROM customers;
WHERE Clause (Filtering Data)
SELECT * FROM sales WHERE region = 'North America';
SELECT * FROM employees WHERE salary > 50000;
ORDER BY (Sorting Results)
SELECT * FROM products ORDER BY price DESC;
LIMIT (Fetching Specific Rows)
SELECT * FROM orders LIMIT 10;
DISTINCT (Removing Duplicates)
SELECT DISTINCT category FROM products;
2. Intermediate SQL (For Data Analysis & Aggregation)
Aggregations (SUM, AVG, COUNT, MIN, MAX)
SELECT COUNT(*) AS total_orders FROM orders;
SELECT department, AVG(salary) AS avg_salary FROM employees GROUP BY department;
GROUP BY & HAVING Clause
SELECT region, SUM(sales_amount) AS total_sales
FROM sales
GROUP BY region
HAVING SUM(sales_amount) > 100000;
CASE Statement (Conditional Logic)
SELECT customer_name, purchase_amount,
CASE
WHEN purchase_amount > 1000 THEN 'VIP'
WHEN purchase_amount BETWEEN 500 AND 1000 THEN 'Regular'
ELSE 'Basic'
END AS customer_category
FROM customers;
3. Joins & Relationships Between Tables
INNER JOIN
SELECT e.name, d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.id;
LEFT JOIN (Keep all records from the left table)
SELECT e.name, d.department_name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id;
FULL OUTER JOIN (Combine both tables)
SELECT e.name, d.department_name
FROM employees e
FULL OUTER JOIN departments d ON e.department_id = d.id;
Self-Join (Finding Duplicates or Relationships)
SELECT a.name AS employee, b.name AS manager
FROM employees a
JOIN employees b ON a.manager_id = b.id;
4. Advanced SQL for Data Analysts
Window Functions (Ranking, Moving Averages)
SELECT employee_id, department, salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS salary_rank
FROM employees;
SELECT order_id, customer_id, order_date,
SUM(order_amount) OVER (PARTITION BY customer_id ORDER BY order_date) AS running_total
FROM orders;
Common Table Expressions (CTE)
WITH SalesSummary AS (
SELECT customer_id, SUM(amount) AS total_spent
FROM sales
GROUP BY customer_id
)
SELECT * FROM SalesSummary WHERE total_spent > 5000;
Recursive CTE (Handling Hierarchical Data)
WITH RECURSIVE EmployeeHierarchy AS (
SELECT id, name, manager_id, 1 AS level
FROM employees WHERE manager_id IS NULL
UNION ALL
SELECT e.id, e.name, e.manager_id, h.level + 1
FROM employees e
INNER JOIN EmployeeHierarchy h ON e.manager_id = h.id
)
SELECT * FROM EmployeeHierarchy;
Full-Text Search (PostgreSQL, MySQL 5.7+, MariaDB 10.2+)
SELECT * FROM articles
WHERE to_tsvector(title || ' ' || content) @@ to_tsquery('database & optimization');
5. Advanced SQL for Data Engineers
Handling JSON Data (PostgreSQL, MySQL 5.7+, MariaDB 10.2+)
SELECT
data->>'name' AS employee_name,
data->>'age' AS employee_age
FROM employees_json;
Data Deduplication (Finding & Removing Duplicates)
DELETE FROM sales
WHERE id IN (
SELECT id FROM (
SELECT id, ROW_NUMBER() OVER (PARTITION BY customer_id, order_date ORDER BY id) AS row_num
FROM sales
) AS t WHERE row_num > 1
);
Indexing for Performance Optimization
CREATE INDEX idx_orders_customer ON orders (customer_id);
Partitioning for Large Datasets (PostgreSQL)
CREATE TABLE sales_partitioned (
id SERIAL PRIMARY KEY,
sale_date DATE NOT NULL,
amount DECIMAL(10,2) NOT NULL
) PARTITION BY RANGE (sale_date);
6. Data Cleaning & Preparation
Handling NULL Values
SELECT
name,
COALESCE(email, 'No Email') AS email
FROM customers;
String Transformation
SELECT
LOWER(name) AS name_lowercase,
UPPER(name) AS name_uppercase,
TRIM(name) AS name_trimmed
FROM employees;
Extracting Data from Timestamps
SELECT
EXTRACT(YEAR FROM order_date) AS order_year,
EXTRACT(MONTH FROM order_date) AS order_month
FROM orders;