Skip to main content

First Queries

After connecting to ScramDB (see Quick Start), let's explore SQL with joins, subqueries, and analytical functions.

Create Sample Tables

CREATE TABLE customers (
id INTEGER PRIMARY KEY,
name VARCHAR(100),
country VARCHAR(50),
created_at DATE
);

CREATE TABLE orders (
id BIGINT PRIMARY KEY,
customer_id INTEGER,
amount DOUBLE PRECISION,
status VARCHAR(20),
order_date DATE
);

INSERT INTO customers VALUES
(1, 'Alice', 'US', '2024-01-10'),
(2, 'Bob', 'UK', '2024-02-15'),
(3, 'Charlie', 'US', '2024-03-20'),
(4, 'Diana', 'DE', '2024-01-05');

INSERT INTO orders VALUES
(1, 1, 250.00, 'completed', '2024-03-01'),
(2, 1, 175.00, 'completed', '2024-03-15'),
(3, 2, 500.00, 'completed', '2024-03-10'),
(4, 3, 89.99, 'pending', '2024-03-20'),
(5, 1, 320.00, 'completed', '2024-04-01'),
(6, 4, 450.00, 'cancelled', '2024-03-25');

Joins

-- Inner join: customers with their orders
SELECT c.name, o.amount, o.order_date
FROM customers c
INNER JOIN orders o ON c.id = o.customer_id
WHERE o.status = 'completed'
ORDER BY o.amount DESC;
-- Left join: all customers, including those without orders
SELECT c.name, COALESCE(SUM(o.amount), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id AND o.status = 'completed'
GROUP BY c.name
ORDER BY total_spent DESC;

Subqueries

-- Customers who have spent more than average
SELECT c.name, SUM(o.amount) AS total
FROM customers c
JOIN orders o ON c.id = o.customer_id
GROUP BY c.name
HAVING SUM(o.amount) > (
SELECT AVG(amount) FROM orders WHERE status = 'completed'
);
-- EXISTS: customers with at least one completed order
SELECT name FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.id AND o.status = 'completed'
);

Window Functions

-- Running total per customer
SELECT
c.name,
o.order_date,
o.amount,
SUM(o.amount) OVER (
PARTITION BY c.id ORDER BY o.order_date
) AS running_total
FROM customers c
JOIN orders o ON c.id = o.customer_id
ORDER BY c.name, o.order_date;

Aggregation with CASE

SELECT
c.country,
COUNT(*) AS total_orders,
SUM(CASE WHEN o.status = 'completed' THEN 1 ELSE 0 END) AS completed,
SUM(CASE WHEN o.status = 'pending' THEN 1 ELSE 0 END) AS pending,
SUM(CASE WHEN o.status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled
FROM customers c
JOIN orders o ON c.id = o.customer_id
GROUP BY c.country;

Create an Index

-- Speed up lookups by customer_id
CREATE INDEX idx_orders_customer ON orders (customer_id);

-- The optimizer automatically uses the index when it's beneficial
SELECT * FROM orders WHERE customer_id = 1;

Next Steps