PostgreSQL - queries by examples
Basic queries
-- select
select first_name, last_name, email
from customer;
-- distinct: both the following two forms work
select distinct rating
from film;
select distinct(rating)
from film;
-- count
select count(distinct first_name )
from actor;
-- readable
select count(distinct(first_name))
from actor;
-- this doesn't work, parentheses are required
-- select count distinct first_name
-- from actor;
-- where
select email
from customer
where first_name = 'Nancy' and last_name = 'Thomas';
-- limit
select customer_id
from payment
order by payment_date ASC
limit 10;
-- between and
--- both inclusive
--- not between and
--- when used for timestamp, it includes the time part
select count(*)
from payment
where amount between 5 and 6;
select count(*)
from payment
where amount not between 5 and 6;
select *
from payment
where payment_date between '2007-05-12' and '2007-05-15'
order by payment_date asc;
-- in, not in
select *
from city
where city in ('Hoshiarpur', 'Hino');
information_schema
select *
from city
where city not in ('Hoshiarpur', 'Hino');
-- like, ilike
--- like: case sensitive
--- ilike: case insensitive
--- like: match any characters
--- _: match only one character
select *
from customer
where first_name like 'M%' and last_name ilike 's%';
select *
from customer
where first_name like '_her%';
Date time
-- PostgreSQL time data types
--- TIME: contains only time
--- DATE: contains only date
--- TIMESTAMP: contains date and time
--- TIMESTAMPTZ: contains date, time and timezone
--- related functions and operators: TIMEZONE, NOW, TIMEOFDAY, CURRENT_TIME, CURRENT_DATE
--- show the values of all configuration parameters, with descriptions
SHOW ALL;
--- display the current setting of run-time parameters
SHOW TIMEZONE;
--- date and time functions
SELECT now(), timeofday(), CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP;
------------------------------------------------------------------------------------------
-- extract information from a time based data type using
--- EXTRACT(): retrieves subfields such as year/month/hour from date/time values
--- AGE(): subtract argument from current_date
--- TO_CHAR(): convert various data types (date/time, numeric) to formatted strings, and convert from formatted strings to specific data types.
SELECT extract(year from payment_date) as payment_year,
extract(quarter from payment_date) as payment_quarter,
extract(month from payment_date) as payment_month,
extract(day from payment_date) as payment_day,
extract(hour from payment_date) as payment_hour,
extract(minute from payment_date) as payment_minute,
extract(second from payment_date) as payment_second,
extract(century from payment_date) as payment_century,
extract(dow from payment_date) as payment_dow
FROM payment;
--- example result: 16 years 4 mons 22 days 1 hours 34 mins 13.003423 secs
SELECT age(payment_date)
FROM payment;
--- example result: 02-15-2007,THURSDAY /FEBRUARY /2007, 7.99
SELECT to_char(payment_date, 'mm-dd-YYYY'),
to_char(payment_date, 'DAY/MONTH/YYYY'),
to_char(amount, '999D00')
FROM payment;
--- During which months did payments occur? Format your answer to return back the full month name.
SELECT distinct to_char(payment_date, 'MONTH')
FROM payment;
--- How many payments occurred on a Monday using extract()
SELECT count(*)
FROM payment
WHERE extract(dow from payment_date) = 1; -- The day of the week as Monday (1) to Sunday (7)
--- How many payments occurred on a Monday using to_char()
SELECT count(*)
FROM payment
WHERE to_char(payment_date, 'D') = '2'; -- day of the week, Sunday (1) to Saturday (7)
-- date()
select b.starttime, f.name
from bookings as b inner join facilities f on f.facid = b.facid
where f.name like 'Tennis Court%'
and date(b.starttime) = '2012-09-21';
Group By
-- most common aggregate functions
--- COUNT(): simply returns the number of rows
--- AVG(): returns a floating point, you can use ROUND() to specify precision after the decimal
--- MAX(): returns the maximum element
--- MIN(): returns the minimum element
--- SUM(): returns the sum
--- aggregate functions can appear in SELECT, HAVING and ORDER BY clauses, but not WHERE statements
select min(replacement_cost), max(replacement_cost), round(avg(replacement_cost), 2), sum(replacement_cost)
from film;
-- group by: aggregate on one or more columns
--- in the SELECT statement, columns must either have an aggregate function or be in the GROUP BY call
--- WHERE statements should not refer to the aggregation result, should use HAVING to filter on aggregation result
--- can use ORDER BY to sort results based on the aggregate, but make sure to reference the entire aggregate function
-- the total amount per customer per staff
select customer_id, staff_id, sum(amount)
from payment
group by customer_id, staff_id
order by customer_id, staff_id, sum(amount);
select date(payment_date), sum(amount)
from payment
group by date(payment_date)
order by date(payment_date), sum(amount) desc;
select staff_id, count(*)
from payment
group by staff_id
order by count(*) desc;
select rating, round(avg(replacement_cost), 2)
from film
group by rating;
select customer_id, sum(amount)
from payment
group by customer_id
order by sum(amount) desc
limit 5;
-- HAVING: filter on the aggregate
--- we can use filter on non-aggregate columns in WHERE because they're valid before GROUP BY
--- we can NOT use filter on aggregate columns in WHERE because they're valid AFTER GROUP BY
select customer_id, sum(amount)
from payment
where customer_id not in (184, 477, 550)
group by customer_id
having sum(amount) > 100
order by sum(amount) desc;
select customer_id, count(*)
from payment
group by customer_id
having count(*) >= 40;
select customer_id, sum(amount)
from payment
where staff_id = 2
group by customer_id
having sum(amount) > 110;
Join
-- AS
--- it's an alias of a column or an aggregate
--- it's executed at the very end of a query, so we cannot use the alias inside a WHERE operator
select count(*) as num_transactions
from payment;
select customer_id, sum(amount) as total_spent
from payment
group by customer_id
having sum(amount) > 100; -- cannot use 'total_spent' here because it doesn't exist yet
-- INNER JOIN
--- results with a set of records that match in both tables
--- the order of the tables to INNER JOIN doesn't matter
--- if you use just JOIN without the INNER, PostgreSQL will treat it as an INNER JOIN
select payment_id, customer.customer_id, first_name, last_name -- we need to prefix the table if the join column appears in SELECT
from customer inner join payment on customer.customer_id = payment.customer_id;
---------------------------------------------------------------------------------------------------------------------
-- FULL [OUTER] JOIN
--- OUTER is optional
--- grab everything in both tables, columns that don't match will be filled with NULL
---- if table A has 200 records, table B has 300 records, and the matched records is 150, then the result will be:
---- 150 + (200 - 150) + (300 - 150) = 350
--- the table order doesn't matter
--- use WHERE to get rows unique to either table (rows not found in both tables), this is opposite to INNER JOIN
--- get everything from customer and payment
--- rows in both tables, rows in only one table and the value for the other table fields are null
select *
from customer full outer join payment
on customer.customer_id = payment.customer_id;
--- do we have any customer without payment, or any payment without customer?
--- this is opposite to INNER JOIN
select *
from customer full outer join payment
on customer.customer_id = payment.customer_id
where customer.customer_id is null or payment.payment_id is null;
---------------------------------------------------------------------------------------------------------------------
-- A LEFT [OUTER] JOIN B
--- OUTER is optional
--- return records in table A, if there is no match with table B, the the column values are null
--- the table order does matter
--- use WHERE to get rows unique to table A
--- get all films and their inventory (even the films don't have inventory)
select film.film_id, film.title, inventory.inventory_id, inventory.store_id
from film left join inventory on film.film_id = inventory.film_id;
--- do we have any film that are not linked to inventory?
select film.film_id, film.title, inventory.inventory_id, inventory.store_id
from film left join inventory on film.film_id = inventory.film_id
where inventory.film_id is null;
---------------------------------------------------------------------------------------------------------------------
-- A RIGHT [OUTER] JOIN B
--- OUTER is optional
--- essentially the same as LEFT JOIN, except the tables are switched
--- return records in table B, if there is no match with table A, the the column values are null
--- do we have any film that are not linked to inventory?
select film.film_id, film.title, inventory.inventory_id, inventory.store_id
from inventory right join film on film.film_id = inventory.film_id
where inventory.film_id is null;
---------------------------------------------------------------------------------------------------------------------
-- UNION
--- combine the result-set of two or more SELECT statements
--- directly concatenate two results together
--- the columns in SELECT statements MUST be exactly the same
select customer_id
from payment
union
select customer_id
from customer;
---------------------------------------------------------------------------------------------------------------------
-- self join is using standard join to join two copies of the same table on different columns.
--- find the film pair whose film length is the same
select f1.title, f2.title
from film as f1
inner join film as f2
on f1.film_id != f2.film_id -- if f1.film_id = f2.film_id, then the same film will be mapped to itself
and f1.length = f2.length;
Math functions
--- operators: +, -, *, /, %, |/, @
--- functions: abs, ceil, ceiling, div, gcd, floor, lcm, round, mod, power, scale, sqrt, trunc
SELECT round(rental_rate/replacement_cost, 4) * 100 as percent_cost,
trunc(rental_rate/replacement_cost, 4) * 100 as percent_cost2
FROM film;
String
-- String Functions and Operators
--- doc: https://www.postgresql.org/docs/9.1/functions-string.html
--- operators: ||
--- functions: lower(), upper(), substring(), position(), trim(), btrim(), length(), left(), convert(), ltrim(), right(), rtrim()
SELECT first_name || ' ' || last_name AS customer_name
FROM customer;
select lower(left(first_name, 1)) || lower(last_name) || '@gmail.com'
from customer;
select rtrim(email, '.org'), position('@' in email), substring(email from position('@' in email))
from customer;
-- to_char() and left()
select b.starttime, f.name
from bookings as b inner join facilities f on f.facid = b.facid
where to_char(b.starttime, 'YYYY-mm-dd') = '2012-09-21'
and left(f.name, 12) = 'Tennis Court';
-- substring()
select b.starttime, f.name
from bookings as b inner join facilities f on f.facid = b.facid
where to_char(b.starttime, 'YYYY-mm-dd') = '2012-09-21'
and substring(f.name from 1 for 12) = 'Tennis Court';
-- ||
select distinct b.starttime
from bookings as b inner join members m on m.memid = b.memid
where (m.firstname || ' ' || m.surname) = 'David Farrell';
Sub queries
-- Sub query
--- sub queries are executed first.
--- it is unwise to write a sub-query that has side effects (such as calling sequence functions)
select title, rental_rate
from film
where rental_rate > (select avg(rental_rate) from film);
-- Exists
EXISTS (subquery)
-- The subquery is evaluated to determine whether it returns any rows. If it returns at least one row, the result of EXISTS is “true”; if the subquery returns no rows, the result of EXISTS is “false”.
-- Since the result depends only on whether any rows are returned, and not on the contents of those rows, the output list of the subquery is normally unimportant. A common coding convention is to write all EXISTS tests in the form `EXISTS(SELECT 1 WHERE ...)`.
-- For each row in the surrounding query, only if `EXISTS(subquery)` evaluates true, the row is returned.
-- for each customer row, if `exists(subquery)` is true, which means its payment amount > 10, the row is returned.
select first_name, last_name
from customer as c
where exists(
select *
from payment as p
where c.customer_id = p.customer_id
and p.amount > 10
);
-- can use `NOT EXISTS()` to reverse the condition
select first_name, last_name
from customer as c
where not exists(
select *
from payment as p
where c.customer_id = p.customer_id
and p.amount > 10
);
-- IN
expression IN (subquery)
-- The right-hand side is a parenthesized subquery, which must return exactly one column.
select film_id, title
from film
where film_id in (
select film_id
from inventory inner join rental on inventory.inventory_id = rental.inventory_id
where rental.rental_date > '2005-05-29' and rental.rental_date < '2005-05-31'
)
order by film_id;
-- not in
select film_id, title
from film
where film_id not in (
select film_id
from inventory inner join rental on inventory.inventory_id = rental.inventory_id
where rental.rental_date > '2005-05-29' and rental.rental_date < '2005-05-31'
)
order by film_id;
-- ANY/SOME
--- expression operator ANY (subquery)
--- expression operator SOME (subquery)
--- The right-hand side is a parenthesized subquery, which must return exactly one column.
--- The left-hand expression is evaluated and compared to each row of the subquery result using the given operator, which must yield a Boolean result.
--- The result of ANY is “true” if any true result is obtained. The result is “false” if no true result is found (including the case where the subquery returns no rows).
--- `SOME` is a synonym for `ANY`. `IN` is equivalent to `= ANY`.
--- IN is equivalent to = ANY
select film_id, title
from film
where film_id = any (
select film_id
from inventory inner join rental on inventory.inventory_id = rental.inventory_id
where rental.rental_date > '2005-05-29' and rental.rental_date < '2005-05-31'
)
order by film_id;
--- SOME is equivalent to ANY
select film_id, title
from film
where film_id > some (
select film_id
from inventory inner join rental on inventory.inventory_id = rental.inventory_id
where rental.rental_date > '2005-05-29' and rental.rental_date < '2005-05-31'
and film_id > 600
)
order by film_id;
-- ALL
--- expression operator ALL (subquery)
--- The right-hand side is a parenthesized subquery, which must return exactly one column.
--- The result of ALL is “true” if all rows yield true (including the case where the subquery returns no rows). The result is “false” if any false result is found.
select film_id, title
from film
where film_id <= ALL (
select film_id
from inventory inner join rental on inventory.inventory_id = rental.inventory_id
where rental.rental_date > '2005-05-29' and rental.rental_date < '2005-05-31'
and film_id > 100 and film_id < 200
)
order by film_id;