SQL Cheat Sheet

Create/Drop tables

1
-- Comments in SQL are preceded
2
-- by two hyphens
3
create table Students (
4
  studentId integer,
5
  name TEXT
6
  birthDate DATE
7
);
8
/* SQL also supports
9
C-style comments */
10
drop table Students;
11
drop table if exists Students;
12
drop table if exists Students cascade;

Data Types

Untitled

Null Values:

Result of a comparison operation involving null value is unknown
Result of an arithmetic operation involving null value is null

Check for null:

x IS NULL
x IS NOT NULL = NOT (x IS NULL)

x IS DISTINCT FROM y: similar to x <> y but works for null

Constraints in Data Deﬁnitions

A constraint is violated if it evaluates to false

Constraint Types:

Not-null constraints: is violated if $\exists x \in Students$ where x.name IS NOT NULL evaluates to false
Unique constraints: is violated if $\exists x, y \in Students$ where x.id <> y.id evaluates to false
Primary key constraints
Foreign key constraints:
- REFERENCES Room(level, number) does not check partially null foreign key
- REFERENCES Room(level, number) MATCH FULL to fix this (but still allows all null value like (null, null))

Check constraints

1
--Column constraint
2
day SMALLINT CHECK (day IN (1,2,3,4,5)),
3

4
--Table constraint
5
CHECK (
6
  (day IN (1,2,3,4,5) AND hour >= 8 AND hour <= 17)
7
  OR
8
  (day = 6 AND hour IN (9,10,11))
9
)

Constraint Names:

1
--Column constraint
2
pname VARCHAR(50)
3
  CONSTRAINT lectures_pname CHECK (pname IS NOT NULL),
4

5
--Table constraint:
6
CONSTRAINT lectures_pri_key PRIMARY KEY (cname, day, hour),

Database Modifications

Insert:

1
CREATE TABLE Students (
2
  studentId INTEGER PRIMARY KEY,
3
  name VARCHAR(100) NOT NULL,
4
  birthDate date,
5
  dept VARCHAR(20) DEFAULT 'CS'
6
);
7

8
INSERT INTO Students
9
VALUES (12345, 'Alice', '1999-12-25', 'Maths');
10

11
INSERT INTO Students (name, studentId)
12
VALUES ('Bob', 67890), ('Carol', 11122);

Delete:

1
-- Remove all students from Maths department
2
DELETE FROM Students
3
WHERE dept = 'math';

Update:

1
-- Add $500 to account 12345 & change name to 'Alice'
2
UPDATE Accounts
3
SET balance = balance + 500, name = 'Alice'
4
WHERE accountId = 12345;

Foreign Key Constraint Violations

NO ACTION: rejects delete/update if it violates constraint (default option)
RESTRICT: similar to NO ACTION except that constraint checking can’t be deferred
CASCADE: propagates delete/update to referencing tuples
SET DEFAULT: updates foreign keys of referencing tuples to some default value
SET NULL: updates foreign keys of referencing tuples to null value

1
foreign key (sid) references Students
2
  on delete cascade
3
  on update cascade

Transaction

begin command starts a new transaction. Each transaction must end with either a commit or rollback command

1
begin;
2
update Accounts
3
set balance = balance + 1000
4
where accountId = 2;
5

6
update Accounts
7
set balance = balance - 1000
8
where accountId = 1;
9
commit;

Deferrable Constraints: By default, constraints are checked immediately at the end of SQL statement execution. A violation will cause the statement to be rollbacked. The checking could also be deferred for some constraints to the end of transaction execution. Deferrable constraints: UNIQUE, PRIMARY KEY, FOREIGN KEY

deferrable initially deferred
deferrable initially immediate: deferrable but immediate by default, can change to deferred later

1
create table Employees (
2
  eid integer primary key,
3
  ename varchar(100),
4
  managerId integer,
5
  constraint employees_fkey foreign key (managerId) references Employees
6
    deferrable initially immediate
7
);
8

9
insert into Employees values (1, 'Alice', null), (2, 'Bob', 1), (3, 'Carol', 2);
10

11
begin;
12
set constraints employees_fkey deferred;
13
delete from Employees where eid = 2;
14
update Employees set managerId = 1 where eid = 3;
15
commit;

Modifying Schema

1
--Add/remove/modify columns
2
alter table Students alter column dept drop default;
3
alter table Students drop column dept;
4
alter table Students add column faculty varchar(20);
5

6
--Add/remove constraints
7
alter table Students add constraint fk_grade foreign key(grade) references Grades;

Simple Queries

1
select ∗ from Sells
2
where ((price < 20) and (rname <> 'Pizza King'))
3
or
4
rname = 'Corleone Corner';
5

6
select 'Price of '|| pizza || ' is ' || round(price / 1.3) || ' USD' as menu
7
from Sells
8
where rname = 'Corleone Corner';

Set Operations

1
select cname from Customers
2
union
3
select rname from Restaurants;
4

5
select pizza from Contains where ingredient = 'cheese'
6
intersect
7
select pizza from Contains where ingredient = 'chili';
8

9
select pizza from Contains where ingredient = 'cheese'
10
except
11
select pizza from Contains where ingredient = 'chilli';

union all, intersect all, and except all preserves duplicate records

Multi-relation Queries

1
select cname, rname
2
from Customers as C, Restaurants as R
3
where C.area = R.area;
4
--is the same as
5
select cname, rname
6
from Customers C cross join Restaurants R
7
where C.area = R.area;
8

9
select cname, rname
10
from Customers C join Restaurants R
11
  on C.area = R.area;
12

13
--Find distinct restaurant pairs (R1,R2) where R1 < R2 and they sell some common pizza
14
select distinct S1.rname, S2.rname
15
from Sells S1, Sells S2
16
where S1.rname < S2.rname
17
  and S1.pizza = S2.pizza;
18
--or
19
select distinct S1.rname, S2.rname
20
from Sells S1 join Sells S2
21
on (S1.rname < S2.rname)
22
and (S1.pizza = S2.pizza);

Subquery Expression

EXISTS (subquery)/NOT EXISTS (subquery)

Returns true if the the output of the subquery is non-empty; otherwise, false

1
--Find distinct customers who like some pizza sold by “Corleone Corner”
2
select distinct cname
3
from Likes L
4
where exists (
5
  select 1
6
  from Sells S
7
  where S.rname = 'Corleone Corner'
8
    and S.pizza = L.pizza
9
  );

expr IN (subquerry)

Subquery must return exactly one column
Returns false if the output of the subquery is empty; otherwise return the result of the boolean expression

((v=v_1) \vee (v=v_2) \vee \dots (v=v_n))

where $v$ denote the result of the expression, $\{v_1, v_2, \dots, v_n\}$ denote the output of the subquerry

1
select distinct cname
2
from Likes
3
  where pizza in (
4
  select pizza
5
  from Sells
6
  where rname = ’Corleone Corner’
7
  );
8

9
--Find pizzas that contain ham or seafood
10
select distinct pizza from Contains
11
where ingredient in (’ham’, ’seafood’);

expr op ANY/SOME (subquery)

Subquery must return exactly one column
Returns false if the output of the subquery is empty; otherwise return the result of the boolean expression

((v\ op\ v_1) \vee (v\ op\ v_2) \vee \dots (v\ op\ v_n))

where $v$ denote the result of the expression, $\{v_1, v_2, \dots, v_n\}$ denote the output of the subquery, and $op$ denote comparison operator (i.e., =, <>, <, >, <=, >=)

1
select distinct rname
2
from Sells
3
where rname <> ’Corleone Corner’
4
  and
5
  price > any (
6
    select price
7
    from Sells
8
    where rname = ’Corleone Corner’
9
    );

expr op ALL (subquery)

same as ANY/SOME but use $\wedge$ instead of $\vee$

Row Constructors:

Possible to use subqueries that return more than one column

1
select pname, day, hour
2
from Lectures L
3
where row(day,hour) <= all (
4
  select day, hour
5
  from Lectures L2
6
  where L2.pname = L.pname
7
);

Scalar Subqueries:

A scalar subquery is a subquery that returns at most one tuple with one column. If the subquery’s output is empty, its return value is null.
A scalar subquery can be used as a scalar expression.
Non-scalar subquery expressions can be used in different parts of SQL queries: WHERE, FROM, HAVING clause.

Database modification:

1
insert into Enrolls (sid, cid)
2
select studentId, 101
3
from Students S
4
where S.year = 1;

`ORDER BY`/`LIMIT`/`OFFSET` Clause

1
select ∗
2
from Restaurants, Sells
3
where Restaurants.rname = Sells.rname
4
order by area asc, price desc;
5

6
select ∗
7
from Restaurants, Sells
8
where Restaurants.rname = Sells.rname
9
order by area, price desc;

1
select pizza, rname, price
2
from Sells
3
order by price desc
4
limit 3;
5

6
select pizza, rname, price
7
from Sells
8
order by price desc
9
offset 3
10
limit 2;

Aggregate functions

Untitled

1
select min (price), max (price), avg (price)
2
from Sells
3
where rname = ’Corleone Corner’

Let R be an empty relation and S be a relation with cardinality = n where all values of attribute A are null values

Untitled

Aggregate functions can be used in different parts of SQL queries: SELECT, HAVING, and ORDER BY clause

1
select count(∗), max(price ∗ qty)
2
from Orders;
3

4
select pizza, rname
5
from Sells
6
where price = (select max(price) from Sells);

`GROUP BY` Clause

In a query with GROUP BY a1, a2, · · · , an, two tuples t & t′ belong to the same group if the following expression evaluates to true:

1
(t.a1 IS NOT DISTINCT FROM t′.a1) AND · · · AND (t.an IS NOT DISTINCT FROM t′.an)

Each output tuple corresponds to one group

For each column A in relation R that appears in the SELECT clause, one of the following conditions must hold:

A appears in the GROUP BY clause,
A appears in an aggregated expression in the SELECT clause (e.g., min(A)), or
the primary (~~or a candidate~~) key of R appears in the GROUP BY clause

1
--Find the number of students for each (dept,year) combination.
2
--Show the output in ascending order of (dept,year).
3
select dept, year, count(∗) as num
4
from Students
5
group by dept, year;
6
order by dept, year;
7

8
--Show all restaurants in descending order of their average pizza price.
9
--Exclude restaurants that do not sell any pizza.
10
select rname
11
from Sells
12
group by rname
13
order by avg(price) desc;
14

15
--invalid query
16
select year, count(∗) --year is ambiguos
17
from Students
18
group by dept;
19

20
select rname, min(price), max(price) --cannot include rname if dont have GROUP BY
21
from Sells
22

23
select distinct rname
24
from Sells
25
order by price; --price is ambiguous, cannot order by price

`HAVING` Clause

similar to WHERRE clause but for GROUP BY

1
--Find restaurants located in the ‘East’ area that sell
2
--pizzas with an average selling price higher than the
3
--minimum selling price at Pizza King
4
select rname
5
from Sells
6
where rname in (
7
  select rname
8
  from Restaurants
9
  where area = 'East'
10
)
11
group by rname
12
having avg(price) >
13
  (select min(price)
14
  from Sells
15
  where rname = 'Pizza King');

For each column A in relation R that appears in the HAVING clause, one of the following conditions must hold:

A appears in the GROUP BY clause,
A appears in an aggregated expression in the HAVING clause, or
the primary (~~or a candidate~~) key of R appears in the GROUP BY clause

Conceptual Evaluation of Queries

1
SELECT DISTINCT '<select-list>'
2
FROM '<from-list>'
3
WHERE '<where-condition>'
4
GROUP BY '<groupby-list>'
5
HAVING '<having-condition>'
6
ORDER BY '<orderby-list>'
7
OFFSET '<offset-specification>'
8
LIMIT '<limit-specification>'

Compute the cross-product of the tables in from-list
Select the tuples in the cross-product that evaluate to true for the where-condition
Partition the selected tuples into groups using the groupby-list
Select the groups that evaluate to true for the having-condition condition
For each selected group, generate an output tuple by selecting/computing the attributes/expressions that appear in the select-list
Remove any duplicate output tuples
Sort the output tuples based on the orderby-list
Remove the appropriate output tuples based on the offset-speciﬁcation and limit-speciﬁcation

Common Table Expressions (CTEs)

1
WITH
2
  R1 AS (Q1),
3
  R2 AS (Q2),
4
  ...,
5
  Rn AS (Qn)
6
SELECT/INSERT/DELETE/UPDATE '<smth>'

Views: Providing Logical Data Independence

1
create view CourseInfo as
2
select cname, pname, lectureTime,numUGrad+numPGrad+numExchange+numAudit as numEnrolled
3
from Courses natural join Profs natural join Enrollment;

Conditional Expressions

1
--Case 1
2
case
3
  when condition_1 then result_1
4
  ...
5
  when condition_n then result_n
6
  else result_0
7
end
8

9
--Case 2
10
case expression
11
  when value_1 then result_1
12
  ...
13
  when value_n then result_n
14
  else result_0
15
end
16

17
--Example
18
select name, case
19
  when marks >= 70 then ’A’
20
  when marks >= 60 then ’B’
21
  when marks >= 50 then ’C’
22
  else ’D’
23
end as grade
24
from Scores;

COALESCE(a, b, ...) returns the first non-null value in its arguments. Returns null if all the arguments are null

NULLIF(value1, value2) returns null if value1 is equal to value2; otherwise returns value1

1
select name, nullif(result,’absent’) as status
2
from Tests;

Pattern Matching with `LIKE` Operator

1
select cname from Customers where cname like '___%e';

Underscore _ matches any single character
Percent % matches any sequence of 0 or more characters
string not like '<pattern>' is equivalent to not (string like '<pattern>')
For more advanced regular expressions, use similar to operator

Queries with Universal Quantiﬁcation

Example: Find the names of all students who have enrolled in all the courses offered by CS department

1
/* Courses (courseId, name, dept)
2
Students (studentId, name, birthDate)
3
Enrolls (sid, cid, grade) */
4

5
--F(x): set of courseIds of CS courses that are not enrolled by student with studentId x
6
select courseId
7
from Courses C
8
where dept = 'CS' and not exists (
9
  select 1
10
  from Enrolls E
11
  where E.cid = C.courseId and E.sid = x
12
);
13

14
--Names of students who have enrolled in all CS Courses
15
select name
16
from Students S
17
where not exists (
18
  select courseId
19
  from Courses C
20
  where dept = 'CS' and not exists (
21
    select 1
22
    from Enrolls E
23
    where E.cid = C.courseId
24
    and E.sid = S.studentId
25
  )
26
);

Window Functions

SUM(), COUNT(), AVG()

1
SELECT start_terminal,
2
       duration_seconds,
3
       SUM(duration_seconds) OVER
4
         (PARTITION BY start_terminal) AS running_total,
5
       COUNT(duration_seconds) OVER
6
         (PARTITION BY start_terminal) AS running_count,
7
       AVG(duration_seconds) OVER
8
         (PARTITION BY start_terminal) AS running_avg
9
FROM tutorial.dc_bikeshare_q1_2012
10
WHERE start_time < '2012-01-08';

ROW_NUMBER(): same value different number; RANK(): same value same rank

RANK() would give the identical rows a rank of 2, then skip ranks 3 and 4, so the next result would be 5

DENSE_RANK() would still give all the identical rows a rank of 2, but the following row would be 3—no ranks would be skipped.

1
SELECT depname, empno, salary,
2
       rank() OVER (PARTITION BY depname ORDER BY salary DESC)
3
FROM empsalary;

NTILE(#buckets) identifies what percentile (or quartile when #buckets = 4, or any other subdivision) a given row falls into

1
SELECT start_terminal,
2
       duration_seconds,
3
       NTILE(4) OVER
4
         (PARTITION BY start_terminal ORDER BY duration_seconds)
5
          AS quartile,
6
       NTILE(5) OVER
7
         (PARTITION BY start_terminal ORDER BY duration_seconds)
8
         AS quintile,
9
       NTILE(100) OVER
10
         (PARTITION BY start_terminal ORDER BY duration_seconds)
11
         AS percentile
12
FROM tutorial.dc_bikeshare_q1_2012
13
WHERE start_time < '2012-01-08'
14
ORDER BY start_terminal, duration_seconds;

LAG LEAD create columns that pull values from other rows—all you need to do is enter which column to pull from and how many rows away you’d like to do the pull. LAG pulls from previous rows and LEAD pulls from following rows

1
SELECT start_terminal,
2
       duration_seconds,
3
       LAG(duration_seconds, 1) OVER
4
         (PARTITION BY start_terminal ORDER BY duration_seconds) AS lag,
5
       LEAD(duration_seconds, 1) OVER
6
         (PARTITION BY start_terminal ORDER BY duration_seconds) AS lead
7
FROM tutorial.dc_bikeshare_q1_2012
8
ORDER BY start_terminal, duration_seconds;
9

10
  -- Calculate differences between rows
11
SELECT start_terminal,
12
       duration_seconds,
13
       duration_seconds -LAG(duration_seconds, 1) OVER
14
          (PARTITION BY start_terminal ORDER BY duration_seconds)
15
          AS difference
16
FROM tutorial.dc_bikeshare_q1_2012
17
WHERE start_time < '2012-01-08'
18
ORDER BY start_terminal, duration_seconds;
19

20
-- Remove NULL from boundary columns
21
SELECT *
22
FROM (
23
  SELECT start_terminal,
24
          duration_seconds,
25
          duration_seconds -LAG(duration_seconds, 1) OVER
26
            (PARTITION BY start_terminal ORDER BY duration_seconds)
27
            AS difference
28
  FROM tutorial.dc_bikeshare_q1_2012
29
  WHERE start_time < '2012-01-08'
30
  ORDER BY start_terminal, duration_seconds
31
) sub
32
WHERE sub.difference IS NOT NULL;

Window Alias: If you’re planning to write several window functions in to the same query, using the same window, you can create an alias

1
SELECT start_terminal,
2
     duration_seconds,
3
     NTILE(4) OVER
4
       (PARTITION BY start_terminal ORDER BY duration_seconds)
5
       AS quartile,
6
     NTILE(5) OVER
7
       (PARTITION BY start_terminal ORDER BY duration_seconds)
8
       AS quintile,
9
     NTILE(100) OVER
10
       (PARTITION BY start_terminal ORDER BY duration_seconds)
11
       AS percentile
12
FROM tutorial.dc_bikeshare_q1_2012
13
WHERE start_time < '2012-01-08'
14
ORDER BY start_terminal, duration_seconds;
15

16
-- Using window alias
17
SELECT start_terminal,
18
        duration_seconds,
19
        NTILE(4) OVER ntile_window AS quartile,
20
        NTILE(5) OVER ntile_window AS quintile,
21
        NTILE(100) OVER ntile_window AS percentile
22
FROM tutorial.dc_bikeshare_q1_2012
23
WHERE start_time < '2012-01-08'
24
WINDOW ntile_window AS # always come after WHERE
25
      (PARTITION BY start_terminal ORDER BY duration_seconds)
26
ORDER BY start_terminal, duration_seconds;

Pivot

1
--  MSSQL
2
SELECT Subscription_plan,
3
    [2023-06-01],
4
    [2023-06-02],
5
    [2023-06-03],
6
FROM (SELECT Subscription_plan,[Subscribed_customers], DATE FROM customers) AS IQ
7
PIVOT (
8
  SUM([Subscribed_customers])
9
  FOR Date IN([2023-06-01],[2023-06-02],[2023-06-03])
10
) AS PT;
11

12
--  MySQL
13
SELECT Subscription_plan,
14
    SUM(CASE
15
      WHEN Date = STR_TO_DATE('06/01/2023','%m/%d/%Y') THEN Subscribed_customers ELSE 0 END
16
    ) AS '06/01/2023',
17
    SUM(CASE
18
      WHEN Date = STR_TO_DATE('06/02/2023','%m/%d/%Y') THEN Subscribed_customers ELSE 0 END
19
    ) AS '06/02/2023',
20
    SUM(CASE
21
      WHEN Date = STR_TO_DATE('06/03/2023','%m/%d/%Y') THEN Subscribed_customers ELSE 0 END
22
    ) AS '06/03/2023',
23
FROM customers
24
GROUP BY Subscription_plan;
25

26
--  PostgreSQL
27
CREATE EXTENSION IF NOT EXISTS tablefunc;
28

29
SELECT *
30
FROM crosstab('select Subscription_plan, Date, Subscribed_customers from customers ORDER BY 1,2') AS ct (
31
  Subscription_plan varchar(50),
32
  "06/01/2023" int,
33
  "06/02/2023" int,
34
  "06/03/2023" int,
35
  "06/04/2023" int,
36
  "06/05/2023" int,
37
  "06/06/2023" int,
38
  "06/07/2023" int
39
);

SQL Cheat Sheet

Create/Drop tables

Data Types

Constraints in Data Deﬁnitions

Database Modifications

Foreign Key Constraint Violations

Transaction

Modifying Schema

Simple Queries

Set Operations

Multi-relation Queries

Subquery Expression

ORDER BY/LIMIT/OFFSET Clause

Aggregate functions

GROUP BY Clause

HAVING Clause

Conceptual Evaluation of Queries

Common Table Expressions (CTEs)

Views: Providing Logical Data Independence

Conditional Expressions

Pattern Matching with LIKE Operator

Queries with Universal Quantiﬁcation

Window Functions

Pivot

`ORDER BY`/`LIMIT`/`OFFSET` Clause

`GROUP BY` Clause

`HAVING` Clause

Pattern Matching with `LIKE` Operator