Please send questions to
st10@humboldt.edu .
-------
-- 315lab09-projected.sql
-- example SQL and SQL*Plus commands for CIS 315 lab, Week 9
--
-- last modified: 10-20-10 (pre-lab)
-------
-- if needed: (and if you copy set-up-ex-tbls.sql into the directory
-- where this script is, and uncomment the following line)
-- @ set-up-ex-tbls
-- SET UP for WEEK 9 LAB EXERCISE:
-- * ssh to nrs-labs
-- * mkdir 315lab9
-- * chmod 700 315lab9
-- * cd 315lab9
-- * nano lab9.sql
-- * ...and start up today's lab script with:
-- * comments containing your name, Week 9 lab, today's date
-- * also spool on and spool off commands:
-- spool lab9-results.txt
-- spool off
-- REMEMBER: put the queries for today's lab exercise
-- IN BETWEEN the spool and spool off commands!
spool lab09-results.txt
-----
-- ORDER BY --- to specify what order you want rows in a result
-- to be displayed in (DOESN'T change what's stored in database!!)
-----
-- note: it only makes sense for an "outermost" select...
-----
-- let's see all employee info in increasing order of salary:
select *
from empl
order by salary;
-- oh, let's see them in order of hiredate now...
select *
from empl
order by hiredate;
-- how about in order of job_title?
select *
from empl
order by job_title;
-- (etc. --- MANY orders are possible, so can base on what you want to
-- DO with the data...)
-- note that order by does NOT affect what columns are projected, or
-- the "order" that those columns appear;
-- for example, I don't even have to project the column I'm ordering
-- by:
select empl_last_name
from empl
order by salary;
-- also: keep the select semantics in mind; order-by is a late stage!
-- you only order the selected rows...
select salary, empl_last_name
from empl
where job_title = 'Manager'
order by empl_last_name;
-- LAB EXERCISE #1
-- write a query that selects all of the rows of the dept table,
-- displaying the rows in order of department location
-- LAB EXERCISE #2
-- write a query that selects all of the rows of the dept table,
-- displaying the rows in order of department name
-- if you give multiple attributes in the order by clause, you are
-- saying what to sort by in case of TIES in the previous attributes
-- given in the order by
-- say that I want to sort the employees by job_title,
-- and if they have the same job_title, sort by mgr number,
-- and if they have the same job_title and mgr number, sort
-- by hiredate;
select *
from empl
order by job_title, mgr, hiredate;
-- what DOES happen with null columns in ordering???
select *
from empl
order by commission;
select *
from empl
order by mgr;
-- by default, order by gives you ascending order --- to get DESCENDING
-- order, use keyword DESC
-- how about with highest salary first? (DESCENDING order)
select *
from empl
order by salary desc;
-- put the desc after EACH attribute that you want in descending
-- order --- for example, to get employees in descending alpha
-- order by job_title, but by increasing order of mgr within that
-- job_title, and by descending hiredate with the same manager
-- and job_title...
select *
from empl
order by job_title desc, mgr, hiredate desc;
-- and, for those with the same salary, in decreasing order of hiredate
-- (thus: in order of INCREASING salary, but if salary is the same,
-- then in order of DECREASING hiredate)
select *
from empl
order by salary, hiredate desc;
-- do null values show up in a different place when desc?
select *
from empl
order by commission desc;
-- LAB EXERCISE #3
-- write a query that projects just the empl_last_name, dept_name,
-- salary, hiredate from the join of the empl and dept tables,
-- displaying the resulting rows in primary order of decreasing
-- salary and in secondary order by increasing hiredate.
-- DO NOT use order-by in a sub-select! A) it is not good style, and
-- B) it doesn't make sense anyway, if you REALLY think about it...
-- It is ONLY reasonable at the END of the "outermost" select.
-- (note: this is now a COURSE SQL CODING standard)
-- it'll go AFTER and OUTSIDE a nested select, for example:
select *
from empl
where salary >
(select min(salary)
from empl
where job_title = 'Manager')
order by salary;
-- this does NOT make sense, and in SOME SQL's you will
-- get an ERROR --- what happens in ours?
-- BAD STYLE!! DON'T DO THIS!!! --
-- (F10 - you do get an error message -- missing right parenthesis?!)
select *
from empl
where salary >
(select min(salary)
from empl
where job_title = 'Manager'
order by salary);
-- question: can you order by, say, the second character of a string?
-- YUP: consider the following (ref: section 2.4.8, Sunderraman)
select *
from empl
order by substr(empl_last_name, 2, 1);
-----
-- GROUP BY
-----
-- group by: a way to group rows sharing common characteristics...
-- here's how you get the average salary of ALL employees, right?
select avg(salary)
from empl;
-- here's how you get the average salary of each job_title...
select avg(salary)
from empl
group by job_title;
-- (and this prints WHICH job_title has each average)
select job_title, avg(salary)
from empl
group by job_title;
-- and, can group by whatever you want...
select dept_name, avg(salary)
from empl e, dept d
where e.dept_num = d.dept_num
group by dept_name
order by avg(salary);
select dept_name, avg(salary)
from empl e join dept d on e.dept_num = d.dept_num
group by dept_name
order by avg(salary);
-- what if I want the minimum and maximum salaries and
-- other computations for each dept_num?
select count(*), dept_num, min(salary), max(salary), min(hiredate),
max(hiredate), sum(salary)
from empl
group by dept_num
order by min(salary);
-- IMPORTANT group-by LIMITATIONS:
--
-- you can only project columns/info that SQL knows are IDENTICAL for all
-- in each group --- for example, THIS FAILS:
-- (remember, group by essentially gives you "one row" per group;)
-- THIS FAILS...
select dept_num, empl_last_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl
group by dept_num;
-- this fails because, even though I know dept_name is the same
-- for all rows with the same dept_num, Oracle doesn't know that...
-- BAD QUERY - WON'T WORK
select d.dept_num, dept_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl e, dept d
where e.dept_num = d.dept_num
group by d.dept_num;
-- BAD QUERY - WON'T WORK
select d.dept_num, dept_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl e join dept d on e.dept_num = d.dept_num
group by d.dept_num;
-- if you group by more than one column, you get the groups for
-- rows with the same value in BOTH of those columns (or
-- as many columns as are specified in the group-by clause)
select d.dept_num, dept_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl e, dept d
where e.dept_num = d.dept_num
group by d.dept_num, dept_name;
select d.dept_num, dept_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl e join dept d on e.dept_num = d.dept_num
group by d.dept_num, dept_name;
-- be careful, though -- each distinct PAIR is a group...
select job_title, mgr, avg(salary), count(*)
from empl
group by job_title, mgr;
-- SO --- what if I *really* want to show dept_name and dept_num? Group-by
-- *both*... (both being candidate keys, anyway, it turns out quite nicely)
select d.dept_num, dept_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl e, dept d
where e.dept_num = d.dept_num
group by d.dept_num, dept_name;
select d.dept_num, dept_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl e join dept d on e.dept_num = d.dept_num
group by d.dept_num, dept_name;
-- BUT be careful --- remember that it is really giving the min salary,
-- max salary, min hiredate, etc. for each distinct *set* of attributes
-- grouped by --- consider:
-- (why are the results what they are?)
select dept_num, empl_last_name, min(salary), max(salary),
min(hiredate), max(hiredate)
from empl
group by dept_num, empl_last_name;
-- another GROUP-BY STYLE STANDARD: you should be grouping-by for
-- a REASON (usually, that's for a computation based on the grouping...)
-- poor style:
select dept_name, job_title
from empl e, dept d
where e.dept_num = d.dept_num
group by dept_name, job_title;
-- also poor style:
select dept_name, job_title
from empl e join dept d on e.dept_num = d.dept_num
group by dept_name, job_title;
-- better style: (if you want to avoid duplicates -- just use DISTINCT
-- already, it's clearer and MAY be more efficient;)
select distinct dept_name, job_title
from empl e, dept d
where e.dept_num = d.dept_num;
select distinct dept_name, job_title
from empl e join dept d on e.dept_num = d.dept_num;
-- and, the below lets you see how many are in each (department, job_title)
-- pair, and their average salary:
select dept_name, job_title, count(*), avg(salary)
from empl e, dept d
where e.dept_num = d.dept_num
group by dept_name, job_title;
select dept_name, job_title, count(*), avg(salary)
from empl e join dept d on e.dept_num = d.dept_num
group by dept_name, job_title;
-- LAB EXERCISE #4
-- write a query that projects, for each value of mgr,
-- the earliest hiredate for those empl rows with that
-- value of mgr. (Project two columns in this query, the mgr
-- value, and the earliest hiredate)
select mgr, min(hiredate), count(*)
from empl
group by mgr;
-- can group by be in a nested select? Yes, if you are careful...
-- to see which employees make more than or equal to the average for
-- any one department (even if not theirs)
select empl_last_name, salary
from empl
where salary >=
(select min(avg(salary))
from empl
group by dept_num);
-- note, IF you are using group-by, then you can have an aggregate
-- function call on the group'ed computations...!
-- The possibilities are probably numerous!
-- what's the minimum *average* salary for a department?
select min(avg(salary))
from empl
group by dept_num;
-- (and which department(s) have this minimum average salary?)
-- (NOT as easy as it sounds! 8-S )
-- Here are two example solutions; note the use of having!
select dept_name
from dept
where dept_num in
(select dept_num
from empl
group by dept_num
having avg(salary) in
(select min(avg(salary))
from empl
group by dept_num));
select dept_name
from dept d, empl e
where d.dept_num = e.dept_num
group by dept_name -- only OK because a candidate key...
having avg(salary) in
(select min(avg(salary))
from empl
group by dept_num);
select dept_name
from dept d join empl e on d.dept_num = e.dept_num
group by dept_name -- only OK because a candidate key...
having avg(salary) in
(select min(avg(salary))
from empl
group by dept_num);
-----
-- do NOT confuse order-by and group-by!
-----
-- if you want your results in a certain order, you STILL need order-by
-- (even with group-by)
-- what if I want the earlier (good) example ordered by minimum salary?
select dept_num, min(salary), max(salary), min(hiredate),
max(hiredate), sum(salary)
from empl
group by dept_num
order by min(salary);
-- if you have a where clause, that SELECTION is done before the
-- GROUPING...
-- what's the average salary for each department ONLY for those
-- hired after July 15, 1991?
-- note that the selection based on hiredate is done BEFORE the
-- grouping based on dept_num:
select dept_num, avg(salary), count(*)
from empl
where hiredate > '15-Jul-1991'
group by dept_num
order by count(*);
-- note that that result IS different from this one:
select dept_num, avg(salary), count(*)
from empl
group by dept_num
order by avg(salary);
-----
-- HAVING
-----
-- "having" is to groups what "where" is to rows...a way to
-- limit which GROUPS you see in the result;
-- for example, if I want dept_num's, and average salaries for each
-- dept_num, ONLY for dept_num's with an average salary greater than
-- 1500, I MUST use HAVING:
select dept_num, avg(salary)
from empl
group by dept_num
having avg(salary) > 1500;
-- let's see that in order of decreasing (descending) average salary:
select dept_num, avg(salary)
from empl
group by dept_num
having avg(salary) > 1500
order by avg(salary) desc;
-- I can limit the groups by other criteria --- BUT those criteria
-- have to be "related" to the group --- based on the grouped-by
-- attributes (or expressions using those attributes?),
-- OR to computations on the group;
-- such as, I only want to see dept_num and average salary for
-- departments whose latest hiredate is after Jan 1, 1992:
select dept_num, avg(salary)
from empl
group by dept_num
having max(hiredate) > '01-Jan-1992'
order by avg(salary) desc;
-- and --- what if I only am interested in average salaries within each
-- dept of employees hired after July 15, 1991, and only for departments
-- with average salary greater than 1500 (and still in order of descending
-- average salary)
select dept_num, avg(salary)
from empl
where hiredate > '15-Jul-1991'
group by dept_num
having avg(salary) > 1500
order by avg(salary) desc;
-- now --- gee, we'd like to use department name with the above
-- instead of dept_num...
select dept_name, avg(salary)
from empl e, dept d
where e.dept_num = d.dept_num
and hiredate > '15-Jul-1991'
group by dept_name
having avg(salary) > 1500
order by avg(salary) desc;
select dept_name, avg(salary)
from empl e join dept d on e.dept_num = d.dept_num
where hiredate > '15-Jul-1991'
group by dept_name
having avg(salary) > 1500
order by avg(salary) desc;
-- and, having clause can be as "interesting" as we'd like...
select dept_name, avg(salary)
from empl e, dept d
where e.dept_num = d.dept_num
group by dept_name
having avg(salary) > 1500
and min(salary) < 4000
order by avg(salary) desc;
select dept_name, avg(salary)
from empl e join dept d on e.dept_num = d.dept_num
group by dept_name
having avg(salary) > 1500
and min(salary) < 4000
order by avg(salary) desc;
-- LAB EXERCISE #5
-- write a query that projects, for each value of mgr, the earliest
-- hiredate for those empl rows with that value of mgr,
-- BUT only for those mgr values for which the earliest
-- hiredate is before May 1, 1991 (project 2 columns in this query,
-- the mgr value and the earliest hiredate)
select mgr, min(hiredate)
from empl
group by mgr
having min(hiredate) < '01-MAY-1991';
-----
-- a little bit more on AGGREGATE FUNCTIONS (intro'd w/nested selects)
-----
-- but, interesting additional tidbit --- can use distinct INSIDE
-- the parentheses of a call of one of these...
-- if you use distinct "normally", it removes any duplicate rows in
-- the result:
select count(job_title)
from empl;
-- but: distinct WITHIN an aggregate function is different: how many
-- DISTINCT values?
-- for example: to find out how many DIFFERENT job_titles are used in
-- empl table:
select count(distinct job_title)
from empl;
-- this is useless, right? distinct for a single-row result??
select distinct count(job_title)
from empl;
-- making the result a little prettier...
select count(distinct job_title) "How Many Job-titles"
from empl;
-- notice that number of non-null commissions is NOT 14...
select count(commission)
from empl;
select count(distinct commission)
from empl;
spool off
-- end of 315lab09-projected.sql