315lab09-projected

Please send questions to st10@humboldt.edu .

-------
-- 315lab09-projected.sql
-- example SQL and SQL*Plus commands for CIS 315 lab, Week 9
--
-- last modified: 10-20-10 (pre-lab)
-------

-- if needed: (and if you copy set-up-ex-tbls.sql into the directory
--    where this script is, and uncomment the following line)
-- @ set-up-ex-tbls

-- SET UP for WEEK 9 LAB EXERCISE:
-- *   ssh to nrs-labs
-- *   mkdir 315lab9
-- *   chmod 700 315lab9
-- *   cd 315lab9
-- *   nano lab9.sql
-- *   ...and start up today's lab script with:
--     *   comments containing your name, Week 9 lab, today's date
--     *   also spool on and spool off commands:
--         spool lab9-results.txt
--         spool off
-- REMEMBER: put the queries for today's lab exercise
--    IN BETWEEN the spool and spool off commands!

spool lab09-results.txt

-----
-- ORDER BY --- to specify what order you want rows in a result
-- to be displayed in (DOESN'T change what's stored in database!!)
-----
-- note: it only makes sense for an "outermost" select...
-----

-- let's see all employee info in increasing order of salary:

select	 *
from	 empl
order by salary;

-- oh, let's see them in order of hiredate now...

select   *
from	 empl
order by hiredate;

-- how about in order of job_title?

select   *
from	 empl
order by job_title;

-- (etc. --- MANY orders are possible, so can base on what you want to
-- DO with the data...)

-- note that order by does NOT affect what columns are projected, or
-- the "order" that those columns appear;
-- for example, I don't even have to project the column I'm ordering
-- by:

select	 empl_last_name
from	 empl
order by salary;

-- also: keep the select semantics in mind; order-by is a late stage!
-- you only order the selected rows...

select   salary, empl_last_name
from     empl
where    job_title = 'Manager'
order by empl_last_name;

-- LAB EXERCISE #1 
-- write a query that selects all of the rows of the dept table,
-- displaying the rows in order of department location

-- LAB EXERCISE #2
-- write a query that selects all of the rows of the dept table,
-- displaying the rows in order of department name

-- if you give multiple attributes in the order by clause, you are
-- saying what to sort by in case of TIES in the previous attributes
-- given in the order by

-- say that I want to sort the employees by job_title,
-- and if they have the same job_title, sort by mgr number,
-- and if they have the same job_title and mgr number, sort
-- by hiredate;

select	 *
from	 empl
order by job_title, mgr, hiredate;

-- what DOES happen with null columns in ordering???

select 	 *
from   	 empl
order by commission;

select	 *
from	 empl
order by mgr;

-- by default, order by gives you ascending order --- to get DESCENDING
-- order, use keyword DESC

-- how about with highest salary first? (DESCENDING order)

select   *
from     empl
order by salary desc;

-- put the desc after EACH attribute that you want in descending
-- order --- for example, to get employees in descending alpha
-- order by job_title, but by increasing order of mgr within that
-- job_title, and by descending hiredate with the same manager
-- and job_title...

select   *
from	 empl
order by job_title desc, mgr, hiredate desc;

-- and, for those with the same salary, in decreasing order of hiredate
-- (thus: in order of INCREASING salary, but if salary is the same,
-- then in order of DECREASING hiredate)

select	 *
from	 empl
order by salary, hiredate desc;

-- do null values show up in a different place when desc?

select 	 *
from	 empl
order by commission desc;

-- LAB EXERCISE #3
-- write a query that projects just the empl_last_name, dept_name,
--    salary, hiredate from the join of the empl and dept tables,
--    displaying the resulting rows in primary order of decreasing
--    salary and in secondary order by increasing hiredate.

-- DO NOT use order-by in a sub-select! A) it is not good style, and
-- B) it doesn't make sense anyway, if you REALLY think about it...
-- It is ONLY reasonable at the END of the "outermost" select.
-- (note: this is now a COURSE SQL CODING standard)

-- it'll go AFTER and OUTSIDE a nested select, for example:

select   *
from     empl
where    salary >
         (select min(salary)
          from   empl
	  where  job_title = 'Manager')
order by salary;

-- this does NOT make sense, and in SOME SQL's you will
-- get an ERROR --- what happens in ours?

-- BAD STYLE!! DON'T DO THIS!!! --
-- (F10 - you do get an error message -- missing right parenthesis?!)
select   *
from     empl
where    salary >
         (select   min(salary)
          from     empl
          where    job_title = 'Manager'
          order by salary);

-- question: can you order by, say, the second character of a string?

-- YUP: consider the following (ref: section 2.4.8, Sunderraman)

select	 *
from	 empl
order by substr(empl_last_name, 2, 1);

-----
-- GROUP BY
-----

-- group by: a way to group rows sharing common characteristics...

-- here's how you get the average salary of ALL employees, right?

select avg(salary)
from   empl;

-- here's how you get the average salary of each job_title...

select	 avg(salary)
from	 empl
group by job_title;

-- (and this prints WHICH job_title has each average)

select	 job_title, avg(salary)
from	 empl
group by job_title;

-- and, can group by whatever you want...

select	 dept_name, avg(salary)
from	 empl e, dept d
where	 e.dept_num = d.dept_num
group by dept_name
order by avg(salary);

select	 dept_name, avg(salary)
from	 empl e join dept d on e.dept_num = d.dept_num
group by dept_name
order by avg(salary);

-- what if I want the minimum and maximum salaries and
-- other computations for each dept_num?

select	 count(*), dept_num, min(salary), max(salary), min(hiredate), 
         max(hiredate), sum(salary)
from	 empl
group by dept_num
order by min(salary);

-- IMPORTANT group-by LIMITATIONS:
--
-- you can only project columns/info that SQL knows are IDENTICAL for all 
-- in each group --- for example, THIS FAILS:
-- (remember, group by essentially gives you "one row" per group;)

-- THIS FAILS... 

select	 dept_num, empl_last_name, min(salary), max(salary), 
	 min(hiredate), max(hiredate)
from	 empl
group by dept_num;

-- this fails because, even though I know dept_name is the same
-- for all rows with the same dept_num, Oracle doesn't know that...

-- BAD QUERY - WON'T WORK 

select   d.dept_num, dept_name, min(salary), max(salary), 
	 min(hiredate), max(hiredate)
from	 empl e, dept d
where	 e.dept_num = d.dept_num
group by d.dept_num;

-- BAD QUERY - WON'T WORK 

select   d.dept_num, dept_name, min(salary), max(salary), 
	 min(hiredate), max(hiredate)
from	 empl e join dept d on e.dept_num = d.dept_num
group by d.dept_num;

-- if you group by more than one column, you get the groups for
-- rows with the same value in BOTH of those columns (or
-- as many columns as are specified in the group-by clause)

select	 d.dept_num, dept_name, min(salary), max(salary), 
	 min(hiredate), max(hiredate)
from	 empl e, dept d
where	 e.dept_num = d.dept_num
group by d.dept_num, dept_name;

select   d.dept_num, dept_name, min(salary), max(salary),
         min(hiredate), max(hiredate)
from     empl e join dept d on e.dept_num = d.dept_num
group by d.dept_num, dept_name;

-- be careful, though -- each distinct PAIR is a group...

select   job_title, mgr, avg(salary), count(*)
from     empl
group by job_title, mgr;       

-- SO --- what if I *really* want to show dept_name and dept_num? Group-by
-- *both*... (both being candidate keys, anyway, it turns out quite nicely)

select	 d.dept_num, dept_name, min(salary), max(salary), 
	 min(hiredate), max(hiredate)
from	 empl e, dept d
where    e.dept_num = d.dept_num
group by d.dept_num, dept_name;

select	 d.dept_num, dept_name, min(salary), max(salary), 
	 min(hiredate), max(hiredate)
from	 empl e join dept d on e.dept_num = d.dept_num
group by d.dept_num, dept_name;

-- BUT be careful --- remember that it is really giving the min salary, 
-- max salary, min hiredate, etc. for each distinct *set* of attributes 
-- grouped by --- consider:
-- (why are the results what they are?)

select   dept_num, empl_last_name, min(salary), max(salary),
         min(hiredate), max(hiredate)
from     empl
group by dept_num, empl_last_name;

-- another GROUP-BY STYLE STANDARD: you should be grouping-by for
--    a REASON (usually, that's for a computation based on the grouping...)

-- poor style:

select   dept_name, job_title
from     empl e, dept d
where    e.dept_num = d.dept_num
group by dept_name, job_title;

-- also poor style:

select   dept_name, job_title
from     empl e join dept d on e.dept_num = d.dept_num
group by dept_name, job_title;

-- better style: (if you want to avoid duplicates -- just use DISTINCT
--    already, it's clearer and MAY be more efficient;)

select   distinct dept_name, job_title
from     empl e, dept d
where    e.dept_num = d.dept_num;

select   distinct dept_name, job_title
from     empl e join dept d on e.dept_num = d.dept_num;

-- and, the below lets you see how many are in each (department, job_title)
-- pair, and their average salary:

select   dept_name, job_title, count(*), avg(salary)
from     empl e, dept d
where    e.dept_num = d.dept_num
group by dept_name, job_title;

select   dept_name, job_title, count(*), avg(salary)
from     empl e join dept d on e.dept_num = d.dept_num
group by dept_name, job_title;

-- LAB EXERCISE #4
-- write a query that projects, for each value of mgr,
--    the earliest hiredate for those empl rows with that
--    value of mgr. (Project two columns in this query, the mgr
--    value, and the earliest hiredate)

select   mgr, min(hiredate), count(*)
from     empl
group by mgr;

-- can group by be in a nested select? Yes, if you are careful...

-- to see which employees make more than or equal to the average for 
-- any one department (even if not theirs)

select empl_last_name, salary
from   empl
where  salary >=
       (select   min(avg(salary))
        from     empl
        group by dept_num);

-- note, IF you are using group-by, then you can have an aggregate
--    function call on the group'ed computations...!
-- The possibilities are probably numerous!

-- what's the minimum *average* salary for a department?

select   min(avg(salary))
from     empl
group by dept_num;

-- (and which department(s) have this minimum average salary?)
-- (NOT as easy as it sounds! 8-S ) 
-- Here are two example solutions; note the use of having!

select dept_name
from   dept
where  dept_num in
       (select dept_num
        from   empl
        group  by dept_num
        having avg(salary) in
               (select min(avg(salary))
                from empl
                group by dept_num));
              
select   dept_name
from     dept d, empl e
where    d.dept_num = e.dept_num
group by dept_name                  -- only OK because a candidate key...
having   avg(salary) in
         (select   min(avg(salary))
          from     empl
          group by dept_num);

select   dept_name
from     dept d join empl e on d.dept_num = e.dept_num
group by dept_name                  -- only OK because a candidate key...
having   avg(salary) in
         (select   min(avg(salary))
          from     empl
          group by dept_num);

-----
-- do NOT confuse order-by and group-by! 
-----

-- if you want your results in a certain order, you STILL need order-by
-- (even with group-by)

-- what if I want the earlier (good) example ordered by minimum salary?

select	 dept_num, min(salary), max(salary), min(hiredate), 
         max(hiredate), sum(salary)
from	 empl
group by dept_num
order by min(salary);

-- if you have a where clause, that SELECTION is done before the
-- GROUPING...

-- what's the average salary for each department ONLY for those
-- hired after July 15, 1991?

-- note that the selection based on hiredate is done BEFORE the
-- grouping based on dept_num:

select	 dept_num, avg(salary), count(*)
from	 empl
where	 hiredate > '15-Jul-1991'
group by dept_num
order by count(*);

-- note that that result IS different from this one:

select	 dept_num, avg(salary), count(*)
from	 empl
group by dept_num
order by avg(salary);

-----
-- HAVING
-----

-- "having" is to groups what "where" is to rows...a way to
-- limit which GROUPS you see in the result;

-- for example, if I want dept_num's, and average salaries for each 
-- dept_num, ONLY for dept_num's with an average salary greater than
-- 1500, I MUST use HAVING:

select	 dept_num, avg(salary)
from	 empl
group by dept_num
having	 avg(salary) > 1500;

-- let's see that in order of decreasing (descending) average salary:

select   dept_num, avg(salary)
from     empl
group by dept_num
having   avg(salary) > 1500
order by avg(salary) desc;

-- I can limit the groups by other criteria --- BUT those criteria
--    have to be "related" to the group --- based on the grouped-by
--    attributes (or expressions using those attributes?), 
--    OR to computations on the group;

-- such as, I only want to see dept_num and average salary for
--    departments whose latest hiredate is after Jan 1, 1992:

select   dept_num, avg(salary) 
from     empl 
group by dept_num 
having   max(hiredate) > '01-Jan-1992' 
order by avg(salary) desc; 

-- and --- what if I only am interested in average salaries within each
-- dept of employees hired after July 15, 1991, and only for departments
-- with average salary greater than 1500 (and still in order of descending
-- average salary)

select   dept_num, avg(salary)
from     empl
where	 hiredate > '15-Jul-1991'
group by dept_num
having   avg(salary) > 1500
order by avg(salary) desc;

-- now --- gee, we'd like to use department name with the above
-- instead of dept_num...

select   dept_name, avg(salary)
from     empl e, dept d
where	 e.dept_num = d.dept_num
         and hiredate > '15-Jul-1991'
group by dept_name
having   avg(salary) > 1500
order by avg(salary) desc;

select   dept_name, avg(salary)
from     empl e join dept d on e.dept_num = d.dept_num
where    hiredate > '15-Jul-1991'
group by dept_name
having   avg(salary) > 1500
order by avg(salary) desc;

-- and, having clause can be as "interesting" as we'd like...

select   dept_name, avg(salary)
from     empl e, dept d
where    e.dept_num = d.dept_num
group by dept_name
having   avg(salary) > 1500
         and min(salary) < 4000
order by avg(salary) desc;

select   dept_name, avg(salary)
from     empl e join dept d on e.dept_num = d.dept_num
group by dept_name
having   avg(salary) > 1500
         and min(salary) < 4000
order by avg(salary) desc;

-- LAB EXERCISE #5
-- write a query that projects, for each value of mgr, the earliest
--    hiredate for those empl rows with that value of mgr,
--    BUT only for those mgr values for which the earliest
--    hiredate is before May 1, 1991 (project 2 columns in this query,
--    the mgr value and the earliest hiredate)

select   mgr, min(hiredate)
from     empl
group by mgr
having   min(hiredate) < '01-MAY-1991';

-----
-- a little bit more on AGGREGATE FUNCTIONS (intro'd w/nested selects)
-----

-- but, interesting additional tidbit --- can use distinct INSIDE 
-- the parentheses of a call of one of these...

-- if you use distinct "normally", it removes any duplicate rows in
-- the result:

select count(job_title)
from   empl;

-- but: distinct WITHIN an aggregate function is different: how many
-- DISTINCT values?
-- for example: to find out how many DIFFERENT job_titles are used in
-- empl table:

select count(distinct job_title)
from   empl;

-- this is useless, right? distinct for a single-row result??

select distinct count(job_title)
from   empl;

-- making the result a little prettier...

select count(distinct job_title) "How Many Job-titles"
from   empl;

-- notice that number of non-null commissions is NOT 14...

select count(commission)
from   empl;

select  count(distinct commission)
from	empl;

spool off

-- end of 315lab09-projected.sql